Example #1
0
def test_equiv_logps_ref_np(n_theta_sampling=1000):
    """Check equivalence of log marinal likelihood of ref and np impls.
    """
    n_confounders = 6
    n_samples = 100
    rng = np.random.RandomState(0)

    hparamss = define_hparam_searchspace(n_theta_sampling=n_theta_sampling,
                                         prior_indvdls=['t'])

    n_theta_sampling = hparamss[0]['n_theta_sampling']
    P_M1 = hparamss[0]['P_M1']
    P_M2 = hparamss[0]['P_M2']
    prior_indvdl = hparamss[0]['prior_indvdl']

    # ---- Generate samples ----
    data = _gen_samples(n_confounders, n_samples, rng)
    xs = data['xs']
    causality_true = data['causality_true']

    # ---- Inference ----
    _, logPs_ref = bayesmixedlingam_ref(xs, n_theta_sampling, P_M1, P_M2,
                                        prior_indvdl, rng)
    print(logPs_ref)
    _, logPs_np = bayesmixedlingam_np(xs, hparamss, rng)
    print(logPs_np)
Example #2
0
def test_sampling(n_mc_samples=100):
    raise NotImplementedError('This function is not implemented.')

    rng = np.random.RandomState(0)

    # ---- Generate samples ----
    data = gen_samples(n_confounders=1, n_samples=100, rng=rng)
    xs = data['xs']
    causality_true = data['causality_true']

    # ---- Get a hyperparameter set ----
    hparamss = define_hparam_searchspace(n_mc_samples=n_mc_samples)
    hparams = hparamss[200]

    # ---- MC sampling ----
    logp_np, traces_np = comp_logP_bmlingam_np(xs, hparams, rng)
    logp_pm2, traces_pm2 = comp_logP_bmlingam_pm2(xs, hparams, rng)
    # logp_pm3, traces_pm3 = comp_logP_bmlingam_pm3(xs, hparams, rng)

    return {
        'logp_np': logp_np,
        'traces_np': traces_np,
        'logp_pm2': logp_pm2,
        'traces_pm2': traces_pm2,
        # 'logp_pm3': logp_pm3,
        # 'traces_pm3': traces_pm3,
    }
Example #3
0
def infer_causality(xs, infer_params, varnames=None, verbose=1):
    """Infer causality based on samples given pair of columns in data.
    """
    assert (type(infer_params) == InferParams)

    if varnames is None:
        varnames = ['var1', 'var2']

    hparamss = define_hparam_searchspace(infer_params)
    sampling_mode = infer_params.sampling_mode
    hparams_best, post_prob, ll, hparams_rev, post_prob_rev, ll_rev = \
        find_best_model(xs, hparamss, sampling_mode)
    causality = hparams_best['causality']

    x1_name = varnames[0]
    x2_name = varnames[1]
    if causality == [1, 2]:
        src, dst = x1_name, x2_name
    else:
        src, dst = x2_name, x1_name

    result = {
        'Infered causality': '{} -> {}'.format(src, dst),
        '2 * log(p(M)) - log(p(M_rev))': '{}'.format(2 * (ll - ll_rev))
    }

    if 1 <= verbose:
        print(json.dumps(result, indent=2, sort_keys=True))

    if 2 <= verbose:
        print('---- Inference for variables "%s" and "%s" ----' %
              (x1_name, x2_name))
        print(
            'Inferred  : %s -> %s (posterior prob: %1.3f, loglikelihood: %1.3f)'
            % (src, dst, post_prob, ll))
        print(
            '(best_rev): %s -> %s (posterior prob: %1.3f, loglikelihood: %1.3f)'
            % (dst, src, post_prob_rev, ll_rev))
        print('')
        print('Hyper parameters of the optimal model:')
        show_hparams(hparams_best)
        print('')
        print('Hyper parameters of the reverse optimal model:')
        show_hparams(hparams_rev)
        print('')

    return {
        'x1_name': x1_name,
        'x2_name': x2_name,
        'xs': xs,
        'causality': causality,
        'causality_str': ('%s -> %s' % (src, dst)),
        'post_prob': post_prob,
        'hparams': hparams_best,
        'post_prob_rev': post_prob_rev,
        'hparams_rev': hparams_rev
    }
Example #4
0
def _estimate_hparams(xs, infer_params):
    assert (type(infer_params) == InferParams)

    sampling_mode = infer_params.sampling_mode
    hparamss = define_hparam_searchspace(infer_params)
    results = find_best_model(xs, hparamss, sampling_mode)
    hparams_best = results[0]
    bf = results[2] - results[5]  # Bayes factor

    return hparams_best, bf
Example #5
0
def test_find_best_model(verbose=False):
    gen_data_params = GenDataParams(n_samples=200,
                                    mu1_dist=5.0,
                                    mu2_dist=10.0,
                                    f1_coef=[1.0, 1.0, 1.5],
                                    f2_coef=[1.0, 2.0, 0.5],
                                    conf_dist=[['laplace'], ['exp'],
                                               ['uniform']],
                                    e1_dist=['laplace'],
                                    e2_dist=['laplace'],
                                    e1_std=3.0,
                                    e2_std=3.0,
                                    fix_causality=False,
                                    seed=0)

    # gen_data_params = deepcopy(gen_data_params_default)
    gen_data_params.n_samples = 200
    gen_data_params.n_confounders = 3
    gen_data_params.dists_e1 = ['laplace']
    gen_data_params.dists_e2 = ['laplace']
    gen_data_params.dist_be1 = 'be1=9.0'
    gen_data_params.dist_be2 = 'be2=9.0'
    gen_data_params.dist_bf1s = '1., 1., 1.5'
    gen_data_params.dist_bf2s = '1., 2., 0.5'
    gen_data_params.dists_conf = [['laplace'], ['exp'], ['uniform']]
    gen_data_params.dist_mu1 = 'mu1=5.0'
    gen_data_params.dist_mu2 = 'mu2=10.0'

    data = gen_artificial_data(gen_data_params)
    xs = data['xs']

    infer_params = infer_params1()
    sampling_mode = infer_params.sampling_mode
    hparamss = define_hparam_searchspace(infer_params)
    result1 = find_best_model(xs, hparamss, sampling_mode)
    print(result1)

    infer_params = infer_params2()
    sampling_mode = infer_params.sampling_mode
    hparamss = define_hparam_searchspace(infer_params)
    result2 = find_best_model(xs, hparamss, sampling_mode)
    print(result2)
Example #6
0
def _test_bmlingam_main(comp_logP_func,
                        test_params,
                        show_result=False,
                        tied_sampling=False,
                        assertive=True):
    """Test estimation using Bayesian mixed LiNGAM model.

    This function is invoked from test_bmlingam_np() and test_bmlingam_pymc(). 
    """
    t_start = time.time()

    # ---- Testing parameters ----
    n_confounderss = test_params['n_confounderss']
    n_trials = test_params['n_trials']
    min_corrects = test_params['min_corrects']
    n_samples = test_params['n_samples']
    max_c = test_params['max_c']
    n_mc_samples = test_params['n_mc_samples']
    normalize_samples = test_params['normalize_samples']
    prior_indvdls = test_params['prior_indvdls']

    # ---- Hyperparameter search space ----
    hparamss = define_hparam_searchspace(tied_sampling=tied_sampling,
                                         max_c=max_c,
                                         n_mc_samples=n_mc_samples,
                                         prior_indvdls=prior_indvdls)

    # ---- Do test ----
    rng = np.random.RandomState(0)
    for i in xrange(len(n_confounderss)):
        n_corrects = _eval_bmlingam(comp_logP_func,
                                    n_confounderss[i],
                                    n_trials,
                                    n_samples,
                                    hparamss,
                                    rng,
                                    show_result=show_result,
                                    tied_sampling=tied_sampling,
                                    normalize_samples=normalize_samples)

        if show_result:
            print(('n_confounders=%d, %d correct inferences ' +
                   'out of 10 trials') % (n_confounderss[i], n_corrects))

        if assertive:
            ok_(min_corrects[i] <= n_corrects)

    if show_result:
        print('')
        print('Program finished at %s' % time.strftime("%c"))
        print('Elapsed time: %.1f [sec]' % (time.time() - t_start))
        print('')

    return
Example #7
0
def estimate_hparams(xs, infer_params):
    """Estimate hyperparameters with the largest marginal likelihood value.
    """
    assert (type(infer_params) == InferParams)

    sampling_mode = infer_params.sampling_mode
    hparamss = define_hparam_searchspace(infer_params)
    results = find_best_model(xs, hparamss, sampling_mode)
    hparams_best = results[0]
    bf = results[2] - results[5]  # Bayes factor

    return hparams_best, bf
Example #8
0
def bmlingam_causality(csv_file, result_dir, is_out_optmodelfile, col_names,
                       infer_params, optmodel_files):
    """Infer causality of all pairs in the data. 
    """
    assert (type(infer_params) == InferParams)

    if type(optmodel_files) is str:
        optmodel_files = [optmodel_files]

    print('---- Algorithm parameters ----')
    print('Number of MC samples: %d' % infer_params.n_mc_samples)
    hparamss = define_hparam_searchspace(infer_params)
    print('Number of candidate models: %d' % len(hparamss))
    print('')

    # Load data and infer causality
    df = load_data(csv_file, col_names)  # Pandas dataframe

    # Get all possible pairs of variables
    pairs = _get_pairs(len(df.columns))

    # Check optimal model files
    if optmodel_files is not None:
        assert (len(optmodel_files) == len(pairs))
        optmodel_files_ = optmodel_files

    # Infer causality over all variable pairs
    data = df.as_matrix()
    varnames = df.columns.values
    results = [
        infer_causality(data[:, pair], infer_params, varnames[list(pair)])
        for pair in pairs
    ]

    # Summarize inference
    table_causal = _make_table_causal(results)

    # Set optimal model files
    if optmodel_files is None:
        if result_dir is not None:
            optmodel_files_ = [
                _get_optmodel_file(result, result_dir) for result in results
            ]
        else:
            optmodel_files_ = []

    # Conditions to save results (and optimal models)
    cond_save_results = (result_dir is not None) and (0 < len(result_dir))
    cond_save_optmodels = 0 < len(optmodel_files_) and is_out_optmodelfile

    # Save results
    if cond_save_results:
        result_file = result_dir + sep + 'causality.csv'
        table_causal.to_csv(result_file)
        print('Inferred causality table was saved as %s.' % result_file)

    # Save optimal models
    if cond_save_optmodels:
        for result, optmodel_file in zip(results, optmodel_files_):
            save_pklz(optmodel_file, result)
            print('Optimal model was saved as %s.' % optmodel_file)
Example #9
0
def eval_find_best_model(n_confounderss=[0, 1, 6, 12],
                         n_trials=10,
                         n_samples=100,
                         min_correctss=[6, 6, 6, 6],
                         prior_indvdlss=[['t'], ['gauss'], ['gg']],
                         dists_noise=['laplace', 'gg'],
                         show_progress=False,
                         show_results=True,
                         betas_indvdl=[.25, .5, .75, 1.],
                         betas_noise=[.25, .5, .75, 1.],
                         standardize=False,
                         sample_coef='r2intervals',
                         n_mc_samples=10000,
                         sampling_mode='normal'):
    """Test estimation using Bayesian mixed LiNGAM model.

    The tests run over numbers of confounders: 0, 1, 6 and 12. 
    Each of the tests passes if the ratio of correct estimations is 
    greater than a threshold for each of settings. 

    The testing parameters are as follows:

    .. code:: python

        n_confounderss = [0, 1, 6, 12] # Number of confounders
        n_trials = 10 # Number of trials (inferences)
        min_corrects = [6, 6, 6, 6] # Lower threshold of correct inferences
        n_samples = 100 # Number of observations

    The default set of hyperparameters are used to do empirical Bayesian 
    estimation (:py:func:`lingam.define_hparam_searchspace`). 

    Input argument :code:`tied_sampling` is for backward compatibility to 
    the original implementation. 
    """
    # ---- Program started ----
    t_start = time.time()
    print('Program started at %s\n' % time.strftime("%c"))

    print('Test parameters')
    print('  n_confounderss: %s' % str(n_confounderss))
    print('  n_samples     : %d' % n_samples)
    print('  min_correctss : %s' % str(min_correctss))
    print('  sampling_mode : %s' % str(sampling_mode))
    print('')
    print('Model search space')
    print('  prior_indvdlss: %s' % prior_indvdlss)
    print('  dists_noise   : %s' % dists_noise)
    print('  betas_indvdl  : %s' % str(betas_indvdl))
    print('  betas_noise   : %s' % str(betas_noise))
    print('  standardize   : %s' % str(standardize))
    print('')

    # ---- Test parameters ----
    test_paramss = [{
        'n_trials':
        n_trials,
        'min_corrects':
        min_corrects,
        'gen_data_params':
        GenDataParams(n_confounders=n_confounders,
                      n_samples=n_samples,
                      sample_coef=sample_coef)
    } for (n_confounders, min_corrects) in zip(n_confounderss, min_correctss)]

    # ---- Hyperparameter search spaces ----
    hparamsss = [
        define_hparam_searchspace(
            InferParams(standardize=standardize,
                        n_mc_samples=n_mc_samples,
                        prior_indvdls=prior_indvdls,
                        dist_noise=dist_noise,
                        betas_indvdl=betas_indvdl,
                        betas_noise=betas_noise))
        for prior_indvdls in prior_indvdlss for dist_noise in dists_noise
    ]

    # ---- Loop over experimental conditions ----
    n_confounders_ = []
    prior_indvdl_ = []
    dist_noise_ = []
    n_corrects_ = []

    for i, test_params in enumerate(test_paramss):
        for j, hparamss in enumerate(hparamsss):
            if show_progress:
                t_start_local = time.time()
                print('---- test_params (%d/%d), hparamss (%d/%d) ----' %
                      (i + 1, len(test_paramss), j + 1, len(hparamsss)))
                print('Num. of candidate models: %d' % len(hparamss))

            # Causality inference
            n_corrects = _test_find_best_model_main(
                test_params,
                hparamss,
                show_progress=show_progress,
                sampling_mode=sampling_mode)

            # Append result to table
            n_confounders_.append(test_params['gen_data_params'].n_confounders)
            prior_indvdl_.append(hparamss[0]['prior_indvdl'])
            dist_noise_.append(hparamss[0]['dist_noise'])
            n_corrects_.append(n_corrects)

            if show_progress:
                print('Elapsed time: %.1f [sec]\n' %
                      (time.time() - t_start_local))

    # ---- Program finished ----
    print('Program finished at %s' % time.strftime("%c"))
    print('Elapsed time: %.1f [sec]\n' % (time.time() - t_start))

    if show_results:
        df = pd.DataFrame({
            'n_confounders': n_confounders_,
            'prior_indvdl': prior_indvdl_,
            'dist_noise': dist_noise_,
            'n_corrects': n_corrects_,
        })

        return df
    else:
        return None