def test_equiv_logps_ref_np(n_theta_sampling=1000): """Check equivalence of log marinal likelihood of ref and np impls. """ n_confounders = 6 n_samples = 100 rng = np.random.RandomState(0) hparamss = define_hparam_searchspace(n_theta_sampling=n_theta_sampling, prior_indvdls=['t']) n_theta_sampling = hparamss[0]['n_theta_sampling'] P_M1 = hparamss[0]['P_M1'] P_M2 = hparamss[0]['P_M2'] prior_indvdl = hparamss[0]['prior_indvdl'] # ---- Generate samples ---- data = _gen_samples(n_confounders, n_samples, rng) xs = data['xs'] causality_true = data['causality_true'] # ---- Inference ---- _, logPs_ref = bayesmixedlingam_ref(xs, n_theta_sampling, P_M1, P_M2, prior_indvdl, rng) print(logPs_ref) _, logPs_np = bayesmixedlingam_np(xs, hparamss, rng) print(logPs_np)
def test_sampling(n_mc_samples=100): raise NotImplementedError('This function is not implemented.') rng = np.random.RandomState(0) # ---- Generate samples ---- data = gen_samples(n_confounders=1, n_samples=100, rng=rng) xs = data['xs'] causality_true = data['causality_true'] # ---- Get a hyperparameter set ---- hparamss = define_hparam_searchspace(n_mc_samples=n_mc_samples) hparams = hparamss[200] # ---- MC sampling ---- logp_np, traces_np = comp_logP_bmlingam_np(xs, hparams, rng) logp_pm2, traces_pm2 = comp_logP_bmlingam_pm2(xs, hparams, rng) # logp_pm3, traces_pm3 = comp_logP_bmlingam_pm3(xs, hparams, rng) return { 'logp_np': logp_np, 'traces_np': traces_np, 'logp_pm2': logp_pm2, 'traces_pm2': traces_pm2, # 'logp_pm3': logp_pm3, # 'traces_pm3': traces_pm3, }
def infer_causality(xs, infer_params, varnames=None, verbose=1): """Infer causality based on samples given pair of columns in data. """ assert (type(infer_params) == InferParams) if varnames is None: varnames = ['var1', 'var2'] hparamss = define_hparam_searchspace(infer_params) sampling_mode = infer_params.sampling_mode hparams_best, post_prob, ll, hparams_rev, post_prob_rev, ll_rev = \ find_best_model(xs, hparamss, sampling_mode) causality = hparams_best['causality'] x1_name = varnames[0] x2_name = varnames[1] if causality == [1, 2]: src, dst = x1_name, x2_name else: src, dst = x2_name, x1_name result = { 'Infered causality': '{} -> {}'.format(src, dst), '2 * log(p(M)) - log(p(M_rev))': '{}'.format(2 * (ll - ll_rev)) } if 1 <= verbose: print(json.dumps(result, indent=2, sort_keys=True)) if 2 <= verbose: print('---- Inference for variables "%s" and "%s" ----' % (x1_name, x2_name)) print( 'Inferred : %s -> %s (posterior prob: %1.3f, loglikelihood: %1.3f)' % (src, dst, post_prob, ll)) print( '(best_rev): %s -> %s (posterior prob: %1.3f, loglikelihood: %1.3f)' % (dst, src, post_prob_rev, ll_rev)) print('') print('Hyper parameters of the optimal model:') show_hparams(hparams_best) print('') print('Hyper parameters of the reverse optimal model:') show_hparams(hparams_rev) print('') return { 'x1_name': x1_name, 'x2_name': x2_name, 'xs': xs, 'causality': causality, 'causality_str': ('%s -> %s' % (src, dst)), 'post_prob': post_prob, 'hparams': hparams_best, 'post_prob_rev': post_prob_rev, 'hparams_rev': hparams_rev }
def _estimate_hparams(xs, infer_params): assert (type(infer_params) == InferParams) sampling_mode = infer_params.sampling_mode hparamss = define_hparam_searchspace(infer_params) results = find_best_model(xs, hparamss, sampling_mode) hparams_best = results[0] bf = results[2] - results[5] # Bayes factor return hparams_best, bf
def test_find_best_model(verbose=False): gen_data_params = GenDataParams(n_samples=200, mu1_dist=5.0, mu2_dist=10.0, f1_coef=[1.0, 1.0, 1.5], f2_coef=[1.0, 2.0, 0.5], conf_dist=[['laplace'], ['exp'], ['uniform']], e1_dist=['laplace'], e2_dist=['laplace'], e1_std=3.0, e2_std=3.0, fix_causality=False, seed=0) # gen_data_params = deepcopy(gen_data_params_default) gen_data_params.n_samples = 200 gen_data_params.n_confounders = 3 gen_data_params.dists_e1 = ['laplace'] gen_data_params.dists_e2 = ['laplace'] gen_data_params.dist_be1 = 'be1=9.0' gen_data_params.dist_be2 = 'be2=9.0' gen_data_params.dist_bf1s = '1., 1., 1.5' gen_data_params.dist_bf2s = '1., 2., 0.5' gen_data_params.dists_conf = [['laplace'], ['exp'], ['uniform']] gen_data_params.dist_mu1 = 'mu1=5.0' gen_data_params.dist_mu2 = 'mu2=10.0' data = gen_artificial_data(gen_data_params) xs = data['xs'] infer_params = infer_params1() sampling_mode = infer_params.sampling_mode hparamss = define_hparam_searchspace(infer_params) result1 = find_best_model(xs, hparamss, sampling_mode) print(result1) infer_params = infer_params2() sampling_mode = infer_params.sampling_mode hparamss = define_hparam_searchspace(infer_params) result2 = find_best_model(xs, hparamss, sampling_mode) print(result2)
def _test_bmlingam_main(comp_logP_func, test_params, show_result=False, tied_sampling=False, assertive=True): """Test estimation using Bayesian mixed LiNGAM model. This function is invoked from test_bmlingam_np() and test_bmlingam_pymc(). """ t_start = time.time() # ---- Testing parameters ---- n_confounderss = test_params['n_confounderss'] n_trials = test_params['n_trials'] min_corrects = test_params['min_corrects'] n_samples = test_params['n_samples'] max_c = test_params['max_c'] n_mc_samples = test_params['n_mc_samples'] normalize_samples = test_params['normalize_samples'] prior_indvdls = test_params['prior_indvdls'] # ---- Hyperparameter search space ---- hparamss = define_hparam_searchspace(tied_sampling=tied_sampling, max_c=max_c, n_mc_samples=n_mc_samples, prior_indvdls=prior_indvdls) # ---- Do test ---- rng = np.random.RandomState(0) for i in xrange(len(n_confounderss)): n_corrects = _eval_bmlingam(comp_logP_func, n_confounderss[i], n_trials, n_samples, hparamss, rng, show_result=show_result, tied_sampling=tied_sampling, normalize_samples=normalize_samples) if show_result: print(('n_confounders=%d, %d correct inferences ' + 'out of 10 trials') % (n_confounderss[i], n_corrects)) if assertive: ok_(min_corrects[i] <= n_corrects) if show_result: print('') print('Program finished at %s' % time.strftime("%c")) print('Elapsed time: %.1f [sec]' % (time.time() - t_start)) print('') return
def estimate_hparams(xs, infer_params): """Estimate hyperparameters with the largest marginal likelihood value. """ assert (type(infer_params) == InferParams) sampling_mode = infer_params.sampling_mode hparamss = define_hparam_searchspace(infer_params) results = find_best_model(xs, hparamss, sampling_mode) hparams_best = results[0] bf = results[2] - results[5] # Bayes factor return hparams_best, bf
def bmlingam_causality(csv_file, result_dir, is_out_optmodelfile, col_names, infer_params, optmodel_files): """Infer causality of all pairs in the data. """ assert (type(infer_params) == InferParams) if type(optmodel_files) is str: optmodel_files = [optmodel_files] print('---- Algorithm parameters ----') print('Number of MC samples: %d' % infer_params.n_mc_samples) hparamss = define_hparam_searchspace(infer_params) print('Number of candidate models: %d' % len(hparamss)) print('') # Load data and infer causality df = load_data(csv_file, col_names) # Pandas dataframe # Get all possible pairs of variables pairs = _get_pairs(len(df.columns)) # Check optimal model files if optmodel_files is not None: assert (len(optmodel_files) == len(pairs)) optmodel_files_ = optmodel_files # Infer causality over all variable pairs data = df.as_matrix() varnames = df.columns.values results = [ infer_causality(data[:, pair], infer_params, varnames[list(pair)]) for pair in pairs ] # Summarize inference table_causal = _make_table_causal(results) # Set optimal model files if optmodel_files is None: if result_dir is not None: optmodel_files_ = [ _get_optmodel_file(result, result_dir) for result in results ] else: optmodel_files_ = [] # Conditions to save results (and optimal models) cond_save_results = (result_dir is not None) and (0 < len(result_dir)) cond_save_optmodels = 0 < len(optmodel_files_) and is_out_optmodelfile # Save results if cond_save_results: result_file = result_dir + sep + 'causality.csv' table_causal.to_csv(result_file) print('Inferred causality table was saved as %s.' % result_file) # Save optimal models if cond_save_optmodels: for result, optmodel_file in zip(results, optmodel_files_): save_pklz(optmodel_file, result) print('Optimal model was saved as %s.' % optmodel_file)
def eval_find_best_model(n_confounderss=[0, 1, 6, 12], n_trials=10, n_samples=100, min_correctss=[6, 6, 6, 6], prior_indvdlss=[['t'], ['gauss'], ['gg']], dists_noise=['laplace', 'gg'], show_progress=False, show_results=True, betas_indvdl=[.25, .5, .75, 1.], betas_noise=[.25, .5, .75, 1.], standardize=False, sample_coef='r2intervals', n_mc_samples=10000, sampling_mode='normal'): """Test estimation using Bayesian mixed LiNGAM model. The tests run over numbers of confounders: 0, 1, 6 and 12. Each of the tests passes if the ratio of correct estimations is greater than a threshold for each of settings. The testing parameters are as follows: .. code:: python n_confounderss = [0, 1, 6, 12] # Number of confounders n_trials = 10 # Number of trials (inferences) min_corrects = [6, 6, 6, 6] # Lower threshold of correct inferences n_samples = 100 # Number of observations The default set of hyperparameters are used to do empirical Bayesian estimation (:py:func:`lingam.define_hparam_searchspace`). Input argument :code:`tied_sampling` is for backward compatibility to the original implementation. """ # ---- Program started ---- t_start = time.time() print('Program started at %s\n' % time.strftime("%c")) print('Test parameters') print(' n_confounderss: %s' % str(n_confounderss)) print(' n_samples : %d' % n_samples) print(' min_correctss : %s' % str(min_correctss)) print(' sampling_mode : %s' % str(sampling_mode)) print('') print('Model search space') print(' prior_indvdlss: %s' % prior_indvdlss) print(' dists_noise : %s' % dists_noise) print(' betas_indvdl : %s' % str(betas_indvdl)) print(' betas_noise : %s' % str(betas_noise)) print(' standardize : %s' % str(standardize)) print('') # ---- Test parameters ---- test_paramss = [{ 'n_trials': n_trials, 'min_corrects': min_corrects, 'gen_data_params': GenDataParams(n_confounders=n_confounders, n_samples=n_samples, sample_coef=sample_coef) } for (n_confounders, min_corrects) in zip(n_confounderss, min_correctss)] # ---- Hyperparameter search spaces ---- hparamsss = [ define_hparam_searchspace( InferParams(standardize=standardize, n_mc_samples=n_mc_samples, prior_indvdls=prior_indvdls, dist_noise=dist_noise, betas_indvdl=betas_indvdl, betas_noise=betas_noise)) for prior_indvdls in prior_indvdlss for dist_noise in dists_noise ] # ---- Loop over experimental conditions ---- n_confounders_ = [] prior_indvdl_ = [] dist_noise_ = [] n_corrects_ = [] for i, test_params in enumerate(test_paramss): for j, hparamss in enumerate(hparamsss): if show_progress: t_start_local = time.time() print('---- test_params (%d/%d), hparamss (%d/%d) ----' % (i + 1, len(test_paramss), j + 1, len(hparamsss))) print('Num. of candidate models: %d' % len(hparamss)) # Causality inference n_corrects = _test_find_best_model_main( test_params, hparamss, show_progress=show_progress, sampling_mode=sampling_mode) # Append result to table n_confounders_.append(test_params['gen_data_params'].n_confounders) prior_indvdl_.append(hparamss[0]['prior_indvdl']) dist_noise_.append(hparamss[0]['dist_noise']) n_corrects_.append(n_corrects) if show_progress: print('Elapsed time: %.1f [sec]\n' % (time.time() - t_start_local)) # ---- Program finished ---- print('Program finished at %s' % time.strftime("%c")) print('Elapsed time: %.1f [sec]\n' % (time.time() - t_start)) if show_results: df = pd.DataFrame({ 'n_confounders': n_confounders_, 'prior_indvdl': prior_indvdl_, 'dist_noise': dist_noise_, 'n_corrects': n_corrects_, }) return df else: return None