def test_full_estimation(self): n_confs = 1 total_noise = 0.5 noise_scale = total_noise / np.sqrt(n_confs) flip = False gen_data_params = GenDataParams( n_samples=100, e1_dist=['laplace'], e1_std=3.0, e2_dist=['laplace'], e2_std=3.0, f1_coef=[noise_scale for _ in range(n_confs)], f2_coef=[noise_scale for _ in range(n_confs)], conf_dist=[['all'] for _ in range(n_confs)], fix_causality=True, # x1 -> x2 (b21 is non-zero) seed=0) data = gen_artificial_data(gen_data_params) print('b_true = {}'.format(data['b'])) xs = data['xs'] print(xs) # Flip causal direction if flip: xs = np.vstack((xs[:, 1], xs[:, 0])).T print('{} (flipped)'.format(data['causality_true'])) else: print('{} (not flipped)'.format(data['causality_true'])) wbic_infer_params = FullBayesInferParams(metric='wbic', n_mc_samples=10000, vb_seed=1, ic_seed=1) wbic_infer_result = _infer_causality_with_posterior( xs, wbic_infer_params) print("inferered causality: {}".format(wbic_infer_result['causality'])) print("WBIC of estimated causality model {} : {}".format( wbic_infer_result['causality'], wbic_infer_result['metric'])) print("WBIC of reverese model: {}".format( wbic_infer_result['metric_rev'])) def _infer_causality_wbic_bml(seed): wbic_infer_params = FullBayesInferParams(metric='wbic', n_mc_samples=10000, vb_seed=1, ic_seed=3) gen_data_params.seed = seed data = gen_artificial_data(gen_data_params) xs = data['xs'] result = _infer_causality_with_posterior(xs, wbic_infer_params) return result['causality'] for seed in range(1): print(_infer_causality_wbic_bml(seed))
def _infer_causality_wbic_bml(seed): wbic_infer_params = FullBayesInferParams(metric='wbic', n_mc_samples=10000, vb_seed=1, ic_seed=3) gen_data_params.seed = seed data = gen_artificial_data(gen_data_params) xs = data['xs'] result = _infer_causality_with_posterior(xs, wbic_infer_params) return result['causality']
def _gen_artificial_data_csv(csv_file, gen_data_params): data = gen_artificial_data(gen_data_params) xs = data['xs'] if data['causality_true'] == [1, 2]: header = 'x1_src,x2_dst' else: header = 'x1_dst,x2_src' np.savetxt(csv_file, xs, fmt='%s', delimiter=',', header=header, comments='') print('Artificial data is generated and saved as %s' % csv_file)
def make_testdata(csv_file, gen_data_params): """Create CSV file including artificial data. """ data = gen_artificial_data(gen_data_params) xs = data['xs'].astype('S20') if data['causality_true'] == [1, 2]: header = ['x1_src', 'x2_dst'] else: header = ['x1_dst', 'x2_src'] csv_data = np.vstack((header, xs)) np.savetxt(csv_file, csv_data, fmt='%s', delimiter=',') print('Made artificial data and saved as %s.' % csv_file)
def gen_artificial_data_given_cond(ix_trial, cond): """Generate artificial data for given conditions (parameters). """ # Set parameters for generating artificial data n_confs = cond['n_confs'] gen_data_params = deepcopy(gen_data_params_default) gen_data_params.n_samples = cond['n_samples'] gen_data_params.conf_dist = [['all'] for _ in range(n_confs)] gen_data_params.e1_dist = [cond['data_noise_type']] gen_data_params.e2_dist = [cond['data_noise_type']] noise_scale = cond['totalnoise'] / np.sqrt(n_confs) gen_data_params.f1_coef = [noise_scale for _ in range(n_confs)] gen_data_params.f2_coef = [noise_scale for _ in range(n_confs)] # Generate artificial data gen_data_params.seed = ix_trial data = gen_artificial_data(gen_data_params) return data
def test_gen_artificial_data(plot=False, n_confounders=1): """Only check the function runs without error. """ gen_data_params = GenDataParams( e1_dist=['uniform'], f1_coef=['r2intervals' for _ in range(n_confounders)], f2_coef=['r2intervals' for _ in range(n_confounders)], conf_dist=[['all'] for _ in range(n_confounders)]) n_samples = gen_data_params.n_samples data = gen_artificial_data(gen_data_params) xs = data['xs'] confs = data['confs'] assert (data['xs'].shape == (n_samples, 2)) print('xs.shape = {}'.format(xs.shape)) print('std(x1) = {}'.format(np.std(xs[:, 0]))) print('std(x2) = {}'.format(np.std(xs[:, 1]))) print('std(conf1) = {}'.format(np.std(confs[:, 0]))) print('std(conf2) = {}'.format(np.std(confs[:, 1]))) print('') pprint(vars(gen_data_params)) plt.figure() plt.scatter(xs[:, 0], xs[:, 1]) plt.xlabel('x1') plt.xlabel('x2') plt.title('Observations') plt.figure() plt.hist(data['es']) plt.title('Errors') plt.figure() plt.hist(data['confs']) plt.title('Confounders')