def gpdc(request): return GPDC(mask_type=None, significance='analytic', fixed_thres=0.1, sig_samples=1000, sig_blocklength=1, confidence='bootstrap', conf_lev=0.9, conf_samples=100, conf_blocklength=None, recycle_residuals=False, verbosity=0)
def setUp(self): auto = 0.6 coeff = 0.6 T = 1000 numpy.random.seed(42) # True graph links_coeffs = { 0: [((0, -1), auto)], 1: [((1, -1), auto), ((0, -1), coeff)], 2: [((2, -1), auto), ((1, -1), coeff)] } self.data, self.true_parents_coeffs = pp.var_process(links_coeffs, T=T) T, N = self.data.shape self.ci_par_corr = ParCorr(use_mask=False, mask_type=None, significance='analytic', fixed_thres=None, sig_samples=10000, sig_blocklength=3, confidence='analytic', conf_lev=0.9, conf_samples=10000, conf_blocklength=1, recycle_residuals=False, verbosity=0) self.ci_gpdc = GPDC(significance='analytic', sig_samples=1000, sig_blocklength=1, confidence='bootstrap', conf_lev=0.9, conf_samples=100, conf_blocklength=None, use_mask=False, mask_type='y', recycle_residuals=False, verbosity=0)
def calculate(para_setup): para_setup_string, sam = para_setup paras = para_setup_string.split('-') paras = [w.replace("'", "") for w in paras] model = str(paras[0]) N = int(paras[1]) n_links = int(paras[2]) min_coeff = float(paras[3]) coeff = float(paras[4]) auto = float(paras[5]) contemp_fraction = float(paras[6]) frac_unobserved = float(paras[7]) max_true_lag = int(paras[8]) T = int(paras[9]) ci_test = str(paras[10]) method = str(paras[11]) pc_alpha = float(paras[12]) tau_max = int(paras[13]) ############################################# ## Data ############################################# def lin_f(x): return x def f2(x): return (x + 5. * x**2 * np.exp(-x**2 / 20.)) if model == 'autobidirected': if verbosity > 999: model_seed = verbosity - 1000 else: model_seed = sam random_state = np.random.RandomState(model_seed) links = { 0: [((0, -1), auto, lin_f), ((1, -1), coeff, lin_f)], 1: [], 2: [((2, -1), auto, lin_f), ((1, -1), coeff, lin_f)], 3: [((3, -1), auto, lin_f), ((2, -1), min_coeff, lin_f)], } observed_vars = [0, 2, 3] noises = [random_state.randn for j in range(len(links))] data_all, nonstationary = mod.generate_nonlinear_contemp_timeseries( links=links, T=T, noises=noises, random_state=random_state) data = data_all[:, observed_vars] elif 'random' in model: if 'lineargaussian' in model: coupling_funcs = [lin_f] noise_types = ['gaussian'] #, 'weibull', 'uniform'] noise_sigma = (0.5, 2) elif 'nonlinearmixed' in model: coupling_funcs = [lin_f, f2] noise_types = ['gaussian', 'gaussian', 'weibull'] noise_sigma = (0.5, 2) if coeff < min_coeff: min_coeff = coeff couplings = list(np.arange(min_coeff, coeff + 0.1, 0.1)) couplings += [-c for c in couplings] auto_deps = list(np.arange(max(0., auto - 0.3), auto + 0.01, 0.05)) # Models may be non-stationary. Hence, we iterate over a number of seeds # to find a stationary one regarding network topology, noises, etc if verbosity > 999: model_seed = verbosity - 1000 else: model_seed = sam for ir in range(1000): # np.random.seed(model_seed) random_state = np.random.RandomState(model_seed) N_all = math.floor((N / (1. - frac_unobserved))) n_links_all = math.ceil(n_links / N * N_all) observed_vars = np.sort( random_state.choice(range(N_all), size=math.ceil( (1. - frac_unobserved) * N_all), replace=False)).tolist() links = mod.generate_random_contemp_model( N=N_all, L=n_links_all, coupling_coeffs=couplings, coupling_funcs=coupling_funcs, auto_coeffs=auto_deps, tau_max=max_true_lag, contemp_fraction=contemp_fraction, # num_trials=1000, random_state=random_state) class noise_model: def __init__(self, sigma=1): self.sigma = sigma def gaussian(self, T): # Get zero-mean unit variance gaussian distribution return self.sigma * random_state.randn(T) def weibull(self, T): # Get zero-mean sigma variance weibull distribution a = 2 mean = scipy.special.gamma(1. / a + 1) variance = scipy.special.gamma( 2. / a + 1) - scipy.special.gamma(1. / a + 1)**2 return self.sigma * (random_state.weibull(a=a, size=T) - mean) / np.sqrt(variance) def uniform(self, T): # Get zero-mean sigma variance uniform distribution mean = 0.5 variance = 1. / 12. return self.sigma * (random_state.uniform(size=T) - mean) / np.sqrt(variance) noises = [] for j in links: noise_type = random_state.choice(noise_types) sigma = noise_sigma[0] + ( noise_sigma[1] - noise_sigma[0]) * random_state.rand() noises.append(getattr(noise_model(sigma), noise_type)) if 'discretebinom' in model: if 'binom2' in model: n_binom = 2 elif 'binom4' in model: n_binom = 4 data_all_check, nonstationary = discretized_scp( links=links, T=T + 10000, n_binom=n_binom, random_state=random_state) else: data_all_check, nonstationary = mod.generate_nonlinear_contemp_timeseries( links=links, T=T + 10000, noises=noises, random_state=random_state) # If the model is stationary, break the loop if not nonstationary: data_all = data_all_check[:T] data = data_all[:, observed_vars] break else: print("Trial %d: Not a stationary model" % ir) model_seed += 10000 else: raise ValueError("model %s not known" % model) if nonstationary: raise ValueError("No stationary model found: %s" % model) true_graph = utilities._get_pag_from_dag(links, observed_vars=observed_vars, tau_max=tau_max, verbosity=verbosity)[1] if verbosity > 0: print("True Links") for j in links: print(j, links[j]) print("observed_vars = ", observed_vars) print("True PAG") if tau_max > 0: for lag in range(tau_max + 1): print(true_graph[:, :, lag]) else: print(true_graph.squeeze()) if plot_data: print("PLOTTING") for j in range(N): # ax = fig.add_subplot(N,1,j+1) pyplot.plot(data[:, j]) pyplot.show() computation_time_start = time.time() dataframe = pp.DataFrame(data) ############################################# ## Methods ############################################# # Specify conditional independence test object if ci_test == 'par_corr': cond_ind_test = ParCorr(significance='analytic', recycle_residuals=True) elif ci_test == 'cmi_knn': cond_ind_test = CMIknn(knn=0.1, sig_samples=500, sig_blocklength=1) elif ci_test == 'gp_dc': cond_ind_test = GPDC(recycle_residuals=True) elif ci_test == 'discg2': cond_ind_test = DiscG2() else: raise ValueError("CI test not recognized.") if 'lpcmci' in method: method_paras = method.split('_') n_preliminary_iterations = int(method_paras[1][7:]) if 'prelimonly' in method: prelim_only = True else: prelim_only = False lpcmci = LPCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) lpcmcires = lpcmci.run_lpcmci( tau_max=tau_max, pc_alpha=pc_alpha, max_p_non_ancestral=3, n_preliminary_iterations=n_preliminary_iterations, prelim_only=prelim_only, verbosity=verbosity) graph = lpcmci.graph val_min = lpcmci.val_min_matrix max_cardinality = lpcmci.cardinality_matrix elif method == 'svarfci': svarfci = SVARFCI(dataframe=dataframe, cond_ind_test=cond_ind_test) svarfcires = svarfci.run_svarfci( tau_max=tau_max, pc_alpha=pc_alpha, max_cond_px=0, max_p_dsep=3, fix_all_edges_before_final_orientation=True, verbosity=verbosity) graph = svarfci.graph val_min = svarfci.val_min_matrix max_cardinality = svarfci.cardinality_matrix elif method == 'svarrfci': svarrfci = SVARRFCI(dataframe=dataframe, cond_ind_test=cond_ind_test) svarrfcires = svarrfci.run_svarrfci( tau_max=tau_max, pc_alpha=pc_alpha, fix_all_edges_before_final_orientation=True, verbosity=verbosity) graph = svarrfci.graph val_min = svarrfci.val_min_matrix max_cardinality = svarrfci.cardinality_matrix else: raise ValueError("%s not implemented." % method) computation_time_end = time.time() computation_time = computation_time_end - computation_time_start return { 'true_graph': true_graph, 'val_min': val_min, 'max_cardinality': max_cardinality, # Method results 'computation_time': computation_time, 'graph': graph, }
study_data = millet_prices # give custom NAN value for tigramite to interpret mssng = 99999 study_data = study_data.copy().fillna(mssng) dataframe = pp.DataFrame(study_data.values, var_names= study_data.columns, missing_flag = mssng) tp.plot_timeseries(dataframe) parcorr = ParCorr(significance='analytic') gpdc = GPDC(significance='analytic', gp_params=None) pcmci_gpdc = PCMCI( dataframe=dataframe, cond_ind_test=gpdc, verbosity=0) pcmci = PCMCI( dataframe=dataframe, cond_ind_test=parcorr, verbosity=1) # min_lag, max_lag = 1,6 results = pcmci.run_pcmci(tau_min = min_lag, tau_max=max_lag, pc_alpha=None) # q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
def test(dataframes,max_lags=[4],alpha=[None],tests=['ParCorr'],limit=1): ''' This function performs the PCMCI algorithm for all the dataframes received as parameters, given the hyper-parameters of the conditional independence test Args: dataframes: A list of TIGRAMITE dataframes max_lags: Maximum number of lags to consider for the laggd time series alpha: Significance level to perform the parent test tests: A list of conditional independence test to be performed limit: A limit for the instances to be considered Returns: ''' test_results = [] random.shuffle(dataframes) total = limit*len(max_lags)*len(alpha)*len(tests) data_frame_iter = iter(dataframes) tests_to_evaluate=[] if 'RCOT' in tests: rcot = RCOT() tests_to_evaluate.append(['RCOT',rcot]) if 'GPDC' in tests: gpdc = GPDC() tests_to_evaluate.append(['GPDC', gpdc]) if 'ParCorr' in tests: parcorr = ParCorr(significance='analytic') tests_to_evaluate.append(['ParCorr',parcorr]) if 'CMIknn' in tests: cmiknn = CMIknn() tests_to_evaluate.append(['CMIknn',cmiknn]) unique_complexities = list(set(l[1] for l in dataframes)) counts = {} for i in unique_complexities: counts[i] = 0 for test in tests_to_evaluate: stop = False for l in max_lags: for a in alpha: while not stop: try: i = random.sample(dataframes,1)[0] if counts[i[1]] < limit: print('evaluating: ' + str(i[3])) start = time.time() pcmci = PCMCI( dataframe=i[2], cond_ind_test=test[1], verbosity=0) # correlations = pcmci.get_lagged_dependencies(tau_max=20) pcmci.verbosity = 1 results = pcmci.run_pcmci(tau_max=l, pc_alpha=a) time_lapse = round(time.time() - start, 2) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') valid_parents = list(pcmci.return_significant_parents(pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=a)['parents'].values()) flat_list = [] for sublist in valid_parents: for item in sublist: flat_list.append(item) valid_links = len(flat_list) test_results.append([i[3], i[0], i[1], l,test[0],a,valid_links,time_lapse]) results_df = pd.DataFrame(test_results, columns=['representation', 'complexity', 'sample_size', 'max_lag','test','alpha','valid_links_at_alpha', 'learning_time']) print('results ready to be saved') results_df.to_csv( 'results/performance_sample_sizes.csv', index=False) counts[i[1]] += 1 if all(value == limit for value in counts.values()): stop = True except: print('Hoopla!') pass for i in unique_complexities: counts[i] = 0
class TestCondInd(): #unittest.TestCase): # def __init__(self): # pass def setUp(self): auto = 0.6 coeff = 0.6 T = 1000 numpy.random.seed(42) # True graph links_coeffs = { 0: [((0, -1), auto)], 1: [((1, -1), auto), ((0, -1), coeff)], 2: [((2, -1), auto), ((1, -1), coeff)] } self.data, self.true_parents_coeffs = pp.var_process(links_coeffs, T=T) T, N = self.data.shape self.ci_par_corr = ParCorr(use_mask=False, mask_type=None, significance='analytic', fixed_thres=None, sig_samples=10000, sig_blocklength=3, confidence='analytic', conf_lev=0.9, conf_samples=10000, conf_blocklength=1, recycle_residuals=False, verbosity=0) self.ci_gpdc = GPDC(significance='analytic', sig_samples=1000, sig_blocklength=1, confidence='bootstrap', conf_lev=0.9, conf_samples=100, conf_blocklength=None, use_mask=False, mask_type='y', recycle_residuals=False, verbosity=0) def test_construct_array(self): data = numpy.array([[0, 10, 20, 30], [1, 11, 21, 31], [2, 12, 22, 32], [3, 13, 23, 33], [4, 14, 24, 34], [5, 15, 25, 35], [6, 16, 26, 36]]) data_mask = numpy.array( [[0, 1, 1, 0], [0, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype='bool') X = [(1, -1)] Y = [(0, 0)] Z = [(0, -1), (1, -2), (2, 0)] tau_max = 2 # No masking res = _construct_array(X=X, Y=Y, Z=Z, tau_max=tau_max, use_mask=False, data=data, mask=data_mask, missing_flag=None, mask_type=None, verbosity=verbosity) print res[0] numpy.testing.assert_almost_equal( res[0], numpy.array([[13, 14, 15], [4, 5, 6], [3, 4, 5], [12, 13, 14], [24, 25, 26]])) numpy.testing.assert_almost_equal(res[1], numpy.array([0, 1, 2, 2, 2])) # masking y res = _construct_array(X=X, Y=Y, Z=Z, tau_max=tau_max, use_mask=True, data=data, mask=data_mask, mask_type=['y'], verbosity=verbosity) print res[0] numpy.testing.assert_almost_equal( res[0], numpy.array([[13, 14, 15], [4, 5, 6], [3, 4, 5], [12, 13, 14], [24, 25, 26]])) numpy.testing.assert_almost_equal(res[1], numpy.array([0, 1, 2, 2, 2])) # masking all res = _construct_array(X=X, Y=Y, Z=Z, tau_max=tau_max, use_mask=True, data=data, mask=data_mask, mask_type=['x', 'y', 'z'], verbosity=verbosity) print res[0] numpy.testing.assert_almost_equal( res[0], numpy.array([[13, 14, 15], [4, 5, 6], [3, 4, 5], [12, 13, 14], [24, 25, 26]])) numpy.testing.assert_almost_equal(res[1], numpy.array([0, 1, 2, 2, 2])) def test_missing_values(self): data = numpy.array([ [0, 10, 20, 30], [1, 11, 21, 31], [2, 12, 22, 32], [3, 13, 999, 33], [4, 14, 24, 34], [5, 15, 25, 35], [6, 16, 26, 36], ]) data_mask = numpy.array( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype='bool') X = [(1, -2)] Y = [(0, 0)] Z = [(2, -1)] tau_max = 1 # Missing values res = _construct_array(X=X, Y=Y, Z=Z, tau_max=tau_max, use_mask=False, data=data, mask=data_mask, missing_flag=999, mask_type=['y'], verbosity=verbosity) # print res[0] numpy.testing.assert_almost_equal( res[0], numpy.array([[10, 14], [2, 6], [21, 25]])) def test_bootstrap_vs_analytic_confidence_parcorr(self): cov = numpy.array([[1., 0.3], [0.3, 1.]]) array = numpy.random.multivariate_normal(mean=numpy.zeros(2), cov=cov, size=150).T val = numpy.corrcoef(array)[0, 1] # print val dim, T = array.shape xyz = numpy.array([0, 1]) conf_ana = self.ci_par_corr.get_analytic_confidence( df=T - dim, value=val, conf_lev=self.ci_par_corr.conf_lev) conf_boots = self.ci_par_corr.get_bootstrap_confidence( array, xyz, dependence_measure=self.ci_par_corr.get_dependence_measure, conf_samples=self.ci_par_corr.conf_samples, conf_blocklength=self.ci_par_corr.conf_blocklength, conf_lev=self.ci_par_corr.conf_lev, ) print conf_ana print conf_boots numpy.testing.assert_allclose(numpy.array(conf_ana), numpy.array(conf_boots), atol=0.01) def test_shuffle_vs_analytic_significance_parcorr(self): cov = numpy.array([[1., 0.04], [0.04, 1.]]) array = numpy.random.multivariate_normal(mean=numpy.zeros(2), cov=cov, size=250).T # array = numpy.random.randn(3, 10) val = numpy.corrcoef(array)[0, 1] # print val dim, T = array.shape xyz = numpy.array([0, 1]) pval_ana = self.ci_par_corr.get_analytic_significance(value=val, T=T, dim=dim) pval_shuffle = self.ci_par_corr.get_shuffle_significance( array, xyz, val) # Adjust p-value for two-sided measures print pval_ana print pval_shuffle numpy.testing.assert_allclose(numpy.array(pval_ana), numpy.array(pval_shuffle), atol=0.01) def test__parcorr_get_single_residuals(self): target_var = 0 #numpy.array([True, False, False, False]) true_residual = numpy.random.randn(4, 1000) array = numpy.copy(true_residual) array[0] += 0.5 * array[2:].sum(axis=0) est_residual = self.ci_par_corr._get_single_residuals( array, target_var, standardize=False, return_means=False) # print est_residual[:10] # print true_residual[0, :10] numpy.testing.assert_allclose(est_residual, true_residual[0], atol=0.01) def test_par_corr(self): val_ana = 0.6 T = 1000 array = numpy.random.randn(5, T) cov = numpy.array([[1., val_ana], [val_ana, 1.]]) array[:2, :] = numpy.random.multivariate_normal(mean=numpy.zeros(2), cov=cov, size=T).T # Generate some confounding array[0] += 0.5 * array[2:].sum(axis=0) array[1] += 0.7 * array[2:].sum(axis=0) # print numpy.corrcoef(array)[0,1] # print val dim, T = array.shape xyz = numpy.array([0, 1, 2, 2, 2]) val_est = self.ci_par_corr.get_dependence_measure(array, xyz) print val_est print val_ana numpy.testing.assert_allclose(numpy.array(val_ana), numpy.array(val_est), atol=0.02) def test__gpdc_get_single_residuals(self): ci_test = self.ci_gpdc # ci_test = self.ci_par_corr c = .3 T = 1000 numpy.random.seed(42) def func(x): return x * (1. - 4. * x**0 * numpy.exp(-x**2 / 2.)) array = numpy.random.randn(3, T) array[1] += c * func(array[2]) #.sum(axis=0) xyz = numpy.array([0, 1] + [2 for i in range(array.shape[0] - 2)]) target_var = 1 dim, T = array.shape # array -= array.mean(axis=1).reshape(dim, 1) c_std = c #/array[1].std() # array /= array.std(axis=1).reshape(dim, 1) array_orig = numpy.copy(array) (est_residual, pred) = ci_test._get_single_residuals(array, target_var, standardize=False, return_means=True) # Testing that in the center the fit is good center = numpy.where(numpy.abs(array_orig[2]) < .7)[0] print(pred[center][:10]).round(2) print(c_std * func(array_orig[2][center])[:10]).round(2) numpy.testing.assert_allclose(pred[center], c_std * func(array_orig[2][center]), atol=0.2) def plot__gpdc_get_single_residuals(self): ####### ci_test = self.ci_gpdc # ci_test = self.ci_par_corr a = 0. c = .3 T = 500 # Each key refers to a variable and the incoming links are supplied as a # list of format [((driver, lag), coeff), ...] links_coeffs = { 0: [((0, -1), a)], 1: [((1, -1), a), ((0, -1), c)], } numpy.random.seed(42) data, true_parents_neighbors = pp.var_process(links_coeffs, use='inv_inno_cov', T=T) dataframe = pp.DataFrame(data) ci_test.set_dataframe(dataframe) # ci_test.set_tau_max(1) # X=[(1, -1)] # Y=[(1, 0)] # Z=[(0, -1)] + [(1, -tau) for tau in range(1, 2)] # array, xyz, XYZ = ci_test.get_array(X, Y, Z, # verbosity=0)] # ci_test.run_test(X, Y, Z,) def func(x): return x * (1. - 4. * x**0 * numpy.exp(-x**2 / 2.)) true_residual = numpy.random.randn(3, T) array = numpy.copy(true_residual) array[1] += c * func(array[2]) #.sum(axis=0) xyz = numpy.array([0, 1] + [2 for i in range(array.shape[0] - 2)]) print 'xyz ', xyz, numpy.where(xyz == 1) target_var = 1 dim, T = array.shape # array -= array.mean(axis=1).reshape(dim, 1) c_std = c #/array[1].std() # array /= array.std(axis=1).reshape(dim, 1) array_orig = numpy.copy(array) import matplotlib from matplotlib import pyplot (est_residual, pred) = ci_test._get_single_residuals(array, target_var, standardize=False, return_means=True) (resid_, pred_parcorr) = self.ci_par_corr._get_single_residuals( array, target_var, standardize=False, return_means=True) fig = pyplot.figure() ax = fig.add_subplot(111) ax.scatter(array_orig[2], array_orig[1]) ax.scatter(array_orig[2], pred, color='red') ax.scatter(array_orig[2], pred_parcorr, color='green') ax.plot(numpy.sort(array_orig[2]), c_std * func(numpy.sort(array_orig[2])), color='black') pyplot.savefig('/home/jakobrunge/test/gpdctest.pdf') def test_shuffle_vs_analytic_significance_gpdc(self): cov = numpy.array([[1., 0.2], [0.2, 1.]]) array = numpy.random.multivariate_normal(mean=numpy.zeros(2), cov=cov, size=245).T dim, T = array.shape xyz = numpy.array([0, 1]) val = self.ci_gpdc.get_dependence_measure(array, xyz) pval_ana = self.ci_gpdc.get_analytic_significance(value=val, T=T, dim=dim) pval_shuffle = self.ci_gpdc.get_shuffle_significance(array, xyz, val) print pval_ana print pval_shuffle numpy.testing.assert_allclose(numpy.array(pval_ana), numpy.array(pval_shuffle), atol=0.05) def test_shuffle_vs_analytic_significance_gpdc(self): cov = numpy.array([[1., 0.01], [0.01, 1.]]) array = numpy.random.multivariate_normal(mean=numpy.zeros(2), cov=cov, size=300).T dim, T = array.shape xyz = numpy.array([0, 1]) val = self.ci_gpdc.get_dependence_measure(array, xyz) pval_ana = self.ci_gpdc.get_analytic_significance(value=val, T=T, dim=dim) pval_shuffle = self.ci_gpdc.get_shuffle_significance(array, xyz, val) print pval_ana print pval_shuffle numpy.testing.assert_allclose(numpy.array(pval_ana), numpy.array(pval_shuffle), atol=0.05) def test_cmi_knn(self): ci_cmi_knn = CMIknn(use_mask=False, mask_type=None, significance='shuffle_test', fixed_thres=None, sig_samples=10000, sig_blocklength=3, knn=10, confidence='bootstrap', conf_lev=0.9, conf_samples=10000, conf_blocklength=1, verbosity=0) # ci_cmi_knn._trafo2uniform(self, x) val_ana = 0.6 T = 10000 numpy.random.seed(42) array = numpy.random.randn(5, T) cov = numpy.array([[1., val_ana], [val_ana, 1.]]) array[:2, :] = numpy.random.multivariate_normal(mean=numpy.zeros(2), cov=cov, size=T).T # Generate some confounding if len(array) > 2: array[0] += 0.5 * array[2:].sum(axis=0) array[1] += 0.7 * array[2:].sum(axis=0) # print numpy.corrcoef(array)[0,1] # print val dim, T = array.shape xyz = numpy.array([0, 1, 2, 2, 2]) val_est = ci_cmi_knn.get_dependence_measure(array, xyz) print val_est print _par_corr_to_cmi(val_ana) numpy.testing.assert_allclose(numpy.array(_par_corr_to_cmi(val_ana)), numpy.array(val_est), atol=0.02) def test_trafo2uniform(self): T = 1000 # numpy.random.seed(None) array = numpy.random.randn(2, T) bins = 10 uniform = self.ci_gpdc._trafo2uniform(array) # print uniform # import matplotlib # from matplotlib import pylab for i in range(array.shape[0]): print uniform[i].shape hist, edges = numpy.histogram(uniform[i], bins=bins, density=True) # pylab.figure() # pylab.hist(uniform[i], color='grey', alpha=0.3) # pylab.hist(array[i], alpha=0.3) # pylab.show() print hist / float(bins) #, edges numpy.testing.assert_allclose(numpy.ones(bins) / float(bins), hist / float(bins), atol=0.01) def test_cmi_symb(self): ci_cmi_symb = CMIsymb(use_mask=False, mask_type=None, significance='shuffle_test', fixed_thres=None, sig_samples=10000, sig_blocklength=3, confidence='bootstrap', conf_lev=0.9, conf_samples=10000, conf_blocklength=1, verbosity=0) val_ana = 0.6 T = 100000 numpy.random.seed(None) array = numpy.random.randn(3, T) cov = numpy.array([[1., val_ana], [val_ana, 1.]]) array[:2, :] = numpy.random.multivariate_normal(mean=numpy.zeros(2), cov=cov, size=T).T # Generate some confounding if len(array) > 2: array[0] += 0.5 * array[2:].sum(axis=0) array[1] += 0.7 * array[2:].sum(axis=0) # Transform to symbolic data array = pp.quantile_bin_array(array.T, bins=16).T dim, T = array.shape xyz = numpy.array([0, 1, 2, 2, 2]) val_est = ci_cmi_symb.get_dependence_measure(array, xyz) print val_est print _par_corr_to_cmi(val_ana) numpy.testing.assert_allclose(numpy.array(_par_corr_to_cmi(val_ana)), numpy.array(val_est), atol=0.02)
def caus_gpdc(data, var_names): import numpy as np import matplotlib as mpl from matplotlib import pyplot as plt import sklearn import tigramite from tigramite import data_processing as pp from tigramite import plotting as tp from tigramite.pcmci import PCMCI from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb from tigramite.models import LinearMediation, Prediction data_mask_row = np.zeros(len(data)) for i in range(68904): if (i % 72) < 30 or (i % 72) > 47: data_mask_row[i] = True data_mask = np.zeros(data.shape) data_mask[:, 0] = data_mask_row data_mask[:, 1] = data_mask_row data_mask[:, 2] = data_mask_row data_mask[:, 9] = data_mask_row data_mask[:, 10] = data_mask_row data_mask[:, 11] = data_mask_row dataframe = pp.DataFrame(data, mask=data_mask) datatime = np.arange(len(data)) # tp.plot_timeseries(data, datatime, var_names, use_mask=True, # mask=data_mask, grey_masked_samples='data') gpdc = GPDC(significance='analytic', gp_params=None, use_mask=True, mask_type='y') gpdc.generate_and_save_nulldists(sample_sizes=range(495, 501), null_dist_filename='dc_nulldists.npz') gpdc.null_dist_filename = 'dc_nulldists.npz' pcmci_gpdc = PCMCI(dataframe=dataframe, cond_ind_test=gpdc, var_names=var_names, verbosity=1) # correlations = pcmci.get_lagged_dependencies(tau_max=20) # lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, # setup_args={'var_names':var_names, # 'x_base':5, 'y_base':.5}) results = pcmci_gpdc.run_pcmci(tau_max=6, tau_min=1, pc_alpha=0.01) # print("p-values") # print (results['p_matrix'].round(3)) # print("MCI partial correlations") # print (results['val_matrix'].round(2)) q_matrix = pcmci_gpdc.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci_gpdc._print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.01) link_matrix = pcmci_gpdc._return_significant_parents( pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix'] tp.plot_time_series_graph( val_matrix=results['val_matrix'], link_matrix=link_matrix, var_names=var_names, link_colorbar_label='MCI', ) return results, link_matrix