def test_pcmci(self): # Setting up strict test level pc_alpha = 0.05 #[0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5] tau_max = 2 alpha_level = 0.01 dataframe = pp.DataFrame(self.data) cond_ind_test = ParCorr(verbosity=verbosity) pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=verbosity) results = pcmci.run_pcmci( tau_max=tau_max, pc_alpha=pc_alpha, ) parents = pcmci._return_significant_parents( pq_matrix=results['p_matrix'], val_matrix=results['val_matrix'], alpha_level=alpha_level)['parents'] # print parents # print self.true_parents assert_graphs_equal(parents, self.true_parents)
class PCMCIPlugin: def input(self, inputfile): self.links_coeffs = {} infile = open(inputfile, 'r') for line in infile: contents = line.split('\t') var = int(contents[0]) driver = int(contents[1]) lag = int(contents[2]) coeff = float(contents[3]) if (not var in self.links_coeffs): self.links_coeffs[var] = [] self.links_coeffs[var].append(((driver, lag), coeff)) def run(self): data, _ = pp.var_process(self.links_coeffs, T=1000) dataframe = pp.DataFrame(data) cond_ind_test = ParCorr() self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None) def output(self, outputfile): self.pcmciobj.print_significant_links( p_matrix=self.results['p_matrix'], val_matrix=self.results['val_matrix'], alpha_level=0.05)
def issue38(): dpath = os.path.dirname(os.path.abspath(__file__)) fname = 'tigramite_issue_38_input_example.csv' fpath = os.path.join(dpath, fname) df = pd.read_csv(fpath, index_col=0) print(df) data = df.values tdf = DataFrame( data=data, mask=None, missing_flag=None, var_names=df.columns, datatime=None, ) indp_test = CMIknn( # knn=None, # shuffle_neighbors=None, # transform=None, # significance=None, ) selected_variables = [col_lbl for col_lbl in df.columns if 'i' in col_lbl] selected_variables_ix = [ df.columns.get_loc(lbl) for lbl in selected_variables ] print(("Init PCMCI with:" f"dataframe={tdf}," f"cond_ind_test={indp_test}," f"selected_variables={selected_variables_ix}," f"verbosity=10,")) pcmci = PCMCI( dataframe=tdf, cond_ind_test=indp_test, selected_variables=selected_variables_ix, verbosity=10, ) max_lag = 24 alpha = 0.1 print("Running PCMCI...") pcmci.run_pcmci(tau_max=max_lag, pc_alpha=alpha) print("Done successfully! No errors!")
correlations = pcmci.get_lagged_dependencies(tau_max=3) lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, setup_args={ 'var_names': headers, 'x_base': 5, 'y_base': .5 }) if verbose > 1: if display_images: lag_func_matrix.savefig() if save_images: lag_func_matrix.savefig("lag_func.png") pcmci.verbosity = 1 results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=None) #Print results print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) #Save results to file p_matrix = results['p_matrix'] with open("p-values.csv", "w") as csv_file: writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL) #[[[1 2 3]]] Three brackets to get through.
pc_alpha = pcA_set2 pc_alpha_name = 'set2' elif p == 4: pc_alpha = pcA_set3 pc_alpha_name = 'set3' elif p == 5: pc_alpha = pcA_set4 pc_alpha_name = 'set4' elif p == 6: pc_alpha = pcA_none pc_alpha_name = 'none' # ====================================================================================================================== results = pcmci.run_pcmci( tau_max=tau_max, pc_alpha=pc_alpha, tau_min=tau_min, max_combinations=1) #selected_links = dictionary/None #results = pcmci.run_pcmci(selected_links =None, tau_max=tau_max, pc_alpha = pc_alpha, tau_min = tau_min,save_iterations=False, max_conds_dim=None, max_combinations=1, max_conds_py=None, max_conds_px=None) #selected_links = dictionary/None #results = pcmci.run_pcmci(selected_links = dictionary, tau_max=tau_max, pc_alpha = pc_alpha, tau_min = tau_min,save_iterations=False, max_conds_dim=None, max_combinations=1, max_conds_py=None, max_conds_px=None) #selected_links = dictionary/None q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci._print_significant_links( p_matrix=results['p_matrix'], q_matrix=q_matrix, #results['p_matrix'] val_matrix=results['val_matrix'], alpha_level=alpha_level) sig = pcmci._return_significant_parents(
a = nc_file.variables["NAOIN"][:] b = nc_file.variables["NAOIS"][:] data[:, 4] = a - b data_mask = np.zeros(data.shape) for t in range(1, T + 1): if (t % 73) >= 12 and (t % 73) <= 30: data_mask[t - 1, :] = True # Initialize dataframe object, specify time axis and variable names var_names = ['WPSH', 'IO', 'WNP', 'ENSO', 'NAO'] dataframe = pp.DataFrame(data, mask=data_mask) parcorr = ParCorr(significance='analytic', mask_type='xyz') pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr) results = pcmci.run_pcmci(tau_max=12, pc_alpha=0.03) # Correct p-values q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') # Plotting link_matrix = pcmci.return_significant_parents( pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.03)['link_matrix'] tp.plot_graph(val_matrix=results['val_matrix'], link_matrix=link_matrix, var_names=var_names) """ tp.plot_time_series_graph(val_matrix=results['val_matrix'],
# In[7]: correlations = pcmci.get_lagged_dependencies(tau_max=20, val_only=True)['val_matrix'] lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, setup_args={ 'figsize': (6, 6), 'var_names': var_names, 'x_base': 10, 'y_base': .5 }) # In[8]: pcmci.verbosity = 0 results = pcmci.run_pcmci(tau_max=8, pc_alpha=None) # In[9]: print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) # In[10]: q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci.print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'],
def run_PCMCI(ex, outdic_actors, s, df_splits, map_proj): #===================================================================================== # # 4) PCMCI-algorithm # #===================================================================================== # save output if ex['SaveTF'] == True: # from contextlib import redirect_stdout orig_stdout = sys.stdout # buffer print statement output to f if sys.version[:1] == '3': sys.stdout = f = io.StringIO() elif sys.version[:1] == '2': sys.stdout = f = open(os.path.join(ex['fig_subpath'], 'old.txt'), 'w+') #%% # amount of text printed: verbosity = 3 # alpha level for independence test within the pc procedure (finding parents) pc_alpha = ex['pcA_sets'][ex['pcA_set']] # alpha level for multiple linear regression model while conditining on parents of # parents alpha_level = ex['alpha_level_tig'] print('run tigramite 4, run.pcmci') print(('alpha level(s) for independence tests within the pc procedure' '(finding parents): {}'.format(pc_alpha))) print(( 'alpha level for multiple linear regression model while conditining on parents of ' 'parents: {}'.format(ex['alpha_level_tig']))) # Retrieve traintest info traintest = df_splits # load Response Variable class RV = ex[ex['RV_name']] # create list with all actors, these will be merged into the fulldata array allvar = ex['vars'][0] var_names_corr = [] actorlist = [] cols = [[RV.name]] for var in allvar[:]: print(var) actor = outdic_actors[var] if actor.ts_corr[s].size != 0: ts_train = actor.ts_corr[s].values actorlist.append(ts_train) # create array which numbers the regions var_idx = allvar.index(var) n_regions = actor.ts_corr[s].shape[1] actor.var_info = [[i + 1, actor.ts_corr[s].columns[i], var_idx] for i in range(n_regions)] # Array of corresponing regions with var_names_corr (first entry is RV) var_names_corr = var_names_corr + actor.var_info cols.append(list(actor.ts_corr[s].columns)) index_dates = actor.ts_corr[s].index var_names_corr.insert(0, RV.name) # stack actor time-series together: fulldata = np.concatenate(tuple(actorlist), axis=1) print(('There are {} regions in total'.format(fulldata.shape[1]))) # add the full 1D time series of interest as first entry: fulldata = np.column_stack((RV.RVfullts, fulldata)) df_data = pd.DataFrame(fulldata, columns=flatten(cols), index=index_dates) if ex['import_prec_ts'] == True: var_names_full = var_names_corr.copy() for d in ex['precursor_ts']: path_data = d[1] if len(path_data) > 1: path_data = ''.join(list(path_data)) # skip first col because it is the RV ts df_data_ext = func_fc.load_hdf5( path_data)['df_data'].iloc[:, 1:].loc[s] cols_ts = np.logical_or(df_data_ext.dtypes == 'float64', df_data_ext.dtypes == 'float32') cols_ext = list(df_data_ext.columns[cols_ts]) # cols_ext must be of format '{}_{int}_{}' lab_int = 100 for i, c in enumerate(cols_ext): char = c.split('_')[1] if char.isdigit(): pass else: cols_ext[i] = c.replace(char, str(lab_int)) + char lab_int += 1 df_data_ext = df_data_ext[cols_ext] to_freq = ex['tfreq'] if to_freq != 1: start_end_date = (ex['sstartdate'], ex['senddate']) start_end_year = (ex['startyear'], ex['endyear']) df_data_ext = functions_pp.time_mean_bins(df_data_ext, to_freq, start_end_date, start_end_year, seldays='part')[0] # df_data_ext = functions_pp.time_mean_bins(df_data_ext, # ex, ex['tfreq'], # seldays='part')[0] # Expand var_names_corr n = var_names_full[-1][0] + 1 add_n = n + len(cols_ext) n_var_idx = var_names_full[-1][-1] + 1 for i in range(n, add_n): var_names_full.append([i, cols_ext[i - n], n_var_idx]) df_data = df_data.merge(df_data_ext, left_index=True, right_index=True) else: var_names_full = var_names_corr bool_train = traintest.loc[s]['TrainIsTrue'] bool_RV_train = np.logical_and(bool_train, traintest.loc[s]['RV_mask']) dates_train = traintest.loc[s]['TrainIsTrue'][bool_train].index dates_RV_train = traintest.loc[s]['TrainIsTrue'][bool_RV_train].index RVfull_train = RV.RVfullts.sel(time=dates_train) datesfull_train = pd.to_datetime(RVfull_train.time.values) data = df_data.loc[datesfull_train].values print((data.shape)) # get RV datamask (same shape als data) data_mask = [ True if d in dates_RV_train else False for d in datesfull_train ] data_mask = np.repeat(data_mask, data.shape[1]).reshape(data.shape) # add traintest mask to fulldata # dates_all = pd.to_datetime(RV.RVfullts.index) # dates_RV = pd.to_datetime(RV.RV_ts.index) dates_all = pd.to_datetime(RV.RVfullts.time.values) dates_RV = pd.to_datetime(RV.RV_ts.time.values) df_data['TrainIsTrue'] = [ True if d in datesfull_train else False for d in dates_all ] df_data['RV_mask'] = [True if d in dates_RV else False for d in dates_all] # ====================================================================================================================== # tigramite 3 # ====================================================================================================================== T, N = data.shape # Time, Regions # ====================================================================================================================== # Initialize dataframe object (needed for tigramite functions) # ====================================================================================================================== dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names_full) # ====================================================================================================================== # pc algorithm: only parents for selected_variables are calculated # ====================================================================================================================== parcorr = ParCorr(significance='analytic', mask_type='y', verbosity=verbosity) #========================================================================== # multiple testing problem: #========================================================================== pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, selected_variables=None, verbosity=4) # selected_variables : list of integers, optional (default: range(N)) # Specify to estimate parents only for selected variables. If None is # passed, parents are estimated for all variables. # ====================================================================================================================== #selected_links = dictionary/None results = pcmci.run_pcmci(tau_max=ex['tigr_tau_max'], pc_alpha=pc_alpha, tau_min=0, max_combinations=ex['max_comb_actors']) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci.print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) # returns all parents, not just causal precursors (of lag>0) sig = rgcpd.return_sign_parents(pcmci, pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) all_parents = sig['parents'] # link_matrix = sig['link_matrix'] links_RV = all_parents[0] df = rgcpd.bookkeeping_precursors(links_RV, var_names_full) #%% rgcpd.print_particular_region_new(links_RV, var_names_corr, s, outdic_actors, map_proj, ex) #%% if ex['SaveTF'] == True: if sys.version[:1] == '3': fname = f's{s}_' + ex['params'] + '.txt' file = io.open(os.path.join(ex['fig_subpath'], fname), mode='w+') file.write(f.getvalue()) file.close() f.close() elif sys.version[:1] == '2': f.close() sys.stdout = orig_stdout return df, df_data
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag): T = T_data N = N_data tau_max = maxlag # Verbosity: # 0 - nothing # 1 - final graph only # 2 - everything verbose_max = 2 verbose = 2 print("======") # print(list(data)) # got 100 records as itertools.chain object, not numpy df data = np.array(list(data)) print("data len is ") print(len(data)) # data = np.fromiter(data, float) # print(data) # Initialize dataframe object, specify time axis and variable names dataframe = pp.DataFrame(data, datatime=dt, var_names=headers) print(dataframe.var_names) rcot = RCOT(significance='analytic') pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0) pcmci_rcot.verbosity = 1 results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05) # Print results print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) # print("inside def pcmci_causality") # output edges result_arr = [] # result_arr.append(["effect","cause"]) for index_cause, item in enumerate(results['p_matrix']): print("index is") print(index) print("item is") print(item) print("cause is") cause = headers[index_cause] print(headers[index_cause]) for index_effect, arr in enumerate(item): print("effect arr is ") print(arr) print("effect name is") effect = headers[index_effect] print(headers[index_effect]) for arrItem in arr: if arrItem < 0.05 and cause != effect: result_arr.append([effect, cause, index]) print("{} caused by {}".format(effect, cause)) break with open("pcmci_para_out{}.csv".format(index), "w", newline='') as f: for row in result_arr: f.write("%s\n" % ','.join(str(col) for col in row)) # print(pcmci) return result_arr
parcorr = ParCorr(significance='analytic') gpdc = GPDC(significance='analytic', gp_params=None) pcmci_gpdc = PCMCI( dataframe=dataframe, cond_ind_test=gpdc, verbosity=0) pcmci = PCMCI( dataframe=dataframe, cond_ind_test=parcorr, verbosity=1) # min_lag, max_lag = 1,6 results = pcmci.run_pcmci(tau_min = min_lag, tau_max=max_lag, pc_alpha=None) # q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') # pcmci.print_significant_links( p_matrix = results['p_matrix'], q_matrix = q_matrix, val_matrix = results['val_matrix'], alpha_level = 0.05) link_matrix = pcmci.return_significant_links(pq_matrix = results['p_matrix'], val_matrix=results['val_matrix'], alpha_level=0.05)['link_matrix'] tp.plot_graph( val_matrix=results['val_matrix'], link_matrix=link_matrix, var_names=study_data.columns,
def test(dataframes,max_lags=[4],alpha=[None],tests=['ParCorr'],limit=1): ''' This function performs the PCMCI algorithm for all the dataframes received as parameters, given the hyper-parameters of the conditional independence test Args: dataframes: A list of TIGRAMITE dataframes max_lags: Maximum number of lags to consider for the laggd time series alpha: Significance level to perform the parent test tests: A list of conditional independence test to be performed limit: A limit for the instances to be considered Returns: ''' test_results = [] random.shuffle(dataframes) total = limit*len(max_lags)*len(alpha)*len(tests) data_frame_iter = iter(dataframes) tests_to_evaluate=[] if 'RCOT' in tests: rcot = RCOT() tests_to_evaluate.append(['RCOT',rcot]) if 'GPDC' in tests: gpdc = GPDC() tests_to_evaluate.append(['GPDC', gpdc]) if 'ParCorr' in tests: parcorr = ParCorr(significance='analytic') tests_to_evaluate.append(['ParCorr',parcorr]) if 'CMIknn' in tests: cmiknn = CMIknn() tests_to_evaluate.append(['CMIknn',cmiknn]) unique_complexities = list(set(l[1] for l in dataframes)) counts = {} for i in unique_complexities: counts[i] = 0 for test in tests_to_evaluate: stop = False for l in max_lags: for a in alpha: while not stop: try: i = random.sample(dataframes,1)[0] if counts[i[1]] < limit: print('evaluating: ' + str(i[3])) start = time.time() pcmci = PCMCI( dataframe=i[2], cond_ind_test=test[1], verbosity=0) # correlations = pcmci.get_lagged_dependencies(tau_max=20) pcmci.verbosity = 1 results = pcmci.run_pcmci(tau_max=l, pc_alpha=a) time_lapse = round(time.time() - start, 2) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') valid_parents = list(pcmci.return_significant_parents(pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=a)['parents'].values()) flat_list = [] for sublist in valid_parents: for item in sublist: flat_list.append(item) valid_links = len(flat_list) test_results.append([i[3], i[0], i[1], l,test[0],a,valid_links,time_lapse]) results_df = pd.DataFrame(test_results, columns=['representation', 'complexity', 'sample_size', 'max_lag','test','alpha','valid_links_at_alpha', 'learning_time']) print('results ready to be saved') results_df.to_csv( 'results/performance_sample_sizes.csv', index=False) counts[i[1]] += 1 if all(value == limit for value in counts.values()): stop = True except: print('Hoopla!') pass for i in unique_complexities: counts[i] = 0
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag): T = T_data N = N_data # Run settings # there is another tau_max in lagged dependencies that might be much longer! tau_max = maxlag # Verbosity: # 0 - nothing # 1 - final graph only # 2 - everything verbose_max = 2 verbose = 2 print("======") # print(list(data)) # got 100 records as itertools.chain object, not numpy df # Initialize dataframe object, specify time axis and variable names dataframe = pp.DataFrame(data, datatime=dt, var_names=headers) print(dataframe.var_names) rcot = RCOT(significance='analytic') pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0) pcmci_rcot.verbosity = 1 results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05) # Print results print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) # Save results to file # p_matrix = results['p_matrix'] # with open("p-values_baseline.csv", "w") as csv_file: # writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL) # # [[[1 2 3]]] Three brackets to get through. # for sector in p_matrix: # print("sector: ", sector) # for row in sector: # print("row: ", row) # writer.writerow(row) # writer.writerow([]) # # print("inside def pcmci_causality") # output edges result_arr = [] for index_cause, item in enumerate(results['p_matrix']): # print("index is") # print(index) # print("item is") # print(item) # print("cause is") cause = headers[index_cause] # print(headers[index_cause]) for index_effect, arr in enumerate(item): # print("effect arr is ") # print(arr) # print("effect name is") effect = headers[index_effect] # print(headers[index_effect]) for arrItem in arr: if arrItem < 0.05 and cause != effect: result_arr.append([effect, cause, index]) print("{} caused by {}".format(effect, cause)) break with open("pcmci_baseline_out.csv", "w", newline='') as f: for row in result_arr: f.write("%s\n" % ','.join(str(col) for col in row)) # print(pcmci) print(result_arr) return result_arr
def run_pcmci(data, data_mask, var_names, path_outsub2, s, tau_min=0, tau_max=1, pc_alpha=None, alpha_level=0.05, max_conds_dim=4, max_combinations=1, max_conds_py=None, max_conds_px=None, verbosity=4): #%% if path_outsub2 is not False: txt_fname = os.path.join(path_outsub2, f'split_{s}_PCMCI_out.txt') # from contextlib import redirect_stdout orig_stdout = sys.stdout # buffer print statement output to f sys.stdout = f = io.StringIO() #%% # ====================================================================================================================== # tigramite 4 # ====================================================================================================================== T, N = data.shape # Time, Regions # ====================================================================================================================== # Initialize dataframe object (needed for tigramite functions) # ====================================================================================================================== dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names) # ====================================================================================================================== # pc algorithm: only parents for selected_variables are calculated # ====================================================================================================================== parcorr = ParCorr(significance='analytic', mask_type='y', verbosity=verbosity) #========================================================================== # multiple testing problem: #========================================================================== pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, selected_variables=None, verbosity=verbosity) # selected_variables : list of integers, optional (default: range(N)) # Specify to estimate parents only for selected variables. If None is # passed, parents are estimated for all variables. # ====================================================================================================================== #selected_links = dictionary/None results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha, tau_min=tau_min, max_conds_dim=max_conds_dim, max_combinations=max_combinations, max_conds_px=max_conds_px, max_conds_py=max_conds_py) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci.print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) #%% if path_outsub2 is not False: file = io.open(txt_fname, mode='w+') file.write(f.getvalue()) file.close() f.close() sys.stdout = orig_stdout return pcmci, q_matrix, results
def caus_gpdc(data, var_names): import numpy as np import matplotlib as mpl from matplotlib import pyplot as plt import sklearn import tigramite from tigramite import data_processing as pp from tigramite import plotting as tp from tigramite.pcmci import PCMCI from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb from tigramite.models import LinearMediation, Prediction data_mask_row = np.zeros(len(data)) for i in range(68904): if (i % 72) < 30 or (i % 72) > 47: data_mask_row[i] = True data_mask = np.zeros(data.shape) data_mask[:, 0] = data_mask_row data_mask[:, 1] = data_mask_row data_mask[:, 2] = data_mask_row data_mask[:, 9] = data_mask_row data_mask[:, 10] = data_mask_row data_mask[:, 11] = data_mask_row dataframe = pp.DataFrame(data, mask=data_mask) datatime = np.arange(len(data)) # tp.plot_timeseries(data, datatime, var_names, use_mask=True, # mask=data_mask, grey_masked_samples='data') gpdc = GPDC(significance='analytic', gp_params=None, use_mask=True, mask_type='y') gpdc.generate_and_save_nulldists(sample_sizes=range(495, 501), null_dist_filename='dc_nulldists.npz') gpdc.null_dist_filename = 'dc_nulldists.npz' pcmci_gpdc = PCMCI(dataframe=dataframe, cond_ind_test=gpdc, var_names=var_names, verbosity=1) # correlations = pcmci.get_lagged_dependencies(tau_max=20) # lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, # setup_args={'var_names':var_names, # 'x_base':5, 'y_base':.5}) results = pcmci_gpdc.run_pcmci(tau_max=6, tau_min=1, pc_alpha=0.01) # print("p-values") # print (results['p_matrix'].round(3)) # print("MCI partial correlations") # print (results['val_matrix'].round(2)) q_matrix = pcmci_gpdc.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci_gpdc._print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.01) link_matrix = pcmci_gpdc._return_significant_parents( pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix'] tp.plot_time_series_graph( val_matrix=results['val_matrix'], link_matrix=link_matrix, var_names=var_names, link_colorbar_label='MCI', ) return results, link_matrix