class PCMCIPlugin: def input(self, inputfile): self.links_coeffs = {} infile = open(inputfile, 'r') for line in infile: contents = line.split('\t') var = int(contents[0]) driver = int(contents[1]) lag = int(contents[2]) coeff = float(contents[3]) if (not var in self.links_coeffs): self.links_coeffs[var] = [] self.links_coeffs[var].append(((driver, lag), coeff)) def run(self): data, _ = pp.var_process(self.links_coeffs, T=1000) dataframe = pp.DataFrame(data) cond_ind_test = ParCorr() self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None) def output(self, outputfile): self.pcmciobj.print_significant_links( p_matrix=self.results['p_matrix'], val_matrix=self.results['val_matrix'], alpha_level=0.05)
def a_run_pcmciplus(a_pcmciplus, a_pcmciplus_params): # Unpack the pcmci and the true parents, and common parameters dataframe, true_graph, links_coeffs, tau_min, tau_max = a_pcmciplus # Unpack the parameters ( pc_alpha, contemp_collider_rule, conflict_resolution, reset_lagged_links, cond_ind_test_class, ) = a_pcmciplus_params if cond_ind_test_class == 'oracle_ci': cond_ind_test = OracleCI(links_coeffs) elif cond_ind_test_class == 'par_corr': cond_ind_test = ParCorr() # Run the PCMCI algorithm with the given parameters pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=2) results = pcmci.run_pcmciplus( selected_links=None, tau_min=tau_min, tau_max=tau_max, pc_alpha=pc_alpha, contemp_collider_rule=contemp_collider_rule, conflict_resolution=conflict_resolution, reset_lagged_links=reset_lagged_links, max_conds_dim=None, max_conds_py=None, max_conds_px=None, ) # Print true links print("************************") print("\nTrue Graph") pcmci.print_significant_links(p_matrix=(true_graph == 0), val_matrix=true_graph, conf_matrix=None, q_matrix=None, graph=true_graph, ambiguous_triples=None, alpha_level=0.05) # Return the results and the expected result return results['graph'], true_graph
writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL) #[[[1 2 3]]] Three brackets to get through. for sector in val_matrix: print("sector: ", sector) for row in sector: print("row: ", row) writer.writerow(row) writer.writerow([]) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci.print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.01) link_matrix = pcmci.return_significant_parents( pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix'] graph = tp.plot_graph( val_matrix=results['val_matrix'], link_matrix=link_matrix, var_names=headers, link_colorbar_label='cross-MCI', node_colorbar_label='auto-MCI', ) if verbose > 1:
def run_PCMCI(ex, outdic_actors, s, df_splits, map_proj): #===================================================================================== # # 4) PCMCI-algorithm # #===================================================================================== # save output if ex['SaveTF'] == True: # from contextlib import redirect_stdout orig_stdout = sys.stdout # buffer print statement output to f if sys.version[:1] == '3': sys.stdout = f = io.StringIO() elif sys.version[:1] == '2': sys.stdout = f = open(os.path.join(ex['fig_subpath'], 'old.txt'), 'w+') #%% # amount of text printed: verbosity = 3 # alpha level for independence test within the pc procedure (finding parents) pc_alpha = ex['pcA_sets'][ex['pcA_set']] # alpha level for multiple linear regression model while conditining on parents of # parents alpha_level = ex['alpha_level_tig'] print('run tigramite 4, run.pcmci') print(('alpha level(s) for independence tests within the pc procedure' '(finding parents): {}'.format(pc_alpha))) print(( 'alpha level for multiple linear regression model while conditining on parents of ' 'parents: {}'.format(ex['alpha_level_tig']))) # Retrieve traintest info traintest = df_splits # load Response Variable class RV = ex[ex['RV_name']] # create list with all actors, these will be merged into the fulldata array allvar = ex['vars'][0] var_names_corr = [] actorlist = [] cols = [[RV.name]] for var in allvar[:]: print(var) actor = outdic_actors[var] if actor.ts_corr[s].size != 0: ts_train = actor.ts_corr[s].values actorlist.append(ts_train) # create array which numbers the regions var_idx = allvar.index(var) n_regions = actor.ts_corr[s].shape[1] actor.var_info = [[i + 1, actor.ts_corr[s].columns[i], var_idx] for i in range(n_regions)] # Array of corresponing regions with var_names_corr (first entry is RV) var_names_corr = var_names_corr + actor.var_info cols.append(list(actor.ts_corr[s].columns)) index_dates = actor.ts_corr[s].index var_names_corr.insert(0, RV.name) # stack actor time-series together: fulldata = np.concatenate(tuple(actorlist), axis=1) print(('There are {} regions in total'.format(fulldata.shape[1]))) # add the full 1D time series of interest as first entry: fulldata = np.column_stack((RV.RVfullts, fulldata)) df_data = pd.DataFrame(fulldata, columns=flatten(cols), index=index_dates) if ex['import_prec_ts'] == True: var_names_full = var_names_corr.copy() for d in ex['precursor_ts']: path_data = d[1] if len(path_data) > 1: path_data = ''.join(list(path_data)) # skip first col because it is the RV ts df_data_ext = func_fc.load_hdf5( path_data)['df_data'].iloc[:, 1:].loc[s] cols_ts = np.logical_or(df_data_ext.dtypes == 'float64', df_data_ext.dtypes == 'float32') cols_ext = list(df_data_ext.columns[cols_ts]) # cols_ext must be of format '{}_{int}_{}' lab_int = 100 for i, c in enumerate(cols_ext): char = c.split('_')[1] if char.isdigit(): pass else: cols_ext[i] = c.replace(char, str(lab_int)) + char lab_int += 1 df_data_ext = df_data_ext[cols_ext] to_freq = ex['tfreq'] if to_freq != 1: start_end_date = (ex['sstartdate'], ex['senddate']) start_end_year = (ex['startyear'], ex['endyear']) df_data_ext = functions_pp.time_mean_bins(df_data_ext, to_freq, start_end_date, start_end_year, seldays='part')[0] # df_data_ext = functions_pp.time_mean_bins(df_data_ext, # ex, ex['tfreq'], # seldays='part')[0] # Expand var_names_corr n = var_names_full[-1][0] + 1 add_n = n + len(cols_ext) n_var_idx = var_names_full[-1][-1] + 1 for i in range(n, add_n): var_names_full.append([i, cols_ext[i - n], n_var_idx]) df_data = df_data.merge(df_data_ext, left_index=True, right_index=True) else: var_names_full = var_names_corr bool_train = traintest.loc[s]['TrainIsTrue'] bool_RV_train = np.logical_and(bool_train, traintest.loc[s]['RV_mask']) dates_train = traintest.loc[s]['TrainIsTrue'][bool_train].index dates_RV_train = traintest.loc[s]['TrainIsTrue'][bool_RV_train].index RVfull_train = RV.RVfullts.sel(time=dates_train) datesfull_train = pd.to_datetime(RVfull_train.time.values) data = df_data.loc[datesfull_train].values print((data.shape)) # get RV datamask (same shape als data) data_mask = [ True if d in dates_RV_train else False for d in datesfull_train ] data_mask = np.repeat(data_mask, data.shape[1]).reshape(data.shape) # add traintest mask to fulldata # dates_all = pd.to_datetime(RV.RVfullts.index) # dates_RV = pd.to_datetime(RV.RV_ts.index) dates_all = pd.to_datetime(RV.RVfullts.time.values) dates_RV = pd.to_datetime(RV.RV_ts.time.values) df_data['TrainIsTrue'] = [ True if d in datesfull_train else False for d in dates_all ] df_data['RV_mask'] = [True if d in dates_RV else False for d in dates_all] # ====================================================================================================================== # tigramite 3 # ====================================================================================================================== T, N = data.shape # Time, Regions # ====================================================================================================================== # Initialize dataframe object (needed for tigramite functions) # ====================================================================================================================== dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names_full) # ====================================================================================================================== # pc algorithm: only parents for selected_variables are calculated # ====================================================================================================================== parcorr = ParCorr(significance='analytic', mask_type='y', verbosity=verbosity) #========================================================================== # multiple testing problem: #========================================================================== pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, selected_variables=None, verbosity=4) # selected_variables : list of integers, optional (default: range(N)) # Specify to estimate parents only for selected variables. If None is # passed, parents are estimated for all variables. # ====================================================================================================================== #selected_links = dictionary/None results = pcmci.run_pcmci(tau_max=ex['tigr_tau_max'], pc_alpha=pc_alpha, tau_min=0, max_combinations=ex['max_comb_actors']) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci.print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) # returns all parents, not just causal precursors (of lag>0) sig = rgcpd.return_sign_parents(pcmci, pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) all_parents = sig['parents'] # link_matrix = sig['link_matrix'] links_RV = all_parents[0] df = rgcpd.bookkeeping_precursors(links_RV, var_names_full) #%% rgcpd.print_particular_region_new(links_RV, var_names_corr, s, outdic_actors, map_proj, ex) #%% if ex['SaveTF'] == True: if sys.version[:1] == '3': fname = f's{s}_' + ex['params'] + '.txt' file = io.open(os.path.join(ex['fig_subpath'], fname), mode='w+') file.write(f.getvalue()) file.close() f.close() elif sys.version[:1] == '2': f.close() sys.stdout = orig_stdout return df, df_data
def test_order_independence_pcmciplus(a_pcmciplus_order_independence, a_pcmciplus_params_order_independence): # Unpack the pcmci and the true parents, and common parameters dataframe, true_graph, links_coeffs, tau_min, tau_max = \ a_pcmciplus_order_independence data = dataframe.values T, N = data.shape # Unpack the parameters ( pc_alpha, contemp_collider_rule, conflict_resolution, reset_lagged_links, cond_ind_test_class, ) = a_pcmciplus_params_order_independence if cond_ind_test_class == 'oracle_ci': cond_ind_test = OracleCI(links_coeffs) elif cond_ind_test_class == 'par_corr': cond_ind_test = ParCorr() # Run the PCMCI algorithm with the given parameters pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=1) print("************************") print("\nTrue Graph") pcmci.print_significant_links(p_matrix=(true_graph == 0), val_matrix=true_graph, conf_matrix=None, q_matrix=None, graph=true_graph, ambiguous_triples=None, alpha_level=0.05) results = pcmci.run_pcmciplus( selected_links=None, tau_min=tau_min, tau_max=tau_max, pc_alpha=pc_alpha, contemp_collider_rule=contemp_collider_rule, conflict_resolution=conflict_resolution, reset_lagged_links=reset_lagged_links, max_conds_dim=None, max_conds_py=None, max_conds_px=None, ) correct_results = results['graph'] for perm in itertools.permutations(range(N)): print(perm) data_new = np.copy(data[:, perm]) dataframe = pp.DataFrame(data_new, var_names=list(perm)) pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=1) results = pcmci.run_pcmciplus( selected_links=None, tau_min=tau_min, tau_max=tau_max, pc_alpha=pc_alpha, contemp_collider_rule=contemp_collider_rule, conflict_resolution=conflict_resolution, reset_lagged_links=reset_lagged_links, max_conds_dim=None, max_conds_py=None, max_conds_px=None, ) tmp = np.take(correct_results, perm, axis=0) back_converted_result = np.take(tmp, perm, axis=1) for tau in range(tau_max + 1): if not np.allclose(results['graph'][:, :, tau], back_converted_result[:, :, tau]): print(tau) print(results['graph'][:, :, tau]) print(back_converted_result[:, :, tau]) print(back_converted_result[:, :, tau] - results['graph'][:, :, tau]) print(perm) # np.allclose(results['graph'], back_converted_result) np.testing.assert_equal(results['graph'], back_converted_result)
def run_pcmci(data, data_mask, var_names, path_outsub2, s, tau_min=0, tau_max=1, pc_alpha=None, alpha_level=0.05, max_conds_dim=4, max_combinations=1, max_conds_py=None, max_conds_px=None, verbosity=4): #%% if path_outsub2 is not False: txt_fname = os.path.join(path_outsub2, f'split_{s}_PCMCI_out.txt') # from contextlib import redirect_stdout orig_stdout = sys.stdout # buffer print statement output to f sys.stdout = f = io.StringIO() #%% # ====================================================================================================================== # tigramite 4 # ====================================================================================================================== T, N = data.shape # Time, Regions # ====================================================================================================================== # Initialize dataframe object (needed for tigramite functions) # ====================================================================================================================== dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names) # ====================================================================================================================== # pc algorithm: only parents for selected_variables are calculated # ====================================================================================================================== parcorr = ParCorr(significance='analytic', mask_type='y', verbosity=verbosity) #========================================================================== # multiple testing problem: #========================================================================== pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, selected_variables=None, verbosity=verbosity) # selected_variables : list of integers, optional (default: range(N)) # Specify to estimate parents only for selected variables. If None is # passed, parents are estimated for all variables. # ====================================================================================================================== #selected_links = dictionary/None results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha, tau_min=tau_min, max_conds_dim=max_conds_dim, max_combinations=max_combinations, max_conds_px=max_conds_px, max_conds_py=max_conds_py) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci.print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) #%% if path_outsub2 is not False: file = io.open(txt_fname, mode='w+') file.write(f.getvalue()) file.close() f.close() sys.stdout = orig_stdout return pcmci, q_matrix, results
# usecols=['P_F', 'SWC_F_MDS_1', 'TA_F']) data = data.replace(-9999, np.nan) data = data.fillna(data.mean()).values #var_names = [r'$TA$', r'$PA$', r'$P$', r'$T$', r'$SWC$'] #var_names = [r'$P$', r'$SWC$'] #var_names = [r'$P$', r'$SWC$', r'$TA$'] var_names = [r'$P$', r'$SWC$', r'$TA$', r'$PA$', r'$T$'] # print data dataframe = pp.DataFrame(np.array(data), var_names=var_names) tp.plot_timeseries(dataframe) plt.show() # main for PCMCI parcorr = ParCorr() pcmci_parcorr = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, verbosity=2) results = pcmci_parcorr.run_pcmci(tau_max=2, pc_alpha=0.2) pcmci_parcorr.print_significant_links(p_matrix=results['p_matrix'], val_matrix=results['val_matrix'], alpha_level=0.01) link_matrix = pcmci_parcorr.return_significant_parents( pq_matrix=results['p_matrix'], val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix'] # Plot time series graph tp.plot_time_series_graph(figsize=(6, 3), val_matrix=results['val_matrix'], link_matrix=link_matrix, var_names=var_names, link_colorbar_label='MCI')