def test_pcmci(self): # Setting up strict test level pc_alpha = 0.05 #[0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5] tau_max = 2 alpha_level = 0.01 dataframe = pp.DataFrame(self.data) cond_ind_test = ParCorr(verbosity=verbosity) pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=verbosity) results = pcmci.run_pcmci( tau_max=tau_max, pc_alpha=pc_alpha, ) parents = pcmci._return_significant_parents( pq_matrix=results['p_matrix'], val_matrix=results['val_matrix'], alpha_level=alpha_level)['parents'] # print parents # print self.true_parents assert_graphs_equal(parents, self.true_parents)
def test_mci(self): # Setting up strict test level pc_alpha = 0.05 #[0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5] tau_max = 2 alpha_level = 0.01 dataframe = pp.DataFrame(self.data) cond_ind_test = ParCorr(verbosity=verbosity) pcmci = PCMCI(selected_variables=None, dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=verbosity) results = pcmci.run_mci( selected_links=None, tau_min=1, tau_max=tau_max, parents=self.true_parents, max_conds_py=None, max_conds_px=None, ) parents = pcmci._return_significant_parents( pq_matrix=results['p_matrix'], val_matrix=results['val_matrix'], alpha_level=alpha_level, )['parents'] # print parents # print _get_parent_graph(true_parents) assert_graphs_equal(parents, self.true_parents)
def test_pc_stable_max_conds_dim(self): # Setting up strict test level pc_alpha = 0.05 #[0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5] tau_max = 2 alpha_level = 0.01 # true_parents_here = {0: [], # 1: [(1, -1), (0, -1)], # 2: [] # } dataframe = pp.DataFrame(self.data) cond_ind_test = ParCorr(verbosity=verbosity) pcmci = PCMCI(selected_variables=None, dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=verbosity) pcmci.run_pc_stable( selected_links=None, tau_min=1, tau_max=tau_max, save_iterations=False, pc_alpha=pc_alpha, max_conds_dim=2, max_combinations=1, ) parents = pcmci.all_parents # print parents # print _get_parent_graph(true_parents) assert_graphs_equal(parents, self.true_parents)
def create_tigramite_dataframe(dataset, exclude=None): ''' Creates a TIGRAMITE datframe from a pandas dataframe Args: dataset: A pandas dataframe with a timestamp column in it an numeric measures exclude: A list of columns to be excluded from the TIGRAMITEs dataframe Returns: A TIGRAMITE dataframe ''' must = ['timestamp'] var_list = list(dataset) if exclude != None: if all([i in var_list for i in exclude]): for i in exclude: var_list.remove(i) else: print('Oops! are you sure all the columns to exclude exist in the dataframe?, maybe check spelling') raise Exception else: pass data = dataset[var_list] if 'timestamp' in list(dataset): datatime = dataset["timestamp"] else: print('Oops! are you sure you included <timestamp> in the dataframe?, maybe check spelling') raise Exception dataframe = pp.DataFrame(data.values, datatime=datatime.values, var_names=var_list) return dataframe, var_list
def gen_data_frame(links_coeffs, time, seed_val): # Set the random seed np.random.seed(seed_val) # Generate the data data, _ = pp.var_process(links_coeffs, T=time) # Get the true parents true_parents = _get_parent_graph(links_coeffs) return pp.DataFrame(data), true_parents
def pcmci_setup(data): dataframe = pp.DataFrame(data.values, var_names=list(data.columns)) parcorr = ParCorr(significance='analytic') pcmci = PCMCI( dataframe=dataframe, cond_ind_test=parcorr, verbosity=1) return pcmci
def _set_dataframe(self, dataset): dataframe = pp.DataFrame(dataset) # Set the data for this iteration of the algorithm self.dataframe = dataframe # Store the shape of the data in the T and N variables self.T, self.N = self.dataframe.values.shape # Some checks if (np.any(np.array(self.selected_variables) < 0) or np.any(np.array(self.selected_variables) >= self.N)): raise ValueError("selected_variables must be within 0..N-1")
def a_sample(request): # Set the parameters links_coeffs, time, seed_val = request.param # Set the random seed np.random.seed(seed_val) # Generate the data data, _ = pp.var_process(links_coeffs, T=time) # Get the true parents true_parents = _get_parent_graph(links_coeffs) return pp.DataFrame(data), true_parents
def a_pcmciplus_order_independence(request): # Set the parameters links_coeffs, time, seed_val = request.param # Retrieve lags tau_min, tau_max = pp._get_minmax_lag(links_coeffs) # Generate the data data, _ = pp.structural_causal_process(links=links_coeffs, T=time, noises=None, seed=seed_val) # Get the true parents # true_parents = pp._get_parents(links_coeffs, exclude_contemp=False) true_graph = pp.links_to_graph(links_coeffs, tau_max=tau_max) return pp.DataFrame(data), true_graph, links_coeffs, tau_min, tau_max
def bootstrapping_ar_model(model, num_bs=200, seed=52): T, N = model.data.shape std_data = np.zeros_like(model.data) #standardize for i in range(N): std_data[:,i] = (model.data[:,i] - model.data[:,i].mean())/model.data[:,i].std() # initial model coeffs phi = model.phi tau_max = phi.shape[0] - 1 residuals = np.zeros((T-tau_max, N)) for i in range(T-tau_max): model_eval = np.zeros((1,N)) for tau in range(1, tau_max+1): model_eval += np.dot(phi[tau],std_data[i+tau_max-tau]) residuals[i,:] = std_data[i+tau_max,:] - model_eval # generate bootstrap data bs_models = [] ts_indexes = np.arange(residuals.shape[0]) np.random.seed(seed) for _ in range(num_bs): bs_residuals = residuals[np.random.choice(ts_indexes, size=T, replace=True),:] # bs model bs_x = np.zeros((T, N)) for t in range(0,T): if t < tau_max: bs_x[t,:] = bs_residuals[t,:] else: model_eval = np.zeros((1,N)) for tau in range(1, tau_max+1): model_eval += np.dot(phi[tau],bs_x[t-tau]) bs_x[t,:] = model_eval + bs_residuals[t,:] #fit bs data bs_med = LinearMediation(dataframe=pp.DataFrame(data=bs_x)) #, data_transform=False) bs_med.fit_model(all_parents=parent_dict['parents']) bs_models.append(bs_med) return bs_models
def calculate(para_setup): para_setup_string, sam = para_setup paras = para_setup_string.split('-') paras = [w.replace("'", "") for w in paras] model = str(paras[0]) N = int(paras[1]) n_links = int(paras[2]) min_coeff = float(paras[3]) coeff = float(paras[4]) auto = float(paras[5]) contemp_fraction = float(paras[6]) frac_unobserved = float(paras[7]) max_true_lag = int(paras[8]) T = int(paras[9]) ci_test = str(paras[10]) method = str(paras[11]) pc_alpha = float(paras[12]) tau_max = int(paras[13]) ############################################# ## Data ############################################# def lin_f(x): return x def f2(x): return (x + 5. * x**2 * np.exp(-x**2 / 20.)) if model == 'autobidirected': if verbosity > 999: model_seed = verbosity - 1000 else: model_seed = sam random_state = np.random.RandomState(model_seed) links = { 0: [((0, -1), auto, lin_f), ((1, -1), coeff, lin_f)], 1: [], 2: [((2, -1), auto, lin_f), ((1, -1), coeff, lin_f)], 3: [((3, -1), auto, lin_f), ((2, -1), min_coeff, lin_f)], } observed_vars = [0, 2, 3] noises = [random_state.randn for j in range(len(links))] data_all, nonstationary = mod.generate_nonlinear_contemp_timeseries( links=links, T=T, noises=noises, random_state=random_state) data = data_all[:, observed_vars] elif 'random' in model: if 'lineargaussian' in model: coupling_funcs = [lin_f] noise_types = ['gaussian'] #, 'weibull', 'uniform'] noise_sigma = (0.5, 2) elif 'nonlinearmixed' in model: coupling_funcs = [lin_f, f2] noise_types = ['gaussian', 'gaussian', 'weibull'] noise_sigma = (0.5, 2) if coeff < min_coeff: min_coeff = coeff couplings = list(np.arange(min_coeff, coeff + 0.1, 0.1)) couplings += [-c for c in couplings] auto_deps = list(np.arange(max(0., auto - 0.3), auto + 0.01, 0.05)) # Models may be non-stationary. Hence, we iterate over a number of seeds # to find a stationary one regarding network topology, noises, etc if verbosity > 999: model_seed = verbosity - 1000 else: model_seed = sam for ir in range(1000): # np.random.seed(model_seed) random_state = np.random.RandomState(model_seed) N_all = math.floor((N / (1. - frac_unobserved))) n_links_all = math.ceil(n_links / N * N_all) observed_vars = np.sort( random_state.choice(range(N_all), size=math.ceil( (1. - frac_unobserved) * N_all), replace=False)).tolist() links = mod.generate_random_contemp_model( N=N_all, L=n_links_all, coupling_coeffs=couplings, coupling_funcs=coupling_funcs, auto_coeffs=auto_deps, tau_max=max_true_lag, contemp_fraction=contemp_fraction, # num_trials=1000, random_state=random_state) class noise_model: def __init__(self, sigma=1): self.sigma = sigma def gaussian(self, T): # Get zero-mean unit variance gaussian distribution return self.sigma * random_state.randn(T) def weibull(self, T): # Get zero-mean sigma variance weibull distribution a = 2 mean = scipy.special.gamma(1. / a + 1) variance = scipy.special.gamma( 2. / a + 1) - scipy.special.gamma(1. / a + 1)**2 return self.sigma * (random_state.weibull(a=a, size=T) - mean) / np.sqrt(variance) def uniform(self, T): # Get zero-mean sigma variance uniform distribution mean = 0.5 variance = 1. / 12. return self.sigma * (random_state.uniform(size=T) - mean) / np.sqrt(variance) noises = [] for j in links: noise_type = random_state.choice(noise_types) sigma = noise_sigma[0] + ( noise_sigma[1] - noise_sigma[0]) * random_state.rand() noises.append(getattr(noise_model(sigma), noise_type)) if 'discretebinom' in model: if 'binom2' in model: n_binom = 2 elif 'binom4' in model: n_binom = 4 data_all_check, nonstationary = discretized_scp( links=links, T=T + 10000, n_binom=n_binom, random_state=random_state) else: data_all_check, nonstationary = mod.generate_nonlinear_contemp_timeseries( links=links, T=T + 10000, noises=noises, random_state=random_state) # If the model is stationary, break the loop if not nonstationary: data_all = data_all_check[:T] data = data_all[:, observed_vars] break else: print("Trial %d: Not a stationary model" % ir) model_seed += 10000 else: raise ValueError("model %s not known" % model) if nonstationary: raise ValueError("No stationary model found: %s" % model) true_graph = utilities._get_pag_from_dag(links, observed_vars=observed_vars, tau_max=tau_max, verbosity=verbosity)[1] if verbosity > 0: print("True Links") for j in links: print(j, links[j]) print("observed_vars = ", observed_vars) print("True PAG") if tau_max > 0: for lag in range(tau_max + 1): print(true_graph[:, :, lag]) else: print(true_graph.squeeze()) if plot_data: print("PLOTTING") for j in range(N): # ax = fig.add_subplot(N,1,j+1) pyplot.plot(data[:, j]) pyplot.show() computation_time_start = time.time() dataframe = pp.DataFrame(data) ############################################# ## Methods ############################################# # Specify conditional independence test object if ci_test == 'par_corr': cond_ind_test = ParCorr(significance='analytic', recycle_residuals=True) elif ci_test == 'cmi_knn': cond_ind_test = CMIknn(knn=0.1, sig_samples=500, sig_blocklength=1) elif ci_test == 'gp_dc': cond_ind_test = GPDC(recycle_residuals=True) elif ci_test == 'discg2': cond_ind_test = DiscG2() else: raise ValueError("CI test not recognized.") if 'lpcmci' in method: method_paras = method.split('_') n_preliminary_iterations = int(method_paras[1][7:]) if 'prelimonly' in method: prelim_only = True else: prelim_only = False lpcmci = LPCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) lpcmcires = lpcmci.run_lpcmci( tau_max=tau_max, pc_alpha=pc_alpha, max_p_non_ancestral=3, n_preliminary_iterations=n_preliminary_iterations, prelim_only=prelim_only, verbosity=verbosity) graph = lpcmci.graph val_min = lpcmci.val_min_matrix max_cardinality = lpcmci.cardinality_matrix elif method == 'svarfci': svarfci = SVARFCI(dataframe=dataframe, cond_ind_test=cond_ind_test) svarfcires = svarfci.run_svarfci( tau_max=tau_max, pc_alpha=pc_alpha, max_cond_px=0, max_p_dsep=3, fix_all_edges_before_final_orientation=True, verbosity=verbosity) graph = svarfci.graph val_min = svarfci.val_min_matrix max_cardinality = svarfci.cardinality_matrix elif method == 'svarrfci': svarrfci = SVARRFCI(dataframe=dataframe, cond_ind_test=cond_ind_test) svarrfcires = svarrfci.run_svarrfci( tau_max=tau_max, pc_alpha=pc_alpha, fix_all_edges_before_final_orientation=True, verbosity=verbosity) graph = svarrfci.graph val_min = svarrfci.val_min_matrix max_cardinality = svarrfci.cardinality_matrix else: raise ValueError("%s not implemented." % method) computation_time_end = time.time() computation_time = computation_time_end - computation_time_start return { 'true_graph': true_graph, 'val_min': val_min, 'max_cardinality': max_cardinality, # Method results 'computation_time': computation_time, 'graph': graph, }
np.random.seed(42) links_coeffs = { 0: [((0, -1), 0.7), ((1, -1), -0.8)], 1: [((1, -1), 0.8), ((3, -1), 0.8)], 2: [((2, -1), 0.5), ((1, -2), 0.5), ((3, -3), 0.6)], 3: [((3, -1), 0.4)], } T = 1000 data, true_parents_neighbors = pp.var_process(links_coeffs, T=T) # In[3]: T, N = data.shape var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$'] dataframe = pp.DataFrame(data, datatime=np.arange(len(data)), var_names=var_names) # In[4]: data.shape # In[5]: tp.plot_timeseries(dataframe) plt.show() # In[6]: parcorr = ParCorr(significance='analytic') pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, verbosity=1)
def run_pcmci(data, data_mask, var_names, path_outsub2, s, tau_min=0, tau_max=1, pc_alpha=None, alpha_level=0.05, max_conds_dim=4, max_combinations=1, max_conds_py=None, max_conds_px=None, verbosity=4): #%% if path_outsub2 is not False: txt_fname = os.path.join(path_outsub2, f'split_{s}_PCMCI_out.txt') # from contextlib import redirect_stdout orig_stdout = sys.stdout # buffer print statement output to f sys.stdout = f = io.StringIO() #%% # ====================================================================================================================== # tigramite 4 # ====================================================================================================================== T, N = data.shape # Time, Regions # ====================================================================================================================== # Initialize dataframe object (needed for tigramite functions) # ====================================================================================================================== dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names) # ====================================================================================================================== # pc algorithm: only parents for selected_variables are calculated # ====================================================================================================================== parcorr = ParCorr(significance='analytic', mask_type='y', verbosity=verbosity) #========================================================================== # multiple testing problem: #========================================================================== pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, selected_variables=None, verbosity=verbosity) # selected_variables : list of integers, optional (default: range(N)) # Specify to estimate parents only for selected variables. If None is # passed, parents are estimated for all variables. # ====================================================================================================================== #selected_links = dictionary/None results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha, tau_min=tau_min, max_conds_dim=max_conds_dim, max_combinations=max_combinations, max_conds_px=max_conds_px, max_conds_py=max_conds_py) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci.print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) #%% if path_outsub2 is not False: file = io.open(txt_fname, mode='w+') file.write(f.getvalue()) file.close() f.close() sys.stdout = orig_stdout return pcmci, q_matrix, results
def test_order_independence_pcmciplus(a_pcmciplus_order_independence, a_pcmciplus_params_order_independence): # Unpack the pcmci and the true parents, and common parameters dataframe, true_graph, links_coeffs, tau_min, tau_max = \ a_pcmciplus_order_independence data = dataframe.values T, N = data.shape # Unpack the parameters ( pc_alpha, contemp_collider_rule, conflict_resolution, reset_lagged_links, cond_ind_test_class, ) = a_pcmciplus_params_order_independence if cond_ind_test_class == 'oracle_ci': cond_ind_test = OracleCI(links_coeffs) elif cond_ind_test_class == 'par_corr': cond_ind_test = ParCorr() # Run the PCMCI algorithm with the given parameters pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=1) print("************************") print("\nTrue Graph") pcmci.print_significant_links(p_matrix=(true_graph == 0), val_matrix=true_graph, conf_matrix=None, q_matrix=None, graph=true_graph, ambiguous_triples=None, alpha_level=0.05) results = pcmci.run_pcmciplus( selected_links=None, tau_min=tau_min, tau_max=tau_max, pc_alpha=pc_alpha, contemp_collider_rule=contemp_collider_rule, conflict_resolution=conflict_resolution, reset_lagged_links=reset_lagged_links, max_conds_dim=None, max_conds_py=None, max_conds_px=None, ) correct_results = results['graph'] for perm in itertools.permutations(range(N)): print(perm) data_new = np.copy(data[:, perm]) dataframe = pp.DataFrame(data_new, var_names=list(perm)) pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=1) results = pcmci.run_pcmciplus( selected_links=None, tau_min=tau_min, tau_max=tau_max, pc_alpha=pc_alpha, contemp_collider_rule=contemp_collider_rule, conflict_resolution=conflict_resolution, reset_lagged_links=reset_lagged_links, max_conds_dim=None, max_conds_py=None, max_conds_px=None, ) tmp = np.take(correct_results, perm, axis=0) back_converted_result = np.take(tmp, perm, axis=1) for tau in range(tau_max + 1): if not np.allclose(results['graph'][:, :, tau], back_converted_result[:, :, tau]): print(tau) print(results['graph'][:, :, tau]) print(back_converted_result[:, :, tau]) print(back_converted_result[:, :, tau] - results['graph'][:, :, tau]) print(perm) # np.allclose(results['graph'], back_converted_result) np.testing.assert_equal(results['graph'], back_converted_result)
def run_PCMCI(ex, outdic_actors, s, df_splits, map_proj): #===================================================================================== # # 4) PCMCI-algorithm # #===================================================================================== # save output if ex['SaveTF'] == True: # from contextlib import redirect_stdout orig_stdout = sys.stdout # buffer print statement output to f if sys.version[:1] == '3': sys.stdout = f = io.StringIO() elif sys.version[:1] == '2': sys.stdout = f = open(os.path.join(ex['fig_subpath'], 'old.txt'), 'w+') #%% # amount of text printed: verbosity = 3 # alpha level for independence test within the pc procedure (finding parents) pc_alpha = ex['pcA_sets'][ex['pcA_set']] # alpha level for multiple linear regression model while conditining on parents of # parents alpha_level = ex['alpha_level_tig'] print('run tigramite 4, run.pcmci') print(('alpha level(s) for independence tests within the pc procedure' '(finding parents): {}'.format(pc_alpha))) print(( 'alpha level for multiple linear regression model while conditining on parents of ' 'parents: {}'.format(ex['alpha_level_tig']))) # Retrieve traintest info traintest = df_splits # load Response Variable class RV = ex[ex['RV_name']] # create list with all actors, these will be merged into the fulldata array allvar = ex['vars'][0] var_names_corr = [] actorlist = [] cols = [[RV.name]] for var in allvar[:]: print(var) actor = outdic_actors[var] if actor.ts_corr[s].size != 0: ts_train = actor.ts_corr[s].values actorlist.append(ts_train) # create array which numbers the regions var_idx = allvar.index(var) n_regions = actor.ts_corr[s].shape[1] actor.var_info = [[i + 1, actor.ts_corr[s].columns[i], var_idx] for i in range(n_regions)] # Array of corresponing regions with var_names_corr (first entry is RV) var_names_corr = var_names_corr + actor.var_info cols.append(list(actor.ts_corr[s].columns)) index_dates = actor.ts_corr[s].index var_names_corr.insert(0, RV.name) # stack actor time-series together: fulldata = np.concatenate(tuple(actorlist), axis=1) print(('There are {} regions in total'.format(fulldata.shape[1]))) # add the full 1D time series of interest as first entry: fulldata = np.column_stack((RV.RVfullts, fulldata)) df_data = pd.DataFrame(fulldata, columns=flatten(cols), index=index_dates) if ex['import_prec_ts'] == True: var_names_full = var_names_corr.copy() for d in ex['precursor_ts']: path_data = d[1] if len(path_data) > 1: path_data = ''.join(list(path_data)) # skip first col because it is the RV ts df_data_ext = func_fc.load_hdf5( path_data)['df_data'].iloc[:, 1:].loc[s] cols_ts = np.logical_or(df_data_ext.dtypes == 'float64', df_data_ext.dtypes == 'float32') cols_ext = list(df_data_ext.columns[cols_ts]) # cols_ext must be of format '{}_{int}_{}' lab_int = 100 for i, c in enumerate(cols_ext): char = c.split('_')[1] if char.isdigit(): pass else: cols_ext[i] = c.replace(char, str(lab_int)) + char lab_int += 1 df_data_ext = df_data_ext[cols_ext] to_freq = ex['tfreq'] if to_freq != 1: start_end_date = (ex['sstartdate'], ex['senddate']) start_end_year = (ex['startyear'], ex['endyear']) df_data_ext = functions_pp.time_mean_bins(df_data_ext, to_freq, start_end_date, start_end_year, seldays='part')[0] # df_data_ext = functions_pp.time_mean_bins(df_data_ext, # ex, ex['tfreq'], # seldays='part')[0] # Expand var_names_corr n = var_names_full[-1][0] + 1 add_n = n + len(cols_ext) n_var_idx = var_names_full[-1][-1] + 1 for i in range(n, add_n): var_names_full.append([i, cols_ext[i - n], n_var_idx]) df_data = df_data.merge(df_data_ext, left_index=True, right_index=True) else: var_names_full = var_names_corr bool_train = traintest.loc[s]['TrainIsTrue'] bool_RV_train = np.logical_and(bool_train, traintest.loc[s]['RV_mask']) dates_train = traintest.loc[s]['TrainIsTrue'][bool_train].index dates_RV_train = traintest.loc[s]['TrainIsTrue'][bool_RV_train].index RVfull_train = RV.RVfullts.sel(time=dates_train) datesfull_train = pd.to_datetime(RVfull_train.time.values) data = df_data.loc[datesfull_train].values print((data.shape)) # get RV datamask (same shape als data) data_mask = [ True if d in dates_RV_train else False for d in datesfull_train ] data_mask = np.repeat(data_mask, data.shape[1]).reshape(data.shape) # add traintest mask to fulldata # dates_all = pd.to_datetime(RV.RVfullts.index) # dates_RV = pd.to_datetime(RV.RV_ts.index) dates_all = pd.to_datetime(RV.RVfullts.time.values) dates_RV = pd.to_datetime(RV.RV_ts.time.values) df_data['TrainIsTrue'] = [ True if d in datesfull_train else False for d in dates_all ] df_data['RV_mask'] = [True if d in dates_RV else False for d in dates_all] # ====================================================================================================================== # tigramite 3 # ====================================================================================================================== T, N = data.shape # Time, Regions # ====================================================================================================================== # Initialize dataframe object (needed for tigramite functions) # ====================================================================================================================== dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names_full) # ====================================================================================================================== # pc algorithm: only parents for selected_variables are calculated # ====================================================================================================================== parcorr = ParCorr(significance='analytic', mask_type='y', verbosity=verbosity) #========================================================================== # multiple testing problem: #========================================================================== pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, selected_variables=None, verbosity=4) # selected_variables : list of integers, optional (default: range(N)) # Specify to estimate parents only for selected variables. If None is # passed, parents are estimated for all variables. # ====================================================================================================================== #selected_links = dictionary/None results = pcmci.run_pcmci(tau_max=ex['tigr_tau_max'], pc_alpha=pc_alpha, tau_min=0, max_combinations=ex['max_comb_actors']) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci.print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) # returns all parents, not just causal precursors (of lag>0) sig = rgcpd.return_sign_parents(pcmci, pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=alpha_level) all_parents = sig['parents'] # link_matrix = sig['link_matrix'] links_RV = all_parents[0] df = rgcpd.bookkeeping_precursors(links_RV, var_names_full) #%% rgcpd.print_particular_region_new(links_RV, var_names_corr, s, outdic_actors, map_proj, ex) #%% if ex['SaveTF'] == True: if sys.version[:1] == '3': fname = f's{s}_' + ex['params'] + '.txt' file = io.open(os.path.join(ex['fig_subpath'], fname), mode='w+') file.write(f.getvalue()) file.close() f.close() elif sys.version[:1] == '2': f.close() sys.stdout = orig_stdout return df, df_data
# Example data, here the real dataset can be loaded as a numpy array of shape # (T, N) numpy.random.seed(42) # Fix random seed links_coeffs = {0: [((0, -1), 0.7)], 1: [((1, -1), 0.8), ((0, -1), 0.8)], 2: [((2, -1), 0.5), ((1, -2), 0.5)], } T = 500 # time series length data, true_parents_neighbors = pp.var_process(links_coeffs, T=T) T, N = data.shape # Initialize dataframe object dataframe = pp.DataFrame(data) # Optionally specify variable names var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$'] # Significance level in condition-selection step. If a list of levels is is # provided or pc_alpha=None, the optimal pc_alpha is automatically chosen via # model-selection. pc_alpha = 0.2 # [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5] selected_variables = range(N) #[2] # [2] # [2] # Maximum time lag tau_max = 3 # Optional minimum time lag in MCI step (in PC-step this is 1) tau_min = 0
def _get_pag_from_dag(links_coeffs, observed_vars=None, tau_max=None, verbosity=0): """Computes PAG over observed variables from DAG on full variable set. Uses OracleCI tests based on ancestors in DAG to obtain skeleton and sepsets. Then applies FCI rules (including collider rule). """ if verbosity > 0: print("Running _get_pag_from_dag:\n\n1. Ancestors search") N_all = len(links_coeffs) # If tau_max is None, compute from links_coeffs _, max_lag_links = _get_minmax_lag(links_coeffs) if tau_max is None: tau_max = max_lag_links else: if max_lag_links > tau_max: raise ValueError( "tau_max must be >= maximum lag in links_coeffs; choose tau_max=None" ) if observed_vars is None: observed_vars = range(N_all) else: if not set(observed_vars).issubset(set(range(N_all))): raise ValueError("observed_vars must be subset of range(N_all).") N = len(observed_vars) # Init cond_ind_test class cond_ind_test = OracleCI(links_coeffs) # Init graph and sepsets graph_dict = { j: {(i, -tau): "o-o" for i in range(N) for tau in range(tau_max + 1) if tau > 0 or j != i} for j in range(N) } sepsets = { j: {(i, -tau): {} for i in range(N) for tau in range(tau_max + 1) if (tau > 0 or i < j)} for j in range(N) } sepset_answers = {} # We will enumerate the observed variables with (i,j) which refers to the index in pag_graph # while x, y iterates through the oberved variables in the underlying DAG # Loop over the observed variables for j, y in enumerate(observed_vars): for i, x in enumerate(observed_vars): for tau in range(0, tau_max + 1): if (x, -tau) != (y, 0): dag_anc_y, _ = cond_ind_test._get_non_blocked_ancestors( Y=[(y, 0)], conds=None, mode='max_lag', max_lag=tau_max) # Only consider observed ancestors pag_anc_y = [ anc for anc in dag_anc_y[(y, 0)] if anc[0] in observed_vars ] dag_anc_x, _ = cond_ind_test._get_non_blocked_ancestors( Y=[(x, -tau)], conds=None, mode='max_lag', max_lag=tau_max) # Only consider observed ancestors pag_anc_x = [ anc for anc in dag_anc_x[(x, -tau)] if anc[0] in observed_vars ] Z = list( set([ z for z in pag_anc_y + pag_anc_x if z != (y, 0) and z != (x, -tau) ])) separated = cond_ind_test._is_dsep(X=[(x, -tau)], Y=[(y, 0)], Z=Z, max_lag=None) # If X and Y are connected given Z, mark a link if not separated and tau == 0: graph_dict[j][(i, -tau)] = "o-o" elif not separated and tau > 0: graph_dict[j][(i, -tau)] = "o->" # If X and Y are separated given Z, mark absence of links and store sepset else: graph_dict[j][(i, -tau)] = "" # Translate sepset to (i,j)-space S = frozenset((observed_vars.index(cond[0]), cond[1]) for cond in Z) #sepsets[j][(i, -tau)] = {(S, "wm")} sepsets[j][(i, -tau)] = {(S, "")} if tau == 0: #sepsets[i][(j, 0)] = {(S, "wm")} sepsets[i][(j, 0)] = {(S, "")} if tau > 0 or (tau == 0 and i < j): X_type = (i, -tau) Y_type = (j, 0) else: X_type = (j, 0) Y_type = (i, 0) for s in S: sepset_answers[(X_type, s, Y_type)] = False for k, tau in product(range(N), range(0, tau_max + 1)): if sepset_answers.get( (X_type, (k, -tau), Y_type)) is None: sepset_answers[(X_type, (k, -tau), Y_type)] = True if verbosity > 0: print("2. FCI orientation rules") # Initialize SVARFCI with dummy data svarfci = SVARFCI(dataframe=pp.DataFrame(np.zeros((N + 1, N))), cond_ind_test=cond_ind_test) svarfci._initialize(tau_max=tau_max, pc_alpha=0.01, max_cond_px=np.inf, max_p_global=np.inf, max_p_dsep=np.inf, max_q_global=np.inf, max_pds_set=np.inf, fix_all_edges_before_final_orientation=False, verbosity=verbosity) svarfci._oracle = True # Update graph_dict and sepsets svarfci.graph_dict = graph_dict svarfci.sepsets = sepsets # Run *all* rules svarfci._B_not_in_SepSet_AC_given_answers = sepset_answers svarfci._run_fci_orientation_phase() # Also return array version of pag graph pag_graph = svarfci._dict2graph() return svarfci.graph_dict, pag_graph
fulldata, time_bin_length=time_bin_length)[0] fulldata_mask = pp.time_bin_with_mask( fulldata_mask, time_bin_length=time_bin_length)[0] > 0. print("Fulldata after binning shape = %s" % str(fulldata.shape)) print("Fulldata after binning masked shape = %s" % str(fulldata_mask.shape)) # # Only use selected indices # selected_comps_indices=[] # for i in selected_components: # selected_comps_indices.append(int(comps_order_file['comps'][i])) # # fulldata = fulldata[:, selected_comps_indices] # fulldata_mask = fulldata_mask[:, selected_comps_indices] dataframe = pp.DataFrame(fulldata, mask=fulldata_mask) print("Fulldata shape = %s" % str(dataframe.values.shape)) print("Unmasked samples %d" % (dataframe.mask[:, 0] == False).sum()) # sys.exit(0) T, N = dataframe.values.shape #print(N) resdict = { "CI_params": { 'significance': 'analytic', 'use_mask': True, 'mask_type': ['y'], 'recycle_residuals': False,
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag): T = T_data N = N_data tau_max = maxlag # Verbosity: # 0 - nothing # 1 - final graph only # 2 - everything verbose_max = 2 verbose = 2 print("======") # print(list(data)) # got 100 records as itertools.chain object, not numpy df data = np.array(list(data)) print("data len is ") print(len(data)) # data = np.fromiter(data, float) # print(data) # Initialize dataframe object, specify time axis and variable names dataframe = pp.DataFrame(data, datatime=dt, var_names=headers) print(dataframe.var_names) rcot = RCOT(significance='analytic') pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0) pcmci_rcot.verbosity = 1 results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05) # Print results print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) # print("inside def pcmci_causality") # output edges result_arr = [] # result_arr.append(["effect","cause"]) for index_cause, item in enumerate(results['p_matrix']): print("index is") print(index) print("item is") print(item) print("cause is") cause = headers[index_cause] print(headers[index_cause]) for index_effect, arr in enumerate(item): print("effect arr is ") print(arr) print("effect name is") effect = headers[index_effect] print(headers[index_effect]) for arrItem in arr: if arrItem < 0.05 and cause != effect: result_arr.append([effect, cause, index]) print("{} caused by {}".format(effect, cause)) break with open("pcmci_para_out{}.csv".format(index), "w", newline='') as f: for row in result_arr: f.write("%s\n" % ','.join(str(col) for col in row)) # print(pcmci) return result_arr
numpy.random.seed(42) # Fix random seed links_coeffs = { 0: [((0, -1), 0.7)], 1: [((1, -1), 0.8), ((0, -1), 0.8)], 2: [((2, -1), 0.5), ((1, -2), 0.5)], } T = 500 # time series length data, true_parents_neighbors = pp.var_process(links_coeffs, T=T) T, N = data.shape # Optionally specify variable names var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$'] # Initialize dataframe object dataframe = pp.DataFrame(data, var_names=var_names) # Significance level in condition-selection step. If a list of levels is is # provided or pc_alpha=None, the optimal pc_alpha is automatically chosen via # model-selection. pc_alpha = 0.2 # [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5] selected_variables = range(N) #[2] # [2] # [2] # Maximum time lag tau_max = 3 # Optional minimum time lag in MCI step (in PC-step this is 1) tau_min = 0 # Maximum cardinality of conditions in PC condition-selection step. The # recommended default choice is None to leave it unrestricted.
def build_link_pcmci_noself(p_data_values, p_agent_names, p_var_sou, p_var_tar): """ build links by n column data """ [times_num, agent_num] = p_data_values.shape # set the data for PCMCI data_frame = pp.DataFrame(p_data_values, var_names=p_agent_names, missing_flag=BaseConfig.BACKGROUND_VALUE) # new PCMCI pcmci = PCMCI(dataframe=data_frame, cond_ind_test=ParCorr()) # run PCMCI alpha_level = 0.01 results_pcmci = pcmci.run_pcmciplus(tau_min=0, tau_max=2, pc_alpha=alpha_level) # get the result graph_pcmci = results_pcmci['graph'] q_matrix = results_pcmci['q_matrix'] p_matrix = results_pcmci['p_matrix'] val_matrix = results_pcmci['val_matrix'] conf_matrix = results_pcmci['conf_matrix'] ambiguous_triples = results_pcmci['ambiguous_triples'] # filter these links links_df = pd.DataFrame(columns=('VarSou', 'VarTar', 'Source', 'Target', 'TimeLag', 'Strength', 'Unoriented')) if graph_pcmci is not None: sig_links = (graph_pcmci != "") * (graph_pcmci != "<--") elif q_matrix is not None: sig_links = (q_matrix <= alpha_level) else: sig_links = (p_matrix <= alpha_level) for j in range(agent_num): links = {(p[0], -p[1]): np.abs(val_matrix[p[0], j, abs(p[1])]) for p in zip(*np.where(sig_links[:, j, :]))} # Sort by value sorted_links = sorted(links, key=links.get, reverse=True) for p in sorted_links: VarSou = p_var_sou VarTar = p_var_tar Source = p_agent_names[j] Target = p_agent_names[p[0]] TimeLag = p[1] Strength = val_matrix[p[0], j, abs(p[1])] Unoriented = None if graph_pcmci is not None: if p[1] == 0 and graph_pcmci[j, p[0], 0] == "o-o": Unoriented = 1 # "unoriented link" elif graph_pcmci[p[0], j, abs(p[1])] == "x-x": Unoriented = 1 # "unclear orientation due to conflict" else: Unoriented = 0 links_df = links_df.append(pd.DataFrame({ 'VarSou': [VarSou], 'VarTar': [VarTar], 'Source': [Source], 'Target': [Target], 'TimeLag': [TimeLag], 'Strength': [Strength], 'Unoriented': [Unoriented] }), ignore_index=True) # remove the self correlation edges links_df = links_df.loc[links_df['Source'] != links_df['Target']] return links_df
senegal_millet_file = 'pricedata/SenegalGEIWSMillet.csv' millet_prices = GEIWS_prices(senegal_millet_file) millet_prices = subtract_rolling_mean( adjust_seasonality(millet_prices ) ) study_data = millet_prices # give custom NAN value for tigramite to interpret mssng = 99999 study_data = study_data.copy().fillna(mssng) dataframe = pp.DataFrame(study_data.values, var_names= study_data.columns, missing_flag = mssng) tp.plot_timeseries(dataframe) parcorr = ParCorr(significance='analytic') gpdc = GPDC(significance='analytic', gp_params=None) pcmci_gpdc = PCMCI( dataframe=dataframe, cond_ind_test=gpdc, verbosity=0) pcmci = PCMCI( dataframe=dataframe, cond_ind_test=parcorr, verbosity=1) #
def init_pcmci(df_data, significance='analytic', mask_type='y', selected_variables=None, verbosity=5): ''' First initializing pcmci object for each training set. This allows to plot lagged cross-correlations which help to identity a reasonably tau_max. Parameters ---------- df_data : pandas DataFrame df_data is retrieved by running rg.get_ts_prec(). significance : str, optional DESCRIPTION. The default is 'analytic'. mask_type : str, optional DESCRIPTION. The default is 'y'. verbosity : int, optional DESCRIPTION. The default is 4. selected_variables : list of integers, optional (default: None) Specify to estimate parents only for selected variables. If None is passed, parents are estimated for all variables. Returns ------- dictionary of format {split:pcmci}. ''' splits = df_data.index.levels[0] pcmci_dict = {} RV_mask = df_data['RV_mask'] for s in range(splits.size): TrainIsTrue = df_data['TrainIsTrue'].loc[s] df_data_s = df_data.loc[s][TrainIsTrue == True] df_data_s = df_data_s.dropna(axis=1, how='all') if any(df_data_s.isna().values.flatten()): if verbosity > 0: print('Warnning: nans detected') # print(np.unique(df_data_s.isna().values)) var_names = [ k for k in df_data_s.columns if k not in ['TrainIsTrue', 'RV_mask'] ] df_data_s = df_data_s.loc[:, var_names] data = df_data_s.values data_mask = ~RV_mask.loc[s][TrainIsTrue == True].values # indices with mask == False are used (with mask_type 'y') data_mask = np.repeat(data_mask, data.shape[1]).reshape(data.shape) # create dataframe in Tigramite format dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names) parcorr = ParCorr(significance=significance, mask_type=mask_type, verbosity=0) parcorr.verbosity = verbosity # to avoid print init text each time # ====================================================================================================================== # pc algorithm: only parents for selected_variables are calculated # ====================================================================================================================== pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, verbosity=verbosity) pcmci_dict[s] = pcmci return pcmci_dict
def run(self): data, _ = pp.var_process(self.links_coeffs, T=1000) dataframe = pp.DataFrame(data) cond_ind_test = ParCorr() self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None)
# ====================================================================================================================== # new mask # ====================================================================================================================== print data.shape data_mask = np.ones(data.shape, dtype='bool') for i in range(4): # take into account 4 months starting from june=5 data_mask[5 + i::12, :] = False # [22+i:: 52,:] ## T, N = data.shape # ====================================================================================================================== # Initialize dataframe object (needed for tigramite functions) # ====================================================================================================================== dataframe = pp.DataFrame(data=data, mask=data_mask) # Specify time axis and variable names datatime = np.arange(len(data)) # ====================================================================================================================== # pc algorithm: only parents for selected_variables are calculated (here entry[0] = PoV) # ====================================================================================================================== parcorr = ParCorr(significance='analytic', use_mask=True, mask_type='y', verbosity=2) pcmci = PCMCI( dataframe=dataframe, cond_ind_test=parcorr,
def plot__gpdc_get_single_residuals(self): ####### ci_test = self.ci_gpdc # ci_test = self.ci_par_corr a = 0. c = .3 T = 500 # Each key refers to a variable and the incoming links are supplied as a # list of format [((driver, lag), coeff), ...] links_coeffs = { 0: [((0, -1), a)], 1: [((1, -1), a), ((0, -1), c)], } numpy.random.seed(42) data, true_parents_neighbors = pp.var_process(links_coeffs, use='inv_inno_cov', T=T) dataframe = pp.DataFrame(data) ci_test.set_dataframe(dataframe) # ci_test.set_tau_max(1) # X=[(1, -1)] # Y=[(1, 0)] # Z=[(0, -1)] + [(1, -tau) for tau in range(1, 2)] # array, xyz, XYZ = ci_test.get_array(X, Y, Z, # verbosity=0)] # ci_test.run_test(X, Y, Z,) def func(x): return x * (1. - 4. * x**0 * numpy.exp(-x**2 / 2.)) true_residual = numpy.random.randn(3, T) array = numpy.copy(true_residual) array[1] += c * func(array[2]) #.sum(axis=0) xyz = numpy.array([0, 1] + [2 for i in range(array.shape[0] - 2)]) print 'xyz ', xyz, numpy.where(xyz == 1) target_var = 1 dim, T = array.shape # array -= array.mean(axis=1).reshape(dim, 1) c_std = c #/array[1].std() # array /= array.std(axis=1).reshape(dim, 1) array_orig = numpy.copy(array) import matplotlib from matplotlib import pyplot (est_residual, pred) = ci_test._get_single_residuals(array, target_var, standardize=False, return_means=True) (resid_, pred_parcorr) = self.ci_par_corr._get_single_residuals( array, target_var, standardize=False, return_means=True) fig = pyplot.figure() ax = fig.add_subplot(111) ax.scatter(array_orig[2], array_orig[1]) ax.scatter(array_orig[2], pred, color='red') ax.scatter(array_orig[2], pred_parcorr, color='green') ax.plot(numpy.sort(array_orig[2]), c_std * func(numpy.sort(array_orig[2])), color='black') pyplot.savefig('/home/jakobrunge/test/gpdctest.pdf')
m_y_indices = [ m_y_data.columns.get_loc('Month'), m_y_data.columns.get_loc('Year') ] m_y_data = m_y_data.interpolate( method='linear', limit=inter_max_gap) if interpolate == True else m_y_data data_filled = m_y_data.fillna(mssng) else: pass data_filled = data_filled[study_vars] if use_study_vars == True else data_filled T, N = data_filled.shape dataframe = pp.DataFrame(data_filled.values, var_names=data_filled.columns, missing_flag=mssng) #links to study selected_links = { i: list( chain.from_iterable([[(j, k) for k in range(-tau_max, -steps_ahead + 1)] for j in range(N) if j != target])) for i in range(N) } # remove month and year conditions #data_stationary_filled = data_stationary.fillna(mssng) #data_filled = data_filled.dropna() #data_stationary_filled = data_stationary_filled[study_vars] if use_study_vars == True else data_stationary_filled
def test_construct_array(cstrct_array_params): # Unpack the parameters (x_nds, y_nds, z_nds), tau_max, missing_vals, mask_type =\ cstrct_array_params # Make some fake data data = np.arange(1000).reshape(10, 100).T # Get the needed parameters from the data T, N = data.shape max_lag = 2 * tau_max n_times = T - max_lag # When testing masking and missing value flags, we will remove time slices, # starting with the earliest slice. This counter keeps track of how many # rows have been masked. n_rows_masked = 0 # Make a fake mask data_mask = np.zeros_like(data, dtype='bool') if mask_type is not None: for var, nodes in zip(['x', 'y', 'z'], [x_nds, y_nds, z_nds]): if var in mask_type: # Get the first node a_nd, a_tau = nodes[0] # Mask the first value of this node data_mask[a_tau - n_times + n_rows_masked, a_nd] = True n_rows_masked += 1 # Choose fake missing value as the earliest time entry in the first z-node # from the original (non-shifted) datathat is not cutoff by max_lag or # masked values from the first z-node missing_flag = None if missing_vals: # Get the node index a_nd, _ = z_nds[0] # Select the earliest non-cutoff entry from the unshifted data set earliest_time = max_lag + n_rows_masked missing_flag = data[earliest_time, a_nd] # Record that the row with this value and all rows up to max_lag after # this value have been cut off as well n_rows_masked += max_lag + 1 # Construct the array data_f = pp.DataFrame(data, data_mask, missing_flag) array, xyz = data_f.construct_array(x_nds, y_nds, z_nds, tau_max=tau_max, mask_type=mask_type, verbosity=VERBOSITY) # Ensure x_nds, y_nds, z_ndes are unique x_nds = list(OrderedDict.fromkeys(x_nds)) y_nds = list(OrderedDict.fromkeys(y_nds)) z_nds = list(OrderedDict.fromkeys(z_nds)) z_nds = [ node for node in z_nds if (node not in x_nds) and (node not in y_nds) ] # Get the expected results expect_array = np.array([ list( range(data[time - n_times, node], data[time - n_times, node] + n_times)) for node, time in x_nds + y_nds + z_nds ]) expect_xyz = np.array([0 for _ in x_nds] +\ [1 for _ in y_nds] +\ [2 for _ in z_nds]) # Apply the mask, which always blocks the latest time of the 0th node of the # masked variable, which removes the first n time slices in the returned # array expect_array = expect_array[:, n_rows_masked:] # Test the results np.testing.assert_almost_equal(array, expect_array) np.testing.assert_almost_equal(xyz, expect_xyz)
with open("test.csv", "w", newline="") as csvfile: data_writer = csv.writer(csvfile, delimiter=delimiter, quotechar=quotechar) for line in data: #data_writer.writerow("|".join([str(s) for s in line])) data_writer.writerow(line) exit() T, N = data.shape # Initialize dataframe object, specify time axis and variable names #var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$'] dataframe = pp.DataFrame(data, datatime=np.arange(len(data)), var_names=headers) if verbose > 0: plot = tp.plot_timeseries(dataframe)[0] if display_images: plot.show() if save_images: plot.savefig("timeseries.png") parcorr = ParCorr(significance='analytic') pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, verbosity=1) correlations = pcmci.get_lagged_dependencies(tau_max=3) lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, setup_args={
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag): T = T_data N = N_data # Run settings # there is another tau_max in lagged dependencies that might be much longer! tau_max = maxlag # Verbosity: # 0 - nothing # 1 - final graph only # 2 - everything verbose_max = 2 verbose = 2 print("======") # print(list(data)) # got 100 records as itertools.chain object, not numpy df # Initialize dataframe object, specify time axis and variable names dataframe = pp.DataFrame(data, datatime=dt, var_names=headers) print(dataframe.var_names) rcot = RCOT(significance='analytic') pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0) pcmci_rcot.verbosity = 1 results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05) # Print results print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) # Save results to file # p_matrix = results['p_matrix'] # with open("p-values_baseline.csv", "w") as csv_file: # writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL) # # [[[1 2 3]]] Three brackets to get through. # for sector in p_matrix: # print("sector: ", sector) # for row in sector: # print("row: ", row) # writer.writerow(row) # writer.writerow([]) # # print("inside def pcmci_causality") # output edges result_arr = [] for index_cause, item in enumerate(results['p_matrix']): # print("index is") # print(index) # print("item is") # print(item) # print("cause is") cause = headers[index_cause] # print(headers[index_cause]) for index_effect, arr in enumerate(item): # print("effect arr is ") # print(arr) # print("effect name is") effect = headers[index_effect] # print(headers[index_effect]) for arrItem in arr: if arrItem < 0.05 and cause != effect: result_arr.append([effect, cause, index]) print("{} caused by {}".format(effect, cause)) break with open("pcmci_baseline_out.csv", "w", newline='') as f: for row in result_arr: f.write("%s\n" % ','.join(str(col) for col in row)) # print(pcmci) print(result_arr) return result_arr