correlations = pcmci.get_lagged_dependencies(tau_max=3) lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, setup_args={ 'var_names': headers, 'x_base': 5, 'y_base': .5 }) if verbose > 1: if display_images: lag_func_matrix.savefig() if save_images: lag_func_matrix.savefig("lag_func.png") pcmci.verbosity = 1 results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=None) #Print results print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) #Save results to file p_matrix = results['p_matrix'] with open("p-values.csv", "w") as csv_file: writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL)
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag): T = T_data N = N_data tau_max = maxlag # Verbosity: # 0 - nothing # 1 - final graph only # 2 - everything verbose_max = 2 verbose = 2 print("======") # print(list(data)) # got 100 records as itertools.chain object, not numpy df data = np.array(list(data)) print("data len is ") print(len(data)) # data = np.fromiter(data, float) # print(data) # Initialize dataframe object, specify time axis and variable names dataframe = pp.DataFrame(data, datatime=dt, var_names=headers) print(dataframe.var_names) rcot = RCOT(significance='analytic') pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0) pcmci_rcot.verbosity = 1 results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05) # Print results print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) # print("inside def pcmci_causality") # output edges result_arr = [] # result_arr.append(["effect","cause"]) for index_cause, item in enumerate(results['p_matrix']): print("index is") print(index) print("item is") print(item) print("cause is") cause = headers[index_cause] print(headers[index_cause]) for index_effect, arr in enumerate(item): print("effect arr is ") print(arr) print("effect name is") effect = headers[index_effect] print(headers[index_effect]) for arrItem in arr: if arrItem < 0.05 and cause != effect: result_arr.append([effect, cause, index]) print("{} caused by {}".format(effect, cause)) break with open("pcmci_para_out{}.csv".format(index), "w", newline='') as f: for row in result_arr: f.write("%s\n" % ','.join(str(col) for col in row)) # print(pcmci) return result_arr
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag): T = T_data N = N_data # Run settings # there is another tau_max in lagged dependencies that might be much longer! tau_max = maxlag # Verbosity: # 0 - nothing # 1 - final graph only # 2 - everything verbose_max = 2 verbose = 2 print("======") # print(list(data)) # got 100 records as itertools.chain object, not numpy df # Initialize dataframe object, specify time axis and variable names dataframe = pp.DataFrame(data, datatime=dt, var_names=headers) print(dataframe.var_names) rcot = RCOT(significance='analytic') pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0) pcmci_rcot.verbosity = 1 results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05) # Print results print("p-values") print(results['p_matrix'].round(3)) print("MCI partial correlations") print(results['val_matrix'].round(2)) # Save results to file # p_matrix = results['p_matrix'] # with open("p-values_baseline.csv", "w") as csv_file: # writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL) # # [[[1 2 3]]] Three brackets to get through. # for sector in p_matrix: # print("sector: ", sector) # for row in sector: # print("row: ", row) # writer.writerow(row) # writer.writerow([]) # # print("inside def pcmci_causality") # output edges result_arr = [] for index_cause, item in enumerate(results['p_matrix']): # print("index is") # print(index) # print("item is") # print(item) # print("cause is") cause = headers[index_cause] # print(headers[index_cause]) for index_effect, arr in enumerate(item): # print("effect arr is ") # print(arr) # print("effect name is") effect = headers[index_effect] # print(headers[index_effect]) for arrItem in arr: if arrItem < 0.05 and cause != effect: result_arr.append([effect, cause, index]) print("{} caused by {}".format(effect, cause)) break with open("pcmci_baseline_out.csv", "w", newline='') as f: for row in result_arr: f.write("%s\n" % ','.join(str(col) for col in row)) # print(pcmci) print(result_arr) return result_arr
def test(dataframes,max_lags=[4],alpha=[None],tests=['ParCorr'],limit=1): ''' This function performs the PCMCI algorithm for all the dataframes received as parameters, given the hyper-parameters of the conditional independence test Args: dataframes: A list of TIGRAMITE dataframes max_lags: Maximum number of lags to consider for the laggd time series alpha: Significance level to perform the parent test tests: A list of conditional independence test to be performed limit: A limit for the instances to be considered Returns: ''' test_results = [] random.shuffle(dataframes) total = limit*len(max_lags)*len(alpha)*len(tests) data_frame_iter = iter(dataframes) tests_to_evaluate=[] if 'RCOT' in tests: rcot = RCOT() tests_to_evaluate.append(['RCOT',rcot]) if 'GPDC' in tests: gpdc = GPDC() tests_to_evaluate.append(['GPDC', gpdc]) if 'ParCorr' in tests: parcorr = ParCorr(significance='analytic') tests_to_evaluate.append(['ParCorr',parcorr]) if 'CMIknn' in tests: cmiknn = CMIknn() tests_to_evaluate.append(['CMIknn',cmiknn]) unique_complexities = list(set(l[1] for l in dataframes)) counts = {} for i in unique_complexities: counts[i] = 0 for test in tests_to_evaluate: stop = False for l in max_lags: for a in alpha: while not stop: try: i = random.sample(dataframes,1)[0] if counts[i[1]] < limit: print('evaluating: ' + str(i[3])) start = time.time() pcmci = PCMCI( dataframe=i[2], cond_ind_test=test[1], verbosity=0) # correlations = pcmci.get_lagged_dependencies(tau_max=20) pcmci.verbosity = 1 results = pcmci.run_pcmci(tau_max=l, pc_alpha=a) time_lapse = round(time.time() - start, 2) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') valid_parents = list(pcmci.return_significant_parents(pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=a)['parents'].values()) flat_list = [] for sublist in valid_parents: for item in sublist: flat_list.append(item) valid_links = len(flat_list) test_results.append([i[3], i[0], i[1], l,test[0],a,valid_links,time_lapse]) results_df = pd.DataFrame(test_results, columns=['representation', 'complexity', 'sample_size', 'max_lag','test','alpha','valid_links_at_alpha', 'learning_time']) print('results ready to be saved') results_df.to_csv( 'results/performance_sample_sizes.csv', index=False) counts[i[1]] += 1 if all(value == limit for value in counts.values()): stop = True except: print('Hoopla!') pass for i in unique_complexities: counts[i] = 0
def caus(data, var_names): import numpy as np import matplotlib as mpl from matplotlib import pyplot as plt import sklearn import tigramite from tigramite import data_processing as pp from tigramite import plotting as tp from tigramite.pcmci import PCMCI from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb from tigramite.models import LinearMediation, Prediction data_mask_row = np.zeros(len(data)) for i in range(68904): if (i % 72) < 30 or (i % 72) > 47: data_mask_row[i] = True data_mask = np.zeros(data.shape) data_mask[:, 0] = data_mask_row data_mask[:, 1] = data_mask_row data_mask[:, 2] = data_mask_row data_mask[:, 9] = data_mask_row data_mask[:, 10] = data_mask_row data_mask[:, 11] = data_mask_row dataframe = pp.DataFrame(data, mask=data_mask) datatime = np.arange(len(data)) # tp.plot_timeseries(data, datatime, var_names, use_mask=True, # mask=data_mask, grey_masked_samples='data') parcorr = ParCorr(significance='analytic', use_mask=True, mask_type='y') pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, var_names=var_names, verbosity=1) # correlations = pcmci.get_lagged_dependencies(tau_max=20) # lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, # setup_args={'var_names':var_names, # 'x_base':5, 'y_base':.5}) pcmci.verbosity = 1 results = pcmci.run_pcmci(tau_max=6, tau_min=1, pc_alpha=0.01) # print("p-values") # print (results['p_matrix'].round(3)) # print("MCI partial correlations") # print (results['val_matrix'].round(2)) q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh') pcmci._print_significant_links(p_matrix=results['p_matrix'], q_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.01) link_matrix = pcmci._return_significant_parents( pq_matrix=q_matrix, val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix'] tp.plot_time_series_graph( val_matrix=results['val_matrix'], link_matrix=link_matrix, var_names=var_names, link_colorbar_label='MCI', ) return results, link_matrix