Ejemplo n.º 1
0
    def test_pcmci(self):
        # Setting up strict test level
        pc_alpha = 0.05  #[0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
        tau_max = 2
        alpha_level = 0.01

        dataframe = pp.DataFrame(self.data)

        cond_ind_test = ParCorr(verbosity=verbosity)

        pcmci = PCMCI(dataframe=dataframe,
                      cond_ind_test=cond_ind_test,
                      verbosity=verbosity)

        results = pcmci.run_pcmci(
            tau_max=tau_max,
            pc_alpha=pc_alpha,
        )

        parents = pcmci._return_significant_parents(
            pq_matrix=results['p_matrix'],
            val_matrix=results['val_matrix'],
            alpha_level=alpha_level)['parents']

        # print parents
        # print self.true_parents
        assert_graphs_equal(parents, self.true_parents)
Ejemplo n.º 2
0
class PCMCIPlugin:
    def input(self, inputfile):
        self.links_coeffs = {}
        infile = open(inputfile, 'r')
        for line in infile:
            contents = line.split('\t')
            var = int(contents[0])
            driver = int(contents[1])
            lag = int(contents[2])
            coeff = float(contents[3])
            if (not var in self.links_coeffs):
                self.links_coeffs[var] = []
            self.links_coeffs[var].append(((driver, lag), coeff))

    def run(self):
        data, _ = pp.var_process(self.links_coeffs, T=1000)
        dataframe = pp.DataFrame(data)
        cond_ind_test = ParCorr()
        self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
        self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None)

    def output(self, outputfile):
        self.pcmciobj.print_significant_links(
            p_matrix=self.results['p_matrix'],
            val_matrix=self.results['val_matrix'],
            alpha_level=0.05)
Ejemplo n.º 3
0
def issue38():
    dpath = os.path.dirname(os.path.abspath(__file__))
    fname = 'tigramite_issue_38_input_example.csv'
    fpath = os.path.join(dpath, fname)
    df = pd.read_csv(fpath, index_col=0)
    print(df)
    data = df.values

    tdf = DataFrame(
        data=data,
        mask=None,
        missing_flag=None,
        var_names=df.columns,
        datatime=None,
    )

    indp_test = CMIknn(
        # knn=None,
        # shuffle_neighbors=None,
        # transform=None,
        # significance=None,
    )

    selected_variables = [col_lbl for col_lbl in df.columns if 'i' in col_lbl]
    selected_variables_ix = [
        df.columns.get_loc(lbl) for lbl in selected_variables
    ]
    print(("Init PCMCI with:"
           f"dataframe={tdf},"
           f"cond_ind_test={indp_test},"
           f"selected_variables={selected_variables_ix},"
           f"verbosity=10,"))
    pcmci = PCMCI(
        dataframe=tdf,
        cond_ind_test=indp_test,
        selected_variables=selected_variables_ix,
        verbosity=10,
    )

    max_lag = 24
    alpha = 0.1

    print("Running PCMCI...")
    pcmci.run_pcmci(tau_max=max_lag, pc_alpha=alpha)

    print("Done successfully! No errors!")
Ejemplo n.º 4
0
correlations = pcmci.get_lagged_dependencies(tau_max=3)
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
                                   setup_args={
                                       'var_names': headers,
                                       'x_base': 5,
                                       'y_base': .5
                                   })

if verbose > 1:
    if display_images:
        lag_func_matrix.savefig()
    if save_images:
        lag_func_matrix.savefig("lag_func.png")

pcmci.verbosity = 1
results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=None)

#Print results
print("p-values")
print(results['p_matrix'].round(3))
print("MCI partial correlations")
print(results['val_matrix'].round(2))

#Save results to file
p_matrix = results['p_matrix']
with open("p-values.csv", "w") as csv_file:
    writer = csv.writer(csv_file,
                        delimiter=",",
                        quotechar="|",
                        quoting=csv.QUOTE_MINIMAL)
    #[[[1 2 3]]] Three brackets to get through.
            pc_alpha = pcA_set2
            pc_alpha_name = 'set2'
        elif p == 4:
            pc_alpha = pcA_set3
            pc_alpha_name = 'set3'
        elif p == 5:
            pc_alpha = pcA_set4
            pc_alpha_name = 'set4'
        elif p == 6:
            pc_alpha = pcA_none
            pc_alpha_name = 'none'

        # ======================================================================================================================
        results = pcmci.run_pcmci(
            tau_max=tau_max,
            pc_alpha=pc_alpha,
            tau_min=tau_min,
            max_combinations=1)  #selected_links = dictionary/None
        #results = pcmci.run_pcmci(selected_links =None, tau_max=tau_max, pc_alpha = pc_alpha, tau_min = tau_min,save_iterations=False,  max_conds_dim=None, max_combinations=1, max_conds_py=None, max_conds_px=None) #selected_links = dictionary/None
        #results = pcmci.run_pcmci(selected_links =  dictionary, tau_max=tau_max, pc_alpha = pc_alpha, tau_min = tau_min,save_iterations=False,  max_conds_dim=None, max_combinations=1, max_conds_py=None, max_conds_px=None) #selected_links = dictionary/None

        q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                               fdr_method='fdr_bh')

        pcmci._print_significant_links(
            p_matrix=results['p_matrix'],
            q_matrix=q_matrix,  #results['p_matrix']
            val_matrix=results['val_matrix'],
            alpha_level=alpha_level)

        sig = pcmci._return_significant_parents(
Ejemplo n.º 6
0
a = nc_file.variables["NAOIN"][:]
b = nc_file.variables["NAOIS"][:]
data[:, 4] = a - b

data_mask = np.zeros(data.shape)
for t in range(1, T + 1):
    if (t % 73) >= 12 and (t % 73) <= 30:
        data_mask[t - 1, :] = True

# Initialize dataframe object, specify time axis and variable names
var_names = ['WPSH', 'IO', 'WNP', 'ENSO', 'NAO']
dataframe = pp.DataFrame(data, mask=data_mask)

parcorr = ParCorr(significance='analytic', mask_type='xyz')
pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr)
results = pcmci.run_pcmci(tau_max=12, pc_alpha=0.03)

# Correct p-values
q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                       fdr_method='fdr_bh')

# Plotting
link_matrix = pcmci.return_significant_parents(
    pq_matrix=q_matrix, val_matrix=results['val_matrix'],
    alpha_level=0.03)['link_matrix']

tp.plot_graph(val_matrix=results['val_matrix'],
              link_matrix=link_matrix,
              var_names=var_names)
"""
tp.plot_time_series_graph(val_matrix=results['val_matrix'],
# In[7]:

correlations = pcmci.get_lagged_dependencies(tau_max=20,
                                             val_only=True)['val_matrix']
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
                                   setup_args={
                                       'figsize': (6, 6),
                                       'var_names': var_names,
                                       'x_base': 10,
                                       'y_base': .5
                                   })

# In[8]:

pcmci.verbosity = 0
results = pcmci.run_pcmci(tau_max=8, pc_alpha=None)

# In[9]:

print("p-values")
print(results['p_matrix'].round(3))
print("MCI partial correlations")
print(results['val_matrix'].round(2))

# In[10]:

q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                       fdr_method='fdr_bh')
pcmci.print_significant_links(p_matrix=results['p_matrix'],
                              q_matrix=q_matrix,
                              val_matrix=results['val_matrix'],
Ejemplo n.º 8
0
def run_PCMCI(ex, outdic_actors, s, df_splits, map_proj):
    #=====================================================================================
    #
    # 4) PCMCI-algorithm
    #
    #=====================================================================================

    # save output
    if ex['SaveTF'] == True:
        #        from contextlib import redirect_stdout
        orig_stdout = sys.stdout
        # buffer print statement output to f
        if sys.version[:1] == '3':
            sys.stdout = f = io.StringIO()
        elif sys.version[:1] == '2':
            sys.stdout = f = open(os.path.join(ex['fig_subpath'], 'old.txt'),
                                  'w+')

#%%
# amount of text printed:
    verbosity = 3

    # alpha level for independence test within the pc procedure (finding parents)
    pc_alpha = ex['pcA_sets'][ex['pcA_set']]
    # alpha level for multiple linear regression model while conditining on parents of
    # parents
    alpha_level = ex['alpha_level_tig']
    print('run tigramite 4, run.pcmci')
    print(('alpha level(s) for independence tests within the pc procedure'
           '(finding parents): {}'.format(pc_alpha)))
    print((
        'alpha level for multiple linear regression model while conditining on parents of '
        'parents: {}'.format(ex['alpha_level_tig'])))

    # Retrieve traintest info
    traintest = df_splits

    # load Response Variable class
    RV = ex[ex['RV_name']]
    # create list with all actors, these will be merged into the fulldata array
    allvar = ex['vars'][0]
    var_names_corr = []
    actorlist = []
    cols = [[RV.name]]

    for var in allvar[:]:
        print(var)
        actor = outdic_actors[var]
        if actor.ts_corr[s].size != 0:
            ts_train = actor.ts_corr[s].values
            actorlist.append(ts_train)
            # create array which numbers the regions
            var_idx = allvar.index(var)
            n_regions = actor.ts_corr[s].shape[1]
            actor.var_info = [[i + 1, actor.ts_corr[s].columns[i], var_idx]
                              for i in range(n_regions)]
            # Array of corresponing regions with var_names_corr (first entry is RV)
            var_names_corr = var_names_corr + actor.var_info
            cols.append(list(actor.ts_corr[s].columns))
            index_dates = actor.ts_corr[s].index
    var_names_corr.insert(0, RV.name)

    # stack actor time-series together:
    fulldata = np.concatenate(tuple(actorlist), axis=1)

    print(('There are {} regions in total'.format(fulldata.shape[1])))
    # add the full 1D time series of interest as first entry:

    fulldata = np.column_stack((RV.RVfullts, fulldata))
    df_data = pd.DataFrame(fulldata, columns=flatten(cols), index=index_dates)

    if ex['import_prec_ts'] == True:
        var_names_full = var_names_corr.copy()
        for d in ex['precursor_ts']:
            path_data = d[1]
            if len(path_data) > 1:
                path_data = ''.join(list(path_data))
            # skip first col because it is the RV ts
            df_data_ext = func_fc.load_hdf5(
                path_data)['df_data'].iloc[:, 1:].loc[s]
            cols_ts = np.logical_or(df_data_ext.dtypes == 'float64',
                                    df_data_ext.dtypes == 'float32')
            cols_ext = list(df_data_ext.columns[cols_ts])
            # cols_ext must be of format '{}_{int}_{}'
            lab_int = 100
            for i, c in enumerate(cols_ext):
                char = c.split('_')[1]
                if char.isdigit():
                    pass
                else:
                    cols_ext[i] = c.replace(char, str(lab_int)) + char
                    lab_int += 1

            df_data_ext = df_data_ext[cols_ext]
            to_freq = ex['tfreq']
            if to_freq != 1:
                start_end_date = (ex['sstartdate'], ex['senddate'])
                start_end_year = (ex['startyear'], ex['endyear'])
            df_data_ext = functions_pp.time_mean_bins(df_data_ext,
                                                      to_freq,
                                                      start_end_date,
                                                      start_end_year,
                                                      seldays='part')[0]
            #            df_data_ext = functions_pp.time_mean_bins(df_data_ext,
            #                                                     ex, ex['tfreq'],
            #                                                     seldays='part')[0]
            # Expand var_names_corr
            n = var_names_full[-1][0] + 1
            add_n = n + len(cols_ext)
            n_var_idx = var_names_full[-1][-1] + 1
            for i in range(n, add_n):
                var_names_full.append([i, cols_ext[i - n], n_var_idx])
            df_data = df_data.merge(df_data_ext,
                                    left_index=True,
                                    right_index=True)
    else:
        var_names_full = var_names_corr

    bool_train = traintest.loc[s]['TrainIsTrue']
    bool_RV_train = np.logical_and(bool_train, traintest.loc[s]['RV_mask'])
    dates_train = traintest.loc[s]['TrainIsTrue'][bool_train].index
    dates_RV_train = traintest.loc[s]['TrainIsTrue'][bool_RV_train].index

    RVfull_train = RV.RVfullts.sel(time=dates_train)
    datesfull_train = pd.to_datetime(RVfull_train.time.values)
    data = df_data.loc[datesfull_train].values
    print((data.shape))

    # get RV datamask (same shape als data)
    data_mask = [
        True if d in dates_RV_train else False for d in datesfull_train
    ]
    data_mask = np.repeat(data_mask, data.shape[1]).reshape(data.shape)

    # add traintest mask to fulldata
    #    dates_all = pd.to_datetime(RV.RVfullts.index)
    #    dates_RV  = pd.to_datetime(RV.RV_ts.index)
    dates_all = pd.to_datetime(RV.RVfullts.time.values)
    dates_RV = pd.to_datetime(RV.RV_ts.time.values)
    df_data['TrainIsTrue'] = [
        True if d in datesfull_train else False for d in dates_all
    ]
    df_data['RV_mask'] = [True if d in dates_RV else False for d in dates_all]

    # ======================================================================================================================
    # tigramite 3
    # ======================================================================================================================

    T, N = data.shape  # Time, Regions
    # ======================================================================================================================
    # Initialize dataframe object (needed for tigramite functions)
    # ======================================================================================================================
    dataframe = pp.DataFrame(data=data,
                             mask=data_mask,
                             var_names=var_names_full)
    # ======================================================================================================================
    # pc algorithm: only parents for selected_variables are calculated
    # ======================================================================================================================

    parcorr = ParCorr(significance='analytic',
                      mask_type='y',
                      verbosity=verbosity)
    #==========================================================================
    # multiple testing problem:
    #==========================================================================
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=parcorr,
                  selected_variables=None,
                  verbosity=4)

    # selected_variables : list of integers, optional (default: range(N))
    #    Specify to estimate parents only for selected variables. If None is
    #    passed, parents are estimated for all variables.

    # ======================================================================================================================
    #selected_links = dictionary/None
    results = pcmci.run_pcmci(tau_max=ex['tigr_tau_max'],
                              pc_alpha=pc_alpha,
                              tau_min=0,
                              max_combinations=ex['max_comb_actors'])

    q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                           fdr_method='fdr_bh')

    pcmci.print_significant_links(p_matrix=results['p_matrix'],
                                  q_matrix=q_matrix,
                                  val_matrix=results['val_matrix'],
                                  alpha_level=alpha_level)

    # returns all parents, not just causal precursors (of lag>0)
    sig = rgcpd.return_sign_parents(pcmci,
                                    pq_matrix=q_matrix,
                                    val_matrix=results['val_matrix'],
                                    alpha_level=alpha_level)

    all_parents = sig['parents']
    #    link_matrix = sig['link_matrix']

    links_RV = all_parents[0]

    df = rgcpd.bookkeeping_precursors(links_RV, var_names_full)
    #%%

    rgcpd.print_particular_region_new(links_RV, var_names_corr, s,
                                      outdic_actors, map_proj, ex)

    #%%
    if ex['SaveTF'] == True:
        if sys.version[:1] == '3':
            fname = f's{s}_' + ex['params'] + '.txt'
            file = io.open(os.path.join(ex['fig_subpath'], fname), mode='w+')
            file.write(f.getvalue())
            file.close()
            f.close()
        elif sys.version[:1] == '2':
            f.close()
        sys.stdout = orig_stdout

    return df, df_data
Ejemplo n.º 9
0
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag):

    T = T_data
    N = N_data
    tau_max = maxlag

    # Verbosity:
    # 0 - nothing
    # 1 - final graph only
    # 2 - everything
    verbose_max = 2
    verbose = 2
    print("======")
    # print(list(data))  # got 100 records as itertools.chain object, not numpy df

    data = np.array(list(data))
    print("data len is ")
    print(len(data))
    # data = np.fromiter(data, float)
    # print(data)
    # Initialize dataframe object, specify time axis and variable names
    dataframe = pp.DataFrame(data, datatime=dt, var_names=headers)
    print(dataframe.var_names)
    rcot = RCOT(significance='analytic')
    pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0)

    pcmci_rcot.verbosity = 1
    results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05)

    # Print results
    print("p-values")
    print(results['p_matrix'].round(3))
    print("MCI partial correlations")
    print(results['val_matrix'].round(2))

    # print("inside def pcmci_causality")

    # output edges
    result_arr = []
    # result_arr.append(["effect","cause"])

    for index_cause, item in enumerate(results['p_matrix']):
        print("index is")
        print(index)
        print("item is")
        print(item)
        print("cause is")
        cause = headers[index_cause]
        print(headers[index_cause])
        for index_effect, arr in enumerate(item):
            print("effect arr is ")
            print(arr)
            print("effect name is")
            effect = headers[index_effect]
            print(headers[index_effect])
            for arrItem in arr:
                if arrItem < 0.05 and cause != effect:
                    result_arr.append([effect, cause, index])
                    print("{} caused by {}".format(effect, cause))
                    break

        with open("pcmci_para_out{}.csv".format(index), "w", newline='') as f:
            for row in result_arr:
                f.write("%s\n" % ','.join(str(col) for col in row))
    # print(pcmci)
    return result_arr
parcorr = ParCorr(significance='analytic')

gpdc = GPDC(significance='analytic', gp_params=None)

pcmci_gpdc = PCMCI(
    dataframe=dataframe, 
    cond_ind_test=gpdc,
    verbosity=0)

pcmci = PCMCI(
    dataframe=dataframe, 
    cond_ind_test=parcorr,
    verbosity=1)
#
min_lag, max_lag  = 1,6
results = pcmci.run_pcmci(tau_min = min_lag, tau_max=max_lag, pc_alpha=None)
#
q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
#
pcmci.print_significant_links(
        p_matrix = results['p_matrix'], 
        q_matrix = q_matrix,
        val_matrix = results['val_matrix'],
        alpha_level = 0.05)

link_matrix = pcmci.return_significant_links(pq_matrix = results['p_matrix'],
                        val_matrix=results['val_matrix'], alpha_level=0.05)['link_matrix']
tp.plot_graph(
    val_matrix=results['val_matrix'],
    link_matrix=link_matrix,
    var_names=study_data.columns,
def test(dataframes,max_lags=[4],alpha=[None],tests=['ParCorr'],limit=1):
    ''' This function performs the PCMCI algorithm for all the dataframes received as parameters, given the hyper-parameters of the conditional
        independence test
    Args:
        dataframes: A list of TIGRAMITE dataframes
        max_lags: Maximum number of lags to consider for the laggd time series
        alpha: Significance level to perform the parent test
        tests: A list of conditional independence test to be performed
        limit: A limit for the instances to be considered

    Returns:

    '''
    test_results = []
    random.shuffle(dataframes)
    total = limit*len(max_lags)*len(alpha)*len(tests)
    data_frame_iter = iter(dataframes)

    tests_to_evaluate=[]
    if 'RCOT' in tests:
        rcot = RCOT()
        tests_to_evaluate.append(['RCOT',rcot])
    if 'GPDC' in tests:
        gpdc = GPDC()
        tests_to_evaluate.append(['GPDC', gpdc])
    if 'ParCorr' in tests:
        parcorr = ParCorr(significance='analytic')
        tests_to_evaluate.append(['ParCorr',parcorr])
    if 'CMIknn' in tests:
        cmiknn = CMIknn()
        tests_to_evaluate.append(['CMIknn',cmiknn])


    unique_complexities = list(set(l[1] for l in dataframes))
    counts = {}
    for i in unique_complexities:
        counts[i] = 0

    for test in tests_to_evaluate:
        stop = False
        for l in max_lags:
            for a in alpha:
                while not stop:
                    try:
                        i = random.sample(dataframes,1)[0]
                        if counts[i[1]] < limit:
                            print('evaluating: ' + str(i[3]))
                            start = time.time()
                            pcmci = PCMCI(
                                    dataframe=i[2],
                                    cond_ind_test=test[1],
                                    verbosity=0)
                             # correlations = pcmci.get_lagged_dependencies(tau_max=20)
                            pcmci.verbosity = 1
                            results = pcmci.run_pcmci(tau_max=l, pc_alpha=a)
                            time_lapse = round(time.time() - start, 2)

                            q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
                            valid_parents = list(pcmci.return_significant_parents(pq_matrix=q_matrix,
                                                                                  val_matrix=results['val_matrix'],
                                                                                  alpha_level=a)['parents'].values())

                            flat_list = []
                            for sublist in valid_parents:
                                for item in sublist:
                                    flat_list.append(item)

                            valid_links = len(flat_list)

                            test_results.append([i[3], i[0], i[1], l,test[0],a,valid_links,time_lapse])

                            results_df = pd.DataFrame(test_results,
                                                              columns=['representation', 'complexity', 'sample_size', 'max_lag','test','alpha','valid_links_at_alpha',
                                                                       'learning_time'])
                            print('results ready to be saved')
                            results_df.to_csv(
                                        'results/performance_sample_sizes.csv',
                                        index=False)

                            counts[i[1]] += 1
                            if all(value == limit for value in counts.values()):
                                stop = True

                    except:
                        print('Hoopla!')
                        pass

                for i in unique_complexities:
                    counts[i] = 0
Ejemplo n.º 12
0
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag):
    T = T_data
    N = N_data
    # Run settings
    # there is another tau_max in lagged dependencies that might be much longer!
    tau_max = maxlag

    # Verbosity:
    # 0 - nothing
    # 1 - final graph only
    # 2 - everything
    verbose_max = 2
    verbose = 2
    print("======")
    # print(list(data))  # got 100 records as itertools.chain object, not numpy df

    # Initialize dataframe object, specify time axis and variable names
    dataframe = pp.DataFrame(data, datatime=dt, var_names=headers)
    print(dataframe.var_names)
    rcot = RCOT(significance='analytic')
    pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0)

    pcmci_rcot.verbosity = 1
    results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05)

    # Print results
    print("p-values")
    print(results['p_matrix'].round(3))
    print("MCI partial correlations")
    print(results['val_matrix'].round(2))

    # Save results to file
    # p_matrix = results['p_matrix']
    # with open("p-values_baseline.csv", "w") as csv_file:
    #     writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL)
    #     # [[[1 2 3]]] Three brackets to get through.
    #     for sector in p_matrix:
    #         print("sector: ", sector)
    #         for row in sector:
    #             print("row: ", row)
    #             writer.writerow(row)
    #         writer.writerow([])
    #
    # print("inside def pcmci_causality")

    # output edges
    result_arr = []

    for index_cause, item in enumerate(results['p_matrix']):
        # print("index is")
        # print(index)
        # print("item is")
        # print(item)
        # print("cause is")
        cause = headers[index_cause]
        # print(headers[index_cause])
        for index_effect, arr in enumerate(item):
            # print("effect arr is ")
            # print(arr)
            # print("effect name is")
            effect = headers[index_effect]
            # print(headers[index_effect])
            for arrItem in arr:
                if arrItem < 0.05 and cause != effect:
                    result_arr.append([effect, cause, index])
                    print("{} caused by {}".format(effect, cause))
                    break

    with open("pcmci_baseline_out.csv", "w", newline='') as f:
        for row in result_arr:
            f.write("%s\n" % ','.join(str(col) for col in row))
    # print(pcmci)
    print(result_arr)

    return result_arr
Ejemplo n.º 13
0
def run_pcmci(data, data_mask, var_names, path_outsub2, s, tau_min=0, tau_max=1, 
              pc_alpha=None, alpha_level=0.05, max_conds_dim=4, max_combinations=1, 
              max_conds_py=None, max_conds_px=None, verbosity=4):
    

    
    #%%
    if path_outsub2 is not False:
        txt_fname = os.path.join(path_outsub2, f'split_{s}_PCMCI_out.txt')
#        from contextlib import redirect_stdout
        orig_stdout = sys.stdout
        # buffer print statement output to f
        sys.stdout = f = io.StringIO()
    #%%            
    # ======================================================================================================================
    # tigramite 4
    # ======================================================================================================================

    T, N = data.shape # Time, Regions
    # ======================================================================================================================
    # Initialize dataframe object (needed for tigramite functions)
    # ======================================================================================================================
    dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names)
    # ======================================================================================================================
    # pc algorithm: only parents for selected_variables are calculated
    # ======================================================================================================================

    parcorr = ParCorr(significance='analytic',
                      mask_type='y',
                      verbosity=verbosity)
    #==========================================================================
    # multiple testing problem:
    #==========================================================================
    pcmci   = PCMCI(dataframe=dataframe,
                    cond_ind_test=parcorr,
                    selected_variables=None,
                    verbosity=verbosity)

    # selected_variables : list of integers, optional (default: range(N))
    #    Specify to estimate parents only for selected variables. If None is
    #    passed, parents are estimated for all variables.

    # ======================================================================================================================
    #selected_links = dictionary/None
    results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha, tau_min=tau_min,
                              max_conds_dim=max_conds_dim, 
                              max_combinations=max_combinations,
                              max_conds_px=max_conds_px,
                              max_conds_py=max_conds_py)

    q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')

    pcmci.print_significant_links(p_matrix=results['p_matrix'],
                                   q_matrix=q_matrix,
                                   val_matrix=results['val_matrix'],
                                   alpha_level=alpha_level)
    #%%
    if path_outsub2 is not False:
        file = io.open(txt_fname, mode='w+')
        file.write(f.getvalue())
        file.close()
        f.close()

        sys.stdout = orig_stdout


    return pcmci, q_matrix, results
Ejemplo n.º 14
0
def caus_gpdc(data, var_names):
    import numpy as np
    import matplotlib as mpl
    from matplotlib import pyplot as plt
    import sklearn

    import tigramite
    from tigramite import data_processing as pp
    from tigramite import plotting as tp
    from tigramite.pcmci import PCMCI
    from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb
    from tigramite.models import LinearMediation, Prediction

    data_mask_row = np.zeros(len(data))
    for i in range(68904):
        if (i % 72) < 30 or (i % 72) > 47:
            data_mask_row[i] = True
    data_mask = np.zeros(data.shape)

    data_mask[:, 0] = data_mask_row
    data_mask[:, 1] = data_mask_row
    data_mask[:, 2] = data_mask_row
    data_mask[:, 9] = data_mask_row
    data_mask[:, 10] = data_mask_row
    data_mask[:, 11] = data_mask_row

    dataframe = pp.DataFrame(data, mask=data_mask)
    datatime = np.arange(len(data))

    # tp.plot_timeseries(data, datatime, var_names, use_mask=True,
    #                    mask=data_mask, grey_masked_samples='data')

    gpdc = GPDC(significance='analytic',
                gp_params=None,
                use_mask=True,
                mask_type='y')
    gpdc.generate_and_save_nulldists(sample_sizes=range(495, 501),
                                     null_dist_filename='dc_nulldists.npz')
    gpdc.null_dist_filename = 'dc_nulldists.npz'
    pcmci_gpdc = PCMCI(dataframe=dataframe,
                       cond_ind_test=gpdc,
                       var_names=var_names,
                       verbosity=1)

    # correlations = pcmci.get_lagged_dependencies(tau_max=20)
    # lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
    #                                    setup_args={'var_names':var_names,
    #                                    'x_base':5, 'y_base':.5})

    results = pcmci_gpdc.run_pcmci(tau_max=6, tau_min=1, pc_alpha=0.01)

    # print("p-values")
    # print (results['p_matrix'].round(3))
    # print("MCI partial correlations")
    # print (results['val_matrix'].round(2))

    q_matrix = pcmci_gpdc.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                                fdr_method='fdr_bh')
    pcmci_gpdc._print_significant_links(p_matrix=results['p_matrix'],
                                        q_matrix=q_matrix,
                                        val_matrix=results['val_matrix'],
                                        alpha_level=0.01)

    link_matrix = pcmci_gpdc._return_significant_parents(
        pq_matrix=q_matrix, val_matrix=results['val_matrix'],
        alpha_level=0.01)['link_matrix']

    tp.plot_time_series_graph(
        val_matrix=results['val_matrix'],
        link_matrix=link_matrix,
        var_names=var_names,
        link_colorbar_label='MCI',
    )
    return results, link_matrix