コード例 #1
0
class PCMCIPlugin:
    def input(self, inputfile):
        self.links_coeffs = {}
        infile = open(inputfile, 'r')
        for line in infile:
            contents = line.split('\t')
            var = int(contents[0])
            driver = int(contents[1])
            lag = int(contents[2])
            coeff = float(contents[3])
            if (not var in self.links_coeffs):
                self.links_coeffs[var] = []
            self.links_coeffs[var].append(((driver, lag), coeff))

    def run(self):
        data, _ = pp.var_process(self.links_coeffs, T=1000)
        dataframe = pp.DataFrame(data)
        cond_ind_test = ParCorr()
        self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
        self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None)

    def output(self, outputfile):
        self.pcmciobj.print_significant_links(
            p_matrix=self.results['p_matrix'],
            val_matrix=self.results['val_matrix'],
            alpha_level=0.05)
コード例 #2
0
def a_run_pcmciplus(a_pcmciplus, a_pcmciplus_params):
    # Unpack the pcmci and the true parents, and common parameters
    dataframe, true_graph, links_coeffs, tau_min, tau_max = a_pcmciplus

    # Unpack the parameters
    (
        pc_alpha,
        contemp_collider_rule,
        conflict_resolution,
        reset_lagged_links,
        cond_ind_test_class,
    ) = a_pcmciplus_params

    if cond_ind_test_class == 'oracle_ci':
        cond_ind_test = OracleCI(links_coeffs)
    elif cond_ind_test_class == 'par_corr':
        cond_ind_test = ParCorr()

    # Run the PCMCI algorithm with the given parameters
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=cond_ind_test,
                  verbosity=2)
    results = pcmci.run_pcmciplus(
        selected_links=None,
        tau_min=tau_min,
        tau_max=tau_max,
        pc_alpha=pc_alpha,
        contemp_collider_rule=contemp_collider_rule,
        conflict_resolution=conflict_resolution,
        reset_lagged_links=reset_lagged_links,
        max_conds_dim=None,
        max_conds_py=None,
        max_conds_px=None,
    )
    # Print true links
    print("************************")
    print("\nTrue Graph")
    pcmci.print_significant_links(p_matrix=(true_graph == 0),
                                  val_matrix=true_graph,
                                  conf_matrix=None,
                                  q_matrix=None,
                                  graph=true_graph,
                                  ambiguous_triples=None,
                                  alpha_level=0.05)
    # Return the results and the expected result
    return results['graph'], true_graph
コード例 #3
0
ファイル: main.py プロジェクト: yangxhcaf/cybertraining
    writer = csv.writer(csv_file,
                        delimiter=",",
                        quotechar="|",
                        quoting=csv.QUOTE_MINIMAL)
    #[[[1 2 3]]] Three brackets to get through.
    for sector in val_matrix:
        print("sector: ", sector)
        for row in sector:
            print("row: ", row)
            writer.writerow(row)
        writer.writerow([])

q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                       fdr_method='fdr_bh')
pcmci.print_significant_links(p_matrix=results['p_matrix'],
                              q_matrix=q_matrix,
                              val_matrix=results['val_matrix'],
                              alpha_level=0.01)

link_matrix = pcmci.return_significant_parents(
    pq_matrix=q_matrix, val_matrix=results['val_matrix'],
    alpha_level=0.01)['link_matrix']

graph = tp.plot_graph(
    val_matrix=results['val_matrix'],
    link_matrix=link_matrix,
    var_names=headers,
    link_colorbar_label='cross-MCI',
    node_colorbar_label='auto-MCI',
)

if verbose > 1:
コード例 #4
0
ファイル: wrapper_RGCPD_tig.py プロジェクト: yangxhcaf/RGCPD
def run_PCMCI(ex, outdic_actors, s, df_splits, map_proj):
    #=====================================================================================
    #
    # 4) PCMCI-algorithm
    #
    #=====================================================================================

    # save output
    if ex['SaveTF'] == True:
        #        from contextlib import redirect_stdout
        orig_stdout = sys.stdout
        # buffer print statement output to f
        if sys.version[:1] == '3':
            sys.stdout = f = io.StringIO()
        elif sys.version[:1] == '2':
            sys.stdout = f = open(os.path.join(ex['fig_subpath'], 'old.txt'),
                                  'w+')

#%%
# amount of text printed:
    verbosity = 3

    # alpha level for independence test within the pc procedure (finding parents)
    pc_alpha = ex['pcA_sets'][ex['pcA_set']]
    # alpha level for multiple linear regression model while conditining on parents of
    # parents
    alpha_level = ex['alpha_level_tig']
    print('run tigramite 4, run.pcmci')
    print(('alpha level(s) for independence tests within the pc procedure'
           '(finding parents): {}'.format(pc_alpha)))
    print((
        'alpha level for multiple linear regression model while conditining on parents of '
        'parents: {}'.format(ex['alpha_level_tig'])))

    # Retrieve traintest info
    traintest = df_splits

    # load Response Variable class
    RV = ex[ex['RV_name']]
    # create list with all actors, these will be merged into the fulldata array
    allvar = ex['vars'][0]
    var_names_corr = []
    actorlist = []
    cols = [[RV.name]]

    for var in allvar[:]:
        print(var)
        actor = outdic_actors[var]
        if actor.ts_corr[s].size != 0:
            ts_train = actor.ts_corr[s].values
            actorlist.append(ts_train)
            # create array which numbers the regions
            var_idx = allvar.index(var)
            n_regions = actor.ts_corr[s].shape[1]
            actor.var_info = [[i + 1, actor.ts_corr[s].columns[i], var_idx]
                              for i in range(n_regions)]
            # Array of corresponing regions with var_names_corr (first entry is RV)
            var_names_corr = var_names_corr + actor.var_info
            cols.append(list(actor.ts_corr[s].columns))
            index_dates = actor.ts_corr[s].index
    var_names_corr.insert(0, RV.name)

    # stack actor time-series together:
    fulldata = np.concatenate(tuple(actorlist), axis=1)

    print(('There are {} regions in total'.format(fulldata.shape[1])))
    # add the full 1D time series of interest as first entry:

    fulldata = np.column_stack((RV.RVfullts, fulldata))
    df_data = pd.DataFrame(fulldata, columns=flatten(cols), index=index_dates)

    if ex['import_prec_ts'] == True:
        var_names_full = var_names_corr.copy()
        for d in ex['precursor_ts']:
            path_data = d[1]
            if len(path_data) > 1:
                path_data = ''.join(list(path_data))
            # skip first col because it is the RV ts
            df_data_ext = func_fc.load_hdf5(
                path_data)['df_data'].iloc[:, 1:].loc[s]
            cols_ts = np.logical_or(df_data_ext.dtypes == 'float64',
                                    df_data_ext.dtypes == 'float32')
            cols_ext = list(df_data_ext.columns[cols_ts])
            # cols_ext must be of format '{}_{int}_{}'
            lab_int = 100
            for i, c in enumerate(cols_ext):
                char = c.split('_')[1]
                if char.isdigit():
                    pass
                else:
                    cols_ext[i] = c.replace(char, str(lab_int)) + char
                    lab_int += 1

            df_data_ext = df_data_ext[cols_ext]
            to_freq = ex['tfreq']
            if to_freq != 1:
                start_end_date = (ex['sstartdate'], ex['senddate'])
                start_end_year = (ex['startyear'], ex['endyear'])
            df_data_ext = functions_pp.time_mean_bins(df_data_ext,
                                                      to_freq,
                                                      start_end_date,
                                                      start_end_year,
                                                      seldays='part')[0]
            #            df_data_ext = functions_pp.time_mean_bins(df_data_ext,
            #                                                     ex, ex['tfreq'],
            #                                                     seldays='part')[0]
            # Expand var_names_corr
            n = var_names_full[-1][0] + 1
            add_n = n + len(cols_ext)
            n_var_idx = var_names_full[-1][-1] + 1
            for i in range(n, add_n):
                var_names_full.append([i, cols_ext[i - n], n_var_idx])
            df_data = df_data.merge(df_data_ext,
                                    left_index=True,
                                    right_index=True)
    else:
        var_names_full = var_names_corr

    bool_train = traintest.loc[s]['TrainIsTrue']
    bool_RV_train = np.logical_and(bool_train, traintest.loc[s]['RV_mask'])
    dates_train = traintest.loc[s]['TrainIsTrue'][bool_train].index
    dates_RV_train = traintest.loc[s]['TrainIsTrue'][bool_RV_train].index

    RVfull_train = RV.RVfullts.sel(time=dates_train)
    datesfull_train = pd.to_datetime(RVfull_train.time.values)
    data = df_data.loc[datesfull_train].values
    print((data.shape))

    # get RV datamask (same shape als data)
    data_mask = [
        True if d in dates_RV_train else False for d in datesfull_train
    ]
    data_mask = np.repeat(data_mask, data.shape[1]).reshape(data.shape)

    # add traintest mask to fulldata
    #    dates_all = pd.to_datetime(RV.RVfullts.index)
    #    dates_RV  = pd.to_datetime(RV.RV_ts.index)
    dates_all = pd.to_datetime(RV.RVfullts.time.values)
    dates_RV = pd.to_datetime(RV.RV_ts.time.values)
    df_data['TrainIsTrue'] = [
        True if d in datesfull_train else False for d in dates_all
    ]
    df_data['RV_mask'] = [True if d in dates_RV else False for d in dates_all]

    # ======================================================================================================================
    # tigramite 3
    # ======================================================================================================================

    T, N = data.shape  # Time, Regions
    # ======================================================================================================================
    # Initialize dataframe object (needed for tigramite functions)
    # ======================================================================================================================
    dataframe = pp.DataFrame(data=data,
                             mask=data_mask,
                             var_names=var_names_full)
    # ======================================================================================================================
    # pc algorithm: only parents for selected_variables are calculated
    # ======================================================================================================================

    parcorr = ParCorr(significance='analytic',
                      mask_type='y',
                      verbosity=verbosity)
    #==========================================================================
    # multiple testing problem:
    #==========================================================================
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=parcorr,
                  selected_variables=None,
                  verbosity=4)

    # selected_variables : list of integers, optional (default: range(N))
    #    Specify to estimate parents only for selected variables. If None is
    #    passed, parents are estimated for all variables.

    # ======================================================================================================================
    #selected_links = dictionary/None
    results = pcmci.run_pcmci(tau_max=ex['tigr_tau_max'],
                              pc_alpha=pc_alpha,
                              tau_min=0,
                              max_combinations=ex['max_comb_actors'])

    q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                           fdr_method='fdr_bh')

    pcmci.print_significant_links(p_matrix=results['p_matrix'],
                                  q_matrix=q_matrix,
                                  val_matrix=results['val_matrix'],
                                  alpha_level=alpha_level)

    # returns all parents, not just causal precursors (of lag>0)
    sig = rgcpd.return_sign_parents(pcmci,
                                    pq_matrix=q_matrix,
                                    val_matrix=results['val_matrix'],
                                    alpha_level=alpha_level)

    all_parents = sig['parents']
    #    link_matrix = sig['link_matrix']

    links_RV = all_parents[0]

    df = rgcpd.bookkeeping_precursors(links_RV, var_names_full)
    #%%

    rgcpd.print_particular_region_new(links_RV, var_names_corr, s,
                                      outdic_actors, map_proj, ex)

    #%%
    if ex['SaveTF'] == True:
        if sys.version[:1] == '3':
            fname = f's{s}_' + ex['params'] + '.txt'
            file = io.open(os.path.join(ex['fig_subpath'], fname), mode='w+')
            file.write(f.getvalue())
            file.close()
            f.close()
        elif sys.version[:1] == '2':
            f.close()
        sys.stdout = orig_stdout

    return df, df_data
コード例 #5
0
def test_order_independence_pcmciplus(a_pcmciplus_order_independence,
                                      a_pcmciplus_params_order_independence):
    # Unpack the pcmci and the true parents, and common parameters
    dataframe, true_graph, links_coeffs, tau_min, tau_max = \
        a_pcmciplus_order_independence

    data = dataframe.values
    T, N = data.shape

    # Unpack the parameters
    (
        pc_alpha,
        contemp_collider_rule,
        conflict_resolution,
        reset_lagged_links,
        cond_ind_test_class,
    ) = a_pcmciplus_params_order_independence

    if cond_ind_test_class == 'oracle_ci':
        cond_ind_test = OracleCI(links_coeffs)
    elif cond_ind_test_class == 'par_corr':
        cond_ind_test = ParCorr()

    # Run the PCMCI algorithm with the given parameters
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=cond_ind_test,
                  verbosity=1)
    print("************************")
    print("\nTrue Graph")
    pcmci.print_significant_links(p_matrix=(true_graph == 0),
                                  val_matrix=true_graph,
                                  conf_matrix=None,
                                  q_matrix=None,
                                  graph=true_graph,
                                  ambiguous_triples=None,
                                  alpha_level=0.05)

    results = pcmci.run_pcmciplus(
        selected_links=None,
        tau_min=tau_min,
        tau_max=tau_max,
        pc_alpha=pc_alpha,
        contemp_collider_rule=contemp_collider_rule,
        conflict_resolution=conflict_resolution,
        reset_lagged_links=reset_lagged_links,
        max_conds_dim=None,
        max_conds_py=None,
        max_conds_px=None,
    )
    correct_results = results['graph']

    for perm in itertools.permutations(range(N)):

        print(perm)
        data_new = np.copy(data[:, perm])
        dataframe = pp.DataFrame(data_new, var_names=list(perm))
        pcmci = PCMCI(dataframe=dataframe,
                      cond_ind_test=cond_ind_test,
                      verbosity=1)
        results = pcmci.run_pcmciplus(
            selected_links=None,
            tau_min=tau_min,
            tau_max=tau_max,
            pc_alpha=pc_alpha,
            contemp_collider_rule=contemp_collider_rule,
            conflict_resolution=conflict_resolution,
            reset_lagged_links=reset_lagged_links,
            max_conds_dim=None,
            max_conds_py=None,
            max_conds_px=None,
        )

        tmp = np.take(correct_results, perm, axis=0)
        back_converted_result = np.take(tmp, perm, axis=1)

        for tau in range(tau_max + 1):
            if not np.allclose(results['graph'][:, :, tau],
                               back_converted_result[:, :, tau]):
                print(tau)
                print(results['graph'][:, :, tau])
                print(back_converted_result[:, :, tau])
                print(back_converted_result[:, :, tau] -
                      results['graph'][:, :, tau])
                print(perm)

        # np.allclose(results['graph'], back_converted_result)
        np.testing.assert_equal(results['graph'], back_converted_result)
コード例 #6
0
ファイル: wrapper_PCMCI.py プロジェクト: yangxhcaf/RGCPD
def run_pcmci(data, data_mask, var_names, path_outsub2, s, tau_min=0, tau_max=1, 
              pc_alpha=None, alpha_level=0.05, max_conds_dim=4, max_combinations=1, 
              max_conds_py=None, max_conds_px=None, verbosity=4):
    

    
    #%%
    if path_outsub2 is not False:
        txt_fname = os.path.join(path_outsub2, f'split_{s}_PCMCI_out.txt')
#        from contextlib import redirect_stdout
        orig_stdout = sys.stdout
        # buffer print statement output to f
        sys.stdout = f = io.StringIO()
    #%%            
    # ======================================================================================================================
    # tigramite 4
    # ======================================================================================================================

    T, N = data.shape # Time, Regions
    # ======================================================================================================================
    # Initialize dataframe object (needed for tigramite functions)
    # ======================================================================================================================
    dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names)
    # ======================================================================================================================
    # pc algorithm: only parents for selected_variables are calculated
    # ======================================================================================================================

    parcorr = ParCorr(significance='analytic',
                      mask_type='y',
                      verbosity=verbosity)
    #==========================================================================
    # multiple testing problem:
    #==========================================================================
    pcmci   = PCMCI(dataframe=dataframe,
                    cond_ind_test=parcorr,
                    selected_variables=None,
                    verbosity=verbosity)

    # selected_variables : list of integers, optional (default: range(N))
    #    Specify to estimate parents only for selected variables. If None is
    #    passed, parents are estimated for all variables.

    # ======================================================================================================================
    #selected_links = dictionary/None
    results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha, tau_min=tau_min,
                              max_conds_dim=max_conds_dim, 
                              max_combinations=max_combinations,
                              max_conds_px=max_conds_px,
                              max_conds_py=max_conds_py)

    q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')

    pcmci.print_significant_links(p_matrix=results['p_matrix'],
                                   q_matrix=q_matrix,
                                   val_matrix=results['val_matrix'],
                                   alpha_level=alpha_level)
    #%%
    if path_outsub2 is not False:
        file = io.open(txt_fname, mode='w+')
        file.write(f.getvalue())
        file.close()
        f.close()

        sys.stdout = orig_stdout


    return pcmci, q_matrix, results
コード例 #7
0
#                   usecols=['P_F', 'SWC_F_MDS_1', 'TA_F'])
data = data.replace(-9999, np.nan)
data = data.fillna(data.mean()).values
#var_names = [r'$TA$', r'$PA$', r'$P$', r'$T$', r'$SWC$']
#var_names = [r'$P$', r'$SWC$']
#var_names = [r'$P$', r'$SWC$', r'$TA$']
var_names = [r'$P$', r'$SWC$', r'$TA$', r'$PA$', r'$T$']

# print data
dataframe = pp.DataFrame(np.array(data), var_names=var_names)
tp.plot_timeseries(dataframe)
plt.show()

# main for PCMCI
parcorr = ParCorr()
pcmci_parcorr = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, verbosity=2)
results = pcmci_parcorr.run_pcmci(tau_max=2, pc_alpha=0.2)
pcmci_parcorr.print_significant_links(p_matrix=results['p_matrix'],
                                      val_matrix=results['val_matrix'],
                                      alpha_level=0.01)
link_matrix = pcmci_parcorr.return_significant_parents(
    pq_matrix=results['p_matrix'],
    val_matrix=results['val_matrix'],
    alpha_level=0.01)['link_matrix']
# Plot time series graph
tp.plot_time_series_graph(figsize=(6, 3),
                          val_matrix=results['val_matrix'],
                          link_matrix=link_matrix,
                          var_names=var_names,
                          link_colorbar_label='MCI')