def test_mci(self):

        # Setting up strict test level
        pc_alpha = 0.05  #[0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
        tau_max = 2
        alpha_level = 0.01

        dataframe = pp.DataFrame(self.data)

        cond_ind_test = ParCorr(verbosity=verbosity)

        pcmci = PCMCI(selected_variables=None,
                      dataframe=dataframe,
                      cond_ind_test=cond_ind_test,
                      verbosity=verbosity)

        results = pcmci.run_mci(
            selected_links=None,
            tau_min=1,
            tau_max=tau_max,
            parents=self.true_parents,
            max_conds_py=None,
            max_conds_px=None,
        )

        parents = pcmci._return_significant_parents(
            pq_matrix=results['p_matrix'],
            val_matrix=results['val_matrix'],
            alpha_level=alpha_level,
        )['parents']
        # print parents
        # print _get_parent_graph(true_parents)
        assert_graphs_equal(parents, self.true_parents)
    def test_pcmci(self):
        # Setting up strict test level
        pc_alpha = 0.05  #[0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
        tau_max = 2
        alpha_level = 0.01

        dataframe = pp.DataFrame(self.data)

        cond_ind_test = ParCorr(verbosity=verbosity)

        pcmci = PCMCI(dataframe=dataframe,
                      cond_ind_test=cond_ind_test,
                      verbosity=verbosity)

        results = pcmci.run_pcmci(
            tau_max=tau_max,
            pc_alpha=pc_alpha,
        )

        parents = pcmci._return_significant_parents(
            pq_matrix=results['p_matrix'],
            val_matrix=results['val_matrix'],
            alpha_level=alpha_level)['parents']

        # print parents
        # print self.true_parents
        assert_graphs_equal(parents, self.true_parents)
Beispiel #3
0
class PCMCIPlugin:
    def input(self, inputfile):
        self.links_coeffs = {}
        infile = open(inputfile, 'r')
        for line in infile:
            contents = line.split('\t')
            var = int(contents[0])
            driver = int(contents[1])
            lag = int(contents[2])
            coeff = float(contents[3])
            if (not var in self.links_coeffs):
                self.links_coeffs[var] = []
            self.links_coeffs[var].append(((driver, lag), coeff))

    def run(self):
        data, _ = pp.var_process(self.links_coeffs, T=1000)
        dataframe = pp.DataFrame(data)
        cond_ind_test = ParCorr()
        self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
        self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None)

    def output(self, outputfile):
        self.pcmciobj.print_significant_links(
            p_matrix=self.results['p_matrix'],
            val_matrix=self.results['val_matrix'],
            alpha_level=0.05)
Beispiel #4
0
def time_lagged_correlation():
    """Runs the time-lagged correlation analysis experiment.

    This function alculates the time-lagged correlation between the variables for lags of up to 48 hours and plots the
    result as a scatterplot matrix.
    """
    var_names = [
        "dayOfYear", "minuteOfYear", "minuteOfDay", "dayOfWeek", "isWeekend",
        "humidity_sensor", "temperature", "precip_intensity", "cloud_cover",
        "p1", "p2", "dew_point", "wind_speed"
    ]
    tau_min = 0
    tau_max = 48
    dataframe, var_list = generate_dataframe(var_names)
    print(f"Variable names: {var_names}")
    ci_test = ParCorr(significance='analytic')
    pcmci = PCMCI(dataframe=dataframe, cond_ind_test=ci_test, verbosity=1)
    correlations = pcmci.get_lagged_dependencies(tau_min=tau_min,
                                                 tau_max=tau_max)
    lag_func_matrix = tp.plot_lagfuncs(
        name="experiments/causal_discovery/results/time_lagged_correlation.png",
        val_matrix=correlations,
        setup_args={
            'var_names': var_names,
            'figsize': (50, 25),
            'label_fontsize': 12,
            'label_space_top': 0.025,
            'label_space_left': 0.05,
            'lag_units': 'hours',
            'x_base': 6,
            'y_base': .5
        })
    print(lag_func_matrix)
Beispiel #5
0
def run_pc_stable_parallel(j):
    """Wrapper around PCMCI.run_pc_stable estimating the parents for a single 
    variable j.

    Parameters
    ----------
    j : int
        Variable index.

    Returns
    -------
    j, pcmci_of_j, parents_of_j : tuple
        Variable index, PCMCI object, and parents of j
    """

    # CondIndTest is initialized globally below
    # Further parameters of PCMCI as described in the documentation can be
    # supplied here:
    pcmci_of_j = PCMCI(dataframe=dataframe,
                       cond_ind_test=cond_ind_test,
                       selected_variables=[j],
                       verbosity=verbosity)

    # Run PC condition-selection algorithm. Also here further parameters can be
    # specified:
    parents_of_j = pcmci_of_j.run_pc_stable(
        selected_links=selected_links,
        tau_max=tau_max,
        pc_alpha=pc_alpha,
    )

    # We return also the PCMCI object because it may contain pre-computed
    # results can be re-used in the MCI step (such as residuals or null
    # distributions)
    return j, pcmci_of_j, parents_of_j
    def test_pc_stable_max_conds_dim(self):

        # Setting up strict test level
        pc_alpha = 0.05  #[0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
        tau_max = 2
        alpha_level = 0.01

        # true_parents_here = {0: [],
        #                1: [(1, -1), (0, -1)],
        #                2: []
        #                }

        dataframe = pp.DataFrame(self.data)

        cond_ind_test = ParCorr(verbosity=verbosity)

        pcmci = PCMCI(selected_variables=None,
                      dataframe=dataframe,
                      cond_ind_test=cond_ind_test,
                      verbosity=verbosity)

        pcmci.run_pc_stable(
            selected_links=None,
            tau_min=1,
            tau_max=tau_max,
            save_iterations=False,
            pc_alpha=pc_alpha,
            max_conds_dim=2,
            max_combinations=1,
        )

        parents = pcmci.all_parents
        # print parents
        # print _get_parent_graph(true_parents)
        assert_graphs_equal(parents, self.true_parents)
Beispiel #7
0
def run_pc_stable_parallel(j, dataframe, cond_ind_test, params):
    """Wrapper around PCMCI.run_pc_stable estimating the parents for a single 
    variable j.

    Parameters
    ----------
    j : int
        Variable index.

    Returns
    -------
    j, pcmci_of_j, parents_of_j : tuple
        Variable index, PCMCI object, and parents of j
    """

    N = dataframe.values.shape[1]

    # CondIndTest is initialized globally below
    # Further parameters of PCMCI as described in the documentation can be
    # supplied here:
    pcmci_of_j = PCMCI(
        dataframe=dataframe,
        cond_ind_test=cond_ind_test,
        selected_variables=[j],
        # var_names=var_names,
        verbosity=verbosity)

    # Run PC condition-selection algorithm. Also here further parameters can be
    # specified:
    if method_arg == 'pcmci':
        parents_of_j = pcmci_of_j.run_pc_stable(
            selected_links=params['selected_links'],
            tau_max=params['tau_max'],
            pc_alpha=params['pc_alpha'],
        )
    elif method_arg == 'gc':
        parents_of_j = {}
        for i in range(N):
            if i == j:
                parents_of_j[i] = [
                    (var, -lag) for var in range(N)
                    for lag in range(params['tau_min'], params['tau_max'] + 1)
                ]
            else:
                parents_of_j[i] = []
    elif method_arg == 'corr':
        parents_of_j = {}
        for i in range(N):
            parents_of_j[i] = []

    # We return also the PCMCI object because it may contain pre-computed
    # results can be re-used in the MCI step (such as residuals or null
    # distributions)
    return j, pcmci_of_j, parents_of_j
Beispiel #8
0
    def __init__(self,
                 dataframe,
                 train_indices,
                 test_indices,
                 prediction_model,
                 cond_ind_test=None,
                 data_transform=None,
                 verbosity=0):

        # Default value for the mask
        mask = dataframe.mask
        if mask is None:
            mask = np.zeros(dataframe.values.shape, dtype='bool')
        # Get the dataframe shape
        T = len(dataframe.values)
        # Have the default dataframe be the training data frame
        train_mask = np.copy(mask)
        train_mask[[t for t in range(T) if t not in train_indices]] = True
        self.dataframe = DataFrame(dataframe.values,
                                   mask=train_mask,
                                   missing_flag=dataframe.missing_flag)
        # Initialize the models baseclass with the training dataframe
        Models.__init__(self,
                        dataframe=self.dataframe,
                        model=prediction_model,
                        data_transform=data_transform,
                        mask_type='y',
                        verbosity=verbosity)

        # Build the testing dataframe as well
        self.test_mask = np.copy(mask)
        self.test_mask[[t for t in range(T) if t not in test_indices]] = True

        # Setup the PCMCI instance
        if cond_ind_test is not None:
            # Force the masking
            cond_ind_test.set_mask_type('y')
            cond_ind_test.verbosity = verbosity
            PCMCI.__init__(self,
                           dataframe=self.dataframe,
                           cond_ind_test=cond_ind_test,
                           selected_variables=None,
                           verbosity=verbosity)

        # Set the member variables
        self.cond_ind_test = cond_ind_test
        # Initialize member varialbes that are set outside
        self.target_predictors = None
        self.selected_targets = None
        self.fitted_model = None
        self.test_array = None
Beispiel #9
0
    def __init__(self,
                 dataframe,
                 train_indices,
                 test_indices,
                 prediction_model,
                 cond_ind_test=None,
                 data_transform=None,
                 verbosity=0):

        # Default value for the mask
        mask = dataframe.mask
        if mask is None:
            mask = np.zeros(dataframe.values.shape, dtype='bool')
        # Get the dataframe shape
        T = len(dataframe.values)
        # Have the default dataframe be the training data frame
        train_mask = np.copy(mask)
        train_mask[[t for t in range(T) if t not in train_indices]] = True
        self.dataframe = DataFrame(dataframe.values,
                                   mask=train_mask,
                                   missing_flag=dataframe.missing_flag)
        # Initialize the models baseclass with the training dataframe
        Models.__init__(self,
                        dataframe=self.dataframe,
                        model=prediction_model,
                        data_transform=data_transform,
                        mask_type='y',
                        verbosity=verbosity)

        # Build the testing dataframe as well
        self.test_mask = np.copy(mask)
        self.test_mask[[t for t in range(T) if t not in test_indices]] = True

        # Setup the PCMCI instance
        if cond_ind_test is not None:
            # Force the masking
            cond_ind_test.set_mask_type('y')
            cond_ind_test.verbosity = verbosity
            PCMCI.__init__(self,
                           dataframe=self.dataframe,
                           cond_ind_test=cond_ind_test,
                           selected_variables=None,
                           verbosity=verbosity)

        # Set the member variables
        self.cond_ind_test = cond_ind_test
        # Initialize member varialbes that are set outside
        self.target_predictors = None
        self.selected_targets = None
        self.fitted_model = None
        self.test_array = None
def a_run_pcmciplus(a_pcmciplus, a_pcmciplus_params):
    # Unpack the pcmci and the true parents, and common parameters
    dataframe, true_graph, links_coeffs, tau_min, tau_max = a_pcmciplus

    # Unpack the parameters
    (
        pc_alpha,
        contemp_collider_rule,
        conflict_resolution,
        reset_lagged_links,
        cond_ind_test_class,
    ) = a_pcmciplus_params

    if cond_ind_test_class == 'oracle_ci':
        cond_ind_test = OracleCI(links_coeffs)
    elif cond_ind_test_class == 'par_corr':
        cond_ind_test = ParCorr()

    # Run the PCMCI algorithm with the given parameters
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=cond_ind_test,
                  verbosity=2)
    results = pcmci.run_pcmciplus(
        selected_links=None,
        tau_min=tau_min,
        tau_max=tau_max,
        pc_alpha=pc_alpha,
        contemp_collider_rule=contemp_collider_rule,
        conflict_resolution=conflict_resolution,
        reset_lagged_links=reset_lagged_links,
        max_conds_dim=None,
        max_conds_py=None,
        max_conds_px=None,
    )
    # Print true links
    print("************************")
    print("\nTrue Graph")
    for lag in range(tau_max):
        print("Lag %d = ", lag)
        print(true_graph[:, :, lag])
    # pcmci.print_significant_links(
    #                             p_matrix=(true_graph != ""),
    #                             val_matrix=true_graph,
    #                             conf_matrix=None,
    #                             q_matrix=None,
    #                             graph=true_graph,
    #                             ambiguous_triples=None,
    #                             alpha_level=0.05)
    # Return the results and the expected result
    return results['graph'], true_graph
Beispiel #11
0
def issue38():
    dpath = os.path.dirname(os.path.abspath(__file__))
    fname = 'tigramite_issue_38_input_example.csv'
    fpath = os.path.join(dpath, fname)
    df = pd.read_csv(fpath, index_col=0)
    print(df)
    data = df.values

    tdf = DataFrame(
        data=data,
        mask=None,
        missing_flag=None,
        var_names=df.columns,
        datatime=None,
    )

    indp_test = CMIknn(
        # knn=None,
        # shuffle_neighbors=None,
        # transform=None,
        # significance=None,
    )

    selected_variables = [col_lbl for col_lbl in df.columns if 'i' in col_lbl]
    selected_variables_ix = [
        df.columns.get_loc(lbl) for lbl in selected_variables
    ]
    print(("Init PCMCI with:"
           f"dataframe={tdf},"
           f"cond_ind_test={indp_test},"
           f"selected_variables={selected_variables_ix},"
           f"verbosity=10,"))
    pcmci = PCMCI(
        dataframe=tdf,
        cond_ind_test=indp_test,
        selected_variables=selected_variables_ix,
        verbosity=10,
    )

    max_lag = 24
    alpha = 0.1

    print("Running PCMCI...")
    pcmci.run_pcmci(tau_max=max_lag, pc_alpha=alpha)

    print("Done successfully! No errors!")
Beispiel #12
0
def pcmci_setup(data):
	dataframe = pp.DataFrame(data.values, var_names=list(data.columns))
	parcorr = ParCorr(significance='analytic')
	pcmci = PCMCI(
		dataframe=dataframe,
		cond_ind_test=parcorr,
		verbosity=1)
	return pcmci
Beispiel #13
0
def a_pcmci(a_sample, request):
    # Unpack the test data and true parent graph
    dataframe, true_parents = a_sample
    # Build the PCMCI instance
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=ParCorr(verbosity=VERBOSITY),
                  verbosity=VERBOSITY)
    # Return the constructed PCMCI, expected results, and common parameters
    return pcmci, true_parents
def run_pc_stable_parallel(j):
    """Wrapper around PCMCI.run_pc_stable estimating the parents for a single 
    variable j.

    Parameters
    ----------
    j : int
        Variable index.

    Returns
    -------
    j, pcmci_of_j, parents_of_j : tuple
        Variable index, PCMCI object, and parents of j
    """

    # CondIndTest is initialized globally below
    # Further parameters of PCMCI as described in the documentation can be
    # supplied here:
    pcmci_of_j = PCMCI(
        dataframe=dataframe,
        cond_ind_test=cond_ind_test,
        selected_variables=[j],
        verbosity=verbosity)

    # Run PC condition-selection algorithm. Also here further parameters can be
    # specified:
    parents_of_j = pcmci_of_j.run_pc_stable(
                      selected_links=selected_links,
                      tau_max=tau_max,
                      pc_alpha=pc_alpha,
            )

    # We return also the PCMCI object because it may contain pre-computed 
    # results can be re-used in the MCI step (such as residuals or null
    # distributions)
    return j, pcmci_of_j, parents_of_j
Beispiel #15
0
def a_pcmci(a_sample, a_test, a_common_params, request):
    # Unpack the test data and true parent graph
    dataframe, true_parents = a_sample
    # Unpack the common parameters
    tau_min, tau_max, sel_link = a_common_params
    # Get the parameters from this request
    select_vars = request.param
    # Build the PCMCI instance
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=a_test,
                  verbosity=VERBOSITY)
    # Select the correct links if they are given
    select_links = _select_links(sel_link, true_parents)
    # Ensure we change the true parents to be the same as the selected links
    if select_links is not None:
        true_parents = select_links
    # Return the constructed PCMCI, expected results, and common parameters
    return pcmci, true_parents, tau_min, tau_max, select_links
def test_alphas(dataframe,
                cond_ind_test,
                alphas,
                var_names,
                tau_min=0,
                tau_max=1,
                selected_links=None):
    """Executes the PCMCI algorithm over a list of different alphas and plots the results.

    Args:
        dataframe: The TIGRAMITE dataframe to use.
        cond_ind_test: The conditional independence test to use.
        alphas: The list of individual alphas.
        var_names: The names of the variables contained in the dataframe.
        tau_min: The minimum lag.
        tau_max: The maximum lag.
        selected_links: Dictionalry specifying whether only selected links should be tested.
    """
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=cond_ind_test,
                  verbosity=1)
    for pc_alpha in alphas:
        run_experiment(pcmci, cond_ind_test, pc_alpha, tau_min, tau_max,
                       var_names, selected_links)
Beispiel #17
0
def caus_gpdc(data, var_names):
    import numpy as np
    import matplotlib as mpl
    from matplotlib import pyplot as plt
    import sklearn

    import tigramite
    from tigramite import data_processing as pp
    from tigramite import plotting as tp
    from tigramite.pcmci import PCMCI
    from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb
    from tigramite.models import LinearMediation, Prediction

    data_mask_row = np.zeros(len(data))
    for i in range(68904):
        if (i % 72) < 30 or (i % 72) > 47:
            data_mask_row[i] = True
    data_mask = np.zeros(data.shape)

    data_mask[:, 0] = data_mask_row
    data_mask[:, 1] = data_mask_row
    data_mask[:, 2] = data_mask_row
    data_mask[:, 9] = data_mask_row
    data_mask[:, 10] = data_mask_row
    data_mask[:, 11] = data_mask_row

    dataframe = pp.DataFrame(data, mask=data_mask)
    datatime = np.arange(len(data))

    # tp.plot_timeseries(data, datatime, var_names, use_mask=True,
    #                    mask=data_mask, grey_masked_samples='data')

    gpdc = GPDC(significance='analytic',
                gp_params=None,
                use_mask=True,
                mask_type='y')
    gpdc.generate_and_save_nulldists(sample_sizes=range(495, 501),
                                     null_dist_filename='dc_nulldists.npz')
    gpdc.null_dist_filename = 'dc_nulldists.npz'
    pcmci_gpdc = PCMCI(dataframe=dataframe,
                       cond_ind_test=gpdc,
                       var_names=var_names,
                       verbosity=1)

    # correlations = pcmci.get_lagged_dependencies(tau_max=20)
    # lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
    #                                    setup_args={'var_names':var_names,
    #                                    'x_base':5, 'y_base':.5})

    results = pcmci_gpdc.run_pcmci(tau_max=6, tau_min=1, pc_alpha=0.01)

    # print("p-values")
    # print (results['p_matrix'].round(3))
    # print("MCI partial correlations")
    # print (results['val_matrix'].round(2))

    q_matrix = pcmci_gpdc.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                                fdr_method='fdr_bh')
    pcmci_gpdc._print_significant_links(p_matrix=results['p_matrix'],
                                        q_matrix=q_matrix,
                                        val_matrix=results['val_matrix'],
                                        alpha_level=0.01)

    link_matrix = pcmci_gpdc._return_significant_parents(
        pq_matrix=q_matrix, val_matrix=results['val_matrix'],
        alpha_level=0.01)['link_matrix']

    tp.plot_time_series_graph(
        val_matrix=results['val_matrix'],
        link_matrix=link_matrix,
        var_names=var_names,
        link_colorbar_label='MCI',
    )
    return results, link_matrix
Beispiel #18
0
def run_pcmci(data, data_mask, var_names, path_outsub2, s, tau_min=0, tau_max=1, 
              pc_alpha=None, alpha_level=0.05, max_conds_dim=4, max_combinations=1, 
              max_conds_py=None, max_conds_px=None, verbosity=4):
    

    
    #%%
    if path_outsub2 is not False:
        txt_fname = os.path.join(path_outsub2, f'split_{s}_PCMCI_out.txt')
#        from contextlib import redirect_stdout
        orig_stdout = sys.stdout
        # buffer print statement output to f
        sys.stdout = f = io.StringIO()
    #%%            
    # ======================================================================================================================
    # tigramite 4
    # ======================================================================================================================

    T, N = data.shape # Time, Regions
    # ======================================================================================================================
    # Initialize dataframe object (needed for tigramite functions)
    # ======================================================================================================================
    dataframe = pp.DataFrame(data=data, mask=data_mask, var_names=var_names)
    # ======================================================================================================================
    # pc algorithm: only parents for selected_variables are calculated
    # ======================================================================================================================

    parcorr = ParCorr(significance='analytic',
                      mask_type='y',
                      verbosity=verbosity)
    #==========================================================================
    # multiple testing problem:
    #==========================================================================
    pcmci   = PCMCI(dataframe=dataframe,
                    cond_ind_test=parcorr,
                    selected_variables=None,
                    verbosity=verbosity)

    # selected_variables : list of integers, optional (default: range(N))
    #    Specify to estimate parents only for selected variables. If None is
    #    passed, parents are estimated for all variables.

    # ======================================================================================================================
    #selected_links = dictionary/None
    results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha, tau_min=tau_min,
                              max_conds_dim=max_conds_dim, 
                              max_combinations=max_combinations,
                              max_conds_px=max_conds_px,
                              max_conds_py=max_conds_py)

    q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')

    pcmci.print_significant_links(p_matrix=results['p_matrix'],
                                   q_matrix=q_matrix,
                                   val_matrix=results['val_matrix'],
                                   alpha_level=alpha_level)
    #%%
    if path_outsub2 is not False:
        file = io.open(txt_fname, mode='w+')
        file.write(f.getvalue())
        file.close()
        f.close()

        sys.stdout = orig_stdout


    return pcmci, q_matrix, results
        # Specify time axis and variable names
        datatime = np.arange(len(data))

        # ======================================================================================================================
        # pc algorithm: only parents for selected_variables are calculated (here entry[0] = PoV)
        # ======================================================================================================================

        parcorr = ParCorr(significance='analytic',
                          use_mask=True,
                          mask_type='y',
                          verbosity=2)
        pcmci = PCMCI(
            dataframe=dataframe,
            cond_ind_test=parcorr,
            var_names=var_names,
            selected_variables=
            None,  #[0], # only parents for the monsoon trough rainfall
            verbosity=2)

        # ======================================================================================================================
        # results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha = pc_alpha, tau_min = tau_min, max_combinations=1  )
        # ======================================================================================================================

        if p == 0:
            pc_alpha = pcA_set1a
            pc_alpha_name = str(pcA_set1a)
        elif p == 1:
            pc_alpha = pcA_set1b
            pc_alpha_name = str(pcA_set1b)
        elif p == 2:
Beispiel #20
0
data[:, 3] = nc_file.variables["ENSOI"][:]
a = nc_file.variables["NAOIN"][:]
b = nc_file.variables["NAOIS"][:]
data[:, 4] = a - b

data_mask = np.zeros(data.shape)
for t in range(1, T + 1):
    if (t % 73) >= 12 and (t % 73) <= 30:
        data_mask[t - 1, :] = True

# Initialize dataframe object, specify time axis and variable names
var_names = ['WPSH', 'IO', 'WNP', 'ENSO', 'NAO']
dataframe = pp.DataFrame(data, mask=data_mask)

parcorr = ParCorr(significance='analytic', mask_type='xyz')
pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr)
results = pcmci.run_pcmci(tau_max=12, pc_alpha=0.03)

# Correct p-values
q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                       fdr_method='fdr_bh')

# Plotting
link_matrix = pcmci.return_significant_parents(
    pq_matrix=q_matrix, val_matrix=results['val_matrix'],
    alpha_level=0.03)['link_matrix']

tp.plot_graph(val_matrix=results['val_matrix'],
              link_matrix=link_matrix,
              var_names=var_names)
"""
Beispiel #21
0
 def run(self):
     data, _ = pp.var_process(self.links_coeffs, T=1000)
     dataframe = pp.DataFrame(data)
     cond_ind_test = ParCorr()
     self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
     self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None)
                         datatime=np.arange(len(data)),
                         var_names=var_names)

# In[4]:

data.shape

# In[5]:

tp.plot_timeseries(dataframe)
plt.show()

# In[6]:

parcorr = ParCorr(significance='analytic')
pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, verbosity=1)

# In[7]:

correlations = pcmci.get_lagged_dependencies(tau_max=20,
                                             val_only=True)['val_matrix']
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
                                   setup_args={
                                       'figsize': (6, 6),
                                       'var_names': var_names,
                                       'x_base': 10,
                                       'y_base': .5
                                   })

# In[8]:
Beispiel #23
0
def init_pcmci(df_data,
               significance='analytic',
               mask_type='y',
               selected_variables=None,
               verbosity=5):
    '''
    First initializing pcmci object for each training set. This allows to plot
    lagged cross-correlations which help to identity a reasonably tau_max.

    Parameters
    ----------
    df_data : pandas DataFrame
        df_data is retrieved by running rg.get_ts_prec().
    significance : str, optional
        DESCRIPTION. The default is 'analytic'.
    mask_type : str, optional
        DESCRIPTION. The default is 'y'.
    verbosity : int, optional
        DESCRIPTION. The default is 4.
    selected_variables : list of integers, optional (default: None)
        Specify to estimate parents only for selected variables. If None is
        passed, parents are estimated for all variables.

    Returns
    -------
    dictionary of format {split:pcmci}.

    '''
    splits = df_data.index.levels[0]
    pcmci_dict = {}
    RV_mask = df_data['RV_mask']
    for s in range(splits.size):

        TrainIsTrue = df_data['TrainIsTrue'].loc[s]
        df_data_s = df_data.loc[s][TrainIsTrue == True]
        df_data_s = df_data_s.dropna(axis=1, how='all')
        if any(df_data_s.isna().values.flatten()):
            if verbosity > 0:
                print('Warnning: nans detected')


#        print(np.unique(df_data_s.isna().values))
        var_names = [
            k for k in df_data_s.columns
            if k not in ['TrainIsTrue', 'RV_mask']
        ]
        df_data_s = df_data_s.loc[:, var_names]
        data = df_data_s.values
        data_mask = ~RV_mask.loc[s][TrainIsTrue == True].values
        # indices with mask == False are used (with mask_type 'y')
        data_mask = np.repeat(data_mask, data.shape[1]).reshape(data.shape)

        # create dataframe in Tigramite format
        dataframe = pp.DataFrame(data=data,
                                 mask=data_mask,
                                 var_names=var_names)

        parcorr = ParCorr(significance=significance,
                          mask_type=mask_type,
                          verbosity=0)
        parcorr.verbosity = verbosity  # to avoid print init text each time

        # ======================================================================================================================
        # pc algorithm: only parents for selected_variables are calculated
        # ======================================================================================================================
        pcmci = PCMCI(dataframe=dataframe,
                      cond_ind_test=parcorr,
                      verbosity=verbosity)
        pcmci_dict[s] = pcmci
    return pcmci_dict
Beispiel #24
0
def test_order_independence_pcmciplus(a_pcmciplus_order_independence,
                                      a_pcmciplus_params_order_independence):
    # Unpack the pcmci and the true parents, and common parameters
    dataframe, true_graph, links_coeffs, tau_min, tau_max = \
        a_pcmciplus_order_independence

    data = dataframe.values
    T, N = data.shape

    # Unpack the parameters
    (
        pc_alpha,
        contemp_collider_rule,
        conflict_resolution,
        reset_lagged_links,
        cond_ind_test_class,
    ) = a_pcmciplus_params_order_independence

    if cond_ind_test_class == 'oracle_ci':
        cond_ind_test = OracleCI(links_coeffs)
    elif cond_ind_test_class == 'par_corr':
        cond_ind_test = ParCorr()

    # Run the PCMCI algorithm with the given parameters
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=cond_ind_test,
                  verbosity=1)
    print("************************")
    print("\nTrue Graph")
    pcmci.print_significant_links(p_matrix=(true_graph == 0),
                                  val_matrix=true_graph,
                                  conf_matrix=None,
                                  q_matrix=None,
                                  graph=true_graph,
                                  ambiguous_triples=None,
                                  alpha_level=0.05)

    results = pcmci.run_pcmciplus(
        selected_links=None,
        tau_min=tau_min,
        tau_max=tau_max,
        pc_alpha=pc_alpha,
        contemp_collider_rule=contemp_collider_rule,
        conflict_resolution=conflict_resolution,
        reset_lagged_links=reset_lagged_links,
        max_conds_dim=None,
        max_conds_py=None,
        max_conds_px=None,
    )
    correct_results = results['graph']

    for perm in itertools.permutations(range(N)):

        print(perm)
        data_new = np.copy(data[:, perm])
        dataframe = pp.DataFrame(data_new, var_names=list(perm))
        pcmci = PCMCI(dataframe=dataframe,
                      cond_ind_test=cond_ind_test,
                      verbosity=1)
        results = pcmci.run_pcmciplus(
            selected_links=None,
            tau_min=tau_min,
            tau_max=tau_max,
            pc_alpha=pc_alpha,
            contemp_collider_rule=contemp_collider_rule,
            conflict_resolution=conflict_resolution,
            reset_lagged_links=reset_lagged_links,
            max_conds_dim=None,
            max_conds_py=None,
            max_conds_px=None,
        )

        tmp = np.take(correct_results, perm, axis=0)
        back_converted_result = np.take(tmp, perm, axis=1)

        for tau in range(tau_max + 1):
            if not np.allclose(results['graph'][:, :, tau],
                               back_converted_result[:, :, tau]):
                print(tau)
                print(results['graph'][:, :, tau])
                print(back_converted_result[:, :, tau])
                print(back_converted_result[:, :, tau] -
                      results['graph'][:, :, tau])
                print(perm)

        # np.allclose(results['graph'], back_converted_result)
        np.testing.assert_equal(results['graph'], back_converted_result)
Beispiel #25
0
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag):
    T = T_data
    N = N_data
    # Run settings
    # there is another tau_max in lagged dependencies that might be much longer!
    tau_max = maxlag

    # Verbosity:
    # 0 - nothing
    # 1 - final graph only
    # 2 - everything
    verbose_max = 2
    verbose = 2
    print("======")
    # print(list(data))  # got 100 records as itertools.chain object, not numpy df

    # Initialize dataframe object, specify time axis and variable names
    dataframe = pp.DataFrame(data, datatime=dt, var_names=headers)
    print(dataframe.var_names)
    rcot = RCOT(significance='analytic')
    pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0)

    pcmci_rcot.verbosity = 1
    results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05)

    # Print results
    print("p-values")
    print(results['p_matrix'].round(3))
    print("MCI partial correlations")
    print(results['val_matrix'].round(2))

    # Save results to file
    # p_matrix = results['p_matrix']
    # with open("p-values_baseline.csv", "w") as csv_file:
    #     writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL)
    #     # [[[1 2 3]]] Three brackets to get through.
    #     for sector in p_matrix:
    #         print("sector: ", sector)
    #         for row in sector:
    #             print("row: ", row)
    #             writer.writerow(row)
    #         writer.writerow([])
    #
    # print("inside def pcmci_causality")

    # output edges
    result_arr = []

    for index_cause, item in enumerate(results['p_matrix']):
        # print("index is")
        # print(index)
        # print("item is")
        # print(item)
        # print("cause is")
        cause = headers[index_cause]
        # print(headers[index_cause])
        for index_effect, arr in enumerate(item):
            # print("effect arr is ")
            # print(arr)
            # print("effect name is")
            effect = headers[index_effect]
            # print(headers[index_effect])
            for arrItem in arr:
                if arrItem < 0.05 and cause != effect:
                    result_arr.append([effect, cause, index])
                    print("{} caused by {}".format(effect, cause))
                    break

    with open("pcmci_baseline_out.csv", "w", newline='') as f:
        for row in result_arr:
            f.write("%s\n" % ','.join(str(col) for col in row))
    # print(pcmci)
    print(result_arr)

    return result_arr
Beispiel #26
0
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag):

    T = T_data
    N = N_data
    tau_max = maxlag

    # Verbosity:
    # 0 - nothing
    # 1 - final graph only
    # 2 - everything
    verbose_max = 2
    verbose = 2
    print("======")
    # print(list(data))  # got 100 records as itertools.chain object, not numpy df

    data = np.array(list(data))
    print("data len is ")
    print(len(data))
    # data = np.fromiter(data, float)
    # print(data)
    # Initialize dataframe object, specify time axis and variable names
    dataframe = pp.DataFrame(data, datatime=dt, var_names=headers)
    print(dataframe.var_names)
    rcot = RCOT(significance='analytic')
    pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0)

    pcmci_rcot.verbosity = 1
    results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05)

    # Print results
    print("p-values")
    print(results['p_matrix'].round(3))
    print("MCI partial correlations")
    print(results['val_matrix'].round(2))

    # print("inside def pcmci_causality")

    # output edges
    result_arr = []
    # result_arr.append(["effect","cause"])

    for index_cause, item in enumerate(results['p_matrix']):
        print("index is")
        print(index)
        print("item is")
        print(item)
        print("cause is")
        cause = headers[index_cause]
        print(headers[index_cause])
        for index_effect, arr in enumerate(item):
            print("effect arr is ")
            print(arr)
            print("effect name is")
            effect = headers[index_effect]
            print(headers[index_effect])
            for arrItem in arr:
                if arrItem < 0.05 and cause != effect:
                    result_arr.append([effect, cause, index])
                    print("{} caused by {}".format(effect, cause))
                    break

        with open("pcmci_para_out{}.csv".format(index), "w", newline='') as f:
            for row in result_arr:
                f.write("%s\n" % ','.join(str(col) for col in row))
    # print(pcmci)
    return result_arr
def build_link_pcmci_noself(p_data_values, p_agent_names, p_var_sou,
                            p_var_tar):
    """
    build links by n column data
    """
    [times_num, agent_num] = p_data_values.shape
    # set the data for PCMCI
    data_frame = pp.DataFrame(p_data_values,
                              var_names=p_agent_names,
                              missing_flag=BaseConfig.BACKGROUND_VALUE)
    # new PCMCI
    pcmci = PCMCI(dataframe=data_frame, cond_ind_test=ParCorr())
    # run PCMCI
    alpha_level = 0.01
    results_pcmci = pcmci.run_pcmciplus(tau_min=0,
                                        tau_max=2,
                                        pc_alpha=alpha_level)
    # get the result
    graph_pcmci = results_pcmci['graph']
    q_matrix = results_pcmci['q_matrix']
    p_matrix = results_pcmci['p_matrix']
    val_matrix = results_pcmci['val_matrix']
    conf_matrix = results_pcmci['conf_matrix']
    ambiguous_triples = results_pcmci['ambiguous_triples']
    # filter these links
    links_df = pd.DataFrame(columns=('VarSou', 'VarTar', 'Source', 'Target',
                                     'TimeLag', 'Strength', 'Unoriented'))
    if graph_pcmci is not None:
        sig_links = (graph_pcmci != "") * (graph_pcmci != "<--")
    elif q_matrix is not None:
        sig_links = (q_matrix <= alpha_level)
    else:
        sig_links = (p_matrix <= alpha_level)
    for j in range(agent_num):
        links = {(p[0], -p[1]): np.abs(val_matrix[p[0], j, abs(p[1])])
                 for p in zip(*np.where(sig_links[:, j, :]))}
        # Sort by value
        sorted_links = sorted(links, key=links.get, reverse=True)
        for p in sorted_links:
            VarSou = p_var_sou
            VarTar = p_var_tar
            Source = p_agent_names[j]
            Target = p_agent_names[p[0]]
            TimeLag = p[1]
            Strength = val_matrix[p[0], j, abs(p[1])]
            Unoriented = None
            if graph_pcmci is not None:
                if p[1] == 0 and graph_pcmci[j, p[0], 0] == "o-o":
                    Unoriented = 1
                    # "unoriented link"
                elif graph_pcmci[p[0], j, abs(p[1])] == "x-x":
                    Unoriented = 1
                    # "unclear orientation due to conflict"
                else:
                    Unoriented = 0
            links_df = links_df.append(pd.DataFrame({
                'VarSou': [VarSou],
                'VarTar': [VarTar],
                'Source': [Source],
                'Target': [Target],
                'TimeLag': [TimeLag],
                'Strength': [Strength],
                'Unoriented': [Unoriented]
            }),
                                       ignore_index=True)
    # remove the self correlation edges
    links_df = links_df.loc[links_df['Source'] != links_df['Target']]
    return links_df
Beispiel #28
0
dfresiduals = pd.read_pickle(filename_residuals)

data = dfresiduals[parameters['var_names']].values

T, N = data.shape

# Initialize dataframe object
dataframe = pp.DataFrame(data)

#%%

rcot = RCOT2(significance=parameters['cond_ind_test.significance'],
            num_f=parameters['cond_ind_test.num_f'])
pcmci = PCMCI(dataframe,
              cond_ind_test=rcot,
              selected_variables=parameters['selected_variables'],
              var_names=parameters['var_names'],
              verbosity=10)
              
q_matrix = pcmci.get_corrected_pvalues(p_matrix=p_matrix, fdr_method='fdr_bh')
q_matrix_tsbh = pcmci.get_corrected_pvalues(p_matrix=p_matrix, fdr_method='fdr_tsbh')

#%% print results
pcmci._print_significant_links(
        p_matrix = p_matrix, 
        q_matrix = q_matrix,
        val_matrix = val_matrix,
        alpha_level = 0.1)


#%% get selected parents and fit linear model
Beispiel #29
0
# Initialize dataframe object, specify time axis and variable names
#var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$']
dataframe = pp.DataFrame(data,
                         datatime=np.arange(len(data)),
                         var_names=headers)

if verbose > 0:
    plot = tp.plot_timeseries(dataframe)[0]
    if display_images:
        plot.show()
    if save_images:
        plot.savefig("timeseries.png")

parcorr = ParCorr(significance='analytic')
pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, verbosity=1)

correlations = pcmci.get_lagged_dependencies(tau_max=3)
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
                                   setup_args={
                                       'var_names': headers,
                                       'x_base': 5,
                                       'y_base': .5
                                   })

if verbose > 1:
    if display_images:
        lag_func_matrix.savefig()
    if save_images:
        lag_func_matrix.savefig("lag_func.png")
Beispiel #30
0
def run_PCMCI(ex, outdic_actors, s, df_splits, map_proj):
    #=====================================================================================
    #
    # 4) PCMCI-algorithm
    #
    #=====================================================================================

    # save output
    if ex['SaveTF'] == True:
        #        from contextlib import redirect_stdout
        orig_stdout = sys.stdout
        # buffer print statement output to f
        if sys.version[:1] == '3':
            sys.stdout = f = io.StringIO()
        elif sys.version[:1] == '2':
            sys.stdout = f = open(os.path.join(ex['fig_subpath'], 'old.txt'),
                                  'w+')

#%%
# amount of text printed:
    verbosity = 3

    # alpha level for independence test within the pc procedure (finding parents)
    pc_alpha = ex['pcA_sets'][ex['pcA_set']]
    # alpha level for multiple linear regression model while conditining on parents of
    # parents
    alpha_level = ex['alpha_level_tig']
    print('run tigramite 4, run.pcmci')
    print(('alpha level(s) for independence tests within the pc procedure'
           '(finding parents): {}'.format(pc_alpha)))
    print((
        'alpha level for multiple linear regression model while conditining on parents of '
        'parents: {}'.format(ex['alpha_level_tig'])))

    # Retrieve traintest info
    traintest = df_splits

    # load Response Variable class
    RV = ex[ex['RV_name']]
    # create list with all actors, these will be merged into the fulldata array
    allvar = ex['vars'][0]
    var_names_corr = []
    actorlist = []
    cols = [[RV.name]]

    for var in allvar[:]:
        print(var)
        actor = outdic_actors[var]
        if actor.ts_corr[s].size != 0:
            ts_train = actor.ts_corr[s].values
            actorlist.append(ts_train)
            # create array which numbers the regions
            var_idx = allvar.index(var)
            n_regions = actor.ts_corr[s].shape[1]
            actor.var_info = [[i + 1, actor.ts_corr[s].columns[i], var_idx]
                              for i in range(n_regions)]
            # Array of corresponing regions with var_names_corr (first entry is RV)
            var_names_corr = var_names_corr + actor.var_info
            cols.append(list(actor.ts_corr[s].columns))
            index_dates = actor.ts_corr[s].index
    var_names_corr.insert(0, RV.name)

    # stack actor time-series together:
    fulldata = np.concatenate(tuple(actorlist), axis=1)

    print(('There are {} regions in total'.format(fulldata.shape[1])))
    # add the full 1D time series of interest as first entry:

    fulldata = np.column_stack((RV.RVfullts, fulldata))
    df_data = pd.DataFrame(fulldata, columns=flatten(cols), index=index_dates)

    if ex['import_prec_ts'] == True:
        var_names_full = var_names_corr.copy()
        for d in ex['precursor_ts']:
            path_data = d[1]
            if len(path_data) > 1:
                path_data = ''.join(list(path_data))
            # skip first col because it is the RV ts
            df_data_ext = func_fc.load_hdf5(
                path_data)['df_data'].iloc[:, 1:].loc[s]
            cols_ts = np.logical_or(df_data_ext.dtypes == 'float64',
                                    df_data_ext.dtypes == 'float32')
            cols_ext = list(df_data_ext.columns[cols_ts])
            # cols_ext must be of format '{}_{int}_{}'
            lab_int = 100
            for i, c in enumerate(cols_ext):
                char = c.split('_')[1]
                if char.isdigit():
                    pass
                else:
                    cols_ext[i] = c.replace(char, str(lab_int)) + char
                    lab_int += 1

            df_data_ext = df_data_ext[cols_ext]
            to_freq = ex['tfreq']
            if to_freq != 1:
                start_end_date = (ex['sstartdate'], ex['senddate'])
                start_end_year = (ex['startyear'], ex['endyear'])
            df_data_ext = functions_pp.time_mean_bins(df_data_ext,
                                                      to_freq,
                                                      start_end_date,
                                                      start_end_year,
                                                      seldays='part')[0]
            #            df_data_ext = functions_pp.time_mean_bins(df_data_ext,
            #                                                     ex, ex['tfreq'],
            #                                                     seldays='part')[0]
            # Expand var_names_corr
            n = var_names_full[-1][0] + 1
            add_n = n + len(cols_ext)
            n_var_idx = var_names_full[-1][-1] + 1
            for i in range(n, add_n):
                var_names_full.append([i, cols_ext[i - n], n_var_idx])
            df_data = df_data.merge(df_data_ext,
                                    left_index=True,
                                    right_index=True)
    else:
        var_names_full = var_names_corr

    bool_train = traintest.loc[s]['TrainIsTrue']
    bool_RV_train = np.logical_and(bool_train, traintest.loc[s]['RV_mask'])
    dates_train = traintest.loc[s]['TrainIsTrue'][bool_train].index
    dates_RV_train = traintest.loc[s]['TrainIsTrue'][bool_RV_train].index

    RVfull_train = RV.RVfullts.sel(time=dates_train)
    datesfull_train = pd.to_datetime(RVfull_train.time.values)
    data = df_data.loc[datesfull_train].values
    print((data.shape))

    # get RV datamask (same shape als data)
    data_mask = [
        True if d in dates_RV_train else False for d in datesfull_train
    ]
    data_mask = np.repeat(data_mask, data.shape[1]).reshape(data.shape)

    # add traintest mask to fulldata
    #    dates_all = pd.to_datetime(RV.RVfullts.index)
    #    dates_RV  = pd.to_datetime(RV.RV_ts.index)
    dates_all = pd.to_datetime(RV.RVfullts.time.values)
    dates_RV = pd.to_datetime(RV.RV_ts.time.values)
    df_data['TrainIsTrue'] = [
        True if d in datesfull_train else False for d in dates_all
    ]
    df_data['RV_mask'] = [True if d in dates_RV else False for d in dates_all]

    # ======================================================================================================================
    # tigramite 3
    # ======================================================================================================================

    T, N = data.shape  # Time, Regions
    # ======================================================================================================================
    # Initialize dataframe object (needed for tigramite functions)
    # ======================================================================================================================
    dataframe = pp.DataFrame(data=data,
                             mask=data_mask,
                             var_names=var_names_full)
    # ======================================================================================================================
    # pc algorithm: only parents for selected_variables are calculated
    # ======================================================================================================================

    parcorr = ParCorr(significance='analytic',
                      mask_type='y',
                      verbosity=verbosity)
    #==========================================================================
    # multiple testing problem:
    #==========================================================================
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=parcorr,
                  selected_variables=None,
                  verbosity=4)

    # selected_variables : list of integers, optional (default: range(N))
    #    Specify to estimate parents only for selected variables. If None is
    #    passed, parents are estimated for all variables.

    # ======================================================================================================================
    #selected_links = dictionary/None
    results = pcmci.run_pcmci(tau_max=ex['tigr_tau_max'],
                              pc_alpha=pc_alpha,
                              tau_min=0,
                              max_combinations=ex['max_comb_actors'])

    q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                           fdr_method='fdr_bh')

    pcmci.print_significant_links(p_matrix=results['p_matrix'],
                                  q_matrix=q_matrix,
                                  val_matrix=results['val_matrix'],
                                  alpha_level=alpha_level)

    # returns all parents, not just causal precursors (of lag>0)
    sig = rgcpd.return_sign_parents(pcmci,
                                    pq_matrix=q_matrix,
                                    val_matrix=results['val_matrix'],
                                    alpha_level=alpha_level)

    all_parents = sig['parents']
    #    link_matrix = sig['link_matrix']

    links_RV = all_parents[0]

    df = rgcpd.bookkeeping_precursors(links_RV, var_names_full)
    #%%

    rgcpd.print_particular_region_new(links_RV, var_names_corr, s,
                                      outdic_actors, map_proj, ex)

    #%%
    if ex['SaveTF'] == True:
        if sys.version[:1] == '3':
            fname = f's{s}_' + ex['params'] + '.txt'
            file = io.open(os.path.join(ex['fig_subpath'], fname), mode='w+')
            file.write(f.getvalue())
            file.close()
            f.close()
        elif sys.version[:1] == '2':
            f.close()
        sys.stdout = orig_stdout

    return df, df_data
# give custom NAN value for tigramite to interpret
mssng = 99999
study_data = study_data.copy().fillna(mssng)



    
dataframe = pp.DataFrame(study_data.values, var_names= study_data.columns, missing_flag = mssng)
tp.plot_timeseries(dataframe)
parcorr = ParCorr(significance='analytic')

gpdc = GPDC(significance='analytic', gp_params=None)

pcmci_gpdc = PCMCI(
    dataframe=dataframe, 
    cond_ind_test=gpdc,
    verbosity=0)

pcmci = PCMCI(
    dataframe=dataframe, 
    cond_ind_test=parcorr,
    verbosity=1)
#
min_lag, max_lag  = 1,6
results = pcmci.run_pcmci(tau_min = min_lag, tau_max=max_lag, pc_alpha=None)
#
q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
#
pcmci.print_significant_links(
        p_matrix = results['p_matrix'], 
        q_matrix = q_matrix,
def test(dataframes,max_lags=[4],alpha=[None],tests=['ParCorr'],limit=1):
    ''' This function performs the PCMCI algorithm for all the dataframes received as parameters, given the hyper-parameters of the conditional
        independence test
    Args:
        dataframes: A list of TIGRAMITE dataframes
        max_lags: Maximum number of lags to consider for the laggd time series
        alpha: Significance level to perform the parent test
        tests: A list of conditional independence test to be performed
        limit: A limit for the instances to be considered

    Returns:

    '''
    test_results = []
    random.shuffle(dataframes)
    total = limit*len(max_lags)*len(alpha)*len(tests)
    data_frame_iter = iter(dataframes)

    tests_to_evaluate=[]
    if 'RCOT' in tests:
        rcot = RCOT()
        tests_to_evaluate.append(['RCOT',rcot])
    if 'GPDC' in tests:
        gpdc = GPDC()
        tests_to_evaluate.append(['GPDC', gpdc])
    if 'ParCorr' in tests:
        parcorr = ParCorr(significance='analytic')
        tests_to_evaluate.append(['ParCorr',parcorr])
    if 'CMIknn' in tests:
        cmiknn = CMIknn()
        tests_to_evaluate.append(['CMIknn',cmiknn])


    unique_complexities = list(set(l[1] for l in dataframes))
    counts = {}
    for i in unique_complexities:
        counts[i] = 0

    for test in tests_to_evaluate:
        stop = False
        for l in max_lags:
            for a in alpha:
                while not stop:
                    try:
                        i = random.sample(dataframes,1)[0]
                        if counts[i[1]] < limit:
                            print('evaluating: ' + str(i[3]))
                            start = time.time()
                            pcmci = PCMCI(
                                    dataframe=i[2],
                                    cond_ind_test=test[1],
                                    verbosity=0)
                             # correlations = pcmci.get_lagged_dependencies(tau_max=20)
                            pcmci.verbosity = 1
                            results = pcmci.run_pcmci(tau_max=l, pc_alpha=a)
                            time_lapse = round(time.time() - start, 2)

                            q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
                            valid_parents = list(pcmci.return_significant_parents(pq_matrix=q_matrix,
                                                                                  val_matrix=results['val_matrix'],
                                                                                  alpha_level=a)['parents'].values())

                            flat_list = []
                            for sublist in valid_parents:
                                for item in sublist:
                                    flat_list.append(item)

                            valid_links = len(flat_list)

                            test_results.append([i[3], i[0], i[1], l,test[0],a,valid_links,time_lapse])

                            results_df = pd.DataFrame(test_results,
                                                              columns=['representation', 'complexity', 'sample_size', 'max_lag','test','alpha','valid_links_at_alpha',
                                                                       'learning_time'])
                            print('results ready to be saved')
                            results_df.to_csv(
                                        'results/performance_sample_sizes.csv',
                                        index=False)

                            counts[i[1]] += 1
                            if all(value == limit for value in counts.values()):
                                stop = True

                    except:
                        print('Hoopla!')
                        pass

                for i in unique_complexities:
                    counts[i] = 0