Exemple #1
0
correlations = pcmci.get_lagged_dependencies(tau_max=3)
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
                                   setup_args={
                                       'var_names': headers,
                                       'x_base': 5,
                                       'y_base': .5
                                   })

if verbose > 1:
    if display_images:
        lag_func_matrix.savefig()
    if save_images:
        lag_func_matrix.savefig("lag_func.png")

pcmci.verbosity = 1
results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=None)

#Print results
print("p-values")
print(results['p_matrix'].round(3))
print("MCI partial correlations")
print(results['val_matrix'].round(2))

#Save results to file
p_matrix = results['p_matrix']
with open("p-values.csv", "w") as csv_file:
    writer = csv.writer(csv_file,
                        delimiter=",",
                        quotechar="|",
                        quoting=csv.QUOTE_MINIMAL)
Exemple #2
0
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag):

    T = T_data
    N = N_data
    tau_max = maxlag

    # Verbosity:
    # 0 - nothing
    # 1 - final graph only
    # 2 - everything
    verbose_max = 2
    verbose = 2
    print("======")
    # print(list(data))  # got 100 records as itertools.chain object, not numpy df

    data = np.array(list(data))
    print("data len is ")
    print(len(data))
    # data = np.fromiter(data, float)
    # print(data)
    # Initialize dataframe object, specify time axis and variable names
    dataframe = pp.DataFrame(data, datatime=dt, var_names=headers)
    print(dataframe.var_names)
    rcot = RCOT(significance='analytic')
    pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0)

    pcmci_rcot.verbosity = 1
    results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05)

    # Print results
    print("p-values")
    print(results['p_matrix'].round(3))
    print("MCI partial correlations")
    print(results['val_matrix'].round(2))

    # print("inside def pcmci_causality")

    # output edges
    result_arr = []
    # result_arr.append(["effect","cause"])

    for index_cause, item in enumerate(results['p_matrix']):
        print("index is")
        print(index)
        print("item is")
        print(item)
        print("cause is")
        cause = headers[index_cause]
        print(headers[index_cause])
        for index_effect, arr in enumerate(item):
            print("effect arr is ")
            print(arr)
            print("effect name is")
            effect = headers[index_effect]
            print(headers[index_effect])
            for arrItem in arr:
                if arrItem < 0.05 and cause != effect:
                    result_arr.append([effect, cause, index])
                    print("{} caused by {}".format(effect, cause))
                    break

        with open("pcmci_para_out{}.csv".format(index), "w", newline='') as f:
            for row in result_arr:
                f.write("%s\n" % ','.join(str(col) for col in row))
    # print(pcmci)
    return result_arr
Exemple #3
0
def pcmci_causality(data, dt, index, headers, T_data, N_data, maxlag):
    T = T_data
    N = N_data
    # Run settings
    # there is another tau_max in lagged dependencies that might be much longer!
    tau_max = maxlag

    # Verbosity:
    # 0 - nothing
    # 1 - final graph only
    # 2 - everything
    verbose_max = 2
    verbose = 2
    print("======")
    # print(list(data))  # got 100 records as itertools.chain object, not numpy df

    # Initialize dataframe object, specify time axis and variable names
    dataframe = pp.DataFrame(data, datatime=dt, var_names=headers)
    print(dataframe.var_names)
    rcot = RCOT(significance='analytic')
    pcmci_rcot = PCMCI(dataframe=dataframe, cond_ind_test=rcot, verbosity=0)

    pcmci_rcot.verbosity = 1
    results = pcmci_rcot.run_pcmci(tau_max=tau_max, pc_alpha=0.05)

    # Print results
    print("p-values")
    print(results['p_matrix'].round(3))
    print("MCI partial correlations")
    print(results['val_matrix'].round(2))

    # Save results to file
    # p_matrix = results['p_matrix']
    # with open("p-values_baseline.csv", "w") as csv_file:
    #     writer = csv.writer(csv_file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL)
    #     # [[[1 2 3]]] Three brackets to get through.
    #     for sector in p_matrix:
    #         print("sector: ", sector)
    #         for row in sector:
    #             print("row: ", row)
    #             writer.writerow(row)
    #         writer.writerow([])
    #
    # print("inside def pcmci_causality")

    # output edges
    result_arr = []

    for index_cause, item in enumerate(results['p_matrix']):
        # print("index is")
        # print(index)
        # print("item is")
        # print(item)
        # print("cause is")
        cause = headers[index_cause]
        # print(headers[index_cause])
        for index_effect, arr in enumerate(item):
            # print("effect arr is ")
            # print(arr)
            # print("effect name is")
            effect = headers[index_effect]
            # print(headers[index_effect])
            for arrItem in arr:
                if arrItem < 0.05 and cause != effect:
                    result_arr.append([effect, cause, index])
                    print("{} caused by {}".format(effect, cause))
                    break

    with open("pcmci_baseline_out.csv", "w", newline='') as f:
        for row in result_arr:
            f.write("%s\n" % ','.join(str(col) for col in row))
    # print(pcmci)
    print(result_arr)

    return result_arr
def test(dataframes,max_lags=[4],alpha=[None],tests=['ParCorr'],limit=1):
    ''' This function performs the PCMCI algorithm for all the dataframes received as parameters, given the hyper-parameters of the conditional
        independence test
    Args:
        dataframes: A list of TIGRAMITE dataframes
        max_lags: Maximum number of lags to consider for the laggd time series
        alpha: Significance level to perform the parent test
        tests: A list of conditional independence test to be performed
        limit: A limit for the instances to be considered

    Returns:

    '''
    test_results = []
    random.shuffle(dataframes)
    total = limit*len(max_lags)*len(alpha)*len(tests)
    data_frame_iter = iter(dataframes)

    tests_to_evaluate=[]
    if 'RCOT' in tests:
        rcot = RCOT()
        tests_to_evaluate.append(['RCOT',rcot])
    if 'GPDC' in tests:
        gpdc = GPDC()
        tests_to_evaluate.append(['GPDC', gpdc])
    if 'ParCorr' in tests:
        parcorr = ParCorr(significance='analytic')
        tests_to_evaluate.append(['ParCorr',parcorr])
    if 'CMIknn' in tests:
        cmiknn = CMIknn()
        tests_to_evaluate.append(['CMIknn',cmiknn])


    unique_complexities = list(set(l[1] for l in dataframes))
    counts = {}
    for i in unique_complexities:
        counts[i] = 0

    for test in tests_to_evaluate:
        stop = False
        for l in max_lags:
            for a in alpha:
                while not stop:
                    try:
                        i = random.sample(dataframes,1)[0]
                        if counts[i[1]] < limit:
                            print('evaluating: ' + str(i[3]))
                            start = time.time()
                            pcmci = PCMCI(
                                    dataframe=i[2],
                                    cond_ind_test=test[1],
                                    verbosity=0)
                             # correlations = pcmci.get_lagged_dependencies(tau_max=20)
                            pcmci.verbosity = 1
                            results = pcmci.run_pcmci(tau_max=l, pc_alpha=a)
                            time_lapse = round(time.time() - start, 2)

                            q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
                            valid_parents = list(pcmci.return_significant_parents(pq_matrix=q_matrix,
                                                                                  val_matrix=results['val_matrix'],
                                                                                  alpha_level=a)['parents'].values())

                            flat_list = []
                            for sublist in valid_parents:
                                for item in sublist:
                                    flat_list.append(item)

                            valid_links = len(flat_list)

                            test_results.append([i[3], i[0], i[1], l,test[0],a,valid_links,time_lapse])

                            results_df = pd.DataFrame(test_results,
                                                              columns=['representation', 'complexity', 'sample_size', 'max_lag','test','alpha','valid_links_at_alpha',
                                                                       'learning_time'])
                            print('results ready to be saved')
                            results_df.to_csv(
                                        'results/performance_sample_sizes.csv',
                                        index=False)

                            counts[i[1]] += 1
                            if all(value == limit for value in counts.values()):
                                stop = True

                    except:
                        print('Hoopla!')
                        pass

                for i in unique_complexities:
                    counts[i] = 0
Exemple #5
0
def caus(data, var_names):
    import numpy as np
    import matplotlib as mpl
    from matplotlib import pyplot as plt
    import sklearn

    import tigramite
    from tigramite import data_processing as pp
    from tigramite import plotting as tp
    from tigramite.pcmci import PCMCI
    from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb
    from tigramite.models import LinearMediation, Prediction

    data_mask_row = np.zeros(len(data))
    for i in range(68904):
        if (i % 72) < 30 or (i % 72) > 47:
            data_mask_row[i] = True
    data_mask = np.zeros(data.shape)

    data_mask[:, 0] = data_mask_row
    data_mask[:, 1] = data_mask_row
    data_mask[:, 2] = data_mask_row
    data_mask[:, 9] = data_mask_row
    data_mask[:, 10] = data_mask_row
    data_mask[:, 11] = data_mask_row

    dataframe = pp.DataFrame(data, mask=data_mask)
    datatime = np.arange(len(data))

    # tp.plot_timeseries(data, datatime, var_names, use_mask=True,
    #                    mask=data_mask, grey_masked_samples='data')

    parcorr = ParCorr(significance='analytic', use_mask=True, mask_type='y')
    pcmci = PCMCI(dataframe=dataframe,
                  cond_ind_test=parcorr,
                  var_names=var_names,
                  verbosity=1)

    # correlations = pcmci.get_lagged_dependencies(tau_max=20)
    # lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
    #                                    setup_args={'var_names':var_names,
    #                                    'x_base':5, 'y_base':.5})
    pcmci.verbosity = 1
    results = pcmci.run_pcmci(tau_max=6, tau_min=1, pc_alpha=0.01)

    # print("p-values")
    # print (results['p_matrix'].round(3))
    # print("MCI partial correlations")
    # print (results['val_matrix'].round(2))

    q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                           fdr_method='fdr_bh')
    pcmci._print_significant_links(p_matrix=results['p_matrix'],
                                   q_matrix=q_matrix,
                                   val_matrix=results['val_matrix'],
                                   alpha_level=0.01)

    link_matrix = pcmci._return_significant_parents(
        pq_matrix=q_matrix, val_matrix=results['val_matrix'],
        alpha_level=0.01)['link_matrix']

    tp.plot_time_series_graph(
        val_matrix=results['val_matrix'],
        link_matrix=link_matrix,
        var_names=var_names,
        link_colorbar_label='MCI',
    )
    return results, link_matrix