コード例 #1
0
def gpdc(request):
    return GPDC(mask_type=None,
                significance='analytic',
                fixed_thres=0.1,
                sig_samples=1000,
                sig_blocklength=1,
                confidence='bootstrap',
                conf_lev=0.9,
                conf_samples=100,
                conf_blocklength=None,
                recycle_residuals=False,
                verbosity=0)
コード例 #2
0
    def setUp(self):

        auto = 0.6
        coeff = 0.6
        T = 1000
        numpy.random.seed(42)
        # True graph
        links_coeffs = {
            0: [((0, -1), auto)],
            1: [((1, -1), auto), ((0, -1), coeff)],
            2: [((2, -1), auto), ((1, -1), coeff)]
        }

        self.data, self.true_parents_coeffs = pp.var_process(links_coeffs, T=T)
        T, N = self.data.shape

        self.ci_par_corr = ParCorr(use_mask=False,
                                   mask_type=None,
                                   significance='analytic',
                                   fixed_thres=None,
                                   sig_samples=10000,
                                   sig_blocklength=3,
                                   confidence='analytic',
                                   conf_lev=0.9,
                                   conf_samples=10000,
                                   conf_blocklength=1,
                                   recycle_residuals=False,
                                   verbosity=0)

        self.ci_gpdc = GPDC(significance='analytic',
                            sig_samples=1000,
                            sig_blocklength=1,
                            confidence='bootstrap',
                            conf_lev=0.9,
                            conf_samples=100,
                            conf_blocklength=None,
                            use_mask=False,
                            mask_type='y',
                            recycle_residuals=False,
                            verbosity=0)
コード例 #3
0
def calculate(para_setup):

    para_setup_string, sam = para_setup

    paras = para_setup_string.split('-')
    paras = [w.replace("'", "") for w in paras]

    model = str(paras[0])
    N = int(paras[1])
    n_links = int(paras[2])
    min_coeff = float(paras[3])
    coeff = float(paras[4])
    auto = float(paras[5])
    contemp_fraction = float(paras[6])
    frac_unobserved = float(paras[7])
    max_true_lag = int(paras[8])
    T = int(paras[9])

    ci_test = str(paras[10])
    method = str(paras[11])
    pc_alpha = float(paras[12])
    tau_max = int(paras[13])

    #############################################
    ##  Data
    #############################################

    def lin_f(x):
        return x

    def f2(x):
        return (x + 5. * x**2 * np.exp(-x**2 / 20.))

    if model == 'autobidirected':
        if verbosity > 999:
            model_seed = verbosity - 1000
        else:
            model_seed = sam

        random_state = np.random.RandomState(model_seed)

        links = {
            0: [((0, -1), auto, lin_f), ((1, -1), coeff, lin_f)],
            1: [],
            2: [((2, -1), auto, lin_f), ((1, -1), coeff, lin_f)],
            3: [((3, -1), auto, lin_f), ((2, -1), min_coeff, lin_f)],
        }
        observed_vars = [0, 2, 3]

        noises = [random_state.randn for j in range(len(links))]

        data_all, nonstationary = mod.generate_nonlinear_contemp_timeseries(
            links=links, T=T, noises=noises, random_state=random_state)
        data = data_all[:, observed_vars]

    elif 'random' in model:
        if 'lineargaussian' in model:

            coupling_funcs = [lin_f]

            noise_types = ['gaussian']  #, 'weibull', 'uniform']
            noise_sigma = (0.5, 2)

        elif 'nonlinearmixed' in model:

            coupling_funcs = [lin_f, f2]

            noise_types = ['gaussian', 'gaussian', 'weibull']
            noise_sigma = (0.5, 2)

        if coeff < min_coeff:
            min_coeff = coeff
        couplings = list(np.arange(min_coeff, coeff + 0.1, 0.1))
        couplings += [-c for c in couplings]

        auto_deps = list(np.arange(max(0., auto - 0.3), auto + 0.01, 0.05))

        # Models may be non-stationary. Hence, we iterate over a number of seeds
        # to find a stationary one regarding network topology, noises, etc
        if verbosity > 999:
            model_seed = verbosity - 1000
        else:
            model_seed = sam

        for ir in range(1000):
            # np.random.seed(model_seed)
            random_state = np.random.RandomState(model_seed)

            N_all = math.floor((N / (1. - frac_unobserved)))
            n_links_all = math.ceil(n_links / N * N_all)
            observed_vars = np.sort(
                random_state.choice(range(N_all),
                                    size=math.ceil(
                                        (1. - frac_unobserved) * N_all),
                                    replace=False)).tolist()

            links = mod.generate_random_contemp_model(
                N=N_all,
                L=n_links_all,
                coupling_coeffs=couplings,
                coupling_funcs=coupling_funcs,
                auto_coeffs=auto_deps,
                tau_max=max_true_lag,
                contemp_fraction=contemp_fraction,
                # num_trials=1000,
                random_state=random_state)

            class noise_model:
                def __init__(self, sigma=1):
                    self.sigma = sigma

                def gaussian(self, T):
                    # Get zero-mean unit variance gaussian distribution
                    return self.sigma * random_state.randn(T)

                def weibull(self, T):
                    # Get zero-mean sigma variance weibull distribution
                    a = 2
                    mean = scipy.special.gamma(1. / a + 1)
                    variance = scipy.special.gamma(
                        2. / a + 1) - scipy.special.gamma(1. / a + 1)**2
                    return self.sigma * (random_state.weibull(a=a, size=T) -
                                         mean) / np.sqrt(variance)

                def uniform(self, T):
                    # Get zero-mean sigma variance uniform distribution
                    mean = 0.5
                    variance = 1. / 12.
                    return self.sigma * (random_state.uniform(size=T) -
                                         mean) / np.sqrt(variance)

            noises = []
            for j in links:
                noise_type = random_state.choice(noise_types)
                sigma = noise_sigma[0] + (
                    noise_sigma[1] - noise_sigma[0]) * random_state.rand()
                noises.append(getattr(noise_model(sigma), noise_type))

            if 'discretebinom' in model:
                if 'binom2' in model:
                    n_binom = 2
                elif 'binom4' in model:
                    n_binom = 4

                data_all_check, nonstationary = discretized_scp(
                    links=links,
                    T=T + 10000,
                    n_binom=n_binom,
                    random_state=random_state)
            else:
                data_all_check, nonstationary = mod.generate_nonlinear_contemp_timeseries(
                    links=links,
                    T=T + 10000,
                    noises=noises,
                    random_state=random_state)

            # If the model is stationary, break the loop
            if not nonstationary:
                data_all = data_all_check[:T]
                data = data_all[:, observed_vars]
                break
            else:
                print("Trial %d: Not a stationary model" % ir)
                model_seed += 10000
    else:
        raise ValueError("model %s not known" % model)

    if nonstationary:
        raise ValueError("No stationary model found: %s" % model)

    true_graph = utilities._get_pag_from_dag(links,
                                             observed_vars=observed_vars,
                                             tau_max=tau_max,
                                             verbosity=verbosity)[1]

    if verbosity > 0:
        print("True Links")
        for j in links:
            print(j, links[j])
        print("observed_vars = ", observed_vars)
        print("True PAG")
        if tau_max > 0:
            for lag in range(tau_max + 1):
                print(true_graph[:, :, lag])
        else:
            print(true_graph.squeeze())

    if plot_data:
        print("PLOTTING")
        for j in range(N):
            # ax = fig.add_subplot(N,1,j+1)
            pyplot.plot(data[:, j])

        pyplot.show()

    computation_time_start = time.time()

    dataframe = pp.DataFrame(data)

    #############################################
    ##  Methods
    #############################################

    # Specify conditional independence test object
    if ci_test == 'par_corr':
        cond_ind_test = ParCorr(significance='analytic',
                                recycle_residuals=True)
    elif ci_test == 'cmi_knn':
        cond_ind_test = CMIknn(knn=0.1, sig_samples=500, sig_blocklength=1)
    elif ci_test == 'gp_dc':
        cond_ind_test = GPDC(recycle_residuals=True)
    elif ci_test == 'discg2':
        cond_ind_test = DiscG2()
    else:
        raise ValueError("CI test not recognized.")

    if 'lpcmci' in method:
        method_paras = method.split('_')
        n_preliminary_iterations = int(method_paras[1][7:])

        if 'prelimonly' in method: prelim_only = True
        else: prelim_only = False

        lpcmci = LPCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)

        lpcmcires = lpcmci.run_lpcmci(
            tau_max=tau_max,
            pc_alpha=pc_alpha,
            max_p_non_ancestral=3,
            n_preliminary_iterations=n_preliminary_iterations,
            prelim_only=prelim_only,
            verbosity=verbosity)

        graph = lpcmci.graph
        val_min = lpcmci.val_min_matrix
        max_cardinality = lpcmci.cardinality_matrix

    elif method == 'svarfci':
        svarfci = SVARFCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
        svarfcires = svarfci.run_svarfci(
            tau_max=tau_max,
            pc_alpha=pc_alpha,
            max_cond_px=0,
            max_p_dsep=3,
            fix_all_edges_before_final_orientation=True,
            verbosity=verbosity)

        graph = svarfci.graph
        val_min = svarfci.val_min_matrix
        max_cardinality = svarfci.cardinality_matrix

    elif method == 'svarrfci':
        svarrfci = SVARRFCI(dataframe=dataframe, cond_ind_test=cond_ind_test)

        svarrfcires = svarrfci.run_svarrfci(
            tau_max=tau_max,
            pc_alpha=pc_alpha,
            fix_all_edges_before_final_orientation=True,
            verbosity=verbosity)

        graph = svarrfci.graph
        val_min = svarrfci.val_min_matrix
        max_cardinality = svarrfci.cardinality_matrix
    else:
        raise ValueError("%s not implemented." % method)

    computation_time_end = time.time()
    computation_time = computation_time_end - computation_time_start

    return {
        'true_graph': true_graph,
        'val_min': val_min,
        'max_cardinality': max_cardinality,

        # Method results
        'computation_time': computation_time,
        'graph': graph,
    }
コード例 #4
0

study_data = millet_prices

# give custom NAN value for tigramite to interpret
mssng = 99999
study_data = study_data.copy().fillna(mssng)



    
dataframe = pp.DataFrame(study_data.values, var_names= study_data.columns, missing_flag = mssng)
tp.plot_timeseries(dataframe)
parcorr = ParCorr(significance='analytic')

gpdc = GPDC(significance='analytic', gp_params=None)

pcmci_gpdc = PCMCI(
    dataframe=dataframe, 
    cond_ind_test=gpdc,
    verbosity=0)

pcmci = PCMCI(
    dataframe=dataframe, 
    cond_ind_test=parcorr,
    verbosity=1)
#
min_lag, max_lag  = 1,6
results = pcmci.run_pcmci(tau_min = min_lag, tau_max=max_lag, pc_alpha=None)
#
q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
def test(dataframes,max_lags=[4],alpha=[None],tests=['ParCorr'],limit=1):
    ''' This function performs the PCMCI algorithm for all the dataframes received as parameters, given the hyper-parameters of the conditional
        independence test
    Args:
        dataframes: A list of TIGRAMITE dataframes
        max_lags: Maximum number of lags to consider for the laggd time series
        alpha: Significance level to perform the parent test
        tests: A list of conditional independence test to be performed
        limit: A limit for the instances to be considered

    Returns:

    '''
    test_results = []
    random.shuffle(dataframes)
    total = limit*len(max_lags)*len(alpha)*len(tests)
    data_frame_iter = iter(dataframes)

    tests_to_evaluate=[]
    if 'RCOT' in tests:
        rcot = RCOT()
        tests_to_evaluate.append(['RCOT',rcot])
    if 'GPDC' in tests:
        gpdc = GPDC()
        tests_to_evaluate.append(['GPDC', gpdc])
    if 'ParCorr' in tests:
        parcorr = ParCorr(significance='analytic')
        tests_to_evaluate.append(['ParCorr',parcorr])
    if 'CMIknn' in tests:
        cmiknn = CMIknn()
        tests_to_evaluate.append(['CMIknn',cmiknn])


    unique_complexities = list(set(l[1] for l in dataframes))
    counts = {}
    for i in unique_complexities:
        counts[i] = 0

    for test in tests_to_evaluate:
        stop = False
        for l in max_lags:
            for a in alpha:
                while not stop:
                    try:
                        i = random.sample(dataframes,1)[0]
                        if counts[i[1]] < limit:
                            print('evaluating: ' + str(i[3]))
                            start = time.time()
                            pcmci = PCMCI(
                                    dataframe=i[2],
                                    cond_ind_test=test[1],
                                    verbosity=0)
                             # correlations = pcmci.get_lagged_dependencies(tau_max=20)
                            pcmci.verbosity = 1
                            results = pcmci.run_pcmci(tau_max=l, pc_alpha=a)
                            time_lapse = round(time.time() - start, 2)

                            q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
                            valid_parents = list(pcmci.return_significant_parents(pq_matrix=q_matrix,
                                                                                  val_matrix=results['val_matrix'],
                                                                                  alpha_level=a)['parents'].values())

                            flat_list = []
                            for sublist in valid_parents:
                                for item in sublist:
                                    flat_list.append(item)

                            valid_links = len(flat_list)

                            test_results.append([i[3], i[0], i[1], l,test[0],a,valid_links,time_lapse])

                            results_df = pd.DataFrame(test_results,
                                                              columns=['representation', 'complexity', 'sample_size', 'max_lag','test','alpha','valid_links_at_alpha',
                                                                       'learning_time'])
                            print('results ready to be saved')
                            results_df.to_csv(
                                        'results/performance_sample_sizes.csv',
                                        index=False)

                            counts[i[1]] += 1
                            if all(value == limit for value in counts.values()):
                                stop = True

                    except:
                        print('Hoopla!')
                        pass

                for i in unique_complexities:
                    counts[i] = 0
コード例 #6
0
class TestCondInd():  #unittest.TestCase):
    # def __init__(self):
    #     pass

    def setUp(self):

        auto = 0.6
        coeff = 0.6
        T = 1000
        numpy.random.seed(42)
        # True graph
        links_coeffs = {
            0: [((0, -1), auto)],
            1: [((1, -1), auto), ((0, -1), coeff)],
            2: [((2, -1), auto), ((1, -1), coeff)]
        }

        self.data, self.true_parents_coeffs = pp.var_process(links_coeffs, T=T)
        T, N = self.data.shape

        self.ci_par_corr = ParCorr(use_mask=False,
                                   mask_type=None,
                                   significance='analytic',
                                   fixed_thres=None,
                                   sig_samples=10000,
                                   sig_blocklength=3,
                                   confidence='analytic',
                                   conf_lev=0.9,
                                   conf_samples=10000,
                                   conf_blocklength=1,
                                   recycle_residuals=False,
                                   verbosity=0)

        self.ci_gpdc = GPDC(significance='analytic',
                            sig_samples=1000,
                            sig_blocklength=1,
                            confidence='bootstrap',
                            conf_lev=0.9,
                            conf_samples=100,
                            conf_blocklength=None,
                            use_mask=False,
                            mask_type='y',
                            recycle_residuals=False,
                            verbosity=0)

    def test_construct_array(self):

        data = numpy.array([[0, 10, 20, 30], [1, 11, 21, 31], [2, 12, 22, 32],
                            [3, 13, 23, 33], [4, 14, 24, 34], [5, 15, 25, 35],
                            [6, 16, 26, 36]])
        data_mask = numpy.array(
            [[0, 1, 1, 0], [0, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1],
             [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
            dtype='bool')

        X = [(1, -1)]
        Y = [(0, 0)]
        Z = [(0, -1), (1, -2), (2, 0)]

        tau_max = 2

        # No masking
        res = _construct_array(X=X,
                               Y=Y,
                               Z=Z,
                               tau_max=tau_max,
                               use_mask=False,
                               data=data,
                               mask=data_mask,
                               missing_flag=None,
                               mask_type=None,
                               verbosity=verbosity)
        print res[0]
        numpy.testing.assert_almost_equal(
            res[0],
            numpy.array([[13, 14, 15], [4, 5, 6], [3, 4, 5], [12, 13, 14],
                         [24, 25, 26]]))
        numpy.testing.assert_almost_equal(res[1], numpy.array([0, 1, 2, 2, 2]))

        # masking y
        res = _construct_array(X=X,
                               Y=Y,
                               Z=Z,
                               tau_max=tau_max,
                               use_mask=True,
                               data=data,
                               mask=data_mask,
                               mask_type=['y'],
                               verbosity=verbosity)
        print res[0]

        numpy.testing.assert_almost_equal(
            res[0],
            numpy.array([[13, 14, 15], [4, 5, 6], [3, 4, 5], [12, 13, 14],
                         [24, 25, 26]]))

        numpy.testing.assert_almost_equal(res[1], numpy.array([0, 1, 2, 2, 2]))

        # masking all
        res = _construct_array(X=X,
                               Y=Y,
                               Z=Z,
                               tau_max=tau_max,
                               use_mask=True,
                               data=data,
                               mask=data_mask,
                               mask_type=['x', 'y', 'z'],
                               verbosity=verbosity)
        print res[0]

        numpy.testing.assert_almost_equal(
            res[0],
            numpy.array([[13, 14, 15], [4, 5, 6], [3, 4, 5], [12, 13, 14],
                         [24, 25, 26]]))

        numpy.testing.assert_almost_equal(res[1], numpy.array([0, 1, 2, 2, 2]))

    def test_missing_values(self):

        data = numpy.array([
            [0, 10, 20, 30],
            [1, 11, 21, 31],
            [2, 12, 22, 32],
            [3, 13, 999, 33],
            [4, 14, 24, 34],
            [5, 15, 25, 35],
            [6, 16, 26, 36],
        ])
        data_mask = numpy.array(
            [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
             [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
            dtype='bool')

        X = [(1, -2)]
        Y = [(0, 0)]
        Z = [(2, -1)]

        tau_max = 1

        # Missing values
        res = _construct_array(X=X,
                               Y=Y,
                               Z=Z,
                               tau_max=tau_max,
                               use_mask=False,
                               data=data,
                               mask=data_mask,
                               missing_flag=999,
                               mask_type=['y'],
                               verbosity=verbosity)

        # print res[0]
        numpy.testing.assert_almost_equal(
            res[0], numpy.array([[10, 14], [2, 6], [21, 25]]))

    def test_bootstrap_vs_analytic_confidence_parcorr(self):

        cov = numpy.array([[1., 0.3], [0.3, 1.]])
        array = numpy.random.multivariate_normal(mean=numpy.zeros(2),
                                                 cov=cov,
                                                 size=150).T

        val = numpy.corrcoef(array)[0, 1]
        # print val
        dim, T = array.shape
        xyz = numpy.array([0, 1])

        conf_ana = self.ci_par_corr.get_analytic_confidence(
            df=T - dim, value=val, conf_lev=self.ci_par_corr.conf_lev)

        conf_boots = self.ci_par_corr.get_bootstrap_confidence(
            array,
            xyz,
            dependence_measure=self.ci_par_corr.get_dependence_measure,
            conf_samples=self.ci_par_corr.conf_samples,
            conf_blocklength=self.ci_par_corr.conf_blocklength,
            conf_lev=self.ci_par_corr.conf_lev,
        )

        print conf_ana
        print conf_boots

        numpy.testing.assert_allclose(numpy.array(conf_ana),
                                      numpy.array(conf_boots),
                                      atol=0.01)

    def test_shuffle_vs_analytic_significance_parcorr(self):

        cov = numpy.array([[1., 0.04], [0.04, 1.]])
        array = numpy.random.multivariate_normal(mean=numpy.zeros(2),
                                                 cov=cov,
                                                 size=250).T
        # array = numpy.random.randn(3, 10)
        val = numpy.corrcoef(array)[0, 1]
        # print val
        dim, T = array.shape
        xyz = numpy.array([0, 1])

        pval_ana = self.ci_par_corr.get_analytic_significance(value=val,
                                                              T=T,
                                                              dim=dim)

        pval_shuffle = self.ci_par_corr.get_shuffle_significance(
            array, xyz, val)
        # Adjust p-value for two-sided measures

        print pval_ana
        print pval_shuffle

        numpy.testing.assert_allclose(numpy.array(pval_ana),
                                      numpy.array(pval_shuffle),
                                      atol=0.01)

    def test__parcorr_get_single_residuals(self):

        target_var = 0  #numpy.array([True, False, False, False])
        true_residual = numpy.random.randn(4, 1000)

        array = numpy.copy(true_residual)

        array[0] += 0.5 * array[2:].sum(axis=0)

        est_residual = self.ci_par_corr._get_single_residuals(
            array, target_var, standardize=False, return_means=False)

        # print est_residual[:10]
        # print true_residual[0, :10]
        numpy.testing.assert_allclose(est_residual,
                                      true_residual[0],
                                      atol=0.01)

    def test_par_corr(self):

        val_ana = 0.6
        T = 1000
        array = numpy.random.randn(5, T)

        cov = numpy.array([[1., val_ana], [val_ana, 1.]])
        array[:2, :] = numpy.random.multivariate_normal(mean=numpy.zeros(2),
                                                        cov=cov,
                                                        size=T).T

        # Generate some confounding
        array[0] += 0.5 * array[2:].sum(axis=0)
        array[1] += 0.7 * array[2:].sum(axis=0)

        # print numpy.corrcoef(array)[0,1]
        # print val
        dim, T = array.shape
        xyz = numpy.array([0, 1, 2, 2, 2])

        val_est = self.ci_par_corr.get_dependence_measure(array, xyz)

        print val_est
        print val_ana

        numpy.testing.assert_allclose(numpy.array(val_ana),
                                      numpy.array(val_est),
                                      atol=0.02)

    def test__gpdc_get_single_residuals(self):

        ci_test = self.ci_gpdc
        # ci_test = self.ci_par_corr

        c = .3
        T = 1000

        numpy.random.seed(42)

        def func(x):
            return x * (1. - 4. * x**0 * numpy.exp(-x**2 / 2.))

        array = numpy.random.randn(3, T)
        array[1] += c * func(array[2])  #.sum(axis=0)
        xyz = numpy.array([0, 1] + [2 for i in range(array.shape[0] - 2)])

        target_var = 1

        dim, T = array.shape
        # array -= array.mean(axis=1).reshape(dim, 1)
        c_std = c  #/array[1].std()
        # array /= array.std(axis=1).reshape(dim, 1)
        array_orig = numpy.copy(array)

        (est_residual, pred) = ci_test._get_single_residuals(array,
                                                             target_var,
                                                             standardize=False,
                                                             return_means=True)

        # Testing that in the center the fit is good
        center = numpy.where(numpy.abs(array_orig[2]) < .7)[0]
        print(pred[center][:10]).round(2)
        print(c_std * func(array_orig[2][center])[:10]).round(2)
        numpy.testing.assert_allclose(pred[center],
                                      c_std * func(array_orig[2][center]),
                                      atol=0.2)

    def plot__gpdc_get_single_residuals(self):

        #######
        ci_test = self.ci_gpdc
        # ci_test = self.ci_par_corr

        a = 0.
        c = .3
        T = 500
        # Each key refers to a variable and the incoming links are supplied as a
        # list of format [((driver, lag), coeff), ...]
        links_coeffs = {
            0: [((0, -1), a)],
            1: [((1, -1), a), ((0, -1), c)],
        }

        numpy.random.seed(42)
        data, true_parents_neighbors = pp.var_process(links_coeffs,
                                                      use='inv_inno_cov',
                                                      T=T)
        dataframe = pp.DataFrame(data)
        ci_test.set_dataframe(dataframe)

        # ci_test.set_tau_max(1)

        # X=[(1, -1)]
        # Y=[(1, 0)]
        # Z=[(0, -1)] + [(1, -tau) for tau in range(1, 2)]
        # array, xyz, XYZ = ci_test.get_array(X, Y, Z,
        #     verbosity=0)]
        # ci_test.run_test(X, Y, Z,)
        def func(x):
            return x * (1. - 4. * x**0 * numpy.exp(-x**2 / 2.))

        true_residual = numpy.random.randn(3, T)
        array = numpy.copy(true_residual)
        array[1] += c * func(array[2])  #.sum(axis=0)
        xyz = numpy.array([0, 1] + [2 for i in range(array.shape[0] - 2)])

        print 'xyz ', xyz, numpy.where(xyz == 1)
        target_var = 1

        dim, T = array.shape
        # array -= array.mean(axis=1).reshape(dim, 1)
        c_std = c  #/array[1].std()
        # array /= array.std(axis=1).reshape(dim, 1)
        array_orig = numpy.copy(array)

        import matplotlib
        from matplotlib import pyplot
        (est_residual, pred) = ci_test._get_single_residuals(array,
                                                             target_var,
                                                             standardize=False,
                                                             return_means=True)
        (resid_, pred_parcorr) = self.ci_par_corr._get_single_residuals(
            array, target_var, standardize=False, return_means=True)

        fig = pyplot.figure()
        ax = fig.add_subplot(111)
        ax.scatter(array_orig[2], array_orig[1])
        ax.scatter(array_orig[2], pred, color='red')
        ax.scatter(array_orig[2], pred_parcorr, color='green')
        ax.plot(numpy.sort(array_orig[2]),
                c_std * func(numpy.sort(array_orig[2])),
                color='black')

        pyplot.savefig('/home/jakobrunge/test/gpdctest.pdf')

    def test_shuffle_vs_analytic_significance_gpdc(self):

        cov = numpy.array([[1., 0.2], [0.2, 1.]])
        array = numpy.random.multivariate_normal(mean=numpy.zeros(2),
                                                 cov=cov,
                                                 size=245).T

        dim, T = array.shape
        xyz = numpy.array([0, 1])

        val = self.ci_gpdc.get_dependence_measure(array, xyz)

        pval_ana = self.ci_gpdc.get_analytic_significance(value=val,
                                                          T=T,
                                                          dim=dim)

        pval_shuffle = self.ci_gpdc.get_shuffle_significance(array, xyz, val)

        print pval_ana
        print pval_shuffle

        numpy.testing.assert_allclose(numpy.array(pval_ana),
                                      numpy.array(pval_shuffle),
                                      atol=0.05)

    def test_shuffle_vs_analytic_significance_gpdc(self):

        cov = numpy.array([[1., 0.01], [0.01, 1.]])
        array = numpy.random.multivariate_normal(mean=numpy.zeros(2),
                                                 cov=cov,
                                                 size=300).T

        dim, T = array.shape
        xyz = numpy.array([0, 1])

        val = self.ci_gpdc.get_dependence_measure(array, xyz)

        pval_ana = self.ci_gpdc.get_analytic_significance(value=val,
                                                          T=T,
                                                          dim=dim)

        pval_shuffle = self.ci_gpdc.get_shuffle_significance(array, xyz, val)
        print pval_ana
        print pval_shuffle

        numpy.testing.assert_allclose(numpy.array(pval_ana),
                                      numpy.array(pval_shuffle),
                                      atol=0.05)

    def test_cmi_knn(self):

        ci_cmi_knn = CMIknn(use_mask=False,
                            mask_type=None,
                            significance='shuffle_test',
                            fixed_thres=None,
                            sig_samples=10000,
                            sig_blocklength=3,
                            knn=10,
                            confidence='bootstrap',
                            conf_lev=0.9,
                            conf_samples=10000,
                            conf_blocklength=1,
                            verbosity=0)

        # ci_cmi_knn._trafo2uniform(self, x)

        val_ana = 0.6
        T = 10000
        numpy.random.seed(42)
        array = numpy.random.randn(5, T)

        cov = numpy.array([[1., val_ana], [val_ana, 1.]])
        array[:2, :] = numpy.random.multivariate_normal(mean=numpy.zeros(2),
                                                        cov=cov,
                                                        size=T).T

        # Generate some confounding
        if len(array) > 2:
            array[0] += 0.5 * array[2:].sum(axis=0)
            array[1] += 0.7 * array[2:].sum(axis=0)

        # print numpy.corrcoef(array)[0,1]
        # print val
        dim, T = array.shape
        xyz = numpy.array([0, 1, 2, 2, 2])

        val_est = ci_cmi_knn.get_dependence_measure(array, xyz)

        print val_est
        print _par_corr_to_cmi(val_ana)

        numpy.testing.assert_allclose(numpy.array(_par_corr_to_cmi(val_ana)),
                                      numpy.array(val_est),
                                      atol=0.02)

    def test_trafo2uniform(self):

        T = 1000
        # numpy.random.seed(None)
        array = numpy.random.randn(2, T)

        bins = 10

        uniform = self.ci_gpdc._trafo2uniform(array)
        # print uniform

        # import matplotlib
        # from matplotlib import pylab
        for i in range(array.shape[0]):
            print uniform[i].shape
            hist, edges = numpy.histogram(uniform[i], bins=bins, density=True)
            # pylab.figure()
            # pylab.hist(uniform[i], color='grey', alpha=0.3)
            # pylab.hist(array[i], alpha=0.3)
            # pylab.show()
            print hist / float(bins)  #, edges
            numpy.testing.assert_allclose(numpy.ones(bins) / float(bins),
                                          hist / float(bins),
                                          atol=0.01)

    def test_cmi_symb(self):

        ci_cmi_symb = CMIsymb(use_mask=False,
                              mask_type=None,
                              significance='shuffle_test',
                              fixed_thres=None,
                              sig_samples=10000,
                              sig_blocklength=3,
                              confidence='bootstrap',
                              conf_lev=0.9,
                              conf_samples=10000,
                              conf_blocklength=1,
                              verbosity=0)

        val_ana = 0.6
        T = 100000
        numpy.random.seed(None)
        array = numpy.random.randn(3, T)

        cov = numpy.array([[1., val_ana], [val_ana, 1.]])
        array[:2, :] = numpy.random.multivariate_normal(mean=numpy.zeros(2),
                                                        cov=cov,
                                                        size=T).T

        # Generate some confounding
        if len(array) > 2:
            array[0] += 0.5 * array[2:].sum(axis=0)
            array[1] += 0.7 * array[2:].sum(axis=0)

        # Transform to symbolic data
        array = pp.quantile_bin_array(array.T, bins=16).T

        dim, T = array.shape
        xyz = numpy.array([0, 1, 2, 2, 2])

        val_est = ci_cmi_symb.get_dependence_measure(array, xyz)

        print val_est
        print _par_corr_to_cmi(val_ana)

        numpy.testing.assert_allclose(numpy.array(_par_corr_to_cmi(val_ana)),
                                      numpy.array(val_est),
                                      atol=0.02)
コード例 #7
0
ファイル: circ_caus.py プロジェクト: hert3863/DPHIL
def caus_gpdc(data, var_names):
    import numpy as np
    import matplotlib as mpl
    from matplotlib import pyplot as plt
    import sklearn

    import tigramite
    from tigramite import data_processing as pp
    from tigramite import plotting as tp
    from tigramite.pcmci import PCMCI
    from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb
    from tigramite.models import LinearMediation, Prediction

    data_mask_row = np.zeros(len(data))
    for i in range(68904):
        if (i % 72) < 30 or (i % 72) > 47:
            data_mask_row[i] = True
    data_mask = np.zeros(data.shape)

    data_mask[:, 0] = data_mask_row
    data_mask[:, 1] = data_mask_row
    data_mask[:, 2] = data_mask_row
    data_mask[:, 9] = data_mask_row
    data_mask[:, 10] = data_mask_row
    data_mask[:, 11] = data_mask_row

    dataframe = pp.DataFrame(data, mask=data_mask)
    datatime = np.arange(len(data))

    # tp.plot_timeseries(data, datatime, var_names, use_mask=True,
    #                    mask=data_mask, grey_masked_samples='data')

    gpdc = GPDC(significance='analytic',
                gp_params=None,
                use_mask=True,
                mask_type='y')
    gpdc.generate_and_save_nulldists(sample_sizes=range(495, 501),
                                     null_dist_filename='dc_nulldists.npz')
    gpdc.null_dist_filename = 'dc_nulldists.npz'
    pcmci_gpdc = PCMCI(dataframe=dataframe,
                       cond_ind_test=gpdc,
                       var_names=var_names,
                       verbosity=1)

    # correlations = pcmci.get_lagged_dependencies(tau_max=20)
    # lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
    #                                    setup_args={'var_names':var_names,
    #                                    'x_base':5, 'y_base':.5})

    results = pcmci_gpdc.run_pcmci(tau_max=6, tau_min=1, pc_alpha=0.01)

    # print("p-values")
    # print (results['p_matrix'].round(3))
    # print("MCI partial correlations")
    # print (results['val_matrix'].round(2))

    q_matrix = pcmci_gpdc.get_corrected_pvalues(p_matrix=results['p_matrix'],
                                                fdr_method='fdr_bh')
    pcmci_gpdc._print_significant_links(p_matrix=results['p_matrix'],
                                        q_matrix=q_matrix,
                                        val_matrix=results['val_matrix'],
                                        alpha_level=0.01)

    link_matrix = pcmci_gpdc._return_significant_parents(
        pq_matrix=q_matrix, val_matrix=results['val_matrix'],
        alpha_level=0.01)['link_matrix']

    tp.plot_time_series_graph(
        val_matrix=results['val_matrix'],
        link_matrix=link_matrix,
        var_names=var_names,
        link_colorbar_label='MCI',
    )
    return results, link_matrix