def gen_data_frame(links_coeffs, time, seed_val): # Set the random seed np.random.seed(seed_val) # Generate the data data, _ = pp.var_process(links_coeffs, T=time) # Get the true parents true_parents = _get_parent_graph(links_coeffs) return pp.DataFrame(data), true_parents
def a_sample(request): # Set the parameters links_coeffs, time, seed_val = request.param # Set the random seed np.random.seed(seed_val) # Generate the data data, _ = pp.var_process(links_coeffs, T=time) # Get the true parents true_parents = _get_parent_graph(links_coeffs) return pp.DataFrame(data), true_parents
def gen_process(a_process): """ Calls var_process for the process fixtures """ # Get the initial values and setup for the decay process _, init_vals, coefs, expect = a_process # Deducte the max time from the expected answer shape max_time = expect.shape[0] # Generate the data data, true_parents_neighbors = pp.var_process(coefs, T=max_time, initial_values=init_vals, use="no_noise") return data, true_parents_neighbors
def test_noise_generation(covariance_parameters): """ Ensure the covariance parameters are respected when the noise is generated """ # Unpack the parameters and covariance matrix good_params, covar_matrix = covariance_parameters # Generate noise-only from this parameter set data, _ = pp.var_process(good_params, T=10000, use='inno_cov', verbosity=0, initial_values=None) # Get the covariance of the data set covar_result = np.cov(data.T) err_message = "Covariance of data does not match covariance implied by "+\ " parameter set" np.testing.assert_allclose(covar_matrix, covar_result, rtol=1e-1, atol=0.025, verbose=True, err_msg=err_message)
def setUp(self): auto = .5 coeff = 0.6 T = 1000 numpy.random.seed(42) # True graph links_coeffs = { 0: [((0, -1), auto)], 1: [((1, -1), auto), ((0, -1), coeff)], 2: [((2, -1), auto), ((1, -1), coeff)] } self.data, self.true_parents_coeffs = pp.var_process(links_coeffs, T=T) T, N = self.data.shape self.true_parents = _get_parent_graph(self.true_parents_coeffs)
def setUp(self): auto = 0.6 coeff = 0.6 T = 1000 numpy.random.seed(42) # True graph links_coeffs = { 0: [((0, -1), auto)], 1: [((1, -1), auto), ((0, -1), coeff)], 2: [((2, -1), auto), ((1, -1), coeff)] } self.data, self.true_parents_coeffs = pp.var_process(links_coeffs, T=T) T, N = self.data.shape self.ci_par_corr = ParCorr(use_mask=False, mask_type=None, significance='analytic', fixed_thres=None, sig_samples=10000, sig_blocklength=3, confidence='analytic', conf_lev=0.9, conf_samples=10000, conf_blocklength=1, recycle_residuals=False, verbosity=0) self.ci_gpdc = GPDC(significance='analytic', sig_samples=1000, sig_blocklength=1, confidence='bootstrap', conf_lev=0.9, conf_samples=100, conf_blocklength=None, use_mask=False, mask_type='y', recycle_residuals=False, verbosity=0)
max_conds_px=max_conds_px, ) return j, results_in_j # Example data, here the real dataset can be loaded as a numpy array of shape # (T, N) numpy.random.seed(42) # Fix random seed links_coeffs = {0: [((0, -1), 0.7)], 1: [((1, -1), 0.8), ((0, -1), 0.8)], 2: [((2, -1), 0.5), ((1, -2), 0.5)], } T = 500 # time series length data, true_parents_neighbors = pp.var_process(links_coeffs, T=T) T, N = data.shape # Initialize dataframe object dataframe = pp.DataFrame(data) # Optionally specify variable names var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$'] # Significance level in condition-selection step. If a list of levels is is # provided or pc_alpha=None, the optimal pc_alpha is automatically chosen via # model-selection. pc_alpha = 0.2 # [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5] selected_variables = range(N) #[2] # [2] # [2] # Maximum time lag
def run(self): data, _ = pp.var_process(self.links_coeffs, T=1000) dataframe = pp.DataFrame(data) cond_ind_test = ParCorr() self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None)
def plot__gpdc_get_single_residuals(self): ####### ci_test = self.ci_gpdc # ci_test = self.ci_par_corr a = 0. c = .3 T = 500 # Each key refers to a variable and the incoming links are supplied as a # list of format [((driver, lag), coeff), ...] links_coeffs = { 0: [((0, -1), a)], 1: [((1, -1), a), ((0, -1), c)], } numpy.random.seed(42) data, true_parents_neighbors = pp.var_process(links_coeffs, use='inv_inno_cov', T=T) dataframe = pp.DataFrame(data) ci_test.set_dataframe(dataframe) # ci_test.set_tau_max(1) # X=[(1, -1)] # Y=[(1, 0)] # Z=[(0, -1)] + [(1, -tau) for tau in range(1, 2)] # array, xyz, XYZ = ci_test.get_array(X, Y, Z, # verbosity=0)] # ci_test.run_test(X, Y, Z,) def func(x): return x * (1. - 4. * x**0 * numpy.exp(-x**2 / 2.)) true_residual = numpy.random.randn(3, T) array = numpy.copy(true_residual) array[1] += c * func(array[2]) #.sum(axis=0) xyz = numpy.array([0, 1] + [2 for i in range(array.shape[0] - 2)]) print 'xyz ', xyz, numpy.where(xyz == 1) target_var = 1 dim, T = array.shape # array -= array.mean(axis=1).reshape(dim, 1) c_std = c #/array[1].std() # array /= array.std(axis=1).reshape(dim, 1) array_orig = numpy.copy(array) import matplotlib from matplotlib import pyplot (est_residual, pred) = ci_test._get_single_residuals(array, target_var, standardize=False, return_means=True) (resid_, pred_parcorr) = self.ci_par_corr._get_single_residuals( array, target_var, standardize=False, return_means=True) fig = pyplot.figure() ax = fig.add_subplot(111) ax.scatter(array_orig[2], array_orig[1]) ax.scatter(array_orig[2], pred, color='red') ax.scatter(array_orig[2], pred_parcorr, color='green') ax.plot(numpy.sort(array_orig[2]), c_std * func(numpy.sort(array_orig[2])), color='black') pyplot.savefig('/home/jakobrunge/test/gpdctest.pdf')
max_conds_px=max_conds_px, ) return j, results_in_j # Example data, here the real dataset can be loaded as a numpy array of shape # (T, N) numpy.random.seed(42) # Fix random seed links_coeffs = {0: [((0, -1), 0.7)], 1: [((1, -1), 0.8), ((0, -1), 0.8)], 2: [((2, -1), 0.5), ((1, -2), 0.5)], } T = 500 # time series length data, true_parents_neighbors = pp.var_process(links_coeffs, T=T) T, N = data.shape # Optionally specify variable names var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$'] # Initialize dataframe object dataframe = pp.DataFrame(data, var_names=var_names) # Significance level in condition-selection step. If a list of levels is is # provided or pc_alpha=None, the optimal pc_alpha is automatically chosen via # model-selection. pc_alpha = 0.2 # [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5] selected_variables = range(N) #[2] # [2] # [2] # Maximum time lag