def sample_posterior_M(self, thetaval): n = np.shape(self.X)[0] m = np.shape(self.Z)[0] K = GaussianKernel(thetaval) Kxz = K.kernel(self.X, self.Z) Kyz = K.kernel(self.Y, self.Z) G = Kxz - Kyz # Compute the observations Delta_val = np.mean(G, axis=0) Dzz = squareform(pdist(self.Z, 'sqeuclidean')) # Compute the R matrix R = np.exp(-Dzz / float(4 * thetaval**2)) + 10**(-8) * np.eye(m) H = np.eye(n) - np.ones((n, n)) / np.float(n) if self.ifindependent: Sigma1 = Kxz.T.dot(H.dot(Kxz)) / (n**2) Sigma2 = Kyz.T.dot(H.dot(Kyz)) / (n**2) Sigma = Sigma1 + Sigma2 + 10**(-8) * np.eye(m) else: Sigma = np.transpose(G).dot(H.dot(G)) / np.float( n**2) + 10**(-8) * np.eye(m) BF = multivariate_normal.pdf( Delta_val, cov=Sigma) / multivariate_normal.pdf(Delta_val, cov=R + Sigma) Prob_M1 = 1 / np.float(BF + 1) mm = bernoulli.rvs(Prob_M1, size=1) if mm == 0: M = 0 else: M = 1 return BF, M
def ridge_error(nlp, nn, ni, sgma=1.0): mse_pres = np.zeros(nlp) for ii in np.arange(nlp): x_tr, y_tr, x_tt, y_tt = dat_gen(nn, ni) kernel = GaussianKernel(float(sgma)) lamda_pre, width_pre = kernel.xvalidate(x_tr, y_tr, method="ridge_regress") mse_pres[ii] = err_pre(x_tr, y_tr, x_tt, y_tt, width_pre, lamda_pre) mse_pre = np.mean(mse_pres) return mse_pre
def disRe_corc(par,eta,lnd_cho,X,Y,Xtst = None,Ytst = None): mu_tr,sigma_tr = bag_cor(par[0],eta,X,lnd_cho) kernel = GaussianKernel(float(par[1])) if Xtst is None: beta = kernel.ridge_regress(mu_tr,Y,par[2]) else: mu_tt,sigma_tt = bag_cor(par[0],eta,Xtst,lnd_cho) if Ytst is None: beta,prdt = kernel.ridge_regress(mu_tr,Y,par[2],mu_tt) else: beta,prdt,mse = kernel.ridge_regress(mu_tr,Y,par[2],mu_tt,Ytst) return beta,prdt,np.sqrt(mse)
def disRe(par,lnd_cho,X,Y,Xtst=None,Ytst = None): mu_tr = pln_ebeding(par[0],X,lnd_cho) kernel = GaussianKernel(float(par[1])) if Xtst is None: beta = kernel.ridge_regress(mu_tr,Y,par[2]) else: mu_tt = pln_ebeding(par[0],Xtst,lnd_cho) if Ytst is None: beta,prdt = kernel.ridge_regress(mu_tr,Y,par[2],mu_tt) else: beta,prdt,mse = kernel.ridge_regress(mu_tr,Y,par[2], mu_tt, Ytst) return beta,prdt,np.sqrt(mse)
def rff_error(nlp, nn, ni, D, sgma=1.0): mse_rffs = np.zeros(nlp) for ii in np.arange(nlp): x_tr, y_tr, x_tt, y_tt = dat_gen(nn, ni) kernel = GaussianKernel(float(sgma)) kernel.rff_generate(D) lamda_rff, width_rff = kernel.xvalidate(x_tr, y_tr, method="ridge_regress_rff") mse_rffs[ii] = err_rff(x_tr, y_tr, x_tt, y_tt, width_rff, lamda_rff, D) mse_rff = np.mean(mse_rffs) return mse_rff
def UnitTestBagKernel(which_bag_kernel): num_bagsX = 20 num_bagsY = 30 shift = 2.0 dim = 3 bagsize = 50 qvar = 0.6 baglistx = list() baglisty = list() for _ in range(num_bagsX): muX = np.sqrt(qvar) * np.random.randn(1, dim) baglistx.append(muX + np.sqrt(1 - qvar) * np.random.randn(bagsize, dim)) for _ in range(num_bagsY): muY = np.sqrt(qvar) * np.random.randn(1, dim) muY[:, 0] = muY[:, 0] + shift baglisty.append(muY + np.sqrt(1 - qvar) * np.random.randn(bagsize, dim)) data_kernel = GaussianKernel(1.0) bag_kernel = which_bag_kernel(data_kernel) bag_kernel.show_kernel_matrix(baglistx + baglisty) print '...successfully visualised kernel matrix on bags.' bag_kernel.rff_generate(dim=dim) bagmmd = bag_kernel.estimateMMD_rff(baglistx, baglisty) print '...successfully computed rff mmd on bags; value: ', bagmmd response_y = np.random.randn(num_bagsX) bag_kernel.ridge_regress_rff(baglistx, response_y) print '...successfully ran rff ridge regression on bags.' print 'unit test ran for ', bag_kernel.__str__()
def get_bag_median_sqdist(feats, indiv_bw, num_freq=250, seed=23): data_gauss_kernel = GaussianKernel(sigma=indiv_bw) gauss_kernel = LinearBagKernel(data_gauss_kernel) # TODO:check this is correct, works for list np.random.seed(seed) gauss_kernel.rff_generate(mdata= num_freq, dim= feats.ndim) means = gauss_kernel.rff_expand(batch) bag_median_sqdist = self.get_median_sqdist(means) return bag_median_sqdist
def compute_average_BF(n, m, nsim, Xmean, Xstd, Ymean, Ystd, Ydist='Normal', thetaval = np.linspace(0.001, 60, 100), rdseed = 12231): if thetaval is None: ntheta = 1 else: ntheta = np.shape(thetaval)[0] BFmat = np.zeros((nsim, ntheta)) ProbM1mat = np.zeros((nsim, ntheta)) for ll in range(nsim): np.random.seed(rdseed) X = np.reshape(normal(Xmean, Xstd, n), (n, 1)) Zx = normal(Xmean, Xstd, int(m / 2)) if Ydist == 'Normal': Y = np.reshape(normal(Ymean, Ystd, n), (n, 1)) Zy = normal(Ymean, Ystd, int(m / 2)) elif Ydist == 'Laplace': Y = np.reshape(laplace(Ymean, Ystd, n), (n, 1)) Zy = laplace(Ymean, Ystd, int(m / 2)) else: raise NotImplementedError Z = np.reshape(np.concatenate((Zx, Zy)), (m, 1)) if thetaval is None: K = GaussianKernel() XY = np.reshape(np.concatenate((X, Y)), (2 * n, 1)) median_heuristic_theta = K.get_sigma_median_heuristic(XY) BF_val, prob_M1_val = compute_ProbM1(X, Y, Z, np.array([median_heuristic_theta]), Independent=True) else: BF_val, prob_M1_val = compute_ProbM1(X, Y, Z, thetaval, Independent=True) median_heuristic_theta = None BFmat[ll, :] = BF_val.reshape(-1) ProbM1mat[ll,:] = prob_M1_val.reshape(-1) rdseed += 1 return BFmat, ProbM1mat, median_heuristic_theta
def err_computing(nlp,nn,ni,D_ftr,sgma0,lamda0): ### the error for ridge regression err_pres = np.zeros(nlp) ### the error for the rff err_rffs = np.zeros(nlp) ### the error difference for the two regression err_pre_rffs = np.zeros(nlp) for num in range(nlp): ### generating data xs,ys,xt,yt =dat_gen(nn,ni) ### run xvalidation kernel = GaussianKernel(sgma0) lamda_pre, width_pre = kernel.xvalidate(xs,ys,method="ridge_regress") kernel.rff_generate(D_ftr) lamda_rff,width_rff = kernel.xvalidate(xs,ys,method="ridge_regress_rff") ### perform ridge regression y_pre, err_pre0 = err_pre(xs,ys,xt,yt,width_pre,lamda_pre) err_pres[num] = err_pre0 ### perform random fourier features y_rff, err_rff0 = err_rff(xs,ys,xt,yt,width_rff,lamda_rff,D_ftr) err_rffs[num] = err_rff0 ### comparing the difference between two predictions err_pre_rffs[num] = np.linalg.norm(y_pre-y_rff)**2/ni ### the mean square error for ridge regression mse_pre = np.mean(err_pres) ### the mean square error for rff mse_rff = np.mean(err_rffs) ### the mean square error for the difference between ridge and rff mse_pre_rff = np.mean(err_pre_rffs) results = np.array([mse_pre,mse_rff,mse_pre_rff]) return results
#Set data parameters offset_data = 1.0 lengthscale_data = 1.0 / 3.0 #fixed to be a third od the range of the dataset sigma_data = 1.0 random_noise = np.random.normal(loc=0.0, scale=1.0, size=None) #Set initial parameters #gamma = 1. #to check #lengthscale_initial = np.sqrt(1/(2*gamma)) + random_noise lengthscale_initial = lengthscale_data + random_noise offset_initial = offset_data + random_noise sigma_initial = sigma_data + random_noise inputs = np.linspace(0, 1, num=N_all)[:, np.newaxis] sigma = GaussianKernel(sigma=lengthscale_data).kernel( inputs, inputs) #lenghtscale in kerpy is called sigma #np.savetxt("../../Workspace/updated_AutoGP/R_plots/inputs.csv", inputs, header="inputs", delimiter=",") # There is a problem of numerical precision #pert = np.zeros((N_all,N_all)) #np.fill_diagonal(pert, 0.001) #print('perturbation',pert) #print('this is the covariance used to generate the data', sigma) #print('this is the covariance shape', sigma.shape) #print('its cholesky', np.linalg.cholesky(sigma+pert)) #sigma = MaternKernel(width = lengthscale_data, nu = 1.5, sigma = sigma_data).kernel(inputs,inputs) #Matern 3_2 #sigma = MaternKernel(width = lengthscale_data, nu = 2.5, sigma = sigma_data).kernel(inputs,inputs) #Matern 5_2 #sigma = sk.rbf_kernel(inputs, inputs) #sigma = sk.rbf_kernel(inputs, inputs, gamma = 50)
def estimate_skeleton( data_matrix, alpha, **kwargs): # originally first argument is indep_test_func # now this version uses HSIC Spectral Test for independence # and KRESIT for conditional independence. """Estimate a skeleton graph from the statistis information. Args: indep_test_func: the function name for a conditional independency test. data_matrix: data (as a numpy array). alpha: the significance level. kwargs: 'max_reach': maximum value of l (see the code). The value depends on the underlying distribution. 'method': if 'stable' given, use stable-PC algorithm (see [Colombo2014]). other parameters may be passed depending on the indep_test_func()s. Returns: g: a skeleton graph (as a networkx.Graph). sep_set: a separation set (as an 2D-array of set()). [Colombo2014] Diego Colombo and Marloes H Maathuis. Order-independent constraint-based causal structure learning. In The Journal of Machine Learning Research, Vol. 15, pp. 3741-3782, 2014. """ def method_stable(kwargs): return ('method' in kwargs) and kwargs['method'] == "stable" node_ids = range(data_matrix.shape[1]) g = _create_complete_graph(node_ids) node_size = data_matrix.shape[1] sep_set = [[set() for i in range(node_size)] for j in range(node_size)] X_idx_list_init = [] Y_idx_list_init = [] Z_idx_list_init = [] pval_list_init = [] l = 0 completed_xy_idx_init = 0 completed_z_idx_init = 0 remove_edges_current = [] results_filename = kwargs['results_filename'] myfolder = "pcalg_results/" save_filename = myfolder + results_filename + ".bin" #print("save_filename:", save_filename) #sys.exit(1) if not os.path.exists(myfolder): os.mkdir(myfolder) elif os.path.exists(save_filename): load_f = open(save_filename,"r") [X_idx_list_init, Y_idx_list_init, Z_idx_list_init, pval_list_init, \ l, completed_xy_idx_init, completed_z_idx_init,\ remove_edges_current, g] = load(load_f) load_f.close() print("Found exitising results") X_idx_list = X_idx_list_init Y_idx_list = Y_idx_list_init Z_idx_list = Z_idx_list_init pval_list = pval_list_init completed_xy_idx = completed_xy_idx_init completed_z_idx = completed_z_idx_init while True: cont = False remove_edges = remove_edges_current perm_iteration_list = list(permutations(node_ids,2)) length_iteration_list = len(perm_iteration_list) for ij in arange(completed_xy_idx, length_iteration_list): (i,j) = perm_iteration_list[ij] adj_i = g.neighbors(i) if j not in adj_i: continue else: adj_i.remove(j) pass if len(adj_i) >= l: _logger.debug('testing %s and %s' % (i,j)) _logger.debug('neighbors of %s are %s' % (i, str(adj_i))) if len(adj_i) < l: continue cc = list(combinations(adj_i, l)) length_cc = len(cc) for kk in arange(completed_z_idx, length_cc): k = cc[kk] _logger.debug('indep prob of %s and %s with subset %s' % (i, j, str(k))) if l == 0: # independence testing print("independence testing", (i,j)) data_x = data_matrix[:,[i]] data_y = data_matrix[:,[j]] num_samples = np.shape(data_matrix)[0] kernelX_hsic = GaussianKernel(1.) kernelY_hsic = GaussianKernel(1.) kernelX_use_median_hsic = True kernelY_use_median_hsic = True myspectraltestobj = HSICSpectralTestObject(num_samples, None, kernelX_hsic, kernelY_hsic, kernelX_use_median = kernelX_use_median_hsic, kernelY_use_median = kernelY_use_median_hsic, num_nullsims=1000, unbiased=False) p_val, _ = myspectraltestobj.compute_pvalue_with_time_tracking(data_x,data_y) X_idx_list.append((i)) Y_idx_list.append((j)) Z_idx_list.append((0)) pval_list.append((p_val)) else: # conditional independence testing print("conditional independence testing",(i,j,k)) data_x = data_matrix[:,[i]] data_y = data_matrix[:,[j]] data_z = data_matrix[:,k] num_samples = np.shape(data_matrix)[0] #kernelX = GaussianKernel(1.) #kernelY = GaussianKernel(1.) #kernelX_use_median = True #kernelY_use_median = True #kernelX = LinearKernel() #kernelY = LinearKernel() kernelX = kwargs['kernelX'] kernelY = kwargs['kernelY'] kernelZ = GaussianKernel(1.) kernelX_use_median = kwargs['kernelX_use_median'] kernelY_use_median = kwargs['kernelY_use_median'] kernelRxz = kwargs['kernelRxz'] kernelRyz = kwargs['kernelRyz'] kernelRxz_use_median = kwargs['kernelRxz_use_median'] kernelRyz_use_median = kwargs['kernelRyz_use_median'] RESIT_type = kwargs['RESIT_type'] optimise_lambda_only = kwargs['optimise_lambda_only'] grid_search = kwargs['grid_search'] GD_optimise = kwargs['GD_optimise'] num_lambdaval = 30 lambda_val = 10**np.linspace(-6,1, num=num_lambdaval) z_bandwidth = None #num_bandwidth = 20 #z_bandwidth = 10**np.linspace(-5,1,num = num_bandwidth) mytestobj = TwoStepCondTestObject(num_samples, None, kernelX, kernelY, kernelZ, kernelX_use_median=kernelX_use_median, kernelY_use_median=kernelY_use_median, kernelZ_use_median=True, kernelRxz = kernelRxz, kernelRyz = kernelRyz, kernelRxz_use_median = kernelRxz_use_median, kernelRyz_use_median = kernelRyz_use_median, RESIT_type = RESIT_type, num_shuffles=800, lambda_val=lambda_val,lambda_X = None, lambda_Y = None, optimise_lambda_only = optimise_lambda_only, sigmasq_vals = z_bandwidth ,sigmasq_xz = 1., sigmasq_yz = 1., K_folds=5, grid_search = grid_search, GD_optimise=GD_optimise, learning_rate=0.1,max_iter=300, initial_lambda_x=0.5,initial_lambda_y=0.5, initial_sigmasq = 1) p_val, _ = mytestobj.compute_pvalue(data_x, data_y, data_z) X_idx_list.append((i)) Y_idx_list.append((j)) Z_idx_list.append(k) pval_list.append((p_val)) completed_z_idx = kk + 1 save_f = open(save_filename,"w") dump([X_idx_list, Y_idx_list, Z_idx_list, pval_list, l, completed_xy_idx, completed_z_idx,\ remove_edges, g], save_f) save_f.close() _logger.debug('p_val is %s' % str(p_val)) if p_val > alpha: if g.has_edge(i, j): _logger.debug('p: remove edge (%s, %s)' % (i, j)) if method_stable(kwargs): remove_edges.append((i, j)) else: g.remove_edge(i, j) pass sep_set[i][j] |= set(k) sep_set[j][i] |= set(k) break pass completed_z_idx = 0 completed_xy_idx = ij + 1 cont = True pass pass l += 1 completed_xy_idx = 0 if method_stable(kwargs): g.remove_edges_from(remove_edges) if cont is False: break if ('max_reach' in kwargs) and (l > kwargs['max_reach']): break save_f = open(save_filename,"w") dump([X_idx_list, Y_idx_list, Z_idx_list, pval_list, l, completed_xy_idx, completed_z_idx,\ remove_edges, g], save_f) save_f.close() pass return (g, sep_set)
entro = list() m0 = 0.0 for ii in np.arange(l): pois = np.random.normal(m0, sd[ii], n) en = 0.5 * np.log(2 * np.pi * np.e * (sd[ii]**2)) sample.append(pois) entro.append(en) return sample, entro sam1d_tr, entro1d_tr = sam1d_gen(5, 10) print sam1d_tr #print entro1d_tr sam1d_tt, entro1d_tt = sam1d_gen(2, 5) ########################################################## # conduct the ridge regression data_gamma = 1.0 bag_gamma = 1.0 data_kernel = GaussianKernel(data_gamma) print data_kernel.kernel(sam1d_tr) bag_kernel = GaussianBagKernel(data_kernel, bag_gamma) #standard distribution regression - computes full kernel matrices #coeff,ypred=bag_kernel.ridge_regress(sam1d_tr,entro1d_tr,lmbda=0.01,Xtst=sam1d_tt) #or distribution regression with random features #bag_kernel.rff_generate(50,60,dim=dim) #50 random features for bag_kernel, 60 for data_kernel #coeff,ypred=bag_kernel.ridge_regress_rff(baglistX,y,Xtst=baglistXtst)
num_samples,D = shape(data) #assume that x corresponds to all but the last column in the file data_x = data[:,:(D-1)] #and that y is just the last column data_y = data[:,D-1] #need to ensure data_y is a 2d array data_y=reshape(data_y,(num_samples,1)) ''' print "shape of data_x:", shape(data_x) print "shape of data_y:", shape(data_y) ''' First, we need to specify the kernels for X and Y. We will use Gaussian kernels -- default value of the width parameter is 1.0 the widths can be either kept fixed or set to a median heuristic based on the data when running a test ''' kernelX = GaussianKernel() kernelY = GaussianKernel() ''' HSICSpectralTestObject/HSICPermutationTestObject: ================================================= num_samples: Integer values -- the number of data samples data_generator: If we use simulated data, which function to use to generate data for repeated tests to investigate power; Examples are given in SimDataGen.py, e.g. data_generator = SimDataGen.LargeScale; Default value is None (if only a single test will be run). kernelX, kernelY: The kernel functions to use for X and Y respectively. (Examples are included in kerpy folder) E.g. kernelX = GaussianKernel(); alternatively, for a kernel with fixed width: kernelY = GaussianKernel(float(1.5)) kernelX_use_median, kernelY_use_median: "True" or "False" -- if median heuristic should be used to select the kernel bandwidth. rff: "True" or "False" -- if random Fourier Features should be used.
def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument("num_samples", type=int,\ help="total # of samples") parser.add_argument("--num_rfx", type=int,\ help="number of random features of the data X", default=30) parser.add_argument("--num_rfy", type=int,\ help="number of random features of the data Y", default=30) parser.add_argument("--num_inducex", type=int,\ help="number of inducing variables of the data X", default=30) parser.add_argument("--num_inducey", type=int,\ help="number of inducing variables of the data Y", default=30) parser.add_argument("--num_shuffles", type=int,\ help="number of shuffles", default=800) parser.add_argument("--blocksize", type=int,\ help="# of samples per block (includes X and Y) when using a block-based test", default=20) parser.add_argument("--dimX", type=int,\ help="dimensionality of the data X", default=3) parser.add_argument("--dimZ", type=int,\ help="dimensionality of the data Z (i.e. the conditioning variable)", default=7) parser.add_argument("--kernelX", const = LinearKernel(), default = GaussianKernel(1.), \ action='store_const', \ help="Linear kernel (Default GaussianKernel(1.))?") parser.add_argument("--kernelY", const = LinearKernel(), default = GaussianKernel(1.), \ action='store_const', \ help="Linear kernel (Default GaussianKernel(1.))?") parser.add_argument("--kernelX_use_median", action="store_true",\ help="should median heuristic be used for X?", default=False) parser.add_argument("--kernelY_use_median", action="store_true",\ help="should median heuristic be used for Y?", default=False) parser.add_argument("--kernelRxz", const = GaussianKernel(1.), default = LinearKernel(), \ action='store_const', \ help="Gaussian kernel(1.) (Default LinearKernel)?") parser.add_argument("--kernelRyz", const = GaussianKernel(1.), default = LinearKernel(), \ action='store_const', \ help="Linear kernel (Default GaussianKernel(1.))?") parser.add_argument("--kernelRxz_use_median", action="store_true",\ help="should median heuristic be used for residuals Rxz?", default=False) parser.add_argument("--kernelRyz_use_median", action="store_true",\ help="should median heuristic be used for residuals Ryz?", default=False) parser.add_argument("--RESIT_type", action="store_true",\ help="Conditional Testing using RESIT?",\ default=False) parser.add_argument("--optimise_lambda_only", action="store_false",\ help="Optimise lambdas only?",\ default=True) parser.add_argument("--grid_search", action="store_false",\ help="Optimise hyperparameters through grid search?",\ default=True) parser.add_argument("--GD_optimise", action="store_true",\ help="Optimise hyperparameters through gradient descent?",\ default=False) parser.add_argument("--results_filename",type=str,\ help = "name of the file to save results?",\ default = "testing") parser.add_argument("--figure_filename",type=str,\ help = "name of the file to save the causal graph?",\ default = "testing") parser.add_argument("--data_filename",type=str,\ help = "name of the file to load data from?",\ default = "testing") #parser.add_argument("--dimY", type=int,\ # help="dimensionality of the data Y", # default=3) parser.add_argument("--hypothesis", type=str,\ help="is null or alternative true in this experiment? [null, alter]",\ default="alter") parser.add_argument("--nullvarmethod", type=str,\ help="how to estimate asymptotic variance under null? [direct, permutation, across]?",\ default="direct") parser.add_argument("--streaming", action="store_true",\ help="should data be streamed (rather than all loaded into memory)?",\ default=False) parser.add_argument("--rff", action="store_true",\ help="should random features be used?",\ default=False) parser.add_argument("--induce_set", action="store_true",\ help="should inducing variables be used?",\ default=False) args = parser.parse_args() return args
for ii in np.arange(l): pois = np.random.normal(m0,sd[ii],n) en = 0.5 * np.log(2* np.pi * np.e * (sd[ii] ** 2)) sample.append(pois) entro.append(en) return sample, entro sam1d_tr, entro1d_tr = sam1d_gen(5,10) print sam1d_tr #print entro1d_tr sam1d_tt, entro1d_tt = sam1d_gen(2,5) ########################################################## # conduct the ridge regression data_gamma = 1.0 bag_gamma = 1.0 data_kernel = GaussianKernel(data_gamma) print data_kernel.kernel(sam1d_tr) bag_kernel = GaussianBagKernel(data_kernel,bag_gamma) #standard distribution regression - computes full kernel matrices #coeff,ypred=bag_kernel.ridge_regress(sam1d_tr,entro1d_tr,lmbda=0.01,Xtst=sam1d_tt) #or distribution regression with random features #bag_kernel.rff_generate(50,60,dim=dim) #50 random features for bag_kernel, 60 for data_kernel #coeff,ypred=bag_kernel.ridge_regress_rff(baglistX,y,Xtst=baglistXtst)
data_generating_function = SimDataGen.LargeScale data_generating_function_null = SimDataGen.turn_into_null(SimDataGen.LargeScale) args = ProcessingObject.parse_arguments() '''unpack the arguments needed:''' num_samples=args.num_samples hypothesis=args.hypothesis dimX = args.dimX kernelX_use_median = args.kernelX_use_median kernelY_use_median = args.kernelY_use_median blocksize = args.blocksize #currently, we are using the same blocksize for both X and Y # A temporary set up for the kernels: kernelX = GaussianKernel(1.) kernelY = GaussianKernel(1.) if hypothesis=="alter": data_generator=lambda num_samples: data_generating_function(num_samples,dimension=dimX) elif hypothesis=="null": data_generator=lambda num_samples: data_generating_function_null(num_samples,dimension=dimX) else: raise NotImplementedError() test_object=HSICBlockTestObject(num_samples, data_generator, kernelX, kernelY, kernelX_use_median=kernelX_use_median,kernelY_use_median=kernelY_use_median, nullvarmethod='permutation', blocksize=blocksize)
pvals_RESIT = np.reshape(np.zeros(num_trials), (num_trials, 1)) # computing Type I error (Null model is true) for jj in xrange(num_trials): #print "number of trial:", jj data_x = np.reshape(np.zeros(num_samples), (num_samples, 1)) noise_x = np.reshape(normal(0, 1, np.shape(data_z)[0]), (np.shape(data_z)[0], 1)) coin_flip_x = np.random.choice([0, 1], replace=True, size=num_samples) data_x[coin_flip_x == 0] = (data_z[coin_flip_x == 0] - 10)**2 data_x[coin_flip_x == 1] = -(data_z[coin_flip_x == 1] - 10)**2 + 35 data_x = data_x + noise_x # KRESIT: kernelX = GaussianKernel(1.) kernelY = GaussianKernel(1.) kernelZ = GaussianKernel(1.) mytestobject = TwoStepCondTestObject(num_samples, None, kernelX, kernelY, kernelZ, kernelX_use_median=True, kernelY_use_median=True, kernelZ_use_median=True, kernelRxz=LinearKernel(), kernelRyz=LinearKernel(), kernelRxz_use_median=False, kernelRyz_use_median=False, RESIT_type=False,
def err_pre(xs,ys,xt,yt,sgma = 1.0,lamda = 0.1): kernel = GaussianKernel(float(sgma)) aa,y_pre,err_pre0 = kernel.ridge_regress(xs,ys,lamda,Xtst=xt,ytst=yt) return y_pre,err_pre0
def err_rff(xs,ys,xt,yt,sgma = 1.0,lamda = 0.1,D = 50): kernel = GaussianKernel(float(sgma)) kernel.rff_generate(D) bb,y_rff,err_rff0 = kernel.ridge_regress_rff(xs,ys,lamda,Xtst=xt,ytst=yt) return y_rff,err_rff0
kernelRxz_use_median = args.kernelRxz_use_median #Default: False kernelRyz_use_median = args.kernelRyz_use_median #Default: False RESIT_type = args.RESIT_type #Default: False optimise_lambda_only = args.optimise_lambda_only #Default: True grid_search = args.grid_search #Default: True GD_optimise = args.GD_optimise #Default: False data_generator = lambda num_samples: data_generating_function(num_samples, dimension=dimZ) num_lambdaval = 30 lambda_val = 10**np.linspace(-6, -1, num=num_lambdaval) #num_bandwidth = 20 #z_bandwidth = 10**np.linspace(-5,1,num = num_bandwidth) z_bandwidth = None kernelZ = GaussianKernel(1.) test_object = TwoStepCondTestObject(num_samples, data_generator, kernelX, kernelY, kernelZ, kernelX_use_median=kernelX_use_median, kernelY_use_median=kernelY_use_median, kernelZ_use_median=True, kernelRxz=kernelRxz, kernelRyz=kernelRyz, kernelRxz_use_median=kernelRxz_use_median, kernelRyz_use_median=kernelRyz_use_median, RESIT_type=RESIT_type, num_shuffles=800,