def build_environmental(self): Xtrain, Xstar = self.X[self.train_set, :], self.X[~self.train_set, :] if Xstar.shape == (0, 0): Xstar = None environmental_cov = SQExpCov(Xtrain, Xstar=Xstar) environmental_cov.act_length = False return environmental_cov
def setUp(self): np.random.seed(1) self._X1 = np.random.randn(10, 5) self._X2 = np.random.randn(10, 8) self._cov1 = SQExpCov(self._X1) self._cov2 = SQExpCov(self._X2) self._cov = SumCov(self._cov1, self._cov2)
def simulate_local(self): tmp = SQExpCov(self.X) tmp.length = self.l2 k = tmp.K() k *= covar_rescaling_factor_efficient(k) self.covar += k
class TestSumCov(unittest.TestCase): def setUp(self): np.random.seed(1) self._X1 = np.random.randn(10, 5) self._X2 = np.random.randn(10, 8) self._cov1 = SQExpCov(self._X1) self._cov2 = SQExpCov(self._X2) self._cov = SumCov(self._cov1, self._cov2) def test_sum_combination(self): K1 = self._cov1.K() + self._cov2.K() K2 = self._cov.K() np.testing.assert_almost_equal(K1, K2) def test_Kgrad(self): cov = self._cov def func(x, i): cov.setParams(x) return cov.K() def grad(x, i): cov.setParams(x) return cov.K_grad_i(i) x0 = cov.getParams() err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.) def test_use_to_predict_exception(self): with self.assertRaises(NotImplementedError): self._cov.use_to_predict = 1.
def test_input(self): with self.assertRaises(ValueError): SQExpCov(np.array([[np.inf]])) with self.assertRaises(ValueError): SQExpCov(np.array([[np.nan]])) with self.assertRaises(NotArrayConvertibleError): SQExpCov("Ola meu querido.")
class TestProd(unittest.TestCase): def setUp(self): # np.random.seed(1) self._X1 = np.random.randn(10, 5) self._X2 = np.random.randn(10, 8) self._X3 = np.random.randn(10, 7) self._cov1 = SQExpCov(self._X1) self._cov2 = SQExpCov(self._X2) self._cov3 = SQExpCov(self._X3) self._cov = ProdCov(self._cov1, self._cov2, self._cov3) def test_sum_combination(self): K1 = self._cov1.K() * self._cov2.K() * self._cov3.K() K2 = self._cov.K() np.testing.assert_almost_equal(K1, K2) def test_Kgrad(self): cov = self._cov def func(x, i): cov.setParams(x) return cov.K() def grad(x, i): cov.setParams(x) return cov.K_grad_i(i) x0 = cov.getParams() err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.) def test_Khess(self): cov = self._cov for j in range(cov.getNumberParams()): def func(x, i): cov.setParams(x) return cov.K_grad_i(j) def grad(x, i): cov.setParams(x) return cov.K_hess_i_j(j, i) x0 = cov.getParams() err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.) def test_use_to_predict_exception(self): with self.assertRaises(NotImplementedError): self._cov.use_to_predict = 1.
def setUp(self): # np.random.seed(1) self._X1 = np.random.randn(10, 5) self._X2 = np.random.randn(10, 8) self._X3 = np.random.randn(10, 7) self._cov1 = SQExpCov(self._X1) self._cov2 = SQExpCov(self._X2) self._cov3 = SQExpCov(self._X3) self._cov = ProdCov(self._cov1, self._cov2, self._cov3)
def setUp(self): np.random.seed(1) self._X = np.random.randn(10, 5) self._cov = SQExpCov(self._X)
class TestSQExp(unittest.TestCase): def setUp(self): np.random.seed(1) self._X = np.random.randn(10, 5) self._cov = SQExpCov(self._X) def test_setX_retE(self): X1 = self._X np.random.seed(2) X2 = np.random.randn(10, 5) E1 = sp.spatial.distance.pdist(X1,'euclidean')**2 E1 = sp.spatial.distance.squareform(E1) E2 = sp.spatial.distance.pdist(X2,'euclidean')**2 E2 = sp.spatial.distance.squareform(E2) np.testing.assert_almost_equal(E1, self._cov.E()) self._cov.X = X2 np.testing.assert_almost_equal(E2, self._cov.E()) def test_param_activation(self): self._cov.act_scale = False self._cov.act_length = False self.assertEqual(len(self._cov.getParams()), 0) self._cov.act_scale = False self._cov.act_length = True self.assertEqual(len(self._cov.getParams()), 1) self._cov.act_scale = True self._cov.act_length = False self.assertEqual(len(self._cov.getParams()), 1) self._cov.act_scale = True self._cov.act_length = True self.assertEqual(len(self._cov.getParams()), 2) self._cov.act_scale = False self._cov.act_length = False self._cov.setParams(np.array([])) with self.assertRaises(ValueError): self._cov.setParams(np.array([0])) with self.assertRaises(ValueError): self._cov.K_grad_i(0) with self.assertRaises(ValueError): self._cov.K_grad_i(1) def test_Kgrad(self): def func(x, i): self._cov.scale = x[i] return self._cov.K() def grad(x, i): self._cov.scale = x[i] return self._cov.K_grad_i(0) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.) def func(x, i): self._cov.length = x[i] return self._cov.K() def grad(x, i): self._cov.scale = x[i] return self._cov.K_grad_i(1) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) def test_Kgrad_activation(self): self._cov.act_length = False def func(x, i): self._cov.scale = x[i] return self._cov.K() def grad(x, i): self._cov.scale = x[i] return self._cov.K_grad_i(0) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.) self._cov.act_scale = False self._cov.act_length = True def func(x, i): self._cov.length = x[i] return self._cov.K() def grad(x, i): self._cov.length = x[i] return self._cov.K_grad_i(0) x0 = np.array([self._cov.length]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.) def test_input(self): with self.assertRaises(ValueError): SQExpCov(np.array([[np.inf]])) with self.assertRaises(ValueError): SQExpCov(np.array([[np.nan]])) with self.assertRaises(NotArrayConvertibleError): SQExpCov("Ola meu querido.")
class TestSQExp(unittest.TestCase): def setUp(self): np.random.seed(1) self._X = np.random.randn(10, 5) self._cov = SQExpCov(self._X) def test_setX_retE(self): X1 = self._X np.random.seed(2) X2 = np.random.randn(10, 5) E1 = sp.spatial.distance.pdist(X1, "euclidean") ** 2 E1 = sp.spatial.distance.squareform(E1) E2 = sp.spatial.distance.pdist(X2, "euclidean") ** 2 E2 = sp.spatial.distance.squareform(E2) np.testing.assert_almost_equal(E1, self._cov.E()) self._cov.X = X2 np.testing.assert_almost_equal(E2, self._cov.E()) def test_param_activation(self): self._cov.act_scale = False self._cov.act_length = False self.assertEqual(len(self._cov.getParams()), 0) self._cov.act_scale = False self._cov.act_length = True self.assertEqual(len(self._cov.getParams()), 1) self._cov.act_scale = True self._cov.act_length = False self.assertEqual(len(self._cov.getParams()), 1) self._cov.act_scale = True self._cov.act_length = True self.assertEqual(len(self._cov.getParams()), 2) self._cov.act_scale = False self._cov.act_length = False self._cov.setParams(np.array([])) with self.assertRaises(ValueError): self._cov.setParams(np.array([0])) with self.assertRaises(ValueError): self._cov.K_grad_i(0) with self.assertRaises(ValueError): self._cov.K_grad_i(1) def test_Kgrad(self): def func(x, i): self._cov.scale = x[i] return self._cov.K() def grad(x, i): self._cov.scale = x[i] return self._cov.K_grad_i(0) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.0) def func(x, i): self._cov.length = x[i] return self._cov.K() def grad(x, i): self._cov.scale = x[i] return self._cov.K_grad_i(1) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) def test_Kgrad_activation(self): self._cov.act_length = False def func(x, i): self._cov.scale = x[i] return self._cov.K() def grad(x, i): self._cov.scale = x[i] return self._cov.K_grad_i(0) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.0) self._cov.act_scale = False self._cov.act_length = True def func(x, i): self._cov.length = x[i] return self._cov.K() def grad(x, i): self._cov.length = x[i] return self._cov.K_grad_i(0) x0 = np.array([self._cov.length]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.0) def test_Khess(self): def func(x, i): self._cov.scale = x[i] return self._cov.K_grad_i(0) def grad(x, i): self._cov.scale = x[i] return self._cov.K_hess_i_j(0, 0) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.0, decimal=5) def func(x, i): self._cov.length = x[i] return self._cov.K_grad_i(0) def grad(x, i): self._cov.length = x[i] return self._cov.K_hess_i_j(0, 1) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.0, decimal=5) def func(x, i): self._cov.length = x[i] return self._cov.K_grad_i(1) def grad(x, i): self._cov.length = x[i] return self._cov.K_hess_i_j(1, 1) x0 = np.array([self._cov.scale]) err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.0, decimal=5) def test_input(self): with self.assertRaises(ValueError): SQExpCov(np.array([[np.inf]])) with self.assertRaises(ValueError): SQExpCov(np.array([[np.nan]])) with self.assertRaises(NotArrayConvertibleError): SQExpCov("Ola meu querido.")
def build_model(Kinship, phenotype, N_cells, X, cell_types, test_set=None, intrinsic =True, environment = True, environmental_cell_types=None, affected_cell_type=None, by_effective_type=True): if test_set is not None and test_set.dtype == bool: X_training = X[~test_set, :] X_test = X[test_set, :] mean_training = phenotype[~test_set] N_cells_training = sum(~test_set) N_cells_test = N_cells - N_cells_training cell_types_training = cell_types[~test_set] cell_types_test = cell_types[test_set] else: X_training = X X_test = None mean_training = phenotype N_cells_training = N_cells N_cells_test = 0 cell_types_training = cell_types rm_diag = True cov = None # list cell types cell_type_list = np.unique(cell_types) # local_noise local_noise_cov = SQExpCov(X_training, Xstar=X_test) local_noise_cov.setPenalty(mu=50., sigma=50.) # noise noise_covs = [None for i in range(len(cell_type_list))] for t in cell_type_list: cells_selection = (cell_types_training == t) * np.eye(N_cells_training) if N_cells_test == 0: Kcross = None # TODO: adapt to multiple cell types else: # Kcross = np.concatenate((np.zeros([N_cells_test, N_cells_training]), np.eye(N_cells_test)), axis=1) Kcross = np.zeros([N_cells_test, N_cells_training]) noise_covs[t] = FixedCov(cells_selection, Kcross) if cov is None: cov = SumCov(local_noise_cov, noise_covs[t]) else: cov = SumCov(cov, noise_covs[t]) # environment effect: for each pair of cell types # t1 is the receiving type, t2 is the effector if environment: if by_effective_type: # env_covs = np.array([len(cell_type_list), len(cell_type_list)]) env_covs = [[None for i in range(len(cell_type_list))] for j in range(len(cell_type_list))] else: env_covs = [None for i in range(len(cell_type_list))] # env_covs = [tmp] * len(cell_type_list) for t1 in cell_type_list: if affected_cell_type is not None and affected_cell_type != t1: continue if by_effective_type: for t2 in cell_type_list: # select only the environmental cell type if not all if environmental_cell_types is not None and environmental_cell_types != t2: continue interaction_matrix = build_interaction_matrix(t1, t2, cell_types) tmp = ZKZCov(X, Kinship, rm_diag, interaction_matrix, test_set) env_covs[t1][t2] = tmp env_covs[t1][t2].setPenalty(mu=200., sigma=50.) cov = SumCov(cov, env_covs[t1][t2]) else: interaction_matrix = build_interaction_matrix(t1, 'all', cell_types) tmp = ZKZCov(X, Kinship, rm_diag, interaction_matrix, test_set) env_covs[t1] = tmp env_covs[t1].setPenalty(mu=200., sigma=50.) cov = SumCov(cov, env_covs[t1]) else: env_covs = None if intrinsic: K = build_cell_type_kinship(cell_types_training) if N_cells_test != 0: Kcross = build_cell_type_kinship(cell_types_test, cell_types_training) intrinsic_cov = FixedCov(K, Kcross) cov = SumCov(cov, intrinsic_cov) else: intrinsic_cov = None # mean term mean = MeanBase(mean_training) # define GP gp = limix.core.gp.GP(covar=cov, mean=mean) print('GP created ') return gp, noise_covs, local_noise_cov, env_covs, intrinsic_cov
# generate data N = 400 X = sp.linspace(0,2,N)[:,sp.newaxis] v_noise = 0.01 Y = sp.sin(X) + sp.sqrt(v_noise) * sp.randn(N, 1) # for out-of-sample preditions Xstar = sp.linspace(0,2,1000)[:,sp.newaxis] # define mean term W = 1. * (sp.rand(N, 2) < 0.2) mean = lin_mean(Y, W) # define covariance matrices sqexp = SQExpCov(X, Xstar = Xstar) noise = FixedCov(sp.eye(N)) covar = SumCov(sqexp, noise) # define gp gp = GP(covar=covar,mean=mean) # initialize params sqexp.scale = 1e-4 sqexp.length = 1 noise.scale = Y.var() # optimize gp.optimize(calc_ste=True) # predict out-of-sample Ystar = gp.predict() # print optimized values and standard errors
def run_individual_model(model, expression_file, position_file, output_directory, permute_positions=False, random_start_point=False): rm_diag = True if model is not 'full' and model is not 'env': raise Exception('model not understood. Please specify a model between full and env') # read phenotypes data with open(expression_file, 'r') as f: prot_tmp = f.readline() protein_names = prot_tmp.split(' ') protein_names[-1] = protein_names[-1][0:-1] # removing the newline sign at the end of the last protein protein_names = np.reshape(protein_names, [len(protein_names), 1]) phenotypes = np.loadtxt(expression_file, delimiter=' ', skiprows=1) # read position data X = np.genfromtxt(position_file, delimiter=',') if permute_positions: X = X[np.random.permutation(X.shape[0]), :] if X.shape[0] != phenotypes.shape[0]: raise Exception('cell number inconsistent between position and epression levels ') # define output file name output_file = output_directory+'/inferred_parameters_' + model if permute_positions: output_file += '_permuted.txt' else: output_file += '.txt' N_cells = phenotypes.shape[0] parameters = np.zeros([phenotypes.shape[1], 6]) log_lik = np.zeros(phenotypes.shape[1]) for phen in range(0, phenotypes.shape[1]): phenotype = phenotypes[:, phen] phenotype -= phenotype.mean() phenotype /= phenotype.std() phenotype = np.reshape(phenotype, [N_cells, 1]) phenotypes_tmp = np.delete(phenotypes, phen, axis=1) phenotypes_tmp = normalise(phenotypes_tmp) Kinship = phenotypes_tmp.dot(phenotypes_tmp.transpose()) Kinship -= np.linalg.eigvalsh(Kinship).min() * np.eye(N_cells) Kinship *= covar_rescaling_factor(Kinship) # create different models and print the result including likelihood # create all the covariance terms direct_cov = FixedCov(Kinship) # noise noise_cov = FixedCov(np.eye(N_cells)) # local_noise local_noise_cov = SQExpCov(X) local_noise_cov.length = 100 local_noise_cov.act_length = False # environment effect environment_cov = ZKZCov(X, Kinship, rm_diag) # mean term mean = MeanBase(phenotype) ####################################################################### # defining model ####################################################################### cov = SumCov(noise_cov, local_noise_cov) cov = SumCov(cov, environment_cov) if random_start_point: environment_cov.length = np.random.uniform(10, 300) environment_cov.scale = np.random.uniform(1, 15) else: environment_cov.length = 200 # environment_cov.act_length = False if model == 'full': cov = SumCov(cov, direct_cov) else: direct_cov.scale = 0 # define and optimise GP gp = GP(covar=cov, mean=mean) try: gp.optimize() except: print('optimisation', str(phen), 'failed') continue log_lik[phen] = gp.LML() # rescale each terms to sample variance one # direct cov: unnecessary as fixed covariance rescaled before optimisation # local noise covariance tmp = covar_rescaling_factor(local_noise_cov.K()/local_noise_cov.scale) local_noise_cov.scale /= tmp # env effect tmp = covar_rescaling_factor(environment_cov.K()/environment_cov.scale**2) environment_cov.scale = environment_cov.scale**2/tmp parameters[phen, :] = [direct_cov.scale, noise_cov.scale, local_noise_cov.scale, local_noise_cov.length, environment_cov.scale, environment_cov.length] result_header = 'direct_scale' + ' ' + \ 'noise_scale' + ' ' + \ 'local_noise_scale' + ' ' + \ 'local_noise_length' + ' ' + \ 'environment_scale' + ' ' + \ 'environment_length' with open(output_file, 'w') as f: np.savetxt(f, np.hstack((protein_names, parameters)), delimiter=' ', header=result_header, fmt='%s', comments='') log_lik_file = output_file + '_loglik' with open(log_lik_file, 'w') as f: np.savetxt(f, log_lik)