def simulate(self, cterms=['intrinsic', 'environmental', 'interactions'], interactions_size=None): # train gp with requested terms model = Model1(self.Y, self.X, norm='quantile', oos_predictions=0., cov_terms=cterms, kin_from=self.kin_from) model.reset_params() model.train_gp(grid_size=10) # simulate from gp after removing interactions term k = model.covar_terms['intrinsic'].K() + \ model.covar_terms['environmental'].K() + \ model.covar_terms['noise'].K() k *= covar_rescaling_factor_efficient(k) # manually add a cross-talk term if interactions_size is not None: assert 0. < interactions_size < 1., 'interactions size must be between 0 and 1 ' tmp = model.covar_terms['interactions'].K() tmp *= covar_rescaling_factor_efficient(tmp) tmp *= (interactions_size / (1. - interactions_size)) k += tmp res = np.random.multivariate_normal([0.]*k.shape[0], k) return res
def run(data_dir, protein_index, output_dir, normalisation='quantile', permute=False): # reading all data #################################################################### expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) # import pdb; pdb.set_trace() protein_name = protein_names[protein_index, :] phenotype = phenotypes[:, protein_index] sel = range(phenotypes.shape[1]) sel.remove(protein_index) kin_from = phenotypes[:, sel] N_samples = X.shape[0] # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # intrinsic term #################################################################### cterms = ['intrinsic', 'environmental', 'interactions'] model = Model1(phenotype, X, norm=normalisation, oos_predictions=0., cov_terms=cterms, kin_from=kin_from, cv_ix=0) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(0) + '_interactions' write_variance_explained(model, output_dir, file_prefix) write_LL_grid(model, output_dir, file_prefix)
def run_indiv(protein_names, phenotypes, X, protein_index, output_dir, normalisation, permute): protein_name = protein_names[protein_index, :] print('-------------------------------------------') print('running model for ', protein_name[0]) print('-------------------------------------------') phenotype = phenotypes[:, protein_index] #sel = range(phenotypes.shape[1]) sel = [i for i in range(phenotypes.shape[1]) if i != protein_index] #sel.remove(protein_index) kin_from = phenotypes[:, sel] N_samples = X.shape[0] # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # intrinsic term # ------------------------------------------------------------------------ cterms = ['intrinsic', 'environmental', 'interactions'] model = Model1(phenotype, X, norm=normalisation, oos_predictions=0., cov_terms=cterms, kin_from=kin_from, cv_ix=0) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(0) + '_interactions' write_variance_explained(model, output_dir, file_prefix)
def run(data_dir, protein_index, output_dir, bootstrap_index, normalisation='standard', permute=False): # reading all data #################################################################### expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) # import pdb; pdb.set_trace() protein_name = protein_names[protein_index, :] phenotype = phenotypes[:, protein_index] sel = range(phenotypes.shape[1]) sel.remove(protein_index) kin_from = phenotypes[:, sel] N_samples = X.shape[0] # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # do null simulation #################################################################### sim = FromRealSimulation(X, phenotype, kin_from) Y_sim = sim.simulate() # run model on simulated data #################################################################### # intrinsic and environmental term #################################################################### cterms = ['intrinsic', 'environmental'] model = Model1(Y_sim, X, norm=normalisation, oos_predictions=0., cov_terms=cterms, kin_from=kin_from) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(bootstrap_index) + '_local' write_variance_explained(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix) int_param = model.intrinsic_cov.getParams() env_param = model.environmental_cov.getParams() noise_param = model.noise_cov.getParams() #################################################################### # add cell-cell interactions #################################################################### model.add_cov(['interactions']) LL = np.Inf for i in range(5): if i == 0: int_bk = int_param env_bk = env_param noise_bk = noise_param scale_interactions = True else: int_bk = int_param * s.random.uniform(0.8, 1.2, len(int_param)) local_bk = local_param * s.random.uniform(0.8, 1.2, len(env_param)) noise_bk = noise_param * s.random.uniform(0.8, 1.2, len(noise_param)) scale_interactions = False model.set_initCovs({ 'intrinsic': dir_bk, 'noise': noise_bk, 'environmental': local_bk }) if scale_interactions: model.set_scale_down(['interactions']) else: model.use_scale_down = False model.reset_params() model.train_gp(grid_size=10) if model.gp.LML() < LL: LL = model.gp.LML() saved_params = model.gp.getParams() model.gp.setParams(saved_params) file_prefix = protein_name[0] + '_' + str( bootstrap_index) + '_interactions' write_variance_explained(model, output_dir, file_prefix) #write_r2(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix)
def run(data_dir, protein_index, output_dir, interactions_size, normalisation='standard', permute=False): # reading all data #################################################################### expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) protein_name = protein_names[protein_index, :] phenotype = phenotypes[:, protein_index] sel = range(phenotypes.shape[1]) sel.remove(protein_index) kin_from = phenotypes[:, sel] # N_samples = X.shape[0] boot_ix = deepcopy(interactions_size) interactions_size = float(int(interactions_size) % 10) / 10. down_sampling = 1 - float(int(interactions_size) / 10) / 10. # down sampling n_sel = down_sampling * X.shape[0] sel = np.sort(np.random.choice(range(X.shape[0]), n_sel, replace=False)) X = X[sel, :] phenotype = phenotype[sel] kin_from = kin_from[sel, :] N_samples = X.shape[0] # TODO select X, select phenotype, kin_from, N_samples # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # do null simulation #################################################################### sim = FromRealSimulation(X, phenotype, kin_from) Y_sim = sim.simulate(interactions_size=interactions_size) # run model on simulated data #################################################################### # all but interactions #################################################################### cterms = ['intrinsic', 'environmental'] model = Model1(Y_sim, X, norm=normalisation, oos_predictions=0., cov_terms=cterms, kin_from=kin_from) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(boot_ix) + '_environmental' write_variance_explained(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix) #################################################################### # adding interactions #################################################################### model.add_cov(['interactions']) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(boot_ix) + '_interactions' write_variance_explained(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix)
def run(data_dir, protein_index, output_dir, bootstrap_index, normalisation='quantile', N_fold=5, permute=False): # reading all data #################################################################### expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) protein_name = protein_names[protein_index, :] phenotype = phenotypes[:, protein_index] sel = range(phenotypes.shape[1]) sel.remove(protein_index) kin_from = phenotypes[:, sel] N_samples = X.shape[0] # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # checking N_fold for cross validation oos = 1. / N_fold # intrinsic term #################################################################### cterms = ['intrinsic'] model = Model1(phenotype, X, norm=normalisation, oos_predictions=oos, cov_terms=cterms, kin_from=kin_from, cv_ix=bootstrap_index) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(bootstrap_index) + '_intrinsic' write_variance_explained(model, output_dir, file_prefix) write_pred(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix) # add local term #################################################################### model.add_cov(['environmental']) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str( bootstrap_index) + '_environmental' write_variance_explained(model, output_dir, file_prefix) write_pred(model, output_dir, file_prefix) write_LL_grid(model, output_dir, file_prefix) # add crowding term #################################################################### model.add_cov(['interactions']) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str( bootstrap_index) + '_interactions' write_variance_explained(model, output_dir, file_prefix) write_pred(model, output_dir, file_prefix) write_LL_grid(model, output_dir, file_prefix)