def run(data_dir, protein_index, output_dir, normalisation='quantile', permute=False): # reading all data #################################################################### expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) # import pdb; pdb.set_trace() protein_name = protein_names[protein_index, :] phenotype = phenotypes[:, protein_index] sel = range(phenotypes.shape[1]) sel.remove(protein_index) kin_from = phenotypes[:, sel] N_samples = X.shape[0] # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # intrinsic term #################################################################### cterms = ['intrinsic', 'environmental', 'interactions'] model = Model1(phenotype, X, norm=normalisation, oos_predictions=0., cov_terms=cterms, kin_from=kin_from, cv_ix=0) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(0) + '_interactions' write_variance_explained(model, output_dir, file_prefix) write_LL_grid(model, output_dir, file_prefix)
def run(data_dir, protein_index, output_dir, normalisation='quantile', permute=False): # reading all data # ------------------------------------------------------------------------ expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) if protein_index is None: for protein_index in range(len(protein_names)): run_indiv(protein_names, phenotypes, X, protein_index, output_dir, normalisation, permute) else: for p in protein_index: run_indiv(protein_names, phenotypes, X, p, output_dir, normalisation, permute)
def run(data_dir, protein_index, output_dir, bootstrap_index, normalisation='standard', permute=False): # reading all data #################################################################### expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) # import pdb; pdb.set_trace() protein_name = protein_names[protein_index, :] phenotype = phenotypes[:, protein_index] sel = range(phenotypes.shape[1]) sel.remove(protein_index) kin_from = phenotypes[:, sel] N_samples = X.shape[0] # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # do null simulation #################################################################### sim = FromRealSimulation(X, phenotype, kin_from) Y_sim = sim.simulate() # run model on simulated data #################################################################### # intrinsic and environmental term #################################################################### cterms = ['intrinsic', 'environmental'] model = Model1(Y_sim, X, norm=normalisation, oos_predictions=0., cov_terms=cterms, kin_from=kin_from) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(bootstrap_index) + '_local' write_variance_explained(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix) int_param = model.intrinsic_cov.getParams() env_param = model.environmental_cov.getParams() noise_param = model.noise_cov.getParams() #################################################################### # add cell-cell interactions #################################################################### model.add_cov(['interactions']) LL = np.Inf for i in range(5): if i == 0: int_bk = int_param env_bk = env_param noise_bk = noise_param scale_interactions = True else: int_bk = int_param * s.random.uniform(0.8, 1.2, len(int_param)) local_bk = local_param * s.random.uniform(0.8, 1.2, len(env_param)) noise_bk = noise_param * s.random.uniform(0.8, 1.2, len(noise_param)) scale_interactions = False model.set_initCovs({ 'intrinsic': dir_bk, 'noise': noise_bk, 'environmental': local_bk }) if scale_interactions: model.set_scale_down(['interactions']) else: model.use_scale_down = False model.reset_params() model.train_gp(grid_size=10) if model.gp.LML() < LL: LL = model.gp.LML() saved_params = model.gp.getParams() model.gp.setParams(saved_params) file_prefix = protein_name[0] + '_' + str( bootstrap_index) + '_interactions' write_variance_explained(model, output_dir, file_prefix) #write_r2(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix)
def run(data_dir, protein_index, output_dir, interactions_size, normalisation='standard', permute=False): # reading all data #################################################################### expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) protein_name = protein_names[protein_index, :] phenotype = phenotypes[:, protein_index] sel = range(phenotypes.shape[1]) sel.remove(protein_index) kin_from = phenotypes[:, sel] # N_samples = X.shape[0] boot_ix = deepcopy(interactions_size) interactions_size = float(int(interactions_size) % 10) / 10. down_sampling = 1 - float(int(interactions_size) / 10) / 10. # down sampling n_sel = down_sampling * X.shape[0] sel = np.sort(np.random.choice(range(X.shape[0]), n_sel, replace=False)) X = X[sel, :] phenotype = phenotype[sel] kin_from = kin_from[sel, :] N_samples = X.shape[0] # TODO select X, select phenotype, kin_from, N_samples # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # do null simulation #################################################################### sim = FromRealSimulation(X, phenotype, kin_from) Y_sim = sim.simulate(interactions_size=interactions_size) # run model on simulated data #################################################################### # all but interactions #################################################################### cterms = ['intrinsic', 'environmental'] model = Model1(Y_sim, X, norm=normalisation, oos_predictions=0., cov_terms=cterms, kin_from=kin_from) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(boot_ix) + '_environmental' write_variance_explained(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix) #################################################################### # adding interactions #################################################################### model.add_cov(['interactions']) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(boot_ix) + '_interactions' write_variance_explained(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix)
def run(data_dir, protein_index, output_dir, bootstrap_index, normalisation='quantile', N_fold=5, permute=False): # reading all data #################################################################### expression_file = data_dir + '/expressions.txt' position_file = data_dir + '/positions.txt' protein_names, phenotypes, X = utils.read_data(expression_file, position_file) protein_name = protein_names[protein_index, :] phenotype = phenotypes[:, protein_index] sel = range(phenotypes.shape[1]) sel.remove(protein_index) kin_from = phenotypes[:, sel] N_samples = X.shape[0] # permuting cells if permute: perm = np.random.permutation(X.shape[0]) X = X[perm, :] # checking N_fold for cross validation oos = 1. / N_fold # intrinsic term #################################################################### cterms = ['intrinsic'] model = Model1(phenotype, X, norm=normalisation, oos_predictions=oos, cov_terms=cterms, kin_from=kin_from, cv_ix=bootstrap_index) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str(bootstrap_index) + '_intrinsic' write_variance_explained(model, output_dir, file_prefix) write_pred(model, output_dir, file_prefix) write_LL(model, output_dir, file_prefix) # add local term #################################################################### model.add_cov(['environmental']) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str( bootstrap_index) + '_environmental' write_variance_explained(model, output_dir, file_prefix) write_pred(model, output_dir, file_prefix) write_LL_grid(model, output_dir, file_prefix) # add crowding term #################################################################### model.add_cov(['interactions']) model.reset_params() model.train_gp(grid_size=10) file_prefix = protein_name[0] + '_' + str( bootstrap_index) + '_interactions' write_variance_explained(model, output_dir, file_prefix) write_pred(model, output_dir, file_prefix) write_LL_grid(model, output_dir, file_prefix)