コード例 #1
0
def run(data_dir,
        protein_index,
        output_dir,
        normalisation='quantile',
        permute=False):
    # reading all data
    ####################################################################
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)

    # import pdb; pdb.set_trace()
    protein_name = protein_names[protein_index, :]
    phenotype = phenotypes[:, protein_index]
    sel = range(phenotypes.shape[1])
    sel.remove(protein_index)
    kin_from = phenotypes[:, sel]

    N_samples = X.shape[0]

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # intrinsic term
    ####################################################################
    cterms = ['intrinsic', 'environmental', 'interactions']
    model = Model1(phenotype,
                   X,
                   norm=normalisation,
                   oos_predictions=0.,
                   cov_terms=cterms,
                   kin_from=kin_from,
                   cv_ix=0)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(0) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
    write_LL_grid(model, output_dir, file_prefix)
コード例 #2
0
def run(data_dir,
        protein_index,
        output_dir,
        normalisation='quantile',
        permute=False):
    # reading all data
    # ------------------------------------------------------------------------
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)

    if protein_index is None:
        for protein_index in range(len(protein_names)):
            run_indiv(protein_names, phenotypes, X, protein_index, output_dir,
                      normalisation, permute)
    else:
        for p in protein_index:
            run_indiv(protein_names, phenotypes, X, p, output_dir,
                      normalisation, permute)
コード例 #3
0
ファイル: sim_null.py プロジェクト: gabora/svca
def run(data_dir,
        protein_index,
        output_dir,
        bootstrap_index,
        normalisation='standard',
        permute=False):
    # reading all data
    ####################################################################
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)

    # import pdb; pdb.set_trace()
    protein_name = protein_names[protein_index, :]
    phenotype = phenotypes[:, protein_index]
    sel = range(phenotypes.shape[1])
    sel.remove(protein_index)
    kin_from = phenotypes[:, sel]
    N_samples = X.shape[0]

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # do null simulation
    ####################################################################
    sim = FromRealSimulation(X, phenotype, kin_from)
    Y_sim = sim.simulate()

    # run model on simulated data
    ####################################################################
    # intrinsic and environmental term
    ####################################################################
    cterms = ['intrinsic', 'environmental']
    model = Model1(Y_sim,
                   X,
                   norm=normalisation,
                   oos_predictions=0.,
                   cov_terms=cterms,
                   kin_from=kin_from)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(bootstrap_index) + '_local'
    write_variance_explained(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)

    int_param = model.intrinsic_cov.getParams()
    env_param = model.environmental_cov.getParams()
    noise_param = model.noise_cov.getParams()

    ####################################################################
    # add cell-cell interactions
    ####################################################################
    model.add_cov(['interactions'])

    LL = np.Inf
    for i in range(5):
        if i == 0:
            int_bk = int_param
            env_bk = env_param
            noise_bk = noise_param
            scale_interactions = True
        else:
            int_bk = int_param * s.random.uniform(0.8, 1.2, len(int_param))
            local_bk = local_param * s.random.uniform(0.8, 1.2, len(env_param))
            noise_bk = noise_param * s.random.uniform(0.8, 1.2,
                                                      len(noise_param))
            scale_interactions = False
        model.set_initCovs({
            'intrinsic': dir_bk,
            'noise': noise_bk,
            'environmental': local_bk
        })
        if scale_interactions:
            model.set_scale_down(['interactions'])
        else:
            model.use_scale_down = False

        model.reset_params()
        model.train_gp(grid_size=10)
        if model.gp.LML() < LL:
            LL = model.gp.LML()
            saved_params = model.gp.getParams()

    model.gp.setParams(saved_params)
    file_prefix = protein_name[0] + '_' + str(
        bootstrap_index) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
    #write_r2(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)
コード例 #4
0
ファイル: simul_env.py プロジェクト: ryanccarelli/svca
def run(data_dir,
        protein_index,
        output_dir,
        interactions_size,
        normalisation='standard',
        permute=False):
    # reading all data
    ####################################################################
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)

    protein_name = protein_names[protein_index, :]
    phenotype = phenotypes[:, protein_index]
    sel = range(phenotypes.shape[1])
    sel.remove(protein_index)
    kin_from = phenotypes[:, sel]

    # N_samples = X.shape[0]

    boot_ix = deepcopy(interactions_size)
    interactions_size = float(int(interactions_size) % 10) / 10.
    down_sampling = 1 - float(int(interactions_size) / 10) / 10.

    # down sampling
    n_sel = down_sampling * X.shape[0]
    sel = np.sort(np.random.choice(range(X.shape[0]), n_sel, replace=False))
    X = X[sel, :]
    phenotype = phenotype[sel]
    kin_from = kin_from[sel, :]
    N_samples = X.shape[0]
    # TODO select X, select phenotype, kin_from, N_samples

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # do null simulation
    ####################################################################
    sim = FromRealSimulation(X, phenotype, kin_from)
    Y_sim = sim.simulate(interactions_size=interactions_size)

    # run model on simulated data
    ####################################################################
    # all but interactions
    ####################################################################
    cterms = ['intrinsic', 'environmental']
    model = Model1(Y_sim,
                   X,
                   norm=normalisation,
                   oos_predictions=0.,
                   cov_terms=cterms,
                   kin_from=kin_from)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(boot_ix) + '_environmental'
    write_variance_explained(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)

    ####################################################################
    # adding interactions
    ####################################################################
    model.add_cov(['interactions'])
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(boot_ix) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)
コード例 #5
0
ファイル: run_cv.py プロジェクト: gabora/svca
def run(data_dir,
        protein_index,
        output_dir,
        bootstrap_index,
        normalisation='quantile',
        N_fold=5,
        permute=False):
    # reading all data
    ####################################################################
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)
    protein_name = protein_names[protein_index, :]
    phenotype = phenotypes[:, protein_index]
    sel = range(phenotypes.shape[1])
    sel.remove(protein_index)
    kin_from = phenotypes[:, sel]

    N_samples = X.shape[0]

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # checking N_fold for cross validation
    oos = 1. / N_fold

    # intrinsic term
    ####################################################################
    cterms = ['intrinsic']
    model = Model1(phenotype,
                   X,
                   norm=normalisation,
                   oos_predictions=oos,
                   cov_terms=cterms,
                   kin_from=kin_from,
                   cv_ix=bootstrap_index)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(bootstrap_index) + '_intrinsic'
    write_variance_explained(model, output_dir, file_prefix)
    write_pred(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)

    # add local term
    ####################################################################
    model.add_cov(['environmental'])
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(
        bootstrap_index) + '_environmental'
    write_variance_explained(model, output_dir, file_prefix)
    write_pred(model, output_dir, file_prefix)
    write_LL_grid(model, output_dir, file_prefix)

    # add crowding term
    ####################################################################
    model.add_cov(['interactions'])
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(
        bootstrap_index) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
    write_pred(model, output_dir, file_prefix)
    write_LL_grid(model, output_dir, file_prefix)