Beispiel #1
0
    def simulate(self, cterms=['intrinsic', 'environmental', 'interactions'], interactions_size=None):
        # train gp with requested terms
        model = Model1(self.Y, self.X, norm='quantile', oos_predictions=0.,
                       cov_terms=cterms, kin_from=self.kin_from)
        model.reset_params()
        model.train_gp(grid_size=10)

        # simulate from gp after removing interactions term
        k = model.covar_terms['intrinsic'].K() + \
            model.covar_terms['environmental'].K() + \
            model.covar_terms['noise'].K()
        k *= covar_rescaling_factor_efficient(k)

        # manually add a cross-talk term
        if interactions_size is not None:
            assert 0. < interactions_size < 1., 'interactions size must be between 0 and 1 '
            tmp = model.covar_terms['interactions'].K()
            tmp *= covar_rescaling_factor_efficient(tmp)

            tmp *= (interactions_size / (1. - interactions_size))
            k += tmp

        res = np.random.multivariate_normal([0.]*k.shape[0], k)

        return res
Beispiel #2
0
def run(data_dir,
        protein_index,
        output_dir,
        normalisation='quantile',
        permute=False):
    # reading all data
    ####################################################################
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)

    # import pdb; pdb.set_trace()
    protein_name = protein_names[protein_index, :]
    phenotype = phenotypes[:, protein_index]
    sel = range(phenotypes.shape[1])
    sel.remove(protein_index)
    kin_from = phenotypes[:, sel]

    N_samples = X.shape[0]

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # intrinsic term
    ####################################################################
    cterms = ['intrinsic', 'environmental', 'interactions']
    model = Model1(phenotype,
                   X,
                   norm=normalisation,
                   oos_predictions=0.,
                   cov_terms=cterms,
                   kin_from=kin_from,
                   cv_ix=0)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(0) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
    write_LL_grid(model, output_dir, file_prefix)
Beispiel #3
0
def run_indiv(protein_names, phenotypes, X, protein_index, output_dir,
              normalisation, permute):

    protein_name = protein_names[protein_index, :]

    print('-------------------------------------------')
    print('running model for ', protein_name[0])
    print('-------------------------------------------')

    phenotype = phenotypes[:, protein_index]
    #sel = range(phenotypes.shape[1])
    sel = [i for i in range(phenotypes.shape[1]) if i != protein_index]
    #sel.remove(protein_index)
    kin_from = phenotypes[:, sel]

    N_samples = X.shape[0]

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # intrinsic term
    # ------------------------------------------------------------------------
    cterms = ['intrinsic', 'environmental', 'interactions']
    model = Model1(phenotype,
                   X,
                   norm=normalisation,
                   oos_predictions=0.,
                   cov_terms=cterms,
                   kin_from=kin_from,
                   cv_ix=0)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(0) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
Beispiel #4
0
def run(data_dir,
        protein_index,
        output_dir,
        bootstrap_index,
        normalisation='standard',
        permute=False):
    # reading all data
    ####################################################################
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)

    # import pdb; pdb.set_trace()
    protein_name = protein_names[protein_index, :]
    phenotype = phenotypes[:, protein_index]
    sel = range(phenotypes.shape[1])
    sel.remove(protein_index)
    kin_from = phenotypes[:, sel]
    N_samples = X.shape[0]

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # do null simulation
    ####################################################################
    sim = FromRealSimulation(X, phenotype, kin_from)
    Y_sim = sim.simulate()

    # run model on simulated data
    ####################################################################
    # intrinsic and environmental term
    ####################################################################
    cterms = ['intrinsic', 'environmental']
    model = Model1(Y_sim,
                   X,
                   norm=normalisation,
                   oos_predictions=0.,
                   cov_terms=cterms,
                   kin_from=kin_from)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(bootstrap_index) + '_local'
    write_variance_explained(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)

    int_param = model.intrinsic_cov.getParams()
    env_param = model.environmental_cov.getParams()
    noise_param = model.noise_cov.getParams()

    ####################################################################
    # add cell-cell interactions
    ####################################################################
    model.add_cov(['interactions'])

    LL = np.Inf
    for i in range(5):
        if i == 0:
            int_bk = int_param
            env_bk = env_param
            noise_bk = noise_param
            scale_interactions = True
        else:
            int_bk = int_param * s.random.uniform(0.8, 1.2, len(int_param))
            local_bk = local_param * s.random.uniform(0.8, 1.2, len(env_param))
            noise_bk = noise_param * s.random.uniform(0.8, 1.2,
                                                      len(noise_param))
            scale_interactions = False
        model.set_initCovs({
            'intrinsic': dir_bk,
            'noise': noise_bk,
            'environmental': local_bk
        })
        if scale_interactions:
            model.set_scale_down(['interactions'])
        else:
            model.use_scale_down = False

        model.reset_params()
        model.train_gp(grid_size=10)
        if model.gp.LML() < LL:
            LL = model.gp.LML()
            saved_params = model.gp.getParams()

    model.gp.setParams(saved_params)
    file_prefix = protein_name[0] + '_' + str(
        bootstrap_index) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
    #write_r2(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)
Beispiel #5
0
def run(data_dir,
        protein_index,
        output_dir,
        interactions_size,
        normalisation='standard',
        permute=False):
    # reading all data
    ####################################################################
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)

    protein_name = protein_names[protein_index, :]
    phenotype = phenotypes[:, protein_index]
    sel = range(phenotypes.shape[1])
    sel.remove(protein_index)
    kin_from = phenotypes[:, sel]

    # N_samples = X.shape[0]

    boot_ix = deepcopy(interactions_size)
    interactions_size = float(int(interactions_size) % 10) / 10.
    down_sampling = 1 - float(int(interactions_size) / 10) / 10.

    # down sampling
    n_sel = down_sampling * X.shape[0]
    sel = np.sort(np.random.choice(range(X.shape[0]), n_sel, replace=False))
    X = X[sel, :]
    phenotype = phenotype[sel]
    kin_from = kin_from[sel, :]
    N_samples = X.shape[0]
    # TODO select X, select phenotype, kin_from, N_samples

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # do null simulation
    ####################################################################
    sim = FromRealSimulation(X, phenotype, kin_from)
    Y_sim = sim.simulate(interactions_size=interactions_size)

    # run model on simulated data
    ####################################################################
    # all but interactions
    ####################################################################
    cterms = ['intrinsic', 'environmental']
    model = Model1(Y_sim,
                   X,
                   norm=normalisation,
                   oos_predictions=0.,
                   cov_terms=cterms,
                   kin_from=kin_from)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(boot_ix) + '_environmental'
    write_variance_explained(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)

    ####################################################################
    # adding interactions
    ####################################################################
    model.add_cov(['interactions'])
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(boot_ix) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)
Beispiel #6
0
def run(data_dir,
        protein_index,
        output_dir,
        bootstrap_index,
        normalisation='quantile',
        N_fold=5,
        permute=False):
    # reading all data
    ####################################################################
    expression_file = data_dir + '/expressions.txt'
    position_file = data_dir + '/positions.txt'
    protein_names, phenotypes, X = utils.read_data(expression_file,
                                                   position_file)
    protein_name = protein_names[protein_index, :]
    phenotype = phenotypes[:, protein_index]
    sel = range(phenotypes.shape[1])
    sel.remove(protein_index)
    kin_from = phenotypes[:, sel]

    N_samples = X.shape[0]

    # permuting cells
    if permute:
        perm = np.random.permutation(X.shape[0])
        X = X[perm, :]

    # checking N_fold for cross validation
    oos = 1. / N_fold

    # intrinsic term
    ####################################################################
    cterms = ['intrinsic']
    model = Model1(phenotype,
                   X,
                   norm=normalisation,
                   oos_predictions=oos,
                   cov_terms=cterms,
                   kin_from=kin_from,
                   cv_ix=bootstrap_index)
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(bootstrap_index) + '_intrinsic'
    write_variance_explained(model, output_dir, file_prefix)
    write_pred(model, output_dir, file_prefix)
    write_LL(model, output_dir, file_prefix)

    # add local term
    ####################################################################
    model.add_cov(['environmental'])
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(
        bootstrap_index) + '_environmental'
    write_variance_explained(model, output_dir, file_prefix)
    write_pred(model, output_dir, file_prefix)
    write_LL_grid(model, output_dir, file_prefix)

    # add crowding term
    ####################################################################
    model.add_cov(['interactions'])
    model.reset_params()
    model.train_gp(grid_size=10)

    file_prefix = protein_name[0] + '_' + str(
        bootstrap_index) + '_interactions'
    write_variance_explained(model, output_dir, file_prefix)
    write_pred(model, output_dir, file_prefix)
    write_LL_grid(model, output_dir, file_prefix)