コード例 #1
0
    def simulate_local(self):
        tmp = SQExpCov(self.X)
        tmp.length = self.l2
        k = tmp.K()
        k *= covar_rescaling_factor_efficient(k)

        self.covar += k
コード例 #2
0
ファイル: demo_gp_regression.py プロジェクト: xypan1232/limix
    Xstar = sp.linspace(0,2,1000)[:,sp.newaxis]

    # define mean term
    W = 1. * (sp.rand(N, 2) < 0.2)
    mean = lin_mean(Y, W)

    # define covariance matrices
    sqexp = SQExpCov(X, Xstar = Xstar)
    noise = FixedCov(sp.eye(N))
    covar  = SumCov(sqexp, noise)

    # define gp
    gp = GP(covar=covar,mean=mean)
    # initialize params
    sqexp.scale = 1e-4
    sqexp.length = 1
    noise.scale = Y.var()
    # optimize
    gp.optimize(calc_ste=True)
    # predict out-of-sample
    Ystar = gp.predict()

    # print optimized values and standard errors
    print('weights of fixed effects')
    print(mean.b[0, 0], '+/-', mean.b_ste[0, 0])
    print(mean.b[1, 0], '+/-', mean.b_ste[1, 0])
    print('scale of sqexp')
    print(sqexp.scale, '+/-', sqexp.scale_ste)
    print('length of sqexp')
    print(sqexp.length, '+/-', sqexp.length_ste)
    print('scale of fixed')
コード例 #3
0
def run_individual_model(model, expression_file, position_file, output_directory,
                         permute_positions=False, random_start_point=False):

    rm_diag = True

    if model is not 'full' and model is not 'env':
        raise Exception('model not understood. Please specify a model between full and env')

    # read phenotypes data
    with open(expression_file, 'r') as f:
        prot_tmp = f.readline()
    protein_names = prot_tmp.split(' ')
    protein_names[-1] = protein_names[-1][0:-1]  # removing the newline sign at the end of the last protein
    protein_names = np.reshape(protein_names, [len(protein_names), 1])
    phenotypes = np.loadtxt(expression_file, delimiter=' ', skiprows=1)

    # read position data
    X = np.genfromtxt(position_file, delimiter=',')
    if permute_positions:
        X = X[np.random.permutation(X.shape[0]), :]
    if X.shape[0] != phenotypes.shape[0]:
        raise Exception('cell number inconsistent between position and epression levels ')

    # define output file name
    output_file = output_directory+'/inferred_parameters_' + model
    if permute_positions:
        output_file += '_permuted.txt'
    else:
        output_file += '.txt'

    N_cells = phenotypes.shape[0]

    parameters = np.zeros([phenotypes.shape[1], 6])

    log_lik = np.zeros(phenotypes.shape[1])

    for phen in range(0, phenotypes.shape[1]):

        phenotype = phenotypes[:, phen]
        phenotype -= phenotype.mean()
        phenotype /= phenotype.std()
        phenotype = np.reshape(phenotype, [N_cells, 1])

        phenotypes_tmp = np.delete(phenotypes, phen, axis=1)
        phenotypes_tmp = normalise(phenotypes_tmp)

        Kinship = phenotypes_tmp.dot(phenotypes_tmp.transpose())
        Kinship -= np.linalg.eigvalsh(Kinship).min() * np.eye(N_cells)
        Kinship *= covar_rescaling_factor(Kinship)

        # create different models and print the result including likelihood
        # create all the covariance terms
        direct_cov = FixedCov(Kinship)

        # noise
        noise_cov = FixedCov(np.eye(N_cells))

        # local_noise
        local_noise_cov = SQExpCov(X)
        local_noise_cov.length = 100
        local_noise_cov.act_length = False
        # environment effect
        environment_cov = ZKZCov(X, Kinship, rm_diag)

        # mean term
        mean = MeanBase(phenotype)

        #######################################################################
        # defining model
        #######################################################################
        cov = SumCov(noise_cov, local_noise_cov)
        cov = SumCov(cov, environment_cov)
        if random_start_point:
            environment_cov.length = np.random.uniform(10, 300)
            environment_cov.scale = np.random.uniform(1, 15)

        else:
            environment_cov.length = 200
        # environment_cov.act_length = False

        if model == 'full':
            cov = SumCov(cov, direct_cov)
        else:
            direct_cov.scale = 0

        # define and optimise GP
        gp = GP(covar=cov, mean=mean)

        try:
            gp.optimize()
        except:
            print('optimisation', str(phen), 'failed')
            continue

        log_lik[phen] = gp.LML()


        # rescale each terms to sample variance one
        # direct cov: unnecessary as fixed covariance rescaled before optimisation
        # local noise covariance
        tmp = covar_rescaling_factor(local_noise_cov.K()/local_noise_cov.scale)
        local_noise_cov.scale /= tmp
        # env effect
        tmp = covar_rescaling_factor(environment_cov.K()/environment_cov.scale**2)
        environment_cov.scale = environment_cov.scale**2/tmp

        parameters[phen, :] = [direct_cov.scale,
                               noise_cov.scale,
                               local_noise_cov.scale,
                               local_noise_cov.length,
                               environment_cov.scale,
                               environment_cov.length]

    result_header = 'direct_scale' + ' ' + \
                    'noise_scale' + ' ' + \
                    'local_noise_scale' + ' ' + \
                    'local_noise_length' + ' ' + \
                    'environment_scale' + ' ' + \
                    'environment_length'

    with open(output_file, 'w') as f:
        np.savetxt(f,
                   np.hstack((protein_names, parameters)),
                   delimiter=' ',
                   header=result_header,
                   fmt='%s',
                   comments='')

    log_lik_file = output_file + '_loglik'
    with open(log_lik_file, 'w') as f:
        np.savetxt(f, log_lik)