Esempio n. 1
0
def simple_unsupervised_demo():
    print "Simple PEER application. All default prior values are set explicitly as demonstration."
    y = SP.loadtxt("data/expression.csv", delimiter=",")
    K = 20
    Nmax_iterations = 100
    model = peer.PEER()

    # set data and parameters
    model.setNk(K)  #number of factor for learning
    model.setPhenoMean(y)  # data for inference
    # set priors (these are the default settings of PEER)
    model.setPriorAlpha(0.001, 0.1)
    model.setPriorEps(0.1, 10.)
    model.setNmax_iterations(Nmax_iterations)
    # perform inference
    model.update()

    #investigate results
    #factors:
    X = model.getX()
    #weights:
    W = model.getW()
    #ARD parameters
    Alpha = model.getAlpha()
    #get corrected dataset:
    Yc = model.getResiduals()

    # plot variance of factors - in this case, we expect a natural elbow where there are 5 active factors, as 5 were simulated
    plot_Alpha(Alpha)
    PL.savefig("demo_simple.pdf")
    print "Plotted factor relevance"
    PL.show()
Esempio n. 2
0
def get_simple_model_object(K=10,
                            Nmax_iterations=100,
                            expr_file="data/expression.csv"):
    y = SP.loadtxt(expr_file, delimiter=",")
    model = peer.PEER()
    # set data and parameters
    model.setNk(K)  #number of factor for learning
    model.setPhenoMean(y)  # data for inference
    model.setNmax_iterations(Nmax_iterations)
    return model
Esempio n. 3
0
def runpeer(phenotype, K, iterations, cov=None):
    model = peer.PEER()
    model.setPhenoMean(phenotype)
    if cov is not None:
        model.setCovariates(cov)
    model.setNk(K)
    model.setNmax_iterations(iterations)
    model.update()
    residuals = model.getResiduals()
    factors = model.getX()
    return residuals, factors
def RunPEER(infile, covfile):
    print '1'
    import peer
    print '2'
    import scipy as SP
    print '3'
    expr = SP.loadtxt(
        '%s/%s' %
        (cwd, '.'.join(infile.split('.')[:-1]) + '.NoHead.trans.tab'),
        delimiter='\t'
    )  # expr = SP.loadtxt('/home/vmason/LinuxShare/Programs/PEER/dataIN/DESeqNorm.untreated.NCBI.wVSD.2.tx.sort.trans.tab', delimiter='\t')
    expr.shape
    covs = SP.loadtxt(
        '%s/%s' % (cwd, '.'.join(covfile.split('.')[:-1]) + '.NoHead.tab'),
        delimiter='\t'
    )  # covs = SP.loadtxt('/home/vmason/LinuxShare/Programs/PEER/dataIN/RAO_Covariates.no55.94.NoCTLRAO.NoMiss.trans.tab', delimiter='\t')
    covs.shape
    model = peer.PEER()
    model.setPhenoMean(expr)
    model.getPhenoMean().shape
    model.setCovariates(covs)
    model.getCovariates()
    model.setNk(10)
    model.getNk()
    #model.setAdd_mean(True) # the tutorial says that adding this value is usually a good idea
    #model.setTolerance(0.001) # default = 0.001
    model.getTolerance()
    #model.setVarTolerance(0.00001) # default = 0.00001
    model.getVarTolerance()
    model.setNmax_iterations(1000)  # default = 1000

    model.update()

    #include covariates
    #covs = SP.loadtxt('/home/vmason/LinuxShare/Programs/PEER/dataIN/covariates.tab', delimiter='\t')
    #model.setCovariates(covs)

    #Model Parameters
    #model.setNmax_iterations(100)
    #model.setTolerance(0.01)
    #model.setVarTolerance(0.0001)
    #model.setPriorAlpha(0.001,0.1)
    #model.setPriorEps(0.1,10.)

    #In general you can keep the bound tolerance fairly high, but should keep the variation tolerance quite low compared to the variance of the expression matrix. If unsure, use the default values (bound=0.001, variance=0.00001).
    #PEER uses uninformative priors on weight precision and noise precision by default (Alpha a = 0.001, Alpha b = 0.1, Eps a = 0.1, Eps b = 10)

    factors = model.getX()
    weights = model.getW()
    precision = model.getAlpha()
    residuals = model.getResiduals()
    return (residuals, precision, factors, weights)
Esempio n. 5
0
def get_simple_model_object(K=10,
                            Nmax_iterations=100,
                            expr_file="data/expression.csv"):
    #y = SP.loadtxt(expr_file,delimiter=",")
    df = pd.read_csv(expr_file, delimiter='\t', header=1)
    df = df.drop(columns=['Name'])
    df.set_index('Description', inplace=True)
    df = df.transpose()
    y = df.values
    model = peer.PEER()
    # set data and parameters
    model.setNk(K)  #number of factor for learning
    model.setPhenoMean(y)  # data for inference
    model.setNmax_iterations(Nmax_iterations)
    return model
Esempio n. 6
0
def compute_and_store_peer_factors(df, peer_result_file, K=15):
    #    print(df)
    #    print(df.values)
    # df:
    #       Gene1   Gene2
    # Ind1  0.4     6
    # Ind2  14      0.5
    Nmax_iterations = 100
    Nmax_iterations = 3
    model = peer.PEER()

    model.setNk(K)  #number of factor for learning
    model.setPhenoMean(df.values)  # data for inference
    # set priors (these are the default settings of PEER)
    model.setPriorAlpha(0.001, 0.1)
    model.setPriorEps(0.1, 10.)
    model.setNmax_iterations(Nmax_iterations)
    # perform inference
    model.update()

    #investigate results
    #factors:
    X = model.getX()
    #weights:
    W = model.getW()
    #ARD parameters
    Alpha = model.getAlpha()
    #get corrected dataset:
    Yc = model.getResiduals()

    # plot variance of factors - in this case, we expect a natural elbow where there are 5 active factors, as 5 were simulated
    #    plot_Alpha(Alpha)
    #    PL.savefig("demo_simple.pdf")
    print "Plotted factor relevance"

    ids = list(df.index)
    with open(peer_result_file, 'w') as outfile:
        for i in range(len(X)):
            individual_id = ids[i]
            individual_factors = '\t'.join([str(e) for e in X[i]])
            outfile.write('%s\t%s\n' % (individual_id, individual_factors))
    return X
Esempio n. 7
0
def peerModel(expressFile, factorNum):
    model = peer.PEER()
    model.setPhenoMean(expressFile)
    model.setNk(factorNum)
    model.update()
    return model
Esempio n. 8
0
######################################################
## load folder_name, batch names
######################################################

folder_data='/nfs/research2/hipsci/processed_data/proteomics/'
file_protein_quantification='hipsci.proteomics.maxquant.uniprot.TMT_batch_14.20170517'
filter_genes='_genes_filtered'
filter_lines="_lines_filtered_unique"
field_data="Reporter intensity corrected_regbatch"
data=load_protein_mrna( filter_genes=filter_genes,filter_lines=filter_lines,file_protein_quantification=file_protein_quantification,\
        folder_data=folder_data,field_data=field_data,only_protein=True)

## Run PEER ### Input matrix: N rows and G columns, where N is the number of samples, and G is the number of genes.

#Cov=np.vstack([np.array([f5p['metadata/lines/'][covariate][:] ==c for c in np.setdiff1d(f5p['metadata/lines/'][covariate][:],'')]) for covariate in ['gender','batch_2_ID']]).astype(float).T
model = peer.PEER()
#model.setCovariates(Cov) # N x C matrix

expr=np.log(np.copy(data['protein_intensity'].values))
expr[~np.isfinite(expr)]=0
#model.setPhenoMean(data['protein_intensity'].values[:,np.isfinite(np.sum(data['protein_intensity'].values,0))][:,:1000])
model.setPhenoMean(expr)
model.setAdd_mean(True)
# To infer K hidden confounders, define K and number of iterations
K=10;model.setNk(int(K)) # or PEER_setNk(model,number_of_covs)
model.setNmax_iterations(1000)
model.update()
factors = model.getX(); weights = model.getW(); precision = model.getAlpha(); expr_peer = model.getResiduals()

write_data( folder_data+file_protein_quantification+"_lines_metadata_peer"+filter_lines+".txt",        mat=np.hstack([data['line_meta'].values,factors]),\
        header= np.hstack([data['line_meta'].columns.values,['peer_'+str(i) for i in np.arange(factors.shape[1])]]),delim='\t')