def simple_unsupervised_demo(): print "Simple PEER application. All default prior values are set explicitly as demonstration." y = SP.loadtxt("data/expression.csv", delimiter=",") K = 20 Nmax_iterations = 100 model = peer.PEER() # set data and parameters model.setNk(K) #number of factor for learning model.setPhenoMean(y) # data for inference # set priors (these are the default settings of PEER) model.setPriorAlpha(0.001, 0.1) model.setPriorEps(0.1, 10.) model.setNmax_iterations(Nmax_iterations) # perform inference model.update() #investigate results #factors: X = model.getX() #weights: W = model.getW() #ARD parameters Alpha = model.getAlpha() #get corrected dataset: Yc = model.getResiduals() # plot variance of factors - in this case, we expect a natural elbow where there are 5 active factors, as 5 were simulated plot_Alpha(Alpha) PL.savefig("demo_simple.pdf") print "Plotted factor relevance" PL.show()
def get_simple_model_object(K=10, Nmax_iterations=100, expr_file="data/expression.csv"): y = SP.loadtxt(expr_file, delimiter=",") model = peer.PEER() # set data and parameters model.setNk(K) #number of factor for learning model.setPhenoMean(y) # data for inference model.setNmax_iterations(Nmax_iterations) return model
def runpeer(phenotype, K, iterations, cov=None): model = peer.PEER() model.setPhenoMean(phenotype) if cov is not None: model.setCovariates(cov) model.setNk(K) model.setNmax_iterations(iterations) model.update() residuals = model.getResiduals() factors = model.getX() return residuals, factors
def RunPEER(infile, covfile): print '1' import peer print '2' import scipy as SP print '3' expr = SP.loadtxt( '%s/%s' % (cwd, '.'.join(infile.split('.')[:-1]) + '.NoHead.trans.tab'), delimiter='\t' ) # expr = SP.loadtxt('/home/vmason/LinuxShare/Programs/PEER/dataIN/DESeqNorm.untreated.NCBI.wVSD.2.tx.sort.trans.tab', delimiter='\t') expr.shape covs = SP.loadtxt( '%s/%s' % (cwd, '.'.join(covfile.split('.')[:-1]) + '.NoHead.tab'), delimiter='\t' ) # covs = SP.loadtxt('/home/vmason/LinuxShare/Programs/PEER/dataIN/RAO_Covariates.no55.94.NoCTLRAO.NoMiss.trans.tab', delimiter='\t') covs.shape model = peer.PEER() model.setPhenoMean(expr) model.getPhenoMean().shape model.setCovariates(covs) model.getCovariates() model.setNk(10) model.getNk() #model.setAdd_mean(True) # the tutorial says that adding this value is usually a good idea #model.setTolerance(0.001) # default = 0.001 model.getTolerance() #model.setVarTolerance(0.00001) # default = 0.00001 model.getVarTolerance() model.setNmax_iterations(1000) # default = 1000 model.update() #include covariates #covs = SP.loadtxt('/home/vmason/LinuxShare/Programs/PEER/dataIN/covariates.tab', delimiter='\t') #model.setCovariates(covs) #Model Parameters #model.setNmax_iterations(100) #model.setTolerance(0.01) #model.setVarTolerance(0.0001) #model.setPriorAlpha(0.001,0.1) #model.setPriorEps(0.1,10.) #In general you can keep the bound tolerance fairly high, but should keep the variation tolerance quite low compared to the variance of the expression matrix. If unsure, use the default values (bound=0.001, variance=0.00001). #PEER uses uninformative priors on weight precision and noise precision by default (Alpha a = 0.001, Alpha b = 0.1, Eps a = 0.1, Eps b = 10) factors = model.getX() weights = model.getW() precision = model.getAlpha() residuals = model.getResiduals() return (residuals, precision, factors, weights)
def get_simple_model_object(K=10, Nmax_iterations=100, expr_file="data/expression.csv"): #y = SP.loadtxt(expr_file,delimiter=",") df = pd.read_csv(expr_file, delimiter='\t', header=1) df = df.drop(columns=['Name']) df.set_index('Description', inplace=True) df = df.transpose() y = df.values model = peer.PEER() # set data and parameters model.setNk(K) #number of factor for learning model.setPhenoMean(y) # data for inference model.setNmax_iterations(Nmax_iterations) return model
def compute_and_store_peer_factors(df, peer_result_file, K=15): # print(df) # print(df.values) # df: # Gene1 Gene2 # Ind1 0.4 6 # Ind2 14 0.5 Nmax_iterations = 100 Nmax_iterations = 3 model = peer.PEER() model.setNk(K) #number of factor for learning model.setPhenoMean(df.values) # data for inference # set priors (these are the default settings of PEER) model.setPriorAlpha(0.001, 0.1) model.setPriorEps(0.1, 10.) model.setNmax_iterations(Nmax_iterations) # perform inference model.update() #investigate results #factors: X = model.getX() #weights: W = model.getW() #ARD parameters Alpha = model.getAlpha() #get corrected dataset: Yc = model.getResiduals() # plot variance of factors - in this case, we expect a natural elbow where there are 5 active factors, as 5 were simulated # plot_Alpha(Alpha) # PL.savefig("demo_simple.pdf") print "Plotted factor relevance" ids = list(df.index) with open(peer_result_file, 'w') as outfile: for i in range(len(X)): individual_id = ids[i] individual_factors = '\t'.join([str(e) for e in X[i]]) outfile.write('%s\t%s\n' % (individual_id, individual_factors)) return X
def peerModel(expressFile, factorNum): model = peer.PEER() model.setPhenoMean(expressFile) model.setNk(factorNum) model.update() return model
###################################################### ## load folder_name, batch names ###################################################### folder_data='/nfs/research2/hipsci/processed_data/proteomics/' file_protein_quantification='hipsci.proteomics.maxquant.uniprot.TMT_batch_14.20170517' filter_genes='_genes_filtered' filter_lines="_lines_filtered_unique" field_data="Reporter intensity corrected_regbatch" data=load_protein_mrna( filter_genes=filter_genes,filter_lines=filter_lines,file_protein_quantification=file_protein_quantification,\ folder_data=folder_data,field_data=field_data,only_protein=True) ## Run PEER ### Input matrix: N rows and G columns, where N is the number of samples, and G is the number of genes. #Cov=np.vstack([np.array([f5p['metadata/lines/'][covariate][:] ==c for c in np.setdiff1d(f5p['metadata/lines/'][covariate][:],'')]) for covariate in ['gender','batch_2_ID']]).astype(float).T model = peer.PEER() #model.setCovariates(Cov) # N x C matrix expr=np.log(np.copy(data['protein_intensity'].values)) expr[~np.isfinite(expr)]=0 #model.setPhenoMean(data['protein_intensity'].values[:,np.isfinite(np.sum(data['protein_intensity'].values,0))][:,:1000]) model.setPhenoMean(expr) model.setAdd_mean(True) # To infer K hidden confounders, define K and number of iterations K=10;model.setNk(int(K)) # or PEER_setNk(model,number_of_covs) model.setNmax_iterations(1000) model.update() factors = model.getX(); weights = model.getW(); precision = model.getAlpha(); expr_peer = model.getResiduals() write_data( folder_data+file_protein_quantification+"_lines_metadata_peer"+filter_lines+".txt", mat=np.hstack([data['line_meta'].values,factors]),\ header= np.hstack([data['line_meta'].columns.values,['peer_'+str(i) for i in np.arange(factors.shape[1])]]),delim='\t')