def find_vstructures(bfile, pfile, gfile, cisfile, assoc0file, window, vfile, startTraitIdx, nTraits): """ running association scan input: pfile : phenotype file cfile : covariance file ffile : fixed effects file cisfile : file containing cis anchors assoc0file : file containing the results from the initial association scan vfile : file containing v-structures """ preader = phenoReader.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader, window=window) genecorr_reader = reader.FileReader(gfile + '.pv') model.set_genecorr_reader(genecorr_reader) assoc0Reader = reader.FileReader(assoc0file + '.pv') model.set_assoc0_reader(assoc0Reader) model.load_cis_anchors(cisfile) model.find_vstructures(startTraitIdx, nTraits) model.save_vstructures(vfile + '.csv')
def gene_has_anchor(bfile, pfile, assoc0, anchor_thresh, anchorfile, window, cis=True): """ tests if a gene has a cis anchor input: bfile : binary bed file (bfile.bed, bfile.bim and bfile.fam are required) pfile : phenotype file assoc0file : basefilename of initial association scan anchor_thresh : thrshold for anchor-associations anchor_file : filename for saving cis assocaitions window : maximal distance between cis-snp and gene cis : if set, look for cis-associations only (default: True) """ preader = phenoReaderFile.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader, window=window) assoc0Reader = reader.FileReader(assoc0 + '.pv') model.set_assoc0_reader(assoc0Reader) model.gene_has_anchor(anchor_thresh, cis) model.save_anchors(anchorfile)
def find_vstructures(bfile, pfile, gfile, anchorfile, assoc0file, window, vfile, startTraitIdx, nTraits, corr_thresh, ind_thresh, max_genes): """ running association scan input: bfile : blink file pfile : phenotype file gfile : correlation file anchorfile : file containing anchors assoc0file : file containing the results from the initial association scan window : window size vfile : file containing v-structures startTraitIdx : index of first trait to be analyzed nTraits : number of traits to be analyzed corr_thresh : q-value for calling a correlation significant ind_thresh : q-value for calling a correlation not significant max_genes : maximal number of genes in the conditioning set """ preader = phenoReaderFile.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader, window=window, thresh_corr=corr_thresh, thresh_ind=ind_thresh) genecorr_reader = reader.FileReader(gfile + '.pv') model.set_genecorr_reader(genecorr_reader) assoc0Reader = reader.FileReader(assoc0file + '.pv') model.set_assoc0_reader(assoc0Reader) model.load_anchors(anchorfile) model.find_vstructures(startTraitIdx, nTraits, max_genes) if np.isfinite(nTraits): vfile += ".startTrait_%d" % startTraitIdx model.save_vstructures(vfile + '.csv')
def plot_power(bfile, pfile, assoc0file, assocfile, plotfile, window): # reading out p-values score = {} assoc0Reader = reader.FileReader(assoc0file + '.pv') score['LMM'] = -np.log10(assoc0Reader.getMatrix()) assocReader = reader.FileReader(assocfile + '.pv') score['GNetLMM'] = -np.log10(assocReader.getMatrix()) # get network Agene = np.loadtxt(pfile + '.Agene') # gene info preader = phenoReader.PhenoReaderFile(pfile) gene_start = preader.getGeneStart() gene_end = preader.getGeneEnd() gene_chrom = preader.getGeneChrom() # snp info breader = bedReader.BedReader(bfile) snp_pos = breader.getSnpPos() snp_chrom = breader.getSnpChrom() P_cis, P_trans = get_groundtruth(Agene, snp_pos, snp_chrom, gene_start, gene_chrom, window) # delete cis-associations for key in score.keys(): score[key][P_cis] = 0 # compute receiver operator characteristics FPR = {} TPR = {} for key in score.keys(): FPR[key], TPR[key] = roc.roc(P_trans, score[key]) # plotting results plotting.plotROCcurve(['LMM', 'GNetLMM'], TPR, FPR, xlim=(0, 0.05), ylim=(0, 0.42), fn=plotfile)
def create_nice_output(vfile, bfile, pfile, assoc0file, assocfile, blockfile, outfile): """ creating human readbable output file """ preader = phenoReaderFile.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) snp_ids = greader.getSnpIds() gene_ids = preader.getGeneIds() vstruct = vstructures.VstructureFile(vfile + '.csv') assoc0Reader = reader.FileReader(assoc0file + '.pv') assocReader = assoc_results.AssocResultsList() assocReader.load_csv(assocfile + '.csv') blockReader = None if blockfile is not None: blockReader = assoc_results.AssocResultsList() blockReader.load_csv(blockfile + '.csv') f = open(outfile + '.csv','w') if blockReader is None: header = "Anchor Snp\t Anchor Gene\t Focal Gene\t Orthogonal Genes\t pv(LMM) \t pv(GNetLMM)" else: header = "Anchor Snp\t Anchor Gene\t Focal Gene\t Orthogonal Genes\t pv(LMM) \t pv(GNetLMM) \t pv(BlockLMM)" if assocReader.var_snps is not None: header += '\t varSnp(GnetLMM)' if assocReader.var_covs is not None: header += '\t varCovs(GnetLMM)' if assocReader.var_genes is not None: header += '\t varGenes(GnetLMM)' if blockReader is not None: if blockReader.var_snps is not None: header += '\t varSnp(BlockLMM)' if blockReader.var_covs is not None: header += '\t varCovs(BlockLMM)' if blockReader.var_genes is not None: header += '\t varGenes(BlockLMM)' header += "\n" f.write(header) for idx_focal_gene, idx_anchor_snp, idx_orth_gene, idx_anchor_gene in vstruct.iterator(full=True): focal_gene = ",".join(gene_ids[idx_focal_gene]) orth_gene = ",".join(gene_ids[idx_orth_gene]) anchor_snps = snp_ids[idx_anchor_snp] for i in range(len(anchor_snps)): try: anchor_gene = gene_ids[idx_anchor_gene[i]] except: anchor_gene = "" pv_lmm = assoc0Reader.getRows([idx_anchor_snp[i]])[:,idx_focal_gene][0,0] idx_gnet = np.logical_and(assocReader.focal_gene==idx_focal_gene, assocReader.snp_anchor==idx_anchor_snp[i]) if blockReader is not None: idx_block = np.logical_and(blockReader.focal_gene==idx_focal_gene, blockReader.snp_anchor==idx_anchor_snp[i]) if blockReader is None: line = "%s\t%s\t%s\t%s\t%.4e\t%.4e"%(anchor_snps[i],anchor_gene,focal_gene, orth_gene, pv_lmm, assocReader.pv[idx_gnet]) else: line = "%s\t%s\t%s\t%s\t%.4e\t%.4e\t%.4e"%(anchor_snps[i],anchor_gene,focal_gene, orth_gene, pv_lmm, assocReader.pv[idx_gnet], blockReader.pv[idx_block]) if assocReader.var_snps is not None: line += '\t%.4e'%assocReader.var_snps[idx_gnet] if assocReader.var_covs is not None: line += '\t%.4e'%assocReader.var_covs[idx_gnet] if assocReader.var_genes is not None: line += '\t%.4e'%assocReader.var_genes[idx_gnet] if blockReader is not None: if blockReader.var_snps is not None: line += '\t%.4e'%blockReader.var_snps[idx_block] if blockReader.var_covs is not None: line += '\t%.4e'%blockReader.var_covs[idx_block] if blockReader.var_genes is not None: line += '\t%.4e'%blockReader.var_genes[idx_block] line += "\n" f.write(line) f.close()