def command(self, gene_list, n_top): from gprofiler import GProfiler import numpy as np gp = GProfiler("") r0 = gp.gprofile(gene_list, correction_method=GProfiler.THR_FDR, ordered=True) r0 = np.array(r0) r0 = r0[r0[:, 9] == 'MF'] name_out = r0[0:n_top, -3] p_out = r0[0:n_top, 2] return np.array([x for x in zip(name_out, p_out)])
def command(self, gene_list, n_top): from gprofiler import GProfiler import numpy as np gp = GProfiler("") r0 = gp.gprofile(gene_list,correction_method=GProfiler.THR_FDR,ordered=True) r0 = np.array(r0) r0 = r0[r0[:,9]=='MF'] name_out = r0[0:n_top,-3] p_out = r0[0:n_top,2] data = np.array([x for x in zip(name_out, p_out)]) dataframe = pd.DataFrame(data=data, columns=["Name", "p-values"]) return dataframe
def profile_genes_with_active_sites(enriched_genes, background=None) -> DataFrame: if len(enriched_genes) == 0: return DataFrame() gp = GProfiler('ActiveDriverDB', want_header=True) response = gp.gprofile(enriched_genes, custom_bg=background) if not response: return DataFrame() header, *rows = response return DataFrame(rows, columns=header)
def gsea_connected_components(G, outdir): """ Perform Gene Set Enrichment Analysis on the connected components in G using GProfiler Returns ------- rv : list of (set, str) tuples of gene set that was queried for enrichment and the enrichment output file """ rv = [] gp = GProfiler("FluPath/0.1") if nx.is_directed(G): G = G.to_undirected() comps = list(nx.connected_components(G)) comp_no = 0 for comp in comps: # TODO how are http errors handled? enrich_out_fp = os.path.join(outdir, "enrich_{}.tsv".format(comp_no)) if not os.path.exists(enrich_out_fp): enrich = gp.gprofile(comp, src_filter=['GO:BP']) write_enrich(enrich, enrich_out_fp) rv.append((comp, enrich_out_fp)) comp_no += 1 return rv
def lookup_enrichment(gene_set): clean_gene_set = [x for x in gene_set if x is not None] gp = GProfiler("GTEx/wj") enrichment_results = gp.gprofile(clean_gene_set) return enrichment_results
#df.to_csv(join(out_path, node_name + "_sorted.csv")) print(df_all.shape) if df_all.shape[0] < args.n_genes: # raise Exception("cell type '{}' has less than {} significant genes!".format(cell_type, args.n_genes)) print("cell type '{}' has less than {} significant genes!".format( cell_type, args.n_genes)) continue df_all = df_all.head(args.n_genes) gene_list_all = df_all['EntrezID'].astype('str').values mg = mygene.MyGeneInfo() result = mg.getgenes(gene_list_all, fields='symbol', species='mouse') gene_list_all = [d['symbol'] for d in result] results_all = gp.gprofile(gene_list_all, organism="mmusculus", ordered=args.ordered, correction_method=get_correction_method( args.correction), src_filter=[args.go_branch], custom_bg=background_gene_set) print("\t # results returned (all genes) = {}".format( len(results_all))) # if len(results) == 0: # continue filepath = join(args.output_folder, "{}_all.tex".format(cell_type.replace(':', '_'))) table_tex = write_to_table(cell_type, info_dict, results_all, filepath, args.rows) overall_table_all_f.write(table_tex) overall_table_all_f.write("\n") filepath = join(args.output_folder, "{}_all_full.tex".format(cell_type.replace(':', '_')))
class GOEnrichmentTester(): def __init__(self): self.gp = GProfiler("COSSY++/1.5") def getGoTerms(self, genelist): result = [] res = self.gp.gprofile(query=genelist) for i in range(len(res)): pvalue = res[i][2] goid = res[i][8] gocat = res[i][9] goterm = res[i][11] ''' if (gocat =="MF" or gocat == "CC" or gocat == "BP"): result.append({"pvalue":pvalue, "id":goid, "category":gocat, "term":goterm}) ''' result.append({"pvalue":pvalue, "id":goid, "category":gocat, "term":goterm}) return result def readTSV(self, fname): records = [] with open(fname) as reader: headers = [] for line in reader: values = [x.replace("\"","") for x in line.split("\t")] if line.startswith("Gene Symbol"): headers = values continue rec = {headers[i] : values[i] for i in range(len(headers))} records.append(rec) return records def loadCOSMIC(self, fname): self.result = {"somatic":{}, "germline":{}} self.diseaseList = [] records = self.readTSV(fname=fname) for rec in records: geneSymbol = rec["Gene Symbol"] somaticTumors = [x.strip() for x in rec["Tumour Types(Somatic)"].strip().split(",")] germlineTumors = [x.strip() for x in rec["Tumour Types(Germline)"].strip().split(",")] for tumorType in somaticTumors: if tumorType == "": continue; if tumorType not in self.result["somatic"]: self.result["somatic"][tumorType] = [] self.result["somatic"][tumorType].append(geneSymbol) if tumorType not in self.diseaseList: self.diseaseList.append(tumorType) for tumorType in germlineTumors: if tumorType == "": continue; if tumorType not in self.result["germline"]: self.result["germline"][tumorType] = [] self.result["germline"][tumorType].append(geneSymbol) if tumorType not in self.diseaseList: self.diseaseList.append(tumorType) self.makeGOList() def getGenes(self, disease): if disease in self.result["somatic"]: somaticGenes = self.result["somatic"][disease] else: somaticGenes = [] if disease in self.result["germline"]: germlineGenes = self.result["germline"][disease] else: germlineGenes = [] return somaticGenes + germlineGenes def makeGOList(self): self.GOList = {} for tumorType in self.diseaseList: print "." genes = self.getGenes(tumorType) goTerms = self.getGoTerms(genes) goTerms = sorted(goTerms, cmp=self.pvaluecomp) self.GOList[tumorType] = goTerms def writeCOSMICGO(self, fname): with open(fname, "w") as w: json.dump(self.GOList, w, indent=4) def corr(self, genes, disease): inputGO = sorted(self.getGoTerms(genes), cmp=self.pvaluecomp) inputGO_terms = [x["term"] for x in inputGO] answerGO = sorted([x for x in self.GOList[disease] if x["term"] in inputGO_terms], cmp=self.pvaluecomp) answerGO_terms = [x["term"] for x in answerGO] assert(len(inputGO_terms) != len(answerGO_terms)) inputGO_ranks_pair = [(x,inputGO_terms.index(x)) for x in inputGO_terms] answerGO_ranks_pair = [(x,answerGO_terms.index(x)) for x in answerGO_terms] inputGO_ranks = [x[1] for x in sorted(inputGO_ranks_pair, key=itemgetter(0))] answerGO_ranks = [x[1] for x in sorted(answerGO_ranks_pair, key=itemgetter(0))] np.correlate(inputGO_ranks, answerGO_ranks, "same") def pvaluecomp(self, a,b): x = a['pvalue'] y = b['pvalue'] if x > y: return 1 elif x < y: return -1 else: return 0