Exemple #1
0
 def command(self, gene_list, n_top):
     from gprofiler import GProfiler
     import numpy as np
     gp = GProfiler("")
     r0 = gp.gprofile(gene_list,
                      correction_method=GProfiler.THR_FDR,
                      ordered=True)
     r0 = np.array(r0)
     r0 = r0[r0[:, 9] == 'MF']
     name_out = r0[0:n_top, -3]
     p_out = r0[0:n_top, 2]
     return np.array([x for x in zip(name_out, p_out)])
Exemple #2
0
    def command(self, gene_list, n_top):
        from gprofiler import GProfiler
        import numpy as np
        gp = GProfiler("")
        r0 = gp.gprofile(gene_list,correction_method=GProfiler.THR_FDR,ordered=True)
        r0 = np.array(r0)
        r0 = r0[r0[:,9]=='MF']
        name_out = r0[0:n_top,-3]
        p_out = r0[0:n_top,2]
        data = np.array([x for x in zip(name_out, p_out)])
        dataframe = pd.DataFrame(data=data, columns=["Name", "p-values"])

        return dataframe
Exemple #3
0
def profile_genes_with_active_sites(enriched_genes,
                                    background=None) -> DataFrame:

    if len(enriched_genes) == 0:
        return DataFrame()

    gp = GProfiler('ActiveDriverDB', want_header=True)

    response = gp.gprofile(enriched_genes, custom_bg=background)

    if not response:
        return DataFrame()

    header, *rows = response

    return DataFrame(rows, columns=header)
Exemple #4
0
def gsea_connected_components(G, outdir):
    """
  Perform Gene Set Enrichment Analysis on the connected components in G using GProfiler

  Returns
  -------
  rv : list of (set, str)
    tuples of gene set that was queried for enrichment and the enrichment output file
  """
    rv = []
    gp = GProfiler("FluPath/0.1")
    if nx.is_directed(G):
        G = G.to_undirected()
    comps = list(nx.connected_components(G))
    comp_no = 0
    for comp in comps:
        # TODO how are http errors handled?
        enrich_out_fp = os.path.join(outdir, "enrich_{}.tsv".format(comp_no))
        if not os.path.exists(enrich_out_fp):
            enrich = gp.gprofile(comp, src_filter=['GO:BP'])
            write_enrich(enrich, enrich_out_fp)
        rv.append((comp, enrich_out_fp))
        comp_no += 1
    return rv
def lookup_enrichment(gene_set):
    clean_gene_set = [x for x in gene_set if x is not None]
    gp = GProfiler("GTEx/wj")
    enrichment_results = gp.gprofile(clean_gene_set)
    return enrichment_results
Exemple #6
0
 #df.to_csv(join(out_path, node_name + "_sorted.csv"))
 print(df_all.shape)
 if df_all.shape[0] < args.n_genes:
     # raise Exception("cell type '{}' has less than {} significant genes!".format(cell_type, args.n_genes))
     print("cell type '{}' has less than {} significant genes!".format(
         cell_type, args.n_genes))
     continue
 df_all = df_all.head(args.n_genes)
 gene_list_all = df_all['EntrezID'].astype('str').values
 mg = mygene.MyGeneInfo()
 result = mg.getgenes(gene_list_all, fields='symbol', species='mouse')
 gene_list_all = [d['symbol'] for d in result]
 results_all = gp.gprofile(gene_list_all,
                           organism="mmusculus",
                           ordered=args.ordered,
                           correction_method=get_correction_method(
                               args.correction),
                           src_filter=[args.go_branch],
                           custom_bg=background_gene_set)
 print("\t # results returned (all genes) = {}".format(
     len(results_all)))
 # if len(results) == 0:
 #     continue
 filepath = join(args.output_folder,
                 "{}_all.tex".format(cell_type.replace(':', '_')))
 table_tex = write_to_table(cell_type, info_dict, results_all, filepath,
                            args.rows)
 overall_table_all_f.write(table_tex)
 overall_table_all_f.write("\n")
 filepath = join(args.output_folder,
                 "{}_all_full.tex".format(cell_type.replace(':', '_')))
class GOEnrichmentTester():
    
    def __init__(self):
        self.gp = GProfiler("COSSY++/1.5")
    
    def getGoTerms(self, genelist):
        result = []
        res = self.gp.gprofile(query=genelist)
        
        for i in range(len(res)):
            pvalue = res[i][2]
            goid = res[i][8]
            gocat = res[i][9]
            goterm = res[i][11]
            
            '''
            if (gocat =="MF" or gocat == "CC" or gocat == "BP"):
                result.append({"pvalue":pvalue, "id":goid, "category":gocat, "term":goterm})
            '''
            result.append({"pvalue":pvalue, "id":goid, "category":gocat, "term":goterm})
        
        return result
    
    def readTSV(self, fname):
        records = []
        with open(fname) as reader:
            headers = []
            
            for line in reader:
                values = [x.replace("\"","") for x in line.split("\t")]
                if line.startswith("Gene Symbol"):
                    headers = values
                    continue
                
                rec = {headers[i] : values[i] for i in range(len(headers))}
                
                records.append(rec)
                
        return records
    
    def loadCOSMIC(self, fname):
        self.result = {"somatic":{}, "germline":{}}
        self.diseaseList = []
        
        records = self.readTSV(fname=fname)
        
        for rec in records:
            geneSymbol = rec["Gene Symbol"]
            somaticTumors = [x.strip() for x in rec["Tumour Types(Somatic)"].strip().split(",")]
            germlineTumors = [x.strip() for x in rec["Tumour Types(Germline)"].strip().split(",")]
            
            for tumorType in somaticTumors:
                if tumorType == "":
                    continue;
                
                if tumorType not in self.result["somatic"]:
                    self.result["somatic"][tumorType] = []
                self.result["somatic"][tumorType].append(geneSymbol)
                
                if tumorType not in self.diseaseList:
                    self.diseaseList.append(tumorType)
                
                
            for tumorType in germlineTumors:
                if tumorType == "":
                    continue;
                
                if tumorType not in self.result["germline"]:
                    self.result["germline"][tumorType] = []
                self.result["germline"][tumorType].append(geneSymbol)
                
                if tumorType not in self.diseaseList:
                    self.diseaseList.append(tumorType)
        
        self.makeGOList()

    def getGenes(self, disease):
        
        if disease in self.result["somatic"]:
            somaticGenes = self.result["somatic"][disease]
        else:
            somaticGenes = []
        
        if disease in self.result["germline"]:
            germlineGenes = self.result["germline"][disease]
        else:
            germlineGenes = []
        
        return somaticGenes + germlineGenes
    
    def makeGOList(self):
        self.GOList = {}
        
        for tumorType in self.diseaseList:
            print "."
            genes = self.getGenes(tumorType)
            goTerms = self.getGoTerms(genes)
            
            goTerms = sorted(goTerms, cmp=self.pvaluecomp)
            
            self.GOList[tumorType] = goTerms
    
    def writeCOSMICGO(self, fname):
        with open(fname, "w") as w:
            json.dump(self.GOList, w, indent=4)
    
    def corr(self, genes, disease):
        inputGO = sorted(self.getGoTerms(genes), cmp=self.pvaluecomp)
        inputGO_terms = [x["term"] for x in inputGO]
        
        answerGO = sorted([x for x in self.GOList[disease] if x["term"] in inputGO_terms], cmp=self.pvaluecomp)
        answerGO_terms = [x["term"] for x in answerGO]
        
        assert(len(inputGO_terms) != len(answerGO_terms))
        
        inputGO_ranks_pair = [(x,inputGO_terms.index(x)) for x in inputGO_terms]
        answerGO_ranks_pair = [(x,answerGO_terms.index(x)) for x in answerGO_terms]
        
        inputGO_ranks = [x[1] for x in sorted(inputGO_ranks_pair, key=itemgetter(0))]
        answerGO_ranks = [x[1] for x in sorted(answerGO_ranks_pair, key=itemgetter(0))]
        
        np.correlate(inputGO_ranks, answerGO_ranks, "same")
        
    def pvaluecomp(self, a,b):
        x = a['pvalue']
        y = b['pvalue']
        if x > y:
            return 1
        elif x < y:
            return -1
        else:
            return 0