コード例 #1
0
def save_enrichment_set():
     lib = gp.get_library_name('Human')
     lib = lib[53]

     files = [("gcn-hom-hom", "enrich/gcn-hom-hom.csv"),
              ("gcn-hom-onto", "enrich/gcn-hom-onto.csv"),
              ("gcn-onto-onto", "enrich/gcn-onto-onto.csv"),
              ("gae-hom-hom", "enrich/gae-hom-hom.csv"),
              ("gae-hom-onto", "enrich/gae-hom-onto.csv"),
              ("gae-onto-onto", "enrich/gae-onto-onto.csv")]

     enrich_set = {}
     for key, file in files:
          print(file)
          cluster_data = read_file(file)
          for i in cluster_data:
              print(len(cluster_data[i][2]))
              try:
                  enr = gp.enrichr(gene_list=list(cluster_data[i][2])[:1000], gene_sets=lib, organism='Human', cutoff=0.05).results
                  name = key + "-" + str(i)
                  term = enr['Term'].to_list()
                  enrich_set[name] = term
                  # print(i)
                  print(enr)
              except:
                   pass


     write_file("enrich-cluster/full_result_dic.csv", enrich_set)
コード例 #2
0
def save_enrichment(x):
     lib = gp.get_library_name('Human')

     with open('gensets.txt', 'w') as f:
          for item in range(len(lib)):
               f.write("%s %s\n" % (item, lib[item]))
     # lib = lib[49: 54]
     lib = lib[53]

     files = [(1, x+"/gcn-hom-hom.csv"), (2, x+"/gcn-hom-onto.csv"),
              (3, x+"/gcn-onto-onto.csv"), (4, x+"/gae-hom-hom.csv"),
              (5, x+"/gae-hom-onto.csv"), (6, x+"/gae-onto-onto.csv")]

     df = pd.DataFrame()
     writer = pd.ExcelWriter('enrich-cluster/full-results.xlsx')
     for key, file in files:
          print(file)
          cluster_data = read_file_2(file)
          for i in cluster_data:
               try:
                    enr = gp.enrichr(gene_list=list(cluster_data[i][2]), gene_sets=lib, organism='Human', cutoff=0.05).results
               except:
                    pass
               enr['model'] = key
               enr['cluster'] = i
               df = df.append(enr)

     df = df[(df['P-value'] < 0.05)]
     df.to_excel(writer, sheet_name="sheet1")
     writer.save()
コード例 #3
0
    def enrichment_analysis(self, library, output):
        """
        Saves the results of enrichment analysis

        Attributes:
        -----------
        library - Enrichr library to be used. Recommendations:
            - 'GO_Molecular_Function_2018'
            - 'GO_Biological_Process_2018'
            - 'GO_Cellular_Component_2018'
            for more options check available libraries by typing gseapy.get_library_name()

        output - directory name where results should be saved
        """
        libs = gseapy.get_library_name()
        assert library in libs, "the library is not available, check gseapy.get_library_name() for available options"
        assert (self.convert == True) or (
            self.origID == "symbol"
        ), "EnrichR accepts only gene names as an input, thus please set 'convert' to True and indicate the original gene ID"

        genes1_name = [self.mapping[x] for x in self.genes1]
        genes2_name = [self.mapping[x] for x in self.genes2]
        all_genes_names = genes1_name + genes2_name
        res = gseapy.enrichr(gene_list=all_genes_names,
                             description='pathway',
                             gene_sets=library,
                             cutoff=0.05,
                             outdir=output)
        return (res.results)
コード例 #4
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        if not self.path.exists():
            print(self.__doc__)

        import gseapy
        self.libraries = gseapy.get_library_name()
コード例 #5
0
import pandas as pd
import gseapy as gp
import requests
import sleep
import matplotlib.pyplot as plt
from gseapy.parser import Biomart
from gseapy.plot import barplot, dotplot


gene_list = pd.read_csv("/Users/sunxueyan/Downloads/GSEApy-master/tests/data/gene_list.txt",header=None, sep="\t")
gene_list1 = pd.read_csv("/Users/sunxueyan/Downloads/non_geneID.csv")
gene_list1.head()

glist = gene_list1.squeeze().str.strip().tolist()
names = gp.get_library_name() # default: Human

s = requests.session()
s.keep_alive = False



enr = gp.enrichr(gene_list="/Users/sunxueyan/Downloads/GSEApy-master/tests/data/gene_list.txt",
     # or gene_list=glist
     description='',
     gene_sets=['KEGG_2019_Human'],
     outdir='test/enrichr_kegg',
     cutoff=0.5 # test dataset, use lower value from range(0,1)
    )


コード例 #6
0
ファイル: GSEApy.py プロジェクト: tdayris/yawn
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("cls_file", help="Path to a cls file", type=str)

    parser.add_argument(
        "gct_file",
        help="Path to a gmt-like file with second line discarded",
        type=str)

    parser.add_argument("-g",
                        "--gene_set",
                        help="Gene set name"
                        " (default: %(default)s)",
                        type=str,
                        choices=gp.get_library_name(),
                        default='KEGG_2016')

    parser.add_argument("-p",
                        "--permutation_type",
                        help="Type of permutation used within GSEA"
                        " (default: %(default)s)",
                        type=str,
                        choices=['gene_set', 'phenotype'],
                        default='phenotype')

    parser.add_argument("-o",
                        "--output_dir",
                        help="Path to output_dir"
                        " (default: %(default)s)",
                        type=str,
コード例 #7
0
from EnrichmentAnalysis.enrichment_utils import read_file, read_file_2, write_file


def reduce_genesets():
    sample = read_file("enrich_red/gae-hom-hom.csv")
    red = read_file("enrich_red/selected_genesets.csv")
    temp = {}
    for i in red:
        if i in sample:
            temp[i] = red[i]
    write_file("enrich_red/selected_genesets.csv", temp)


# reduce_genesets()

lib = gp.get_library_name('Human')[53]
files = [("gae-hom-hom", 1, 1), ("gae-hom-onto", 1, 2),
         ("gae-onto-onto", 2, 3), ("jcd-hom-hom", 1, 4),
         ("jcd-onto-onto", 2, 5)]
data_desc = read_file_2("data\ms-project\data-description.csv")

for i in files:
    file_name = "enrich_red/" + i[0] + ".csv"
    file = read_file(file_name)
    # if i[1] == 1:
    #     neigh = read_file_2("data/ms-project/neig_len_hom.csv")
    # else:
    #     neigh = read_file_2("data/ms-project/neig_len_onto.csv")

    if i[2] == 1:
        rank = read_file_2(