def save_enrichment_set(): lib = gp.get_library_name('Human') lib = lib[53] files = [("gcn-hom-hom", "enrich/gcn-hom-hom.csv"), ("gcn-hom-onto", "enrich/gcn-hom-onto.csv"), ("gcn-onto-onto", "enrich/gcn-onto-onto.csv"), ("gae-hom-hom", "enrich/gae-hom-hom.csv"), ("gae-hom-onto", "enrich/gae-hom-onto.csv"), ("gae-onto-onto", "enrich/gae-onto-onto.csv")] enrich_set = {} for key, file in files: print(file) cluster_data = read_file(file) for i in cluster_data: print(len(cluster_data[i][2])) try: enr = gp.enrichr(gene_list=list(cluster_data[i][2])[:1000], gene_sets=lib, organism='Human', cutoff=0.05).results name = key + "-" + str(i) term = enr['Term'].to_list() enrich_set[name] = term # print(i) print(enr) except: pass write_file("enrich-cluster/full_result_dic.csv", enrich_set)
def save_enrichment(x): lib = gp.get_library_name('Human') with open('gensets.txt', 'w') as f: for item in range(len(lib)): f.write("%s %s\n" % (item, lib[item])) # lib = lib[49: 54] lib = lib[53] files = [(1, x+"/gcn-hom-hom.csv"), (2, x+"/gcn-hom-onto.csv"), (3, x+"/gcn-onto-onto.csv"), (4, x+"/gae-hom-hom.csv"), (5, x+"/gae-hom-onto.csv"), (6, x+"/gae-onto-onto.csv")] df = pd.DataFrame() writer = pd.ExcelWriter('enrich-cluster/full-results.xlsx') for key, file in files: print(file) cluster_data = read_file_2(file) for i in cluster_data: try: enr = gp.enrichr(gene_list=list(cluster_data[i][2]), gene_sets=lib, organism='Human', cutoff=0.05).results except: pass enr['model'] = key enr['cluster'] = i df = df.append(enr) df = df[(df['P-value'] < 0.05)] df.to_excel(writer, sheet_name="sheet1") writer.save()
def enrichment_analysis(self, library, output): """ Saves the results of enrichment analysis Attributes: ----------- library - Enrichr library to be used. Recommendations: - 'GO_Molecular_Function_2018' - 'GO_Biological_Process_2018' - 'GO_Cellular_Component_2018' for more options check available libraries by typing gseapy.get_library_name() output - directory name where results should be saved """ libs = gseapy.get_library_name() assert library in libs, "the library is not available, check gseapy.get_library_name() for available options" assert (self.convert == True) or ( self.origID == "symbol" ), "EnrichR accepts only gene names as an input, thus please set 'convert' to True and indicate the original gene ID" genes1_name = [self.mapping[x] for x in self.genes1] genes2_name = [self.mapping[x] for x in self.genes2] all_genes_names = genes1_name + genes2_name res = gseapy.enrichr(gene_list=all_genes_names, description='pathway', gene_sets=library, cutoff=0.05, outdir=output) return (res.results)
def __init__(self, **kwargs): super().__init__(**kwargs) if not self.path.exists(): print(self.__doc__) import gseapy self.libraries = gseapy.get_library_name()
import pandas as pd import gseapy as gp import requests import sleep import matplotlib.pyplot as plt from gseapy.parser import Biomart from gseapy.plot import barplot, dotplot gene_list = pd.read_csv("/Users/sunxueyan/Downloads/GSEApy-master/tests/data/gene_list.txt",header=None, sep="\t") gene_list1 = pd.read_csv("/Users/sunxueyan/Downloads/non_geneID.csv") gene_list1.head() glist = gene_list1.squeeze().str.strip().tolist() names = gp.get_library_name() # default: Human s = requests.session() s.keep_alive = False enr = gp.enrichr(gene_list="/Users/sunxueyan/Downloads/GSEApy-master/tests/data/gene_list.txt", # or gene_list=glist description='', gene_sets=['KEGG_2019_Human'], outdir='test/enrichr_kegg', cutoff=0.5 # test dataset, use lower value from range(0,1) )
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("cls_file", help="Path to a cls file", type=str) parser.add_argument( "gct_file", help="Path to a gmt-like file with second line discarded", type=str) parser.add_argument("-g", "--gene_set", help="Gene set name" " (default: %(default)s)", type=str, choices=gp.get_library_name(), default='KEGG_2016') parser.add_argument("-p", "--permutation_type", help="Type of permutation used within GSEA" " (default: %(default)s)", type=str, choices=['gene_set', 'phenotype'], default='phenotype') parser.add_argument("-o", "--output_dir", help="Path to output_dir" " (default: %(default)s)", type=str,
from EnrichmentAnalysis.enrichment_utils import read_file, read_file_2, write_file def reduce_genesets(): sample = read_file("enrich_red/gae-hom-hom.csv") red = read_file("enrich_red/selected_genesets.csv") temp = {} for i in red: if i in sample: temp[i] = red[i] write_file("enrich_red/selected_genesets.csv", temp) # reduce_genesets() lib = gp.get_library_name('Human')[53] files = [("gae-hom-hom", 1, 1), ("gae-hom-onto", 1, 2), ("gae-onto-onto", 2, 3), ("jcd-hom-hom", 1, 4), ("jcd-onto-onto", 2, 5)] data_desc = read_file_2("data\ms-project\data-description.csv") for i in files: file_name = "enrich_red/" + i[0] + ".csv" file = read_file(file_name) # if i[1] == 1: # neigh = read_file_2("data/ms-project/neig_len_hom.csv") # else: # neigh = read_file_2("data/ms-project/neig_len_onto.csv") if i[2] == 1: rank = read_file_2(