def test_example(): """Run Gene Ontology Enrichment Analysis (GOEA) on Nature data.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] # Print GOEA results to files: With study genes printed as geneids or symbols goeaobj.wr_xlsx("nbt3102_sig_symbols.xlsx", goea_results_sig, itemid2name=geneids2symbol_study) goeaobj.wr_xlsx("nbt3102_sig_geneids.xlsx", goea_results_sig) goeaobj.wr_xlsx("nbt3102_all_symbols.xlsx", goea_results_all, itemid2name=geneids2symbol_study) goeaobj.wr_xlsx("nbt3102_all_geneids.xlsx", goea_results_all)
def test_example(prt=sys.stdout): """Test GoeaResults in plotting package.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all() # Test managing GOEA results objres = GoeaResults(goea_results_sig) run(objres, prt) objnts = GoeaResults(goea_results_nt) run(objnts, prt) # Plot GOEA results fout_img = os.path.join(REPO, "test_plot_objgoearesults_{NS}.png") plot_results(fout_img, goea_results_sig, id2symbol=geneids2symbol_study)
def test_example(): """Test GoeaResults in plotting package.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) go2obj = goeaobj.obo_dag # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all() assert goea_results_nt ns2gos = get_ns2gos(goea_results_sig) # Test plotting GOEA results gosubdag = GoSubDag(set(r.GO for r in goea_results_sig), go2obj) plot_results("test_plot_goids_a_goea_{NS}.png", goea_results_sig, id2symbol=geneids2symbol_study, parentcnt=True, childcnt=True) for nss, goids in ns2gos.items(): plt_goids(gosubdag, "test_plot_goids_b_{NS}.png".format(NS=nss), goids) plot_gos("test_plot_goids_c_{NS}.png".format(NS=nss), goids, go2obj)
def test_example(): """Test GoeaPrintFunctions::print_results.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # No study genes at all geneids_study_none = set() goea_results_all = goeaobj.run_study(geneids_study_none) assert not goea_results_all, 'NO STUDY GENES TEST FAILED: {R}'.format( R=goea_results_all) # No study genes in population or association geneids_study_bad = set(['BADVAL']) goea_results_all = goeaobj.run_study(geneids_study_bad) # goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] assert not goea_results_all, 'NO VALID STUDY GENES TEST FAILED: {R}'.format( R=goea_results_all) # goea_results_all = goeaobj.run_study(geneids_study) objprtres = GoeaPrintFunctions() objprtres.print_results(goea_results_all, pval=None) objprtres.print_date()
def test_example(): """Test GoeaPrintFunctions::print_results.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # No study genes at all geneids_study_none = set() goea_results_all = goeaobj.run_study(geneids_study_none) assert not goea_results_all, 'NO STUDY GENES TEST FAILED: {R}'.format(R=goea_results_all) # No study genes in population or association geneids_study_bad = set(['BADVAL']) goea_results_all = goeaobj.run_study(geneids_study_bad) # goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] assert not goea_results_all, 'NO VALID STUDY GENES TEST FAILED: {R}'.format(R=goea_results_all) # goea_results_all = goeaobj.run_study(geneids_study) objprtres = GoeaPrintFunctions() objprtres.print_results(goea_results_all, pval=None) objprtres.print_date()
def _get_results(godag, propagate_counts, relationships, prt=sys.stdout): """Run a GOEA. Return results""" taxid = 10090 # Mouse study geneids_pop = set(GeneID2nt_mus.keys()) assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None) geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") goeaobj = GOEnrichmentStudy(geneids_pop, assoc_geneid2gos, godag, propagate_counts=propagate_counts, relationships=relationships, alpha=0.05, methods=['fdr_bh']) return goeaobj.run_study(geneids_study, prt=prt)
def _get_results(godag, propagate_counts, relationships, prt=sys.stdout): """Run a GOEA. Return results""" taxid = 10090 # Mouse study geneids_pop = set(GeneID2nt_mus.keys()) assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None) geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") goeaobj = GOEnrichmentStudy( geneids_pop, assoc_geneid2gos, godag, propagate_counts=propagate_counts, relationships=relationships, alpha=0.05, methods=['fdr_bh']) return goeaobj.run_study(geneids_study, prt=prt)
def _get_pvals(pvalfnc_names, prt=sys.stdout): fisher2pvals = {} taxid = 10090 # Mouse study file_obo = os.path.join(os.getcwd(), "go-basic.obo") obo_dag = get_godag(file_obo, prt, loading_bar=None) geneids_pop = GeneID2nt_mus.keys() assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None) geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") for fisher in pvalfnc_names: goeaobj = GOEnrichmentStudy(geneids_pop, assoc_geneid2gos, obo_dag, propagate_counts=False, alpha=0.05, methods=None, pvalcalc=fisher) fisher2pvals[fisher] = goeaobj.get_pval_uncorr(geneids_study, prt) return fisher2pvals
def test_example(): """Test GOEnrichmentStudy::print_results.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] #goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all() goeaobj.print_results(goea_results_sig)
def test_example(): """Run Gene Ontology Enrichment Analysis (GOEA) on Nature data.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] goea_results_sub = [r for r in goea_results_sig if r.study_count > r.study_n/10] # Print GOEA results to files: With study genes printed as geneids or symbols goeaobj.wr_xlsx("nbt3102_symbols.xlsx", goea_results_sub, itemid2name=geneids2symbol_study) goeaobj.wr_xlsx("nbt3102_geneids.xlsx", goea_results_sub)
def test_example(): """Test writing GOEA results into a tab-separated file.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- fout_tsv = os.path.join(REPO, 'goea_results.tsv') taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] goeaobj.wr_tsv(fout_tsv, goea_results_sig) assert os.path.exists(fout_tsv) goeaobj.prt_tsv(sys.stdout, goea_results_sig)
def test_example(): """Test GoeaPrintFunctions::print_results.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] #goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all() objprtres = GoeaPrintFunctions() objprtres.print_results(goea_results_sig) objprtres.print_date()
def test_example(log=sys.stdout): """Run Gene Ontology Enrichment Analysis (GOEA) on Nature data.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study #keep_if = lambda nt: getattr(nt, "p_fdr_bh" ) < 0.05 # keep if results are significant goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] compare_results(goea_results_all) geneids = get_study_items(goea_results_sig) # Print GOEA results to files goeaobj.wr_xlsx("nbt3102.xlsx", goea_results_sig) goeaobj.wr_txt("nbt3102_sig.txt", goea_results_sig) goeaobj.wr_txt("nbt3102_all.txt", goea_results_all) # Plot all significant GO terms w/annotated study info (large plots) #plot_results("nbt3102_{NS}.png", goea_results_sig) #plot_results("nbt3102_{NS}_sym.png", goea_results_sig, study_items=5, items_p_line=2, id2symbol=geneids_study) # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Further examination of GOEA results... # -------------------------------------------------------------------- # -------------------------------------------------------------------- obo = goeaobj.obo_dag dpi = 150 # For review: Figures can be saved in .jpg, .gif, .tif or .eps, at 150 dpi # -------------------------------------------------------------------- # Item 1) Words in GO names associated with large numbers of study genes # -------------------------------------------------------------------- # What GO term words are associated with the largest number of study genes? prt_word2genecnt("nbt3102_genecnt_GOword.txt", goea_results_sig, log) # Curated selection of GO words associated with large numbers of study genes freq_seen = ['RNA', 'translation', 'mitochondr', 'ribosom', # 'ribosomal', 'ribosome', 'adhesion', 'endoplasmic', 'nucleotide', 'apoptotic', 'myelin'] # Collect the GOs which contains the chosen frequently seen words word2NS2gos = get_word2NS2gos(freq_seen, goea_results_sig) go2res = {nt.GO:nt for nt in goea_results_sig} # Print words of interest, the sig GO terms which contain that word, and study genes. prt_word_GO_genes("nbt3102_GO_word_genes.txt", word2NS2gos, go2res, geneids_study, log) # Plot each set of GOs along w/study gene info for word, NS2gos in word2NS2gos.items(): for NS in ['BP', 'MF', 'CC']: if NS in NS2gos: gos = NS2gos[NS] goid2goobj = {go:go2res[go].goterm for go in gos} # dpi: 150 for review, 1200 for publication #dpis = [150, 1200] if word == "RNA" else [150] dpis = [150] for dpi in dpis: fmts = ['png', 'tif', 'eps'] if word == "RNA" else ['png'] for fmt in fmts: plot_goid2goobj( "nbt3102_{WORD}_{NS}_dpi{DPI}.{FMT}".format(WORD=word, NS=NS, DPI=dpi, FMT=fmt), goid2goobj, # source GOs and their GOTerm object items_p_line=3, study_items=6, # Max number of gene symbols to print in each GO term id2symbol=geneids_study, # Contains GeneID-to-Symbol goea_results=goea_results_all, # pvals used for GO Term coloring dpi=dpi) # -------------------------------------------------------------------- # Item 2) Explore findings of Nature paper: # # Gene Ontology (GO) enrichment analysis showed that the # differentially expressed genes contained statistically # significant enrichments of genes involved in # glycolysis, # cellular response to IL-4 stimulation and # positive regulation of B-cell proliferation # -------------------------------------------------------------------- goid_subset = [ 'GO:0006096', # BP 4.24e-12 10 glycolytic process 'GO:0071353', # BP 7.45e-06 5 cellular response to interleukin-4 'GO:0030890', # BP 8.22e-07 7 positive regulation of B cell proliferation ] plot_gos("nbt3102_GOs.png", goid_subset, obo, dpi=dpi) plot_gos("nbt3102_GOs_genecnt.png", goid_subset, obo, goea_results=goea_results_all, dpi=dpi) plot_gos("nbt3102_GOs_genelst.png", goid_subset, obo, study_items=True, goea_results=goea_results_all, dpi=dpi) plot_gos("nbt3102_GOs_symlst.png", goid_subset, obo, study_items=True, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) plot_gos("nbt3102_GOs_symlst_trunc.png", goid_subset, obo, study_items=5, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) plot_gos("nbt3102_GOs_GO0005743.png", ["GO:0005743"], obo, items_p_line=2, study_items=6, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) # -------------------------------------------------------------------- # Item 3) Create one GO sub-plot per significant GO term from study # -------------------------------------------------------------------- for rec in goea_results_sig: png = "nbt3102_{NS}_{GO}.png".format(GO=rec.GO.replace(':', '_'), NS=rec.NS) goid2goobj = {rec.GO:rec.goterm} plot_goid2goobj(png, goid2goobj, # source GOs and their GOTerm object study_items=15, # Max number of gene symbols to print in each GO term id2symbol=geneids_study, # Contains GeneID-to-Symbol goea_results=goea_results_all, # pvals used for GO Term coloring dpi=dpi) # -------------------------------------------------------------------- # Item 4) Explore using manually curated lists of GO terms # -------------------------------------------------------------------- goid_subset = [ 'GO:0030529', # CC D03 intracellular ribonucleoprotein complex (42 genes) 'GO:0015934', # CC D05 large ribosomal subunit (4 genes) 'GO:0015935', # CC D05 small ribosomal subunit (13 genes) 'GO:0022625', # CC D06 cytosolic large ribosomal subunit (16 genes) 'GO:0022627', # CC D06 cytosolic small ribosomal subunit (19 genes) 'GO:0036464', # CC D06 cytoplasmic ribonucleoprotein granule (4 genes) 'GO:0005840', # CC D05 ribosome (35 genes) 'GO:0005844', # CC D04 polysome (6 genes) ] plot_gos("nbt3102_CC_ribosome.png", goid_subset, obo, study_items=6, id2symbol=geneids_study, items_p_line=3, goea_results=goea_results_sig, dpi=dpi) goid_subset = [ 'GO:0003723', # MF D04 RNA binding (32 genes) 'GO:0044822', # MF D05 poly(A) RNA binding (86 genes) 'GO:0003729', # MF D06 mRNA binding (11 genes) 'GO:0019843', # MF D05 rRNA binding (6 genes) 'GO:0003746', # MF D06 translation elongation factor activity (5 genes) ] plot_gos("nbt3102_MF_RNA_genecnt.png", goid_subset, obo, goea_results=goea_results_all, dpi=150) for dpi in [150, 1200]: # 150 for review, 1200 for publication plot_gos("nbt3102_MF_RNA_dpi{DPI}.png".format(DPI=dpi), goid_subset, obo, study_items=6, id2symbol=geneids_study, items_p_line=3, goea_results=goea_results_all, dpi=dpi) # -------------------------------------------------------------------- # Item 5) Are any significant geneids related to cell cycle? # -------------------------------------------------------------------- import test_genes_cell_cycle as CC genes_cell_cycle = CC.get_genes_cell_cycle(taxid, log=log) genes_cell_cycle_sig = genes_cell_cycle.intersection(geneids) CC.prt_genes("nbt3102_cell_cycle.txt", genes_cell_cycle_sig, taxid, log=None)
def test_example(log=sys.stdout): """Run Gene Ontology Enrichment Analysis (GOEA) on Nature data.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study #keep_if = lambda nt: getattr(nt, "p_fdr_bh" ) < 0.05 # keep if results are significant goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] compare_results(goea_results_all) geneids = get_study_items(goea_results_sig) # Print GOEA results to files goeaobj.wr_xlsx("nbt3102.xlsx", goea_results_sig) goeaobj.wr_txt("nbt3102_sig.txt", goea_results_sig) goeaobj.wr_txt("nbt3102_all.txt", goea_results_all) # Plot all significant GO terms w/annotated study info (large plots) #plot_results("nbt3102_{NS}.png", goea_results_sig) #plot_results("nbt3102_{NS}_sym.png", goea_results_sig, study_items=5, items_p_line=2, id2symbol=geneids_study) # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Further examination of GOEA results... # -------------------------------------------------------------------- # -------------------------------------------------------------------- obo = goeaobj.obo_dag dpi = 150 # For review: Figures can be saved in .jpg, .gif, .tif or .eps, at 150 dpi # -------------------------------------------------------------------- # Item 1) Words in GO names associated with large numbers of study genes # -------------------------------------------------------------------- # What GO term words are associated with the largest number of study genes? prt_word2genecnt("nbt3102_genecnt_GOword.txt", goea_results_sig, log) # Curated selection of GO words associated with large numbers of study genes freq_seen = [ 'RNA', 'translation', 'mitochondr', 'ribosom', # 'ribosomal', 'ribosome', 'adhesion', 'endoplasmic', 'nucleotide', 'apoptotic', 'myelin' ] # Collect the GOs which contains the chosen frequently seen words word2NS2gos = get_word2NS2gos(freq_seen, goea_results_sig) go2res = {nt.GO: nt for nt in goea_results_sig} # Print words of interest, the sig GO terms which contain that word, and study genes. prt_word_GO_genes("nbt3102_GO_word_genes.txt", word2NS2gos, go2res, geneids_study, log) # Plot each set of GOs along w/study gene info for word, NS2gos in word2NS2gos.items(): for NS in ['BP', 'MF', 'CC']: if NS in NS2gos: gos = NS2gos[NS] goid2goobj = {go: go2res[go].goterm for go in gos} # dpi: 150 for review, 1200 for publication #dpis = [150, 1200] if word == "RNA" else [150] dpis = [150] for dpi in dpis: fmts = ['png', 'tif', 'eps'] if word == "RNA" else ['png'] for fmt in fmts: plot_goid2goobj( "nbt3102_{WORD}_{NS}_dpi{DPI}.{FMT}".format( WORD=word, NS=NS, DPI=dpi, FMT=fmt), goid2goobj, # source GOs and their GOTerm object items_p_line=3, study_items= 6, # Max number of gene symbols to print in each GO term id2symbol=geneids_study, # Contains GeneID-to-Symbol goea_results= goea_results_all, # pvals used for GO Term coloring dpi=dpi) # -------------------------------------------------------------------- # Item 2) Explore findings of Nature paper: # # Gene Ontology (GO) enrichment analysis showed that the # differentially expressed genes contained statistically # significant enrichments of genes involved in # glycolysis, # cellular response to IL-4 stimulation and # positive regulation of B-cell proliferation # -------------------------------------------------------------------- goid_subset = [ 'GO:0006096', # BP 4.24e-12 10 glycolytic process 'GO:0071353', # BP 7.45e-06 5 cellular response to interleukin-4 'GO:0030890', # BP 8.22e-07 7 positive regulation of B cell proliferation ] plot_gos("nbt3102_GOs.png", goid_subset, obo, dpi=dpi) plot_gos("nbt3102_GOs_genecnt.png", goid_subset, obo, goea_results=goea_results_all, dpi=dpi) plot_gos("nbt3102_GOs_genelst.png", goid_subset, obo, study_items=True, goea_results=goea_results_all, dpi=dpi) plot_gos("nbt3102_GOs_symlst.png", goid_subset, obo, study_items=True, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) plot_gos("nbt3102_GOs_symlst_trunc.png", goid_subset, obo, study_items=5, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) plot_gos("nbt3102_GOs_GO0005743.png", ["GO:0005743"], obo, items_p_line=2, study_items=6, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) # -------------------------------------------------------------------- # Item 3) Create one GO sub-plot per significant GO term from study # -------------------------------------------------------------------- for rec in goea_results_sig: png = "nbt3102_{NS}_{GO}.png".format(GO=rec.GO.replace(':', '_'), NS=rec.NS) goid2goobj = {rec.GO: rec.goterm} plot_goid2goobj( png, goid2goobj, # source GOs and their GOTerm object study_items= 15, # Max number of gene symbols to print in each GO term id2symbol=geneids_study, # Contains GeneID-to-Symbol goea_results=goea_results_all, # pvals used for GO Term coloring dpi=dpi) # -------------------------------------------------------------------- # Item 4) Explore using manually curated lists of GO terms # -------------------------------------------------------------------- goid_subset = [ 'GO:0030529', # CC D03 intracellular ribonucleoprotein complex (42 genes) 'GO:0015934', # CC D05 large ribosomal subunit (4 genes) 'GO:0015935', # CC D05 small ribosomal subunit (13 genes) 'GO:0022625', # CC D06 cytosolic large ribosomal subunit (16 genes) 'GO:0022627', # CC D06 cytosolic small ribosomal subunit (19 genes) 'GO:0036464', # CC D06 cytoplasmic ribonucleoprotein granule (4 genes) 'GO:0005840', # CC D05 ribosome (35 genes) 'GO:0005844', # CC D04 polysome (6 genes) ] plot_gos("nbt3102_CC_ribosome.png", goid_subset, obo, study_items=6, id2symbol=geneids_study, items_p_line=3, goea_results=goea_results_sig, dpi=dpi) goid_subset = [ 'GO:0003723', # MF D04 RNA binding (32 genes) 'GO:0044822', # MF D05 poly(A) RNA binding (86 genes) 'GO:0003729', # MF D06 mRNA binding (11 genes) 'GO:0019843', # MF D05 rRNA binding (6 genes) 'GO:0003746', # MF D06 translation elongation factor activity (5 genes) ] plot_gos("nbt3102_MF_RNA_genecnt.png", goid_subset, obo, goea_results=goea_results_all, dpi=150) for dpi in [150, 1200]: # 150 for review, 1200 for publication plot_gos("nbt3102_MF_RNA_dpi{DPI}.png".format(DPI=dpi), goid_subset, obo, study_items=6, id2symbol=geneids_study, items_p_line=3, goea_results=goea_results_all, dpi=dpi) # -------------------------------------------------------------------- # Item 5) Are any significant geneids related to cell cycle? # -------------------------------------------------------------------- import test_genes_cell_cycle as CC genes_cell_cycle = CC.get_genes_cell_cycle(taxid, log=log) genes_cell_cycle_sig = genes_cell_cycle.intersection(geneids) CC.prt_genes("nbt3102_cell_cycle.txt", genes_cell_cycle_sig, taxid, log=None)