Beispiel #1
0
def test_example():
    """Test GoeaResults in plotting package."""
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    # Gene Ontology Enrichment Analysis (GOEA)
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    taxid = 10090  # Mouse study
    # Load ontologies, associations, and population ids
    geneids_pop = GeneID2nt_mus.keys()
    geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    geneids_study = geneids2symbol_study.keys()
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    go2obj = goeaobj.obo_dag
    # Run GOEA on study
    goea_results_all = goeaobj.run_study(geneids_study)
    goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
    goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all()
    assert goea_results_nt
    ns2gos = get_ns2gos(goea_results_sig)
    # Test plotting GOEA results
    gosubdag = GoSubDag(set(r.GO for r in goea_results_sig), go2obj)
    plot_results("test_plot_goids_a_goea_{NS}.png",
                 goea_results_sig,
                 id2symbol=geneids2symbol_study,
                 parentcnt=True,
                 childcnt=True)
    for nss, goids in ns2gos.items():
        plt_goids(gosubdag, "test_plot_goids_b_{NS}.png".format(NS=nss), goids)
        plot_gos("test_plot_goids_c_{NS}.png".format(NS=nss), goids, go2obj)
Beispiel #2
0
def test_example(prt=sys.stdout):
    """Test GoeaResults in plotting package."""
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    # Gene Ontology Enrichment Analysis (GOEA)
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    taxid = 10090  # Mouse study
    # Load ontologies, associations, and population ids
    geneids_pop = GeneID2nt_mus.keys()
    geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    geneids_study = geneids2symbol_study.keys()
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    # Run GOEA on study
    goea_results_all = goeaobj.run_study(geneids_study)
    goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
    goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all()
    # Test managing GOEA results
    objres = GoeaResults(goea_results_sig)
    run(objres, prt)
    objnts = GoeaResults(goea_results_nt)
    run(objnts, prt)
    # Plot GOEA results
    fout_img = os.path.join(REPO, "test_plot_objgoearesults_{NS}.png")
    plot_results(fout_img, goea_results_sig, id2symbol=geneids2symbol_study)
def test_wr_methods(log=sys.stdout):
    """Demonstrate printing a subset of all available fields using two methods."""
    # 1. Gene Ontology Enrichment Analysis
    #    1a. Initialize: Load ontologies, associations, and population gene IDs
    taxid = 10090 # Mouse study
    geneids_pop = GeneID2nt_mus.keys() # Mouse protein-coding genes
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    #    1b. Run GOEA
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    keep_if = lambda nt: getattr(nt, "p_fdr_bh") < 0.05 # keep if results are significant
    goea_results = goeaobj.run_study(geneids_study, keep_if=keep_if)
    # 2. Write results
    #    Write parameters:
    #    The format_string names below are the same names as in the namedtuple field_names.
    prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n"
    wr_params = {
      # Format for printing in text format
      'prtfmt' : prtfmt, 
      # Format for p-values in tsv and xlsx format
      'fld2fmt' : {'p_fdr_bh' : '{:8.2e}'}, 
      # Print a subset namedtuple fields, don't print all fields in namedtuple.
      'prt_flds' : get_fmtflds(prtfmt) 
    }
    #    2a. Use the write functions inside the GOEnrichmentStudy class.
    _wr_3fmt_goeaobj(goea_results, goeaobj, wr_params, log)
    #    2b. Use the write functions straight from the wr_tbl package to print a list of namedtuples.
    _wr_3fmt_wrtbl(goea_results, wr_params, log)
    assert filecmp.cmp('nbt3102_subset_obj.tsv', 'nbt3102_subset_nt.tsv')
def test_example():
    """Run Gene Ontology Enrichment Analysis (GOEA) on Nature data."""
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    # Gene Ontology Enrichment Analysis (GOEA)
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    taxid = 10090  # Mouse study
    # Load ontologies, associations, and population ids
    geneids_pop = GeneID2nt_mus.keys()
    geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    geneids_study = geneids2symbol_study.keys()
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    # Run GOEA on study
    goea_results_all = goeaobj.run_study(geneids_study)
    goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
    # Print GOEA results to files: With study genes printed as geneids or symbols
    goeaobj.wr_xlsx("nbt3102_sig_symbols.xlsx",
                    goea_results_sig,
                    itemid2name=geneids2symbol_study)
    goeaobj.wr_xlsx("nbt3102_sig_geneids.xlsx", goea_results_sig)
    goeaobj.wr_xlsx("nbt3102_all_symbols.xlsx",
                    goea_results_all,
                    itemid2name=geneids2symbol_study)
    goeaobj.wr_xlsx("nbt3102_all_geneids.xlsx", goea_results_all)
Beispiel #5
0
def _get_pvals(pvalfnc_names, prt=sys.stdout):
    fisher2pvals = {}
    taxid = 10090  # Mouse study
    obo_dag = GODag(download_go_basic_obo(prt=prt))
    geneids_pop = GeneID2nt_mus.keys()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    geneids_study = _get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx", prt)
    for fisher in pvalfnc_names:
        goeaobj = GOEnrichmentStudy(
            geneids_pop, assoc_geneid2gos, obo_dag, propagate_counts=False, alpha=0.05, methods=None, pvalcalc=fisher
        )
        fisher2pvals[fisher] = goeaobj._get_pval_uncorr(geneids_study, prt)
    return fisher2pvals
Beispiel #6
0
def _get_pvals(pvalfnc_names, prt=sys.stdout):
    fisher2pvals = {}
    taxid = 10090  # Mouse study
    obo_dag = GODag(download_go_basic_obo(prt=prt))
    geneids_pop = GeneID2nt_mus.keys()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    geneids_study = _get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx", prt)
    for fisher in pvalfnc_names:
        goeaobj = GOEnrichmentStudy(geneids_pop,
                                    assoc_geneid2gos,
                                    obo_dag,
                                    propagate_counts=False,
                                    alpha=0.05,
                                    methods=None,
                                    pvalcalc=fisher)
        fisher2pvals[fisher] = goeaobj._get_pval_uncorr(geneids_study, prt)
    return fisher2pvals
Beispiel #7
0
def get_goea_results(keep_if=None):
    """Demonstrate printing a subset of all available fields using two methods."""
    if keep_if is None:
        keep_if = lambda nt: getattr(nt, "p_fdr_bh") < 0.05 # keep if results are significant
    # 1. Gene Ontology Enrichment Analysis
    #    1a. Initialize: Load ontologies, associations, and population gene IDs
    taxid = 10090 # Mouse study
    geneids_pop = GeneID2nt_mus.keys() # Mouse protein-coding genes
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    #    1b. Run GOEA
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    return {
        'goea_results' : goeaobj.run_study(geneids_study, keep_if=keep_if),
        'goeaobj' : goeaobj,
        'geneids_study' : geneids_study,
        'geneids_pop' : geneids_pop,
        'obo_dag':goeaobj.obo_dag}
Beispiel #8
0
def _get_pvals(pvalfnc_names, prt=sys.stdout):
    fisher2pvals = {}
    taxid = 10090  # Mouse study
    file_obo = os.path.join(os.getcwd(), "go-basic.obo")
    obo_dag = get_godag(file_obo, prt, loading_bar=None)
    geneids_pop = GeneID2nt_mus.keys()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None)
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    for fisher in pvalfnc_names:
        goeaobj = GOEnrichmentStudy(geneids_pop,
                                    assoc_geneid2gos,
                                    obo_dag,
                                    propagate_counts=False,
                                    alpha=0.05,
                                    methods=None,
                                    pvalcalc=fisher)
        fisher2pvals[fisher] = goeaobj.get_pval_uncorr(geneids_study, prt)
    return fisher2pvals
def get_goea_results(keep_if=None):
    """Demonstrate printing a subset of all available fields using two methods."""
    if keep_if is None:
        keep_if = lambda nt: getattr(
            nt, "p_fdr_bh") < 0.05  # keep if results are significant
    # 1. Gene Ontology Enrichment Analysis
    #    1a. Initialize: Load ontologies, associations, and population gene IDs
    taxid = 10090  # Mouse study
    geneids_pop = GeneID2nt_mus.keys()  # Mouse protein-coding genes
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    #    1b. Run GOEA
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    return {
        'goea_results': goeaobj.run_study(geneids_study, keep_if=keep_if),
        'goeaobj': goeaobj,
        'geneids_study': geneids_study,
        'geneids_pop': geneids_pop,
        'obo_dag': goeaobj.obo_dag
    }
Beispiel #10
0
def test_example(log=sys.stdout):
    """Run Gene Ontology Enrichment Analysis (GOEA) on Nature data."""
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    # Gene Ontology Enrichment Analysis (GOEA)
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    taxid = 10090 # Mouse study
    # Load ontologies, associations, and population ids
    geneids_pop = GeneID2nt_mus.keys()
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    # Run GOEA on study
    #keep_if = lambda nt: getattr(nt, "p_fdr_bh" ) < 0.05 # keep if results are significant
    goea_results_all = goeaobj.run_study(geneids_study)
    goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
    compare_results(goea_results_all)
    geneids = get_study_items(goea_results_sig)
    # Print GOEA results to files
    goeaobj.wr_xlsx("nbt3102.xlsx", goea_results_sig)
    goeaobj.wr_txt("nbt3102_sig.txt", goea_results_sig)
    goeaobj.wr_txt("nbt3102_all.txt", goea_results_all)
    # Plot all significant GO terms w/annotated study info (large plots)
    #plot_results("nbt3102_{NS}.png", goea_results_sig)
    #plot_results("nbt3102_{NS}_sym.png", goea_results_sig, study_items=5, items_p_line=2, id2symbol=geneids_study)



    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    # Further examination of GOEA results...
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    obo = goeaobj.obo_dag
    dpi = 150 # For review: Figures can be saved in .jpg, .gif, .tif or .eps, at 150 dpi


    # --------------------------------------------------------------------
    # Item 1) Words in GO names associated with large numbers of study genes
    # --------------------------------------------------------------------
    # What GO term words are associated with the largest number of study genes?
    prt_word2genecnt("nbt3102_genecnt_GOword.txt", goea_results_sig, log)
    # Curated selection of GO words associated with large numbers of study genes
    freq_seen = ['RNA', 'translation', 'mitochondr', 'ribosom', # 'ribosomal', 'ribosome',
        'adhesion', 'endoplasmic', 'nucleotide', 'apoptotic', 'myelin']
    # Collect the GOs which contains the chosen frequently seen words
    word2NS2gos = get_word2NS2gos(freq_seen, goea_results_sig)
    go2res = {nt.GO:nt for nt in goea_results_sig}
    # Print words of interest, the sig GO terms which contain that word, and study genes.
    prt_word_GO_genes("nbt3102_GO_word_genes.txt", word2NS2gos, go2res, geneids_study, log)
    # Plot each set of GOs along w/study gene info 
    for word, NS2gos in word2NS2gos.items():
       for NS in ['BP', 'MF', 'CC']:
           if NS in NS2gos:
               gos = NS2gos[NS]
               goid2goobj = {go:go2res[go].goterm for go in gos}
               # dpi: 150 for review, 1200 for publication
               #dpis = [150, 1200] if word == "RNA" else [150]
               dpis = [150]
               for dpi in dpis:
                   fmts = ['png', 'tif', 'eps'] if word == "RNA" else ['png']
                   for fmt in fmts:
                       plot_goid2goobj(
                           "nbt3102_{WORD}_{NS}_dpi{DPI}.{FMT}".format(WORD=word, NS=NS, DPI=dpi, FMT=fmt),
                           goid2goobj, # source GOs and their GOTerm object
                           items_p_line=3,
                           study_items=6, # Max number of gene symbols to print in each GO term
                           id2symbol=geneids_study, # Contains GeneID-to-Symbol
                           goea_results=goea_results_all, # pvals used for GO Term coloring
                           dpi=dpi)
      
    
    # --------------------------------------------------------------------
    # Item 2) Explore findings of Nature paper:
    #
    #     Gene Ontology (GO) enrichment analysis showed that the
    #     differentially expressed genes contained statistically
    #     significant enrichments of genes involved in 
    #         glycolysis,
    #         cellular response to IL-4 stimulation and 
    #         positive regulation of B-cell proliferation
    # --------------------------------------------------------------------
    goid_subset = [
        'GO:0006096', # BP 4.24e-12 10 glycolytic process
        'GO:0071353', # BP 7.45e-06  5 cellular response to interleukin-4
        'GO:0030890', # BP 8.22e-07  7 positive regulation of B cell proliferation
    ]
    plot_gos("nbt3102_GOs.png", goid_subset, obo, dpi=dpi)
    plot_gos("nbt3102_GOs_genecnt.png", goid_subset, obo, goea_results=goea_results_all, dpi=dpi)
    plot_gos("nbt3102_GOs_genelst.png", goid_subset, obo, 
        study_items=True, goea_results=goea_results_all, dpi=dpi)
    plot_gos("nbt3102_GOs_symlst.png", goid_subset, obo, 
        study_items=True, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi)
    plot_gos("nbt3102_GOs_symlst_trunc.png", goid_subset, obo, 
        study_items=5, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi)
    plot_gos("nbt3102_GOs_GO0005743.png", ["GO:0005743"], obo, 
        items_p_line=2, study_items=6, 
        id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi)

    # --------------------------------------------------------------------
    # Item 3) Create one GO sub-plot per significant GO term from study
    # --------------------------------------------------------------------
    for rec in goea_results_sig:
        png = "nbt3102_{NS}_{GO}.png".format(GO=rec.GO.replace(':', '_'), NS=rec.NS)
        goid2goobj = {rec.GO:rec.goterm}
        plot_goid2goobj(png,
            goid2goobj, # source GOs and their GOTerm object
            study_items=15, # Max number of gene symbols to print in each GO term
            id2symbol=geneids_study, # Contains GeneID-to-Symbol
            goea_results=goea_results_all, # pvals used for GO Term coloring
            dpi=dpi)

    # --------------------------------------------------------------------
    # Item 4) Explore using manually curated lists of GO terms
    # --------------------------------------------------------------------
    goid_subset = [
      'GO:0030529', # CC D03 intracellular ribonucleoprotein complex (42 genes)
      'GO:0015934', # CC D05 large ribosomal subunit (4 genes)
      'GO:0015935', # CC D05 small ribosomal subunit (13 genes)
      'GO:0022625', # CC D06 cytosolic large ribosomal subunit (16 genes)
      'GO:0022627', # CC D06 cytosolic small ribosomal subunit (19 genes)
      'GO:0036464', # CC D06 cytoplasmic ribonucleoprotein granule (4 genes)
      'GO:0005840', # CC D05 ribosome (35 genes)
      'GO:0005844', # CC D04 polysome (6 genes)
    ]
    plot_gos("nbt3102_CC_ribosome.png", goid_subset, obo, 
        study_items=6, id2symbol=geneids_study, items_p_line=3,
        goea_results=goea_results_sig, dpi=dpi)

    goid_subset = [
      'GO:0003723', # MF D04 RNA binding (32 genes)
      'GO:0044822', # MF D05 poly(A) RNA binding (86 genes)
      'GO:0003729', # MF D06 mRNA binding (11 genes)
      'GO:0019843', # MF D05 rRNA binding (6 genes)
      'GO:0003746', # MF D06 translation elongation factor activity (5 genes)
    ]
    plot_gos("nbt3102_MF_RNA_genecnt.png", 
        goid_subset, 
        obo, 
        goea_results=goea_results_all, dpi=150)
    for dpi in [150, 1200]: # 150 for review, 1200 for publication
        plot_gos("nbt3102_MF_RNA_dpi{DPI}.png".format(DPI=dpi), 
            goid_subset, 
            obo, 
            study_items=6, id2symbol=geneids_study, items_p_line=3,
            goea_results=goea_results_all, dpi=dpi)

    # --------------------------------------------------------------------
    # Item 5) Are any significant geneids related to cell cycle?
    # --------------------------------------------------------------------
    import test_genes_cell_cycle as CC
    genes_cell_cycle = CC.get_genes_cell_cycle(taxid, log=log)
    genes_cell_cycle_sig = genes_cell_cycle.intersection(geneids)
    CC.prt_genes("nbt3102_cell_cycle.txt", genes_cell_cycle_sig, taxid, log=None)
Beispiel #11
0
# Data will be stored in this variable
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import goatools
from goatools.base import download_go_basic_obo
from goatools.base import download_ncbi_associations
from goatools.obo_parser import GODag
from goatools.associations import read_ncbi_gene2go
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
from goatools.go_enrichment import GOEnrichmentStudy

obo_fname = download_go_basic_obo()
gene2go = download_ncbi_associations()
obodag = GODag("go-basic.obo")
geneid2gos_mouse = read_ncbi_gene2go("gene2go", taxids=[10090])

geneid2symbol = {}

print("{N:,} annotated mouse genes".format(N=len(geneid2gos_mouse)))
print(GeneID2nt_mus.keys().head())

goeaobj = GOEnrichmentStudy(
    GeneID2nt_mus.keys(),  # List of mouse protein-coding genes
    geneid2gos_mouse,  # geneid/GO associations
    obodag,  # Ontologies
    propagate_counts=False,
    alpha=0.05,  # default significance cut-off
    methods=['fdr_bh'])  # defult multipletest correction method
Beispiel #12
0
def test_example(log=sys.stdout):
    """Run Gene Ontology Enrichment Analysis (GOEA) on Nature data."""
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    # Gene Ontology Enrichment Analysis (GOEA)
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    taxid = 10090  # Mouse study
    # Load ontologies, associations, and population ids
    geneids_pop = GeneID2nt_mus.keys()
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    # Run GOEA on study
    #keep_if = lambda nt: getattr(nt, "p_fdr_bh" ) < 0.05 # keep if results are significant
    goea_results_all = goeaobj.run_study(geneids_study)
    goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
    compare_results(goea_results_all)
    geneids = get_study_items(goea_results_sig)
    # Print GOEA results to files
    goeaobj.wr_xlsx("nbt3102.xlsx", goea_results_sig)
    goeaobj.wr_txt("nbt3102_sig.txt", goea_results_sig)
    goeaobj.wr_txt("nbt3102_all.txt", goea_results_all)
    # Plot all significant GO terms w/annotated study info (large plots)
    #plot_results("nbt3102_{NS}.png", goea_results_sig)
    #plot_results("nbt3102_{NS}_sym.png", goea_results_sig, study_items=5, items_p_line=2, id2symbol=geneids_study)

    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    # Further examination of GOEA results...
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    obo = goeaobj.obo_dag
    dpi = 150  # For review: Figures can be saved in .jpg, .gif, .tif or .eps, at 150 dpi

    # --------------------------------------------------------------------
    # Item 1) Words in GO names associated with large numbers of study genes
    # --------------------------------------------------------------------
    # What GO term words are associated with the largest number of study genes?
    prt_word2genecnt("nbt3102_genecnt_GOword.txt", goea_results_sig, log)
    # Curated selection of GO words associated with large numbers of study genes
    freq_seen = [
        'RNA',
        'translation',
        'mitochondr',
        'ribosom',  # 'ribosomal', 'ribosome',
        'adhesion',
        'endoplasmic',
        'nucleotide',
        'apoptotic',
        'myelin'
    ]
    # Collect the GOs which contains the chosen frequently seen words
    word2NS2gos = get_word2NS2gos(freq_seen, goea_results_sig)
    go2res = {nt.GO: nt for nt in goea_results_sig}
    # Print words of interest, the sig GO terms which contain that word, and study genes.
    prt_word_GO_genes("nbt3102_GO_word_genes.txt", word2NS2gos, go2res,
                      geneids_study, log)
    # Plot each set of GOs along w/study gene info
    for word, NS2gos in word2NS2gos.items():
        for NS in ['BP', 'MF', 'CC']:
            if NS in NS2gos:
                gos = NS2gos[NS]
                goid2goobj = {go: go2res[go].goterm for go in gos}
                # dpi: 150 for review, 1200 for publication
                #dpis = [150, 1200] if word == "RNA" else [150]
                dpis = [150]
                for dpi in dpis:
                    fmts = ['png', 'tif', 'eps'] if word == "RNA" else ['png']
                    for fmt in fmts:
                        plot_goid2goobj(
                            "nbt3102_{WORD}_{NS}_dpi{DPI}.{FMT}".format(
                                WORD=word, NS=NS, DPI=dpi, FMT=fmt),
                            goid2goobj,  # source GOs and their GOTerm object
                            items_p_line=3,
                            study_items=
                            6,  # Max number of gene symbols to print in each GO term
                            id2symbol=geneids_study,  # Contains GeneID-to-Symbol
                            goea_results=
                            goea_results_all,  # pvals used for GO Term coloring
                            dpi=dpi)

    # --------------------------------------------------------------------
    # Item 2) Explore findings of Nature paper:
    #
    #     Gene Ontology (GO) enrichment analysis showed that the
    #     differentially expressed genes contained statistically
    #     significant enrichments of genes involved in
    #         glycolysis,
    #         cellular response to IL-4 stimulation and
    #         positive regulation of B-cell proliferation
    # --------------------------------------------------------------------
    goid_subset = [
        'GO:0006096',  # BP 4.24e-12 10 glycolytic process
        'GO:0071353',  # BP 7.45e-06  5 cellular response to interleukin-4
        'GO:0030890',  # BP 8.22e-07  7 positive regulation of B cell proliferation
    ]
    plot_gos("nbt3102_GOs.png", goid_subset, obo, dpi=dpi)
    plot_gos("nbt3102_GOs_genecnt.png",
             goid_subset,
             obo,
             goea_results=goea_results_all,
             dpi=dpi)
    plot_gos("nbt3102_GOs_genelst.png",
             goid_subset,
             obo,
             study_items=True,
             goea_results=goea_results_all,
             dpi=dpi)
    plot_gos("nbt3102_GOs_symlst.png",
             goid_subset,
             obo,
             study_items=True,
             id2symbol=geneids_study,
             goea_results=goea_results_all,
             dpi=dpi)
    plot_gos("nbt3102_GOs_symlst_trunc.png",
             goid_subset,
             obo,
             study_items=5,
             id2symbol=geneids_study,
             goea_results=goea_results_all,
             dpi=dpi)
    plot_gos("nbt3102_GOs_GO0005743.png", ["GO:0005743"],
             obo,
             items_p_line=2,
             study_items=6,
             id2symbol=geneids_study,
             goea_results=goea_results_all,
             dpi=dpi)

    # --------------------------------------------------------------------
    # Item 3) Create one GO sub-plot per significant GO term from study
    # --------------------------------------------------------------------
    for rec in goea_results_sig:
        png = "nbt3102_{NS}_{GO}.png".format(GO=rec.GO.replace(':', '_'),
                                             NS=rec.NS)
        goid2goobj = {rec.GO: rec.goterm}
        plot_goid2goobj(
            png,
            goid2goobj,  # source GOs and their GOTerm object
            study_items=
            15,  # Max number of gene symbols to print in each GO term
            id2symbol=geneids_study,  # Contains GeneID-to-Symbol
            goea_results=goea_results_all,  # pvals used for GO Term coloring
            dpi=dpi)

    # --------------------------------------------------------------------
    # Item 4) Explore using manually curated lists of GO terms
    # --------------------------------------------------------------------
    goid_subset = [
        'GO:0030529',  # CC D03 intracellular ribonucleoprotein complex (42 genes)
        'GO:0015934',  # CC D05 large ribosomal subunit (4 genes)
        'GO:0015935',  # CC D05 small ribosomal subunit (13 genes)
        'GO:0022625',  # CC D06 cytosolic large ribosomal subunit (16 genes)
        'GO:0022627',  # CC D06 cytosolic small ribosomal subunit (19 genes)
        'GO:0036464',  # CC D06 cytoplasmic ribonucleoprotein granule (4 genes)
        'GO:0005840',  # CC D05 ribosome (35 genes)
        'GO:0005844',  # CC D04 polysome (6 genes)
    ]
    plot_gos("nbt3102_CC_ribosome.png",
             goid_subset,
             obo,
             study_items=6,
             id2symbol=geneids_study,
             items_p_line=3,
             goea_results=goea_results_sig,
             dpi=dpi)

    goid_subset = [
        'GO:0003723',  # MF D04 RNA binding (32 genes)
        'GO:0044822',  # MF D05 poly(A) RNA binding (86 genes)
        'GO:0003729',  # MF D06 mRNA binding (11 genes)
        'GO:0019843',  # MF D05 rRNA binding (6 genes)
        'GO:0003746',  # MF D06 translation elongation factor activity (5 genes)
    ]
    plot_gos("nbt3102_MF_RNA_genecnt.png",
             goid_subset,
             obo,
             goea_results=goea_results_all,
             dpi=150)
    for dpi in [150, 1200]:  # 150 for review, 1200 for publication
        plot_gos("nbt3102_MF_RNA_dpi{DPI}.png".format(DPI=dpi),
                 goid_subset,
                 obo,
                 study_items=6,
                 id2symbol=geneids_study,
                 items_p_line=3,
                 goea_results=goea_results_all,
                 dpi=dpi)

    # --------------------------------------------------------------------
    # Item 5) Are any significant geneids related to cell cycle?
    # --------------------------------------------------------------------
    import test_genes_cell_cycle as CC
    genes_cell_cycle = CC.get_genes_cell_cycle(taxid, log=log)
    genes_cell_cycle_sig = genes_cell_cycle.intersection(geneids)
    CC.prt_genes("nbt3102_cell_cycle.txt",
                 genes_cell_cycle_sig,
                 taxid,
                 log=None)