Example #1
0
def test_gosearch(log=sys.stdout):
    """Test GoSearch class with no annotations."""
    taxids = [9606, 10090]
    # Download ontologies and annotations, if necessary
    fin_go_obo = os.path.join(REPO, "go-basic.obo")
    download_go_basic_obo(fin_go_obo, loading_bar=None)
    # Because get_assoc_ncbi_taxids returns id2gos, we will opt to
    # use the (optional) multi-level dictionary separate associations by taxid
    # taxid2asscs contains both GO2GeneIDs and GeneID2GOs.
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    get_assoc_ncbi_taxids(taxids, taxid2asscs=taxid2asscs, loading_bar=None)

    # Initialize GO-search helper object with obo and annotations(go2items)
    for taxid in taxids:
        obj = GoSearch(fin_go_obo, go2items=taxid2asscs[taxid]['GO2GeneIDs'], log=log)
        assert len(obj.obo_dag) > 40000
    GoSearch(fin_go_obo, dict(), log=log)
    assert len(obj.obo_dag) > 40000
def test_write_summary_cnts(log=sys.stdout):
    """Print level/depth summaries for various sets of GO terms."""
    obodag = _get_obodag()
    rptobj = RptLevDepth(obodag, log)
    # Report level/depth summary for all GOs in a dag
    log.write("\nSummary for all Ontologies:\n")
    rptobj.write_summary_cnts_all()
    # Report level/depth summary for all GOs in human, fly, and mouse
    taxids = [9606, 7227, 10090]
    # (optional) multi-level dictionary separate associations by taxid
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    # Get associations for human fly and mouse
    get_assoc_ncbi_taxids(taxids, taxid2asscs=taxid2asscs)
    for taxid, assc in taxid2asscs.items():
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_ids = assc['GO2GeneIDs'].keys()
        rptobj.write_summary_cnts(go_ids)
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_objs = [obodag[goid] for goid in go_ids]
        rptobj.write_summary_cnts_goobjs(go_objs)
def test_write_summary_cnts(log=sys.stdout):
    """Print level/depth summaries for various sets of GO terms."""
    obodag = _get_obodag()
    rptobj = RptLevDepth(obodag, log)
    # Report level/depth summary for all GOs in a dag
    log.write("\nSummary for all Ontologies:\n")
    rptobj.write_summary_cnts_all()
    # Report level/depth summary for all GOs in human, fly, and mouse
    taxids = [9606, 7227, 10090]
    # (optional) multi-level dictionary separate associations by taxid
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    # Get associations for human fly and mouse
    get_assoc_ncbi_taxids(taxids, taxid2asscs=taxid2asscs)
    for taxid, assc in taxid2asscs.items():
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_ids = assc['GO2GeneIDs'].keys()
        rptobj.write_summary_cnts(go_ids)
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_objs = [obodag[goid] for goid in go_ids]
        rptobj.write_summary_cnts_goobjs(go_objs)
Example #4
0
def get_genes_cell_cycle(taxid=9606, log=sys.stdout):
    """Test GOEA with local multipletest correction methods."""
    # Download ontologies and annotations, if necessary
    fin_go_obo = "go-basic.obo"
    if not os.path.exists(fin_go_obo):
        wget.download("http://geneontology.org/ontology/go-basic.obo")
    # Because get_assoc_ncbi_taxids returns id2gos, we will opt to
    # use the (optional) multi-level dictionary separate associations by taxid
    # taxid2asscs contains both GO2GeneIDs and GeneID2GOs.
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    get_assoc_ncbi_taxids([taxid], taxid2asscs=taxid2asscs)

    # Initialize GO-search helper object with obo and annotations(go2items)
    srch = GoSearch(fin_go_obo, go2items=taxid2asscs[taxid]['GO2GeneIDs'])
    # Compile search pattern for 'cell cycle'
    cell_cycle = re.compile(r'cell cycle', flags=re.IGNORECASE)
    # Find ALL GOs that have 'cell cycle'. Store results in file.
    fout_allgos = "cell_cycle_gos_{TAXID}.log".format(TAXID=taxid)
    with open(fout_allgos, "w") as prt:
        # Search for 'cell cycle' in GO terms
        gos_cc_all = srch.get_matching_gos(cell_cycle, prt=prt)
        # Researcher carefully reviews GO results and finds GO:0005764(lysosome)
        # in the results when it should not be because the match was found:
        #     cell cycle-independent
        # Researcher removes 'lysosome' from 'cell cycle' results
        # by removing any GOs matching 'cell cycle-independent'
        cell_cycle_ind = re.compile(r'cell cycle.independent',
                                    flags=re.IGNORECASE)
        gos_no_cc = srch.get_matching_gos(cell_cycle_ind,
                                          gos=gos_cc_all,
                                          prt=prt)
        gos = gos_cc_all.difference(gos_no_cc)
        # Add children GOs of cell cycle GOs
        gos_all = srch.add_children_gos(gos)
        if log is not None:
            log.write('    taxid {TAXID:>5}\n'.format(TAXID=taxid))
            log.write('    FOUND {N:>5} GOs:   {F}\n'.format(N=len(gos_all),
                                                             F=fout_allgos))
    # Get Entrez GeneIDs for cell cycle GOs
    geneids = srch.get_items(gos_all)
    return geneids
def get_goeaobj(method, geneids_pop, taxid):
    """Load: ontologies, associations, and population geneids."""
    obo_dag = get_godag()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    goeaobj = GOEnrichmentStudy(geneids_pop,
                                assoc_geneid2gos,
                                obo_dag,
                                propagate_counts=False,
                                alpha=0.05,
                                methods=[method])
    # obo_dag is also found in goeaobj.obo_dag
    return goeaobj
Example #6
0
def _get_pvals(pvalfnc_names, prt=sys.stdout):
    fisher2pvals = {}
    taxid = 10090  # Mouse study
    obo_dag = GODag(download_go_basic_obo(prt=prt))
    geneids_pop = GeneID2nt_mus.keys()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    geneids_study = _get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx", prt)
    for fisher in pvalfnc_names:
        goeaobj = GOEnrichmentStudy(
            geneids_pop, assoc_geneid2gos, obo_dag, propagate_counts=False, alpha=0.05, methods=None, pvalcalc=fisher
        )
        fisher2pvals[fisher] = goeaobj._get_pval_uncorr(geneids_study, prt)
    return fisher2pvals
def test_ncbi_gene2go(log=sys.stdout):
    """Return GO associations to Entrez GeneIDs. Download if necessary."""
    # Get associations for human(9606), mouse(10090), and fly(7227)
    taxid2asscs = get_assoc_ncbi_taxids([9606, 10090, 7227])
    # Report findings
    for taxid, asscs in taxid2asscs.items():
        num_gene2gos = len(asscs['GeneID2GOs'])
        num_go2genes = len(asscs['GO2GeneIDs'])
        log.write("{N:>5} GOs and {M:>5} annotated GeneIDs for tax_id: {TAXID:>6}\n".format(
            TAXID=taxid, N=num_go2genes, M=num_gene2gos))
        # Basic check to ensure gene2go was downloaded and data was returned.
        assert num_gene2gos > 11000
        assert num_go2genes > 6000
Example #8
0
def get_goeaobj(method, geneids_pop, taxid):
    """Load: ontologies, associations, and population geneids."""
    fin_obo = os.path.join(os.getcwd(), "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None)
    goeaobj = GOEnrichmentStudy(geneids_pop,
                                assoc_geneid2gos,
                                godag,
                                propagate_counts=False,
                                alpha=0.05,
                                methods=[method])
    # godag is also found in goeaobj.godag
    return goeaobj
Example #9
0
def test_i96():
    """Test to re-produce issue#96: Passes currently."""
    # Trying to duplicate: ValueError("All values in table must be nonnegative.
    # Get genes
    study_ids = _get_geneids()
    population_ids = GeneID2nt.keys()
    # Get databases
    gene2go = get_assoc_ncbi_taxids([9606], loading_bar=None)
    fin_obo = os.path.join(os.getcwd(), "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    goeaobj = GOEnrichmentStudy(population_ids, gene2go, godag, methods=['fdr_bh'])
    # Run GOEA Gene Ontology Enrichment Analysis
    results_goeas = goeaobj.run_study(study_ids)
Example #10
0
def get_goeaobj(method, geneids_pop, taxid):
    """Load: ontologies, associations, and population geneids."""
    obo_dag = get_godag()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    goeaobj = GOEnrichmentStudy(
        geneids_pop,
        assoc_geneid2gos,
        obo_dag,
        propagate_counts=False,
        alpha=0.05,
        methods=[method])
     # obo_dag is also found in goeaobj.obo_dag
    return goeaobj
def get_genes_cell_cycle(taxid=9606, log=sys.stdout):
    """Test GOEA with local multipletest correction methods."""
    # Download ontologies and annotations, if necessary
    fin_go_obo = "go-basic.obo"
    if not os.path.exists(fin_go_obo):
        wget.download("http://geneontology.org/ontology/go-basic.obo")
    # Because get_assoc_ncbi_taxids returns id2gos, we will opt to 
    # use the (optional) multi-level dictionary separate associations by taxid
    # taxid2asscs contains both GO2GeneIDs and GeneID2GOs.
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    get_assoc_ncbi_taxids([taxid], taxid2asscs=taxid2asscs)

    # Initialize GO-search helper object with obo and annotations(go2items)
    srch = GoSearch(fin_go_obo, go2items=taxid2asscs[taxid]['GO2GeneIDs'])
    # Compile search pattern for 'cell cycle'
    cell_cycle = re.compile(r'cell cycle', flags=re.IGNORECASE)
    # Find ALL GOs that have 'cell cycle'. Store results in file.
    fout_allgos = "cell_cycle_gos_{TAXID}.log".format(TAXID=taxid) 
    with open(fout_allgos, "w") as prt:
        # Search for 'cell cycle' in GO terms
        gos_cc_all = srch.get_matching_gos(cell_cycle, prt=prt)
        # Researcher carefully reviews GO results and finds GO:0005764(lysosome)
        # in the results when it should not be because the match was found:
        #     cell cycle-independent
        # Researcher removes 'lysosome' from 'cell cycle' results
        # by removing any GOs matching 'cell cycle-independent'
        cell_cycle_ind = re.compile(r'cell cycle.independent', flags=re.IGNORECASE)
        gos_no_cc = srch.get_matching_gos(cell_cycle_ind, gos=gos_cc_all, prt=prt)
        gos = gos_cc_all.difference(gos_no_cc)
        # Add children GOs of cell cycle GOs
        gos_all = srch.add_children_gos(gos)
        if log is not None:
            log.write('    taxid {TAXID:>5}\n'.format(TAXID=taxid))
            log.write('    FOUND {N:>5} GOs:   {F}\n'.format(
                N=len(gos_all), F=fout_allgos))
    # Get Entrez GeneIDs for cell cycle GOs
    geneids = srch.get_items(gos_all)
    return geneids
Example #12
0
def get_goeaobj(method, geneids_pop, taxid):
    """Load: ontologies, associations, and population geneids."""
    fin_obo = os.path.join(os.getcwd(), "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None)
    goeaobj = GOEnrichmentStudy(
        geneids_pop,
        assoc_geneid2gos,
        godag,
        propagate_counts=False,
        alpha=0.05,
        methods=[method])
    # godag is also found in goeaobj.godag
    return goeaobj
def _get_results(godag, propagate_counts, relationships, prt=sys.stdout):
    """Run a GOEA. Return results"""
    taxid = 10090  # Mouse study
    geneids_pop = set(GeneID2nt_mus.keys())
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None)
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    goeaobj = GOEnrichmentStudy(geneids_pop,
                                assoc_geneid2gos,
                                godag,
                                propagate_counts=propagate_counts,
                                relationships=relationships,
                                alpha=0.05,
                                methods=['fdr_bh'])
    return goeaobj.run_study(geneids_study, prt=prt)
Example #14
0
def get_goeaobj(method, geneids_pop, taxid):
    """Load: ontologies, associations, and population geneids."""
    fin_obo = "go-basic.obo"
    if not os.path.isfile(fin_obo):
        wget.download("wget http://geneontology.org/ontology/go-basic.obo")
    obo_dag = GODag(fin_obo)
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    goeaobj = GOEnrichmentStudy(geneids_pop,
                                assoc_geneid2gos,
                                obo_dag,
                                propagate_counts=False,
                                alpha=0.05,
                                methods=[method])
    return goeaobj
Example #15
0
def test_write_summary_cnts(log=sys.stdout):
    """Print level/depth summaries for various sets of GO terms."""
    fin_obo = os.path.join(os.getcwd(), "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    rptobj = RptLevDepth(godag, log)
    # Report level/depth summary for all GOs in a dag
    log.write("\nSummary for all Ontologies:\n")
    rptobj.write_summary_cnts_all()
    # Report level/depth summary for all GOs in human, fly, and mouse
    taxids = [9606, 7227, 10090]
    # (optional) multi-level dictionary separate associations by taxid
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    # Get associations for human fly and mouse
    get_assoc_ncbi_taxids(taxids, taxid2asscs=taxid2asscs, loading_bar=None)
    for taxid, assc in taxid2asscs.items():
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_ids = assc['GO2GeneIDs'].keys()
        rptobj.write_summary_cnts(go_ids)
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_objs = [godag.get(goid) for goid in go_ids]
        rptobj.write_summary_cnts_goobjs(go_objs)
    # Print GO depth count table for full GO DAG in LaTeX format
    rptobj.prttex_summary_cnts_all(prt=log)
def test_ncbi_gene2go(log=sys.stdout):
    """Return GO associations to Entrez GeneIDs. Download if necessary."""
    # Get associations for human(9606), mouse(10090), and fly(7227)
    taxid2asscs = get_assoc_ncbi_taxids([9606, 10090, 7227])
    # Report findings
    for taxid, asscs in taxid2asscs.items():
        num_gene2gos = len(asscs['GeneID2GOs'])
        num_go2genes = len(asscs['GO2GeneIDs'])
        log.write(
            "{N:>5} GOs and {M:>5} annotated GeneIDs for tax_id: {TAXID:>6}\n".
            format(TAXID=taxid, N=num_go2genes, M=num_gene2gos))
        # Basic check to ensure gene2go was downloaded and data was returned.
        assert num_gene2gos > 11000
        assert num_go2genes > 6000
def test_write_summary_cnts(log=sys.stdout):
    """Print level/depth summaries for various sets of GO terms."""
    fin_obo = os.path.join(os.getcwd(), "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    rptobj = RptLevDepth(godag, log)
    # Report level/depth summary for all GOs in a dag
    log.write("\nSummary for all Ontologies:\n")
    rptobj.write_summary_cnts_all()
    # Report level/depth summary for all GOs in human, fly, and mouse
    taxids = [9606, 7227, 10090]
    # (optional) multi-level dictionary separate associations by taxid
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    # Get associations for human fly and mouse
    get_assoc_ncbi_taxids(taxids, taxid2asscs=taxid2asscs, loading_bar=None)
    assert taxid2asscs, 'taxid2asscs EMPTY'
    for taxid, assc in taxid2asscs.items():
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_ids = assc['GO2IDs'].keys()
        rptobj.write_summary_cnts(go_ids)
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_objs = [godag.get(goid) for goid in go_ids]
        rptobj.write_summary_cnts_goobjs(go_objs)
    # Print GO depth count table for full GO DAG in LaTeX format
    rptobj.prttex_summary_cnts_all(prt=log)
Example #18
0
def get_goeaobj(method, geneids_pop, taxid):
    """Load: ontologies, associations, and population geneids."""
    fin_obo = "go-basic.obo"
    if not os.path.isfile(fin_obo):
        wget.download("wget http://geneontology.org/ontology/go-basic.obo") 
    obo_dag = GODag(fin_obo)
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    goeaobj = GOEnrichmentStudy(
        geneids_pop,
        assoc_geneid2gos,
        obo_dag,
        propagate_counts = False,
        alpha = 0.05,
        methods = [method])
    return goeaobj
def _get_results(godag, propagate_counts, relationships, prt=sys.stdout):
    """Run a GOEA. Return results"""
    taxid = 10090 # Mouse study
    geneids_pop = set(GeneID2nt_mus.keys())
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None)
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    goeaobj = GOEnrichmentStudy(
        geneids_pop,
        assoc_geneid2gos,
        godag,
        propagate_counts=propagate_counts,
        relationships=relationships,
        alpha=0.05,
        methods=['fdr_bh'])
    return goeaobj.run_study(geneids_study, prt=prt)
Example #20
0
def _get_pvals(pvalfnc_names, prt=sys.stdout):
    fisher2pvals = {}
    taxid = 10090  # Mouse study
    obo_dag = GODag(download_go_basic_obo(prt=prt))
    geneids_pop = GeneID2nt_mus.keys()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    geneids_study = _get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx", prt)
    for fisher in pvalfnc_names:
        goeaobj = GOEnrichmentStudy(geneids_pop,
                                    assoc_geneid2gos,
                                    obo_dag,
                                    propagate_counts=False,
                                    alpha=0.05,
                                    methods=None,
                                    pvalcalc=fisher)
        fisher2pvals[fisher] = goeaobj._get_pval_uncorr(geneids_study, prt)
    return fisher2pvals
Example #21
0
def _get_pvals(pvalfnc_names, prt=sys.stdout):
    fisher2pvals = {}
    taxid = 10090  # Mouse study
    file_obo = os.path.join(os.getcwd(), "go-basic.obo")
    obo_dag = get_godag(file_obo, prt, loading_bar=None)
    geneids_pop = GeneID2nt_mus.keys()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None)
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    for fisher in pvalfnc_names:
        goeaobj = GOEnrichmentStudy(geneids_pop,
                                    assoc_geneid2gos,
                                    obo_dag,
                                    propagate_counts=False,
                                    alpha=0.05,
                                    methods=None,
                                    pvalcalc=fisher)
        fisher2pvals[fisher] = goeaobj.get_pval_uncorr(geneids_study, prt)
    return fisher2pvals
def test_ncbi_gene2go(log=sys.stdout):
    """Return GO associations to Entrez GeneIDs. Download if necessary.

       Example report generated with Feb 22, 2013 download of:
         NCBI Gene tables and associations in gene2go

            49672 items found in gene2go from NCBI's ftp server

            taxid    GOs GeneIDs  Description
            ----- ------ -------  -----------
            10090 16,807  18,971  all DNA items
             7227  7,022  12,019  all DNA items
             7227  6,956  10,590  76% GO coverage of 13,919 protein-coding genes
             9606 16,299  18,680  all DNA items
             9606 16,296  18,253  87% GO coverage of 20,913 protein-coding genes

    """
    # Get associations for human(9606), mouse(10090), and fly(7227)
    # (optional) multi-level dictionary separate associations by taxid
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    # Simple dictionary containing id2gos
    taxids = [9606, 10090, 7227]
    id2gos = get_assoc_ncbi_taxids(taxids,
                                   taxid2asscs=taxid2asscs,
                                   loading_bar=None)
    log.write("  {N} items found in gene2go from NCBI's ftp server\n".format(
        N=len(id2gos)))
    taxid2pc = {9606: GeneID2nt_hsa, 7227: GeneID2nt_dme}
    # Report findings
    log.write("   taxid    GOs GeneIDs  Description\n")
    log.write("   ----- ------ -------  -----------\n")
    for taxid, asscs in taxid2asscs.items():
        num_gene2gos_all = len(asscs['GeneID2GOs'])
        num_go2genes_all = len(asscs['GO2GeneIDs'])
        log.write("  {TAXID:>6} {N:>6,} {M:>7,}  all DNA items\n".format(
            TAXID=taxid, N=num_go2genes_all, M=num_gene2gos_all))
        # Basic check to ensure gene2go was downloaded and data was returned.
        assert num_gene2gos_all > 11000
        assert num_go2genes_all > 6000
        if taxid in taxid2pc.keys():
            rpt_coverage(taxid, asscs, taxid2pc[taxid], log)
def test_ncbi_gene2go(log=sys.stdout):
    """Return GO associations to Entrez GeneIDs. Download if necessary.

       Example report generated with Feb 22, 2013 download of: 
         NCBI Gene tables and associations in gene2go

            49672 items found in gene2go from NCBI's ftp server

            taxid    GOs GeneIDs  Description
            ----- ------ -------  -----------
            10090 16,807  18,971  all DNA items
             7227  7,022  12,019  all DNA items
             7227  6,956  10,590  76% GO coverage of 13,919 protein-coding genes
             9606 16,299  18,680  all DNA items
             9606 16,296  18,253  87% GO coverage of 20,913 protein-coding genes

    """
    # Get associations for human(9606), mouse(10090), and fly(7227)
    # (optional) multi-level dictionary separate associations by taxid
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    # Simple dictionary containing id2gos
    id2gos = get_assoc_ncbi_taxids(taxids=[9606, 10090, 7227], taxid2asscs=taxid2asscs)
    log.write("  {N} items found in gene2go from NCBI's ftp server\n".format(N=len(id2gos)))
    taxid2pc = {9606:GeneID2nt_hsa, 7227:GeneID2nt_dme}
    # Report findings
    log.write("   taxid    GOs GeneIDs  Description\n")
    log.write("   ----- ------ -------  -----------\n")
    for taxid, asscs in taxid2asscs.items():
        num_gene2gos_all = len(asscs['GeneID2GOs'])
        num_go2genes_all = len(asscs['GO2GeneIDs'])
        log.write("  {TAXID:>6} {N:>6,} {M:>7,}  all DNA items\n".format(
            TAXID=taxid, N=num_go2genes_all, M=num_gene2gos_all))
        # Basic check to ensure gene2go was downloaded and data was returned.
        assert num_gene2gos_all > 11000
        assert num_go2genes_all > 6000
        if taxid in taxid2pc.keys():
            rpt_coverage(taxid, asscs, taxid2pc[taxid], log)