def __init__(self, taxid, fin_gene2go, fin_gobasic):
        _fin = os.path.join(REPO, fin_gene2go)
        dnld_ncbi_gene_file(_fin, loading_bar=None)
        self.gene2go = read_ncbi_gene2go(_fin, [taxid])

        _fin_obo = os.path.join(REPO, fin_gobasic)
        self.godag = get_godag(_fin_obo, loading_bar=None)
Exemple #2
0
    def __init__(self, taxid, fin_gene2go, fin_gobasic):
        _fin = os.path.join(REPO, fin_gene2go)
        dnld_ncbi_gene_file(_fin, loading_bar=None)
        self.gene2go = read_ncbi_gene2go(_fin, [taxid])

        _fin_obo = os.path.join(REPO, fin_gobasic)
        self.godag = get_godag(_fin_obo, loading_bar=None)
def _dnld_anno(file_anno):
    """Download the annotation file, if needed."""
    if os.path.exists(file_anno):
        assert os.path.getsize(file_anno) > 1000000, "BAD ANNO({F})".format(F=file_anno)
        return
    dnld_ncbi_gene_file(file_anno, loading_bar=None)
    assert os.path.isfile(file_anno), "MISSING ANNO({F})".format(F=file_anno)
    assert os.path.getsize(file_anno) > 1000000, "BAD ANNO({F})".format(F=file_anno)
Exemple #4
0
def _get_id2gos(file_assc, taxids, log):
    """Return associations."""
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    fin = os.path.join(REPO, file_assc)
    dnld_ncbi_gene_file(fin, loading_bar=None)
    id2gos = read_ncbi_gene2go(fin, taxids, taxid2asscs=taxid2asscs)
    log.write("  {N} items found in gene2go from NCBI's ftp server\n".format(N=len(id2gos)))
    return taxid2asscs
def _dnld_anno(file_anno):
    """Download the annotation file, if needed."""
    if os.path.exists(file_anno):
        assert os.path.getsize(file_anno) > 1000000, "BAD ANNO({F})".format(F=file_anno)
        return
    dnld_ncbi_gene_file(file_anno, loading_bar=None)
    assert os.path.isfile(file_anno), "MISSING ANNO({F})".format(F=file_anno)
    assert os.path.getsize(file_anno) > 1000000, "BAD ANNO({F})".format(F=file_anno)
def _get_evidencecodes(fin_gene2go):
    """Get all evidence codes and qualifiers."""
    evs = set()
    fin_gene2go = os.path.join(REPO, 'gene2go')
    dnld_ncbi_gene_file(fin_gene2go, force_dnld=False, loading_bar=False)
    with open(fin_gene2go) as ifstrm:
        for line in ifstrm:
            if line[0] != '#':  # Line contains data. Not a comment
                line = line.rstrip()  # chomp
                flds = line.split('\t')
                if len(flds) >= 5:
                    # taxid_curr, geneid, go_id, evidence, qualifier = flds[:5]
                    evidence = flds[3]
                    assert len(evidence) >= 2, flds
                    evs.add(evidence)
    print('{N} evidence codes in {FIN}'.format(N=len(evs), FIN=fin_gene2go))
    return evs
def _get_evidencecodes(fin_gene2go):
    """Get all evidence codes and qualifiers."""
    evs = set()
    fin_gene2go = os.path.join(REPO, 'gene2go')
    dnld_ncbi_gene_file(fin_gene2go, force_dnld=False, loading_bar=False)
    with open(fin_gene2go) as ifstrm:
        for line in ifstrm:
            if line[0] != '#': # Line contains data. Not a comment
                line = line.rstrip() # chomp
                flds = line.split('\t')
                if len(flds) >= 5:
                    # taxid_curr, geneid, go_id, evidence, qualifier = flds[:5]
                    evidence = flds[3]
                    assert len(evidence) >= 2, flds
                    evs.add(evidence)
    print('{N} evidence codes in {FIN}'.format(N=len(evs), FIN=fin_gene2go))
    return evs
def test_i96():
    """Test to re-produce issue#96: Passes currently."""
    # Trying to duplicate: ValueError("All values in table must be nonnegative.
    # Get genes
    print('CWD', os.getcwd())
    study_ids = _get_geneids()
    population_ids = GENEID2NT.keys()
    # Get databases

    print(os.getcwd())
    fin = os.path.join(REPO, 'gene2go')
    dnld_ncbi_gene_file(fin, loading_bar=None)
    gene2go = read_ncbi_gene2go(fin, [9606])

    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    goeaobj = GOEnrichmentStudy(population_ids, gene2go, godag, methods=['fdr_bh'])
    # Run GOEA Gene Ontology Enrichment Analysis
    results_goeas = goeaobj.run_study(study_ids)
Exemple #9
0
def test_i96():
    """Test to re-produce issue#96: Passes currently."""
    # Trying to duplicate: ValueError("All values in table must be nonnegative.
    # Get genes
    print('CWD', os.getcwd())
    study_ids = _get_geneids()
    population_ids = GENEID2NT.keys()
    # Get databases

    print(os.getcwd())
    fin = os.path.join(REPO, 'gene2go')
    dnld_ncbi_gene_file(fin, loading_bar=None)
    gene2go = read_ncbi_gene2go(fin, [9606])

    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    goeaobj = GOEnrichmentStudy(population_ids,
                                gene2go,
                                godag,
                                methods=['fdr_bh'])
    # Run GOEA Gene Ontology Enrichment Analysis
    results_goeas = goeaobj.run_study(study_ids)