def __init__(self, taxid, fin_gene2go, fin_gobasic): _fin = os.path.join(REPO, fin_gene2go) dnld_ncbi_gene_file(_fin, loading_bar=None) self.gene2go = read_ncbi_gene2go(_fin, [taxid]) _fin_obo = os.path.join(REPO, fin_gobasic) self.godag = get_godag(_fin_obo, loading_bar=None)
def _dnld_anno(file_anno): """Download the annotation file, if needed.""" if os.path.exists(file_anno): assert os.path.getsize(file_anno) > 1000000, "BAD ANNO({F})".format(F=file_anno) return dnld_ncbi_gene_file(file_anno, loading_bar=None) assert os.path.isfile(file_anno), "MISSING ANNO({F})".format(F=file_anno) assert os.path.getsize(file_anno) > 1000000, "BAD ANNO({F})".format(F=file_anno)
def _get_id2gos(file_assc, taxids, log): """Return associations.""" taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set))) fin = os.path.join(REPO, file_assc) dnld_ncbi_gene_file(fin, loading_bar=None) id2gos = read_ncbi_gene2go(fin, taxids, taxid2asscs=taxid2asscs) log.write(" {N} items found in gene2go from NCBI's ftp server\n".format(N=len(id2gos))) return taxid2asscs
def _get_evidencecodes(fin_gene2go): """Get all evidence codes and qualifiers.""" evs = set() fin_gene2go = os.path.join(REPO, 'gene2go') dnld_ncbi_gene_file(fin_gene2go, force_dnld=False, loading_bar=False) with open(fin_gene2go) as ifstrm: for line in ifstrm: if line[0] != '#': # Line contains data. Not a comment line = line.rstrip() # chomp flds = line.split('\t') if len(flds) >= 5: # taxid_curr, geneid, go_id, evidence, qualifier = flds[:5] evidence = flds[3] assert len(evidence) >= 2, flds evs.add(evidence) print('{N} evidence codes in {FIN}'.format(N=len(evs), FIN=fin_gene2go)) return evs
def test_i96(): """Test to re-produce issue#96: Passes currently.""" # Trying to duplicate: ValueError("All values in table must be nonnegative. # Get genes print('CWD', os.getcwd()) study_ids = _get_geneids() population_ids = GENEID2NT.keys() # Get databases print(os.getcwd()) fin = os.path.join(REPO, 'gene2go') dnld_ncbi_gene_file(fin, loading_bar=None) gene2go = read_ncbi_gene2go(fin, [9606]) fin_obo = os.path.join(REPO, "go-basic.obo") godag = get_godag(fin_obo, loading_bar=None) goeaobj = GOEnrichmentStudy(population_ids, gene2go, godag, methods=['fdr_bh']) # Run GOEA Gene Ontology Enrichment Analysis results_goeas = goeaobj.run_study(study_ids)