#pVOGdb.addPvogs_old(pVOGlines,LOG_H) pVOGdb.addPvogs(PVOG_LIB_DIR, LOG_H) if CHATTY: print("pVOGs have been recorded") LOG_H.write("%s%s%s\n" % ("There are ", len(pVOGdb.pVOGlist), " pVOGs")) accessionCount = pVOGdb.getAccessionCount() LOG_H.write("%s%s\n" % ("The total number of accessions is ", accessionCount)) # Visit each pVOG and find the fasta sequence that corresponds to each member accession # Modify the header of each identified fasta to reflect its membership in the pVOG cluster if CHATTY: print("Searching sequences for each pVOG-associated accession") # Create a fasta object (to be replicated as needed) nextFasta = dbPrep_fastaSequence.fasta() # For each pVOG, get its associated peptide accessions, then # Find that fasta in the ncbi database subset, and # Tag the fasta header with the pVOG information accnCount = 0 for pVOG in pVOGdb.pVOGlist: foundCount = 0 missingCount = 0 missingList = [] for accession in pVOG.accessionList: # For each accession (approx 200k of them, members of pVOG groups) accnCount += 1 if CHATTY: print("Processing pVOG", pVOG.pVOGid, "and accession", accession) LOG_H.write( "%s%s%s%s\n" %
import timeit import datetime import dbPrep_fastaSequence #DEBUG = True DEBUG = False VERBOSE = False if "dbPrep_VERBOSE" in os.environ.keys(): if os.environ["dbPrep_VERBOSE"] == 'True': VERBOSE = True DO_GENE = True DO_PROTEIN = True fastaObj = dbPrep_fastaSequence.fasta() class VOG(object): def __init__(self): self.VOGid = '' # e.g., VOG0334 self.VOGannotation = '' self.accnList = [] # list of accession numbers self.geneCount = 0 self.peptideCount = 0 # ??? are these different? def printAll(self): print("VOG identifier:", self.VOGid) print("VOG annotation:", self.VOGannotation) print("accnList:", self.accnList) print("geneCount:", self.geneCount)