def mrnaExt(ID): """Extract sequence record for the given sequence ID""" recData = Entrez.efetch(db="nucleotide", id=ID, rettype="gb", warning=False) record = SeqIO.read(recData, 'genbank') return record
def cdsExt(ID, geneName): """ returns sequence record object for the input gene refseq ID """ retdata = Entrez.efetch(db="nucleotide", id=ID, rettype='gb', retmode='text').read() with open("Align/" + geneName.split('.')[0] + ".log", "a") as fp: if 'LOW QUALITY PROTEIN' in retdata: fp.write('%s CDS is of low quality\n' % ID) data = retdata.split('\n') for obj in data: if ' CDS ' in obj: try: cdsRange = [ int(obj.lstrip(' CDS ').split('..')[0]), int(obj.lstrip(' CDS ').split('..')[1]) ] except ValueError: try: cdsRange = [ int( obj.lstrip(' CDS ').split('..')[0]. lstrip('<').lstrip('>').rstrip('<').rstrip('>')), int( obj.lstrip(' CDS ').split('..')[1]. lstrip('<').lstrip('>').rstrip('<').rstrip('>')) ] except ValueError: try: cdsRange = [ int( obj.lstrip(' CDS ').split('..') [0]), int( obj.lstrip(' CDS ').split('..') [1].lstrip('>')) ] except ValueError: print( "Problem found while extracting cds from %s. Please report this issue to ambuj (at) ufl (dot) edu" % obj) continue recData = Entrez.efetch(db="nucleotide", id=ID, rettype="gb", warning=False) record = SeqIO.read(recData, 'genbank') record.seq = record.seq[cdsRange[0] - 1:cdsRange[1]] return record