def embl_gb_fasta(raw, ds, path=None): pattern = r"(?:>.+\n^(?:^[^>]+?)(?=\n\n|>|LOCUS|ID))|(?:(?:LOCUS|ID)(?:(?:.|\n)+?)^//)" result_list = [] rawseqs = _re.findall(pattern, _textwrap.dedent(raw + "\n\n"), flags=_re.MULTILINE) for rawseq in rawseqs: format_ = None handle = _io.StringIO(rawseq) if "circular" in rawseq.splitlines()[0]: circular = True else: circular = False try: parsed = _SeqIO.read(handle, "embl", alphabet=_IUPACAmbiguousDNA()) except ValueError: handle.seek(0) try: parsed = _SeqIO.read(handle, "genbank", alphabet=_IUPACAmbiguousDNA()) handle.seek(0) parser = _RecordParser() residue_type = parser.parse(handle).residue_type if "circular" in residue_type : circular = True else: try: if parsed.annotations["topology"] == "circular": circular = True else: circular = False except KeyError: circular = False except ValueError: handle.seek(0) try: parsed = _SeqIO.read(handle, "fasta", alphabet=_IUPACAmbiguousDNA()) except ValueError: parsed = "" else: format_= "fasta" else: format_= "genbank" else: format_ = "embl" handle.close() if parsed: from copy import deepcopy as _deepcopy ## TODO: clean up ! from pydna.seqfeature import SeqFeature as _SeqFeature nfs = [_SeqFeature() for f in parsed.features] for f, nf in zip(parsed.features, nfs): nf.__dict__ =_deepcopy(f.__dict__) parsed.features = nfs if ds and path: result_list.append( _GenbankFile.from_SeqRecord(parsed, linear=not circular, circular=circular, path=path) ) elif ds: result_list.append ( _Dseqrecord.from_SeqRecord(parsed, linear=not circular, circular=circular) ) else: result_list.append( parsed ) return result_list
def embl_gb_fasta(raw, ds, path=None): pattern = (r"(?:>.+\n^(?:^[^>]+?)(?=\n\n|>|" r"LOCUS|ID))|(?:(?:LOCUS|ID)(?:(?:.|\n)+?)^//)") result_list = [] rawseqs = _re.findall(pattern, _textwrap.dedent(raw + "\n\n"), flags=_re.MULTILINE) for rawseq in rawseqs: handle = _io.StringIO(rawseq) circular = False try: parsed = _SeqIO.read(handle, "embl") except ValueError: handle.seek(0) try: parsed = _SeqIO.read(handle, "genbank") if "circular" in str( parsed.annotations.get("topology")).lower(): circular = True except ValueError: handle.seek(0) try: parsed = _SeqIO.read(handle, "fasta") except ValueError: parsed = "" handle.close() if ("circular" in rawseq.splitlines()[0].lower().split() ): # hack to pick up topology from malformed files circular = True if parsed: from copy import deepcopy as _deepcopy # TODO: clean up ! from pydna.seqfeature import SeqFeature as _SeqFeature nfs = [_SeqFeature() for f in parsed.features] for f, nf in zip(parsed.features, nfs): nf.__dict__ = _deepcopy(f.__dict__) parsed.features = nfs if ds and path: result_list.append( _GenbankFile.from_SeqRecord(parsed, linear=not circular, circular=circular, path=path)) elif ds: result_list.append( _Dseqrecord.from_SeqRecord(parsed, linear=not circular, circular=circular)) else: result_list.append(parsed) return result_list