def __init__(self, target, mode="w"): """Create the writer object.""" if mode == "w": try: target.write("") except TypeError: # target was opened in binary mode raise StreamModeError( "File must be opened in text mode.") from None except AttributeError: # target is a path handle = open(target, mode) else: handle = target elif mode == "wb": try: target.write(b"") except TypeError: # target was opened in text mode raise StreamModeError( "File must be opened in binary mode.") from None except AttributeError: # target is a path handle = open(target, mode) else: handle = target else: raise RuntimeError("Unknown mode '%s'" % mode) self._target = target self.handle = handle
def __init__(self, target, mode="w"): """Create the writer object.""" if target is not None: # target is None if we only use the writer to format strings. if mode == "w": try: target.write("") except TypeError: # target was opened in binary mode raise StreamModeError( "File must be opened in text mode.") from None except AttributeError: # target is a path stream = open(target, mode) else: stream = target elif mode == "wb": try: target.write(b"") except TypeError: # target was opened in text mode raise StreamModeError( "File must be opened in binary mode.") from None except AttributeError: # target is a path stream = open(target, mode) else: stream = target else: raise RuntimeError("Unknown mode '%s'" % mode) self.stream = stream self._target = target
def SimpleFastaParser(source): """Iterate over Fasta records as string tuples. Arguments: - source - input stream opened in text mode, or a path to a file For each record a tuple of two strings is returned, the FASTA title line (without the leading '>' character), and the sequence (with any whitespace removed). The title line is not divided up into an identifier (the first word) and comment or description. >>> with open("Fasta/dups.fasta") as handle: ... for values in SimpleFastaParser(handle): ... print(values) ... ('alpha', 'ACGTA') ('beta', 'CGTC') ('gamma', 'CCGCC') ('alpha (again - this is a duplicate entry to test the indexing code)', 'ACGTA') ('delta', 'CGCGC') """ try: handle = open(source) except TypeError: handle = source if handle.read(0) != "": raise StreamModeError( "Fasta files must be opened in text mode") from None try: # Skip any text before the first record (e.g. blank lines, comments) for line in handle: if line[0] == ">": title = line[1:].rstrip() break else: # no break encountered - probably an empty file return # Main logic # Note, remove trailing whitespace, and any internal spaces # (and any embedded \r which are possible in mangled files # when not opened in universal read lines mode) lines = [] for line in handle: if line[0] == ">": yield title, "".join(lines).replace(" ", "").replace("\r", "") lines = [] title = line[1:].rstrip() continue lines.append(line.rstrip()) yield title, "".join(lines).replace(" ", "").replace("\r", "") finally: if handle is not source: handle.close()
def __init__(self, source, alphabet=None, mode="t", fmt=None): """Create a SequenceIterator object. Arguments: - source - input file stream, or path to input file - alphabet - no longer used, should be None This method MAY be overridden by any subclass. Note when subclassing: - there should be a single non-optional argument, the source. - you do not have to require an alphabet. - you can add additional optional arguments. """ if alphabet is not None: raise ValueError("The alphabet argument is no longer supported") try: self.stream = open(source, "r" + mode) self.should_close_stream = True except TypeError: # not a path, assume we received a stream if mode == "t": if source.read(0) != "": raise StreamModeError( "%s files must be opened in text mode." % fmt ) from None elif mode == "b": if source.read(0) != b"": raise StreamModeError( "%s files must be opened in binary mode." % fmt ) from None else: raise ValueError("Unknown mode '%s'" % mode) self.stream = source self.should_close_stream = False try: self.records = self.parse(self.stream) except Exception: if self.should_close_stream: self.stream.close() raise
def IgIterator(source, alphabet=single_letter_alphabet): """Iterate over IntelliGenetics records (as SeqRecord objects). source - file-like object opened in text mode, or a path to a file alphabet - optional alphabet The optional free format file header lines (which start with two semi-colons) are ignored. The free format commentary lines at the start of each record (which start with a semi-colon) are recorded as a single string with embedded new line characters in the SeqRecord's annotations dictionary under the key 'comment'. Examples -------- >>> with open("IntelliGenetics/TAT_mase_nuc.txt") as handle: ... for record in IgIterator(handle): ... print("%s length %i" % (record.id, len(record))) ... A_U455 length 303 B_HXB2R length 306 C_UG268A length 267 D_ELI length 309 F_BZ163A length 309 O_ANT70 length 342 O_MVP5180 length 348 CPZGAB length 309 CPZANT length 309 A_ROD length 390 B_EHOA length 420 D_MM251 length 390 STM_STM length 387 VER_AGM3 length 354 GRI_AGM677 length 264 SAB_SAB1C length 219 SYK_SYK length 330 """ try: handle = open(source) except TypeError: handle = source if handle.read(0) != "": raise StreamModeError( "IntelliGenetics files must be opened in text mode.") from None try: yield from _parse(handle, alphabet) finally: if handle is not source: handle.close()
def __init__(self, source, mode="t", fmt=None): """Create an AlignmentIterator object. Arguments: - source - input file stream, or path to input file This method MAY be overridden by any subclass. Note when subclassing: - there should be a single non-optional argument, the source. - you can add additional optional arguments. """ try: self.stream = open(source, "r" + mode) self.should_close_stream = True except TypeError: # not a path, assume we received a stream if mode == "t": if source.read(0) != "": raise StreamModeError( "%s files must be opened in text mode." % fmt ) from None elif mode == "b": if source.read(0) != b"": raise StreamModeError( "%s files must be opened in binary mode." % fmt ) from None else: raise ValueError("Unknown mode '%s'" % mode) from None self.stream = source self.should_close_stream = False try: self.alignments = self.parse(self.stream) except Exception: if self.should_close_stream: self.stream.close() raise
def __init__(self, stream_or_path, namespace=None): """Create the object and initialize the XML parser.""" self.parser = sax.make_parser() content_handler = ContentHandler() self.parser.setContentHandler(content_handler) self.parser.setFeature(handler.feature_namespaces, True) try: handle = open(stream_or_path, "rb") except TypeError: # not a path, assume we received a stream # Make sure we got a binary handle. If we got a text handle, then # the parser will still run but unicode characters will be garbled # if the text handle was opened with a different encoding than the # one specified in the XML file. With a binary handle, the correct # encoding is picked up by the parser from the XML file. if stream_or_path.read(0) != b"": raise StreamModeError( "SeqXML files should be opened in binary mode") from None self.handle = stream_or_path self.should_close_handle = False else: # we received a path self.handle = handle self.should_close_handle = True # Read until we see the seqXML element with the seqXMLversion BLOCK = self.BLOCK try: while True: # Read in another block of the file... text = self.handle.read(BLOCK) if not text: if content_handler.startElementNS is None: raise ValueError("Empty file.") else: raise ValueError("XML file contains no data.") self.parser.feed(text) seqXMLversion = content_handler.seqXMLversion if seqXMLversion is not None: break except Exception: if self.should_close_handle: self.handle.close() raise self.seqXMLversion = seqXMLversion self.source = content_handler.source self.sourceVersion = content_handler.sourceVersion self.ncbiTaxID = content_handler.ncbiTaxID self.speciesName = content_handler.speciesName
def SnapGeneIterator(source): """Parse a SnapGene file and return a SeqRecord object. Argument source is a file-like object or a path to a file. Note that a SnapGene file can only contain one sequence, so this iterator will always return a single record. """ try: handle = open(source, "rb") except TypeError: handle = source if handle.read(0) != b"": raise StreamModeError( "SnapGene files must be opened in binary mode.") from None record = SeqRecord(None) try: packets = _iterate(handle) try: packet_type, length, data = next(packets) except StopIteration: raise ValueError("Empty file.") from None if packet_type != 0x09: raise ValueError( "The file does not start with a SnapGene cookie packet") _parse_cookie_packet(length, data, record) for (packet_type, length, data) in packets: handler = _packet_handlers.get(packet_type) if handler is not None: handler(length, data, record) finally: if handle is not source: handle.close() if not record.seq: raise ValueError("No DNA packet in file") yield record
def GckIterator(source): """Parse a GCK file and return a SeqRecord object. Argument source is a file-like object or a path to a file. Note that a GCK file can only contain one sequence, so this iterator will always return a single record. """ try: handle = open(source, "rb") except TypeError: handle = source if handle.read(0) != b"": raise StreamModeError( "GCK files must be opened in binary mode.") from None try: records = _parse(handle) yield from records finally: if handle is not source: handle.close()
def FastaTwoLineParser(source): """Iterate over no-wrapping Fasta records as string tuples. Arguments: - source - input stream opened in text mode, or a path to a file Functionally the same as SimpleFastaParser but with a strict interpretation of the FASTA format as exactly two lines per record, the greater-than-sign identifier with description, and the sequence with no line wrapping. Any line wrapping will raise an exception, as will excess blank lines (other than the special case of a zero-length sequence as the second line of a record). Examples -------- This file uses two lines per FASTA record: >>> with open("Fasta/aster_no_wrap.pro") as handle: ... for title, seq in FastaTwoLineParser(handle): ... print("%s = %s..." % (title, seq[:3])) ... gi|3298468|dbj|BAA31520.1| SAMIPF = GGH... This equivalent file uses line wrapping: >>> with open("Fasta/aster.pro") as handle: ... for title, seq in FastaTwoLineParser(handle): ... print("%s = %s..." % (title, seq[:3])) ... Traceback (most recent call last): ... ValueError: Expected FASTA record starting with '>' character. Perhaps this file is using FASTA line wrapping? Got: 'MTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI' """ try: handle = open(source) except TypeError: handle = source if handle.read(0) != "": raise StreamModeError("Fasta files must be opened in text mode") from None idx = -1 # for empty file try: for idx, line in enumerate(handle): if idx % 2 == 0: # title line if line[0] != ">": raise ValueError( "Expected FASTA record starting with '>' character. " "Perhaps this file is using FASTA line wrapping? " f"Got: '{line}'" ) title = line[1:].rstrip() else: # sequence line if line[0] == ">": raise ValueError( "Two '>' FASTA lines in a row. Missing sequence line " "if this is strict two-line-per-record FASTA format. " f"Have '>{title}' and '{line}'" ) yield title, line.strip() if idx == -1: pass # empty file elif idx % 2 == 0: # on a title line raise ValueError( "Missing sequence line at end of file if this is strict " f"two-line-per-record FASTA format. Have title line '{line}'" ) else: assert line[0] != ">", "line[0] == '>' ; this should be impossible!" finally: if handle is not source: handle.close()
def XdnaIterator(source): """Parse a Xdna file and return a SeqRecord object. Argument source is a file-like object in binary mode or a path to a file. Note that this is an "iterator" in name only since an Xdna file always contain a single sequence. """ try: handle = open(source, "rb") except TypeError: handle = source if handle.read(0) != b"": raise StreamModeError( "Xdna files must be opened in binary mode.") from None # Parse fixed-size header and do some rudimentary checks # # The "neg_length" value is the length of the part of the sequence # before the nucleotide considered as the "origin" (nucleotide number 1, # which in DNA Strider is not always the first nucleotide). # Biopython's SeqRecord has no such concept of a sequence origin as far # as I know, so we ignore that value. SerialCloner has no such concept # either and always generates files with a neg_length of zero. try: header = handle.read(112) if not header: raise ValueError("Empty file.") if len(header) < 112: raise ValueError( "Improper header, cannot read 112 bytes from handle") (version, type, topology, length, neg_length, com_length) = unpack(">BBB25xII60xI12x", header) if version != 0: raise ValueError("Unsupported XDNA version") if type not in _seq_types: raise ValueError("Unknown sequence type") # Read actual sequence and comment found in all XDNA files sequence = _read(handle, length).decode("ASCII") comment = _read(handle, com_length).decode("ASCII") # Try to derive a name from the first "word" of the comment name = comment.split(" ")[0] # Create record object record = SeqRecord(Seq(sequence, _seq_types[type]), description=comment, name=name, id=name) if topology in _seq_topologies: record.annotations["topology"] = _seq_topologies[topology] if len(handle.read(1)) == 1: # This is an XDNA file with an optional annotation section. # Skip the overhangs as I don't know how to represent # them in the SeqRecord model. _read_overhang(handle) # right-side overhang _read_overhang(handle) # left-side overhang # Read the features num_features = unpack(">B", _read(handle, 1))[0] while num_features > 0: _read_feature(handle, record) num_features -= 1 yield record finally: if handle is not source: handle.close()
def NibIterator(source, alphabet=None): """Iterate over a nib file and yield a SeqRecord. - source - a file-like object or a path to a file in the nib file format as defined by UCSC; the file must be opened in binary mode. - alphabet - always ignored. Note that a nib file always contains only one sequence record. The sequence of the resulting SeqRecord object should match the sequence generated by Jim Kent's nibFrag utility run with the -masked option. This function is used internally via the Bio.SeqIO functions: >>> from Bio import SeqIO >>> record = SeqIO.read("Nib/test_even_bigendian.nib", "nib") >>> print("%s %i" % (record.seq, len(record))) nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50 You can also call it directly: >>> with open("Nib/test_even_bigendian.nib", "rb") as handle: ... for record in NibIterator(handle): ... print("%s %i" % (record.seq, len(record))) ... nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50 """ if alphabet is not None: raise ValueError("Alphabets are ignored.") try: handle = open(source, "rb") except TypeError: handle = source if handle.read(0) != b"": raise StreamModeError( "nib files must be opened in binary mode.") from None try: word = handle.read(4) if not word: raise ValueError("Empty file.") signature = word.hex() if signature == "3a3de96b": byteorder = "little" # little-endian elif signature == "6be93d3a": byteorder = "big" # big-endian else: raise ValueError("unexpected signature in nib header") number = handle.read(4) length = int.from_bytes(number, byteorder) data = handle.read() indices = data.hex() if length % 2 == 0: if len(indices) != length: raise ValueError("Unexpected file size") elif length % 2 == 1: if len(indices) != length + 1: raise ValueError("Unexpected file size") indices = indices[:length] if not set(indices).issubset("0123489abc"): raise ValueError("Unexpected sequence data found in file") table = str.maketrans("0123489abc", "TCAGNtcagn") nucleotides = indices.translate(table) sequence = Seq(nucleotides) record = SeqRecord(sequence) yield record finally: if handle is not source: handle.close()
def PirIterator(source): """Iterate over a PIR file and yield SeqRecord objects. source - file-like object or a path to a file. Examples -------- >>> with open("NBRF/DMB_prot.pir") as handle: ... for record in PirIterator(handle): ... print("%s length %i" % (record.id, len(record))) HLA:HLA00489 length 263 HLA:HLA00490 length 94 HLA:HLA00491 length 94 HLA:HLA00492 length 80 HLA:HLA00493 length 175 HLA:HLA01083 length 188 """ try: handle = open(source) except TypeError: handle = source if handle.read(0) != "": raise StreamModeError( "PIR files must be opened in binary mode.") from None try: # Skip any text before the first record (e.g. blank lines, comments) for line in handle: if line[0] == ">": break else: return # Premature end of file, or just empty? while True: pir_type = line[1:3] if pir_type not in _pir_alphabets or line[3] != ";": raise ValueError( "Records should start with '>XX;' where XX is a valid sequence type" ) identifier = line[4:].strip() description = handle.readline().strip() lines = [] for line in handle: if line[0] == ">": break # Remove trailing whitespace, and any internal spaces lines.append(line.rstrip().replace(" ", "")) else: line = None seq = "".join(lines) if seq[-1] != "*": # Note the * terminator is present on nucleotide sequences too, # it is not a stop codon! raise ValueError( "Sequences in PIR files should include a * terminator!") # Return the record and then continue... record = SeqRecord( Seq(seq[:-1], _pir_alphabets[pir_type]), id=identifier, name=identifier, description=description, ) record.annotations["PIR-type"] = pir_type yield record if line is None: return # StopIteration raise ValueError("Unrecognised PIR record format.") finally: if handle is not source: handle.close()
def PdbSeqresIterator(source): """Return SeqRecord objects for each chain in a PDB file. Arguments: - source - input stream opened in text mode, or a path to a file The sequences are derived from the SEQRES lines in the PDB file header, not the atoms of the 3D structure. Specifically, these PDB records are handled: DBREF, SEQADV, SEQRES, MODRES See: http://www.wwpdb.org/documentation/format23/sect3.html This gets called internally via Bio.SeqIO for the SEQRES based interpretation of the PDB file format: >>> from Bio import SeqIO >>> for record in SeqIO.parse("PDB/1A8O.pdb", "pdb-seqres"): ... print("Record id %s, chain %s" % (record.id, record.annotations["chain"])) ... print(record.dbxrefs) ... Record id 1A8O:A, chain A ['UNP:P12497', 'UNP:POL_HV1N5'] Equivalently, >>> with open("PDB/1A8O.pdb") as handle: ... for record in PdbSeqresIterator(handle): ... print("Record id %s, chain %s" % (record.id, record.annotations["chain"])) ... print(record.dbxrefs) ... Record id 1A8O:A, chain A ['UNP:P12497', 'UNP:POL_HV1N5'] Note the chain is recorded in the annotations dictionary, and any PDB DBREF lines are recorded in the database cross-references list. """ # Late-binding import to avoid circular dependency on SeqIO in Bio.SeqUtils from Bio.SeqUtils import seq1 chains = collections.defaultdict(list) metadata = collections.defaultdict(list) try: handle = open(source) except TypeError: handle = source if handle.read(0) != "": raise StreamModeError("PDB files must be opened in text mode.") from None try: rec_name = None for line in handle: rec_name = line[0:6].strip() if rec_name == "SEQRES": # NB: We only actually need chain ID and the residues here; # commented bits are placeholders from the wwPDB spec. # Serial number of the SEQRES record for the current chain. # Starts at 1 and increments by one each line. # Reset to 1 for each chain. # ser_num = int(line[8:10]) # Chain identifier. This may be any single legal character, # including a blank which is used if there is only one chain. chn_id = line[11] # Number of residues in the chain (repeated on every record) # num_res = int(line[13:17]) residues = [ seq1(res, custom_map=protein_letters_3to1) for res in line[19:].split() ] chains[chn_id].extend(residues) elif rec_name == "DBREF": # ID code of this entry (PDB ID) pdb_id = line[7:11] # Chain identifier. chn_id = line[12] # Initial sequence number of the PDB sequence segment. # seq_begin = int(line[14:18]) # Initial insertion code of the PDB sequence segment. # icode_begin = line[18] # Ending sequence number of the PDB sequence segment. # seq_end = int(line[20:24]) # Ending insertion code of the PDB sequence segment. # icode_end = line[24] # Sequence database name. database = line[26:32].strip() # Sequence database accession code. db_acc = line[33:41].strip() # Sequence database identification code. db_id_code = line[42:54].strip() # Initial sequence number of the database seqment. # db_seq_begin = int(line[55:60]) # Insertion code of initial residue of the segment, if PDB is the # reference. # db_icode_begin = line[60] # Ending sequence number of the database segment. # db_seq_end = int(line[62:67]) # Insertion code of the ending residue of the segment, if PDB is the # reference. # db_icode_end = line[67] metadata[chn_id].append( { "pdb_id": pdb_id, "database": database, "db_acc": db_acc, "db_id_code": db_id_code, } ) # ENH: 'SEQADV' 'MODRES' if rec_name is None: raise ValueError("Empty file.") for chn_id, residues in sorted(chains.items()): record = SeqRecord(Seq("".join(residues), generic_protein)) record.annotations = {"chain": chn_id} if chn_id in metadata: m = metadata[chn_id][0] record.id = record.name = "%s:%s" % (m["pdb_id"], chn_id) record.description = "%s:%s %s" % ( m["database"], m["db_acc"], m["db_id_code"], ) for melem in metadata[chn_id]: record.dbxrefs.extend( [ "%s:%s" % (melem["database"], melem["db_acc"]), "%s:%s" % (melem["database"], melem["db_id_code"]), ] ) else: record.id = chn_id yield record finally: if handle is not source: handle.close()
def AbiIterator(source, alphabet=None, trim=False): """Return an iterator for the Abi file format.""" # raise exception is alphabet is not dna if alphabet is not None: if isinstance(Alphabet._get_base_alphabet(alphabet), Alphabet.ProteinAlphabet): raise ValueError("Invalid alphabet, ABI files do not hold proteins.") if isinstance(Alphabet._get_base_alphabet(alphabet), Alphabet.RNAAlphabet): raise ValueError("Invalid alphabet, ABI files do not hold RNA.") try: handle = open(source, "rb") except TypeError: handle = source if handle.read(0) != b"": raise StreamModeError("ABI files must be opened in binary mode.") from None try: # check if input file is a valid Abi file marker = handle.read(4) if not marker: # handle empty file gracefully raise ValueError("Empty file.") if marker != b"ABIF": raise OSError("File should start ABIF, not %r" % marker) # dirty hack for handling time information times = {"RUND1": "", "RUND2": "", "RUNT1": "", "RUNT2": ""} # initialize annotations annot = dict(zip(_EXTRACT.values(), [None] * len(_EXTRACT))) # parse header and extract data from directories header = struct.unpack(_HEADFMT, handle.read(struct.calcsize(_HEADFMT))) # Set default sample ID value, which we expect to be present in most # cases in the SMPL1 tag, but may be missing. sample_id = "<unknown id>" raw = {} for tag_name, tag_number, tag_data in _abi_parse_header(header, handle): key = tag_name + str(tag_number) raw[key] = tag_data # PBAS2 is base-called sequence, only available in 3530 if key == "PBAS2": seq = tag_data.decode() ambigs = "KYWMRS" if alphabet is None: if set(seq).intersection(ambigs): alphabet = ambiguous_dna else: alphabet = unambiguous_dna # PCON2 is quality values of base-called sequence elif key == "PCON2": qual = [ord(val) for val in tag_data.decode()] # SMPL1 is sample id entered before sequencing run, it must be # a string. elif key == "SMPL1": sample_id = _get_string_tag(tag_data) elif key in times: times[key] = tag_data else: if key in _EXTRACT: annot[_EXTRACT[key]] = tag_data # set time annotations annot["run_start"] = "%s %s" % (times["RUND1"], times["RUNT1"]) annot["run_finish"] = "%s %s" % (times["RUND2"], times["RUNT2"]) # raw data (for advanced end users benefit) annot["abif_raw"] = raw # fsa check is_fsa_file = all(tn not in raw for tn in ("PBAS1", "PBAS2")) if is_fsa_file: try: file_name = basename(handle.name).replace(".fsa", "") except AttributeError: file_name = "" sample_id = _get_string_tag(raw.get("LIMS1"), sample_id) description = _get_string_tag(raw.get("CTID1"), "<unknown description>") record = SeqRecord( Seq(""), id=sample_id, name=file_name, description=description, annotations=annot, ) else: # use the file name as SeqRecord.name if available try: file_name = basename(handle.name).replace(".ab1", "") except AttributeError: file_name = "" record = SeqRecord( Seq(seq, alphabet), id=sample_id, name=file_name, description="", annotations=annot, letter_annotations={"phred_quality": qual}, ) if not trim or is_fsa_file: yield record else: yield _abi_trim(record) finally: if handle is not source: handle.close()
def TabIterator(source, alphabet=single_letter_alphabet): """Iterate over tab separated lines as SeqRecord objects. Each line of the file should contain one tab only, dividing the line into an identifier and the full sequence. Arguments: - source - file-like object opened in text mode, or a path to a file - alphabet - optional alphabet The first field is taken as the record's .id and .name (regardless of any spaces within the text) and the second field is the sequence. Any blank lines are ignored. Examples -------- >>> with open("GenBank/NC_005816.tsv") as handle: ... for record in TabIterator(handle): ... print("%s length %i" % (record.id, len(record))) gi|45478712|ref|NP_995567.1| length 340 gi|45478713|ref|NP_995568.1| length 260 gi|45478714|ref|NP_995569.1| length 64 gi|45478715|ref|NP_995570.1| length 123 gi|45478716|ref|NP_995571.1| length 145 gi|45478717|ref|NP_995572.1| length 357 gi|45478718|ref|NP_995573.1| length 138 gi|45478719|ref|NP_995574.1| length 312 gi|45478720|ref|NP_995575.1| length 99 gi|45478721|ref|NP_995576.1| length 90 """ try: handle = open(source) except TypeError: handle = source if handle.read(0) != "": raise StreamModeError( "Tab-separated plain-text files must be opened in text mode." ) from None try: for line in handle: try: title, seq = line.split( "\t") # will fail if more than one tab! except ValueError: if line.strip() == "": # It's a blank line, ignore it continue raise ValueError( "Each line should have one tab separating the" + " title and sequence, this line has %i tabs: %r" % (line.count("\t"), line)) from None title = title.strip() seq = seq.strip() # removes the trailing new line yield SeqRecord(Seq(seq, alphabet), id=title, name=title, description="") finally: if handle is not source: handle.close()