def fasta_iter(handle, parse_description=True): """ Iterator over the given FASTA file handle, returning records. handle is a handle to a file opened for reading """ line = handle.readline() while line: data = _screed_record_dict() line = line.strip() if not line.startswith('>'): raise IOError("Bad FASTA format: no '>' at beginning of line") if parse_description: # Try to grab the name and optional description try: data['name'], data['description'] = line[1:].split(' ', 1) except ValueError: # No optional description data['name'] = line[1:] data['description'] = '' else: data['name'] = line[1:] data['description'] = '' data['name'] = data['name'].strip() data['description'] = data['description'].strip() # Collect sequence lines into a list sequenceList = [] line = handle.readline() while line and not line.startswith('>'): sequenceList.append(line.strip()) line = handle.readline() data['sequence'] = ''.join(sequenceList) yield data
def fastq_iter(handle, line=None, parse_description=True): """ Iterator over the given FASTQ file handle returning records. handle is a handle to a file opened for reading """ if line is None: line = handle.readline() line = line.strip() while line: data = _screed_record_dict() if not line.startswith('@'): raise IOError("Bad FASTQ format: no '@' at beginning of line") # Try to grab the name and (optional) annotations if parse_description: try: data['name'], data['annotations'] = line[1:].split(' ',1) except ValueError: # No optional annotations data['name'] = line[1:] data['annotations'] = '' pass else: data['name'] = line[1:] data['annotations'] = '' # Extract the sequence lines sequence = [] line = handle.readline().strip() while not line.startswith('+') and not line.startswith('#'): sequence.append(line) line = handle.readline().strip() data['sequence'] = ''.join(sequence) # Extract the accuracy lines accuracy = [] line = handle.readline().strip() seqlen = len(data['sequence']) aclen = 0 while not line == '' and aclen < seqlen: accuracy.append(line) aclen += len(line) line = handle.readline().strip() data['accuracy'] = ''.join(accuracy) if len(data['sequence']) != len(data['accuracy']): raise IOError('sequence and accuracy strings must be '\ 'of equal length') yield data
def fastq_iter(handle, line=None, parse_description=True): """ Iterator over the given FASTQ file handle returning records. handle is a handle to a file opened for reading """ if line is None: line = handle.readline() line = line.strip() while line: data = _screed_record_dict() if not line.startswith('@'): raise IOError("Bad FASTQ format: no '@' at beginning of line") # Try to grab the name and (optional) annotations if parse_description: try: data['name'], data['annotations'] = line[1:].split(' ', 1) except ValueError: # No optional annotations data['name'] = line[1:] data['annotations'] = '' pass else: data['name'] = line[1:] data['annotations'] = '' # Extract the sequence lines sequence = [] line = handle.readline().strip() while not line.startswith('+') and not line.startswith('#'): sequence.append(line) line = handle.readline().strip() data['sequence'] = ''.join(sequence) # Extract the quality lines quality = [] line = handle.readline().strip() seqlen = len(data['sequence']) aclen = 0 while not line == '' and aclen < seqlen: quality.append(line) aclen += len(line) line = handle.readline().strip() data['quality'] = ''.join(quality) if len(data['sequence']) != len(data['quality']): raise IOError('sequence and quality strings must be ' 'of equal length') yield data
def fasta_iter(handle, parse_description=True, line=None): """ Iterator over the given FASTA file handle, returning records. handle is a handle to a file opened for reading """ if line is None: line = handle.readline() while line: data = _screed_record_dict() line = line.strip() if not line.startswith('>'): raise IOError("Bad FASTA format: no '>' at beginning of line") if parse_description: # Try to grab the name and optional description try: data['name'], data['description'] = line[1:].split(' ', 1) except ValueError: # No optional description data['name'] = line[1:] data['description'] = '' else: data['name'] = line[1:] data['description'] = '' data['name'] = data['name'].strip() data['description'] = data['description'].strip() # Collect sequence lines into a list sequenceList = [] line = handle.readline() while line and not line.startswith('>'): sequenceList.append(line.strip()) line = handle.readline() data['sequence'] = ''.join(sequenceList) yield data