def my_fastq_iter(handle, line=None, parse_description=False): """ Iterator over the given FASTQ file handle returning records. handle is a handle to a file opened for reading CTB: this relies on each FASTQ record being exactly 4 lines. """ while 1: pos = handle.tell() line = handle.readline() if not line: return assert line.startswith('@'), line name = to_str(line.strip())[1:] line = handle.readline() sequence = to_str(line.strip()) line = handle.readline() plus = to_str(line.strip()) assert plus == '+' line = handle.readline() quality = to_str(line.strip()) yield Record(name, sequence, quality=quality), pos
def my_fastq_iter(handle, line=None, parse_description=False): """ Iterator over the given FASTQ file handle returning records. handle is a handle to a file opened for reading """ if line is None: line = handle.readline() line = to_str(line.strip()) while line: data = {} if line and not line.startswith('@'): raise IOError("Bad FASTQ format: no '@' at beginning of line") # Try to grab the name and (optional) annotations if parse_description: try: data['name'], data['annotations'] = line[1:].split(' ', 1) except ValueError: # No optional annotations data['name'] = line[1:] data['annotations'] = '' pass else: data['name'] = line[1:] data['annotations'] = '' # Extract the sequence lines sequence = [] line = to_str(handle.readline().strip()) while line and not line.startswith('+') and not line.startswith('#'): sequence.append(line) line = to_str(handle.readline().strip()) data['sequence'] = ''.join(sequence) # Extract the quality lines quality = [] line = to_str(handle.readline().strip()) seqlen = len(data['sequence']) aclen = 0 while not line == '' and aclen < seqlen: quality.append(line) aclen += len(line) line = to_str(handle.readline().strip()) data['quality'] = ''.join(quality) if len(data['sequence']) != len(data['quality']): raise IOError('sequence and quality strings must be ' 'of equal length') yield Record(**data)
def my_fasta_iter(handle, parse_description=False, line=None): """ Iterator over the given FASTA file handle, returning records. handle is a handle to a file opened for reading """ last_start = handle.tell() if line is None: line = handle.readline() while line: data = {} line = to_str(line.strip()) if not line.startswith('>'): raise IOError( "Bad FASTA format: no '>' at beginning of line: {}".format( line)) if parse_description: # Try to grab the name and optional description try: data['name'], data['description'] = line[1:].split(' ', 1) except ValueError: # No optional description data['name'] = line[1:] data['description'] = '' else: data['name'] = line[1:] data['description'] = '' data['name'] = data['name'].strip() data['description'] = data['description'].strip() # Collect sequence lines into a list sequenceList = [] pos = handle.tell() line = to_str(handle.readline()) while line and not line.startswith('>'): sequenceList.append(line.strip()) pos = handle.tell() line = to_str(handle.readline()) data['sequence'] = ''.join(sequenceList) yield Record(**data), last_start last_start = pos
def my_fasta_iter(handle, parse_description=False, line=None): """ Iterator over the given FASTA file handle, returning records. handle is a handle to a file opened for reading """ last_start = handle.tell() if line is None: line = handle.readline() while line: data = {} line = to_str(line.strip()) if not line.startswith('>'): raise IOError("Bad FASTA format: no '>' at beginning of line: {}".format(line)) if parse_description: # Try to grab the name and optional description try: data['name'], data['description'] = line[1:].split(' ', 1) except ValueError: # No optional description data['name'] = line[1:] data['description'] = '' else: data['name'] = line[1:] data['description'] = '' data['name'] = data['name'].strip() data['description'] = data['description'].strip() # Collect sequence lines into a list sequenceList = [] pos = handle.tell() line = to_str(handle.readline()) while line and not line.startswith('>'): sequenceList.append(line.strip()) pos = handle.tell() line = to_str(handle.readline()) data['sequence'] = ''.join(sequenceList) yield Record(**data), last_start last_start = pos