コード例 #1
0
ファイル: fasta.py プロジェクト: adina/screed
def fasta_iter(handle, parse_description=True):
    """
    Iterator over the given FASTA file handle, returning records. handle
    is a handle to a file opened for reading
    """
    line = handle.readline()
    while line:
        data = _screed_record_dict()

        line = line.strip()
        if not line.startswith('>'):
            raise IOError("Bad FASTA format: no '>' at beginning of line")

        if parse_description: # Try to grab the name and optional description
            try:
                data['name'], data['description'] = line[1:].split(' ', 1)
            except ValueError: # No optional description
                data['name'] = line[1:]
                data['description'] = ''
        else:
            data['name'] = line[1:]
            data['description'] = ''

        data['name'] = data['name'].strip()
        data['description'] = data['description'].strip()

        # Collect sequence lines into a list
        sequenceList = []
        line = handle.readline()
        while line and not line.startswith('>'):
            sequenceList.append(line.strip())
            line = handle.readline()

        data['sequence'] = ''.join(sequenceList)
        yield data
コード例 #2
0
ファイル: fastq.py プロジェクト: brtaylor92/screed
def fastq_iter(handle, line=None, parse_description=True):
    """
    Iterator over the given FASTQ file handle returning records. handle
    is a handle to a file opened for reading
    """
    if line is None:
        line = handle.readline()
    line = line.strip()
    while line:
        data = _screed_record_dict()
        
        if not line.startswith('@'):
            raise IOError("Bad FASTQ format: no '@' at beginning of line")

        # Try to grab the name and (optional) annotations
        if parse_description:
            try:
                data['name'], data['annotations'] = line[1:].split(' ',1)
            except ValueError: # No optional annotations
                data['name'] = line[1:]
                data['annotations'] = ''
                pass
        else:
            data['name'] = line[1:]
            data['annotations'] = ''

        # Extract the sequence lines
        sequence = []
        line = handle.readline().strip()
        while not line.startswith('+') and not line.startswith('#'):
            sequence.append(line)
            line = handle.readline().strip()

        data['sequence'] = ''.join(sequence)

        # Extract the accuracy lines
        accuracy = []
        line = handle.readline().strip()
        seqlen = len(data['sequence'])
        aclen = 0
        while not line == '' and aclen < seqlen:
            accuracy.append(line)
            aclen += len(line)
            line = handle.readline().strip()

        data['accuracy'] = ''.join(accuracy)
        if len(data['sequence']) != len(data['accuracy']):
            raise IOError('sequence and accuracy strings must be '\
                          'of equal length')

        yield data
コード例 #3
0
def fastq_iter(handle, line=None, parse_description=True):
    """
    Iterator over the given FASTQ file handle returning records. handle
    is a handle to a file opened for reading
    """
    if line is None:
        line = handle.readline()
    line = line.strip()
    while line:
        data = _screed_record_dict()

        if not line.startswith('@'):
            raise IOError("Bad FASTQ format: no '@' at beginning of line")

        # Try to grab the name and (optional) annotations
        if parse_description:
            try:
                data['name'], data['annotations'] = line[1:].split(' ', 1)
            except ValueError:  # No optional annotations
                data['name'] = line[1:]
                data['annotations'] = ''
                pass
        else:
            data['name'] = line[1:]
            data['annotations'] = ''

        # Extract the sequence lines
        sequence = []
        line = handle.readline().strip()
        while not line.startswith('+') and not line.startswith('#'):
            sequence.append(line)
            line = handle.readline().strip()

        data['sequence'] = ''.join(sequence)

        # Extract the quality lines
        quality = []
        line = handle.readline().strip()
        seqlen = len(data['sequence'])
        aclen = 0
        while not line == '' and aclen < seqlen:
            quality.append(line)
            aclen += len(line)
            line = handle.readline().strip()

        data['quality'] = ''.join(quality)
        if len(data['sequence']) != len(data['quality']):
            raise IOError('sequence and quality strings must be '
                          'of equal length')

        yield data
コード例 #4
0
ファイル: fasta.py プロジェクト: luizirber/screed
def fasta_iter(handle, parse_description=True, line=None):
    """
    Iterator over the given FASTA file handle, returning records. handle
    is a handle to a file opened for reading
    """
    if line is None:
        line = handle.readline()

    while line:
        data = _screed_record_dict()

        line = line.strip()
        if not line.startswith('>'):
            raise IOError("Bad FASTA format: no '>' at beginning of line")

        if parse_description:  # Try to grab the name and optional description
            try:
                data['name'], data['description'] = line[1:].split(' ', 1)
            except ValueError:  # No optional description
                data['name'] = line[1:]
                data['description'] = ''
        else:
            data['name'] = line[1:]
            data['description'] = ''

        data['name'] = data['name'].strip()
        data['description'] = data['description'].strip()

        # Collect sequence lines into a list
        sequenceList = []
        line = handle.readline()
        while line and not line.startswith('>'):
            sequenceList.append(line.strip())
            line = handle.readline()

        data['sequence'] = ''.join(sequenceList)
        yield data