Ejemplo n.º 1
0
def format_read_as_fna(read, qual=False):
    """Format a single read from a binary SFF file as a FASTA record.

    If qual is True, output the qual values instead of the bases.
    """
    # TODO: Move to PyCogent
    out = StringIO()
    out.write('>%s' % read['Name'])

    # Roche uses 1-based indexing, where the right index is inclusive.
    # To transform to 0-based indices, where the right index is not
    # inclusive, we subtract 1 from the left index, but leave the
    # right index intact.

    start_idx = read['clip_qual_left'] - 1
    end_idx = read['clip_qual_right']

    # A surprising result is produced if the number of cycles are
    # adjusted such that no bases remain past clip_qual_left.  In the
    # clipping routine, the Roche software sets clip_qual_left to be
    # equal to the number of bases.  Using our indexing scheme, the
    # resulting sequence is of length 1 after clipping (one would
    # expect a length of 0).  We would fix this issue, if the effect
    # were not present in the output from Roche's sffinfo program.  We
    # retain this arguably incorrect behavior to be consistent with
    # the reference implementation.

    out.write(' length=%d' % (end_idx - start_idx))

    timestamp, _, region, location = decode_accession(read['Name'])
    out.write(' xy=%04d_%04d' % location)
    out.write(' region=%d' % region)
    out.write(' run=R_%d_%02d_%02d_%02d_%02d_%02d_' % timestamp)
    out.write('\n')

    if qual:
        scores = read['quality_scores'][start_idx:end_idx]
        out.write(' '.join(['%d' % s for s in scores]))
    else:
        bases = read['Bases'][start_idx:end_idx]
        out.write(bases)
    out.write('\n')
    return out.getvalue()
Ejemplo n.º 2
0
def format_read_as_fna(read, qual=False):
    """Format a single read from a binary SFF file as a FASTA record.

    If qual is True, output the qual values instead of the bases.
    """
    # TODO: Move to PyCogent
    out = StringIO()
    out.write('>%s' % read['Name'])

    # Roche uses 1-based indexing, where the right index is inclusive.
    # To transform to 0-based indices, where the right index is not
    # inclusive, we subtract 1 from the left index, but leave the
    # right index intact.

    start_idx = read['clip_qual_left'] - 1
    end_idx = read['clip_qual_right']

    # A surprising result is produced if the number of cycles are
    # adjusted such that no bases remain past clip_qual_left.  In the
    # clipping routine, the Roche software sets clip_qual_left to be
    # equal to the number of bases.  Using our indexing scheme, the
    # resulting sequence is of length 1 after clipping (one would
    # expect a length of 0).  We would fix this issue, if the effect
    # were not present in the output from Roche's sffinfo program.  We
    # retain this arguably incorrect behavior to be consistent with
    # the reference implementation.

    out.write(' length=%d' % (end_idx - start_idx))

    timestamp, _, region, location = decode_accession(read['Name'])
    out.write(' xy=%04d_%04d' % location)
    out.write(' region=%d' % region)
    out.write(' run=R_%d_%02d_%02d_%02d_%02d_%02d_' % timestamp)
    out.write('\n')

    if qual:
        scores = read['quality_scores'][start_idx:end_idx]
        out.write(' '.join(['%d' % s for s in scores]))
    else:
        bases = read['Bases'][start_idx:end_idx]
        out.write(bases)
    out.write('\n')
    return out.getvalue()
Ejemplo n.º 3
0
 def test_decode_accession(self):
     self.assertEqual(
         decode_accession('GA202I001ER3QL'),
         ((2010, 1, 22, 13, 28, 56), '0', 1, (1843, 859)))
Ejemplo n.º 4
0
 def test_decode_accession(self):
     self.assertEqual(decode_accession('GA202I001ER3QL'),
                      ((2010, 1, 22, 13, 28, 56), '0', 1, (1843, 859)))