def format_read_as_fna(read, qual=False): """Format a single read from a binary SFF file as a FASTA record. If qual is True, output the qual values instead of the bases. """ # TODO: Move to PyCogent out = StringIO() out.write('>%s' % read['Name']) # Roche uses 1-based indexing, where the right index is inclusive. # To transform to 0-based indices, where the right index is not # inclusive, we subtract 1 from the left index, but leave the # right index intact. start_idx = read['clip_qual_left'] - 1 end_idx = read['clip_qual_right'] # A surprising result is produced if the number of cycles are # adjusted such that no bases remain past clip_qual_left. In the # clipping routine, the Roche software sets clip_qual_left to be # equal to the number of bases. Using our indexing scheme, the # resulting sequence is of length 1 after clipping (one would # expect a length of 0). We would fix this issue, if the effect # were not present in the output from Roche's sffinfo program. We # retain this arguably incorrect behavior to be consistent with # the reference implementation. out.write(' length=%d' % (end_idx - start_idx)) timestamp, _, region, location = decode_accession(read['Name']) out.write(' xy=%04d_%04d' % location) out.write(' region=%d' % region) out.write(' run=R_%d_%02d_%02d_%02d_%02d_%02d_' % timestamp) out.write('\n') if qual: scores = read['quality_scores'][start_idx:end_idx] out.write(' '.join(['%d' % s for s in scores])) else: bases = read['Bases'][start_idx:end_idx] out.write(bases) out.write('\n') return out.getvalue()
def test_decode_accession(self): self.assertEqual( decode_accession('GA202I001ER3QL'), ((2010, 1, 22, 13, 28, 56), '0', 1, (1843, 859)))
def test_decode_accession(self): self.assertEqual(decode_accession('GA202I001ER3QL'), ((2010, 1, 22, 13, 28, 56), '0', 1, (1843, 859)))