def _generator_to_fasta(obj, fh, qual=FileSentinel, id_whitespace_replacement='_', description_newline_replacement=' ', max_width=None, lowercase=None): if max_width is not None: if max_width < 1: raise ValueError( "Maximum line width must be greater than zero (max_width=%d)." % max_width) if qual is not None: # define text wrapper for splitting quality scores here for # efficiency. textwrap docs recommend reusing a TextWrapper # instance when it is used many times. configure text wrapper to # never break "words" (i.e., integer quality scores) across lines qual_wrapper = textwrap.TextWrapper(width=max_width, break_long_words=False, break_on_hyphens=False) formatted_records = _format_fasta_like_records( obj, id_whitespace_replacement, description_newline_replacement, qual is not None, lowercase) for header, seq_str, qual_scores in formatted_records: if max_width is not None: seq_str = chunk_str(seq_str, max_width, '\n') fh.write('>%s\n%s\n' % (header, seq_str)) if qual is not None: qual_str = ' '.join(np.asarray(qual_scores, dtype=np.str)) if max_width is not None: qual_str = qual_wrapper.fill(qual_str) qual.write('>%s\n%s\n' % (header, qual_str))
def _generator_to_fasta(obj, fh, qual=FileSentinel, id_whitespace_replacement='_', description_newline_replacement=' ', max_width=None): if max_width is not None: if max_width < 1: raise ValueError( "Maximum line width must be greater than zero (max_width=%d)." % max_width) if qual is not None: # define text wrapper for splitting quality scores here for # efficiency. textwrap docs recommend reusing a TextWrapper # instance when it is used many times. configure text wrapper to # never break "words" (i.e., integer quality scores) across lines qual_wrapper = textwrap.TextWrapper( width=max_width, break_long_words=False, break_on_hyphens=False) formatted_records = _format_fasta_like_records( obj, id_whitespace_replacement, description_newline_replacement, qual is not None) for header, seq_str, qual_scores in formatted_records: if max_width is not None: seq_str = _chunk_str(seq_str, max_width, '\n') fh.write('>%s\n%s\n' % (header, seq_str)) if qual is not None: qual_str = ' '.join(np.asarray(qual_scores, dtype=np.str)) if max_width is not None: qual_str = qual_wrapper.fill(qual_str) qual.write('>%s\n%s\n' % (header, qual_str))
def test_empty_str_replacement(self): exp = [('', 'ACGT', range(4)), ('foobar', 'GAU', None), (' foo bar', 'TAG', None), ('foo bar baz', 'A', [42])] obs = list(_format_fasta_like_records(self.gen, '', '', False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e)
def test_empty_sequence(self): def blank_seq_gen(): for seq in (DNA('A'), Sequence(''), RNA('GG')): yield seq with six.assertRaisesRegex(self, ValueError, '2nd.*empty'): list(_format_fasta_like_records(blank_seq_gen(), None, None, False))
def test_empty_sequence(self): def blank_seq_gen(): for seq in (DNA('A'), Sequence(''), RNA('GG')): yield seq with self.assertRaisesRegexp(ValueError, '2nd.*empty'): list(_format_fasta_like_records(blank_seq_gen(), None, None, False))
def test_multi_char_replacement(self): exp = [('', 'ACGT', range(4)), ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None), (' foo_-__-_ bar_-_', 'TAG', None), ('foo bar baz', 'A', [42])] obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e)
def test_missing_quality_scores(self): def missing_qual_gen(): for seq in (RNA('A', positional_metadata={'quality': [42]}), Sequence('AG'), DNA('GG', positional_metadata={'quality': [41, 40]})): yield seq with self.assertRaisesRegexp(ValueError, '2nd sequence.*quality scores'): list(_format_fasta_like_records(missing_qual_gen(), '-', '-', True))
def test_missing_quality_scores(self): def missing_qual_gen(): for seq in (RNASequence('A', quality=[42]), BiologicalSequence('AG'), DNASequence('GG', quality=[41, 40])): yield seq with self.assertRaisesRegexp(ValueError, '2nd sequence.*quality scores'): list(_format_fasta_like_records(missing_qual_gen(), '-', '-', True))
def test_missing_quality_scores(self): def missing_qual_gen(): for seq in (RNA('A', positional_metadata={'quality': [42]}), Sequence('AG'), DNA('GG', positional_metadata={'quality': [41, 40]})): yield seq with six.assertRaisesRegex(self, ValueError, '2nd sequence.*quality scores'): list(_format_fasta_like_records(missing_qual_gen(), '-', '-', True))
def test_multi_char_replacement(self): exp = [ ('', 'ACGT', range(4)), ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None), (' foo_-__-_ bar_-_', 'TAG', None), ('foo bar baz', 'A', [42]) ] obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e)
def test_empty_str_replacement(self): exp = [ ('', 'ACGT', range(4)), ('foobar', 'GAU', None), (' foo bar', 'TAG', None), ('foo bar baz', 'A', [42]) ] obs = list(_format_fasta_like_records(self.gen, '', '', False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e)
def _generator_to_fastq(obj, fh, variant=None, phred_offset=None, id_whitespace_replacement='_', description_newline_replacement=' '): formatted_records = _format_fasta_like_records( obj, id_whitespace_replacement, description_newline_replacement, True) for header, seq_str, qual_scores in formatted_records: qual_str = _encode_phred_to_qual(qual_scores, variant=variant, phred_offset=phred_offset) fh.write('@') fh.write(header) fh.write('\n') fh.write(seq_str) fh.write('\n+\n') fh.write(qual_str) fh.write('\n')
def _generator_to_fastq(obj, fh, variant=None, phred_offset=None, id_whitespace_replacement='_', description_newline_replacement=' ', lowercase=None): formatted_records = _format_fasta_like_records( obj, id_whitespace_replacement, description_newline_replacement, True, lowercase=lowercase) for header, seq_str, qual_scores in formatted_records: qual_str = _encode_phred_to_qual(qual_scores, variant=variant, phred_offset=phred_offset) fh.write('@') fh.write(header) fh.write('\n') fh.write(seq_str) fh.write('\n+\n') fh.write(qual_str) fh.write('\n')
def test_newline_character_in_description_newline_replacement(self): with self.assertRaisesRegexp(ValueError, 'Newline character'): list(_format_fasta_like_records(self.gen, None, 'a\nb', False))
def test_newline_character_in_description_newline_replacement(self): with six.assertRaisesRegex(self, ValueError, 'Newline character'): list(_format_fasta_like_records(self.gen, None, 'a\nb', False))
def test_newline_character_in_id_whitespace_replacement(self): with self.assertRaisesRegexp(ValueError, 'Newline character'): list(_format_fasta_like_records(self.gen, '-\n--', ' ', False))
def test_newline_character_in_id_whitespace_replacement(self): with six.assertRaisesRegex(self, ValueError, 'Newline character'): list(_format_fasta_like_records(self.gen, '-\n--', ' ', False))