Exemple #1
0
def _generator_to_fasta(obj,
                        fh,
                        qual=FileSentinel,
                        id_whitespace_replacement='_',
                        description_newline_replacement=' ',
                        max_width=None,
                        lowercase=None):
    if max_width is not None:
        if max_width < 1:
            raise ValueError(
                "Maximum line width must be greater than zero (max_width=%d)."
                % max_width)
        if qual is not None:
            # define text wrapper for splitting quality scores here for
            # efficiency. textwrap docs recommend reusing a TextWrapper
            # instance when it is used many times. configure text wrapper to
            # never break "words" (i.e., integer quality scores) across lines
            qual_wrapper = textwrap.TextWrapper(width=max_width,
                                                break_long_words=False,
                                                break_on_hyphens=False)

    formatted_records = _format_fasta_like_records(
        obj, id_whitespace_replacement, description_newline_replacement, qual
        is not None, lowercase)
    for header, seq_str, qual_scores in formatted_records:
        if max_width is not None:
            seq_str = chunk_str(seq_str, max_width, '\n')

        fh.write('>%s\n%s\n' % (header, seq_str))

        if qual is not None:
            qual_str = ' '.join(np.asarray(qual_scores, dtype=np.str))
            if max_width is not None:
                qual_str = qual_wrapper.fill(qual_str)
            qual.write('>%s\n%s\n' % (header, qual_str))
Exemple #2
0
def _generator_to_fasta(obj, fh, qual=FileSentinel,
                        id_whitespace_replacement='_',
                        description_newline_replacement=' ', max_width=None):
    if max_width is not None:
        if max_width < 1:
            raise ValueError(
                "Maximum line width must be greater than zero (max_width=%d)."
                % max_width)
        if qual is not None:
            # define text wrapper for splitting quality scores here for
            # efficiency. textwrap docs recommend reusing a TextWrapper
            # instance when it is used many times. configure text wrapper to
            # never break "words" (i.e., integer quality scores) across lines
            qual_wrapper = textwrap.TextWrapper(
                width=max_width, break_long_words=False,
                break_on_hyphens=False)

    formatted_records = _format_fasta_like_records(
        obj, id_whitespace_replacement, description_newline_replacement,
        qual is not None)
    for header, seq_str, qual_scores in formatted_records:
        if max_width is not None:
            seq_str = _chunk_str(seq_str, max_width, '\n')

        fh.write('>%s\n%s\n' % (header, seq_str))

        if qual is not None:
            qual_str = ' '.join(np.asarray(qual_scores, dtype=np.str))
            if max_width is not None:
                qual_str = qual_wrapper.fill(qual_str)
            qual.write('>%s\n%s\n' % (header, qual_str))
Exemple #3
0
    def test_empty_str_replacement(self):
        exp = [('', 'ACGT', range(4)), ('foobar', 'GAU', None),
               (' foo bar', 'TAG', None), ('foo bar baz', 'A', [42])]
        obs = list(_format_fasta_like_records(self.gen, '', '', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
Exemple #4
0
    def test_empty_sequence(self):
        def blank_seq_gen():
            for seq in (DNA('A'), Sequence(''), RNA('GG')):
                yield seq

        with six.assertRaisesRegex(self, ValueError, '2nd.*empty'):
            list(_format_fasta_like_records(blank_seq_gen(), None, None,
                                            False))
Exemple #5
0
    def test_empty_sequence(self):
        def blank_seq_gen():
            for seq in (DNA('A'), Sequence(''),
                        RNA('GG')):
                yield seq

        with self.assertRaisesRegexp(ValueError, '2nd.*empty'):
            list(_format_fasta_like_records(blank_seq_gen(), None, None,
                                            False))
Exemple #6
0
    def test_multi_char_replacement(self):
        exp = [('', 'ACGT', range(4)),
               ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None),
               (' foo_-__-_ bar_-_', 'TAG', None), ('foo bar baz', 'A', [42])]
        obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
Exemple #7
0
    def test_missing_quality_scores(self):
        def missing_qual_gen():
            for seq in (RNA('A', positional_metadata={'quality': [42]}),
                        Sequence('AG'),
                        DNA('GG', positional_metadata={'quality': [41, 40]})):
                yield seq

        with self.assertRaisesRegexp(ValueError,
                                     '2nd sequence.*quality scores'):
            list(_format_fasta_like_records(missing_qual_gen(), '-', '-',
                                            True))
Exemple #8
0
    def test_missing_quality_scores(self):
        def missing_qual_gen():
            for seq in (RNASequence('A',
                                    quality=[42]), BiologicalSequence('AG'),
                        DNASequence('GG', quality=[41, 40])):
                yield seq

        with self.assertRaisesRegexp(ValueError,
                                     '2nd sequence.*quality scores'):
            list(_format_fasta_like_records(missing_qual_gen(), '-', '-',
                                            True))
    def test_missing_quality_scores(self):
        def missing_qual_gen():
            for seq in (RNASequence('A', quality=[42]),
                        BiologicalSequence('AG'),
                        DNASequence('GG', quality=[41, 40])):
                yield seq

        with self.assertRaisesRegexp(ValueError,
                                     '2nd sequence.*quality scores'):
            list(_format_fasta_like_records(missing_qual_gen(), '-', '-',
                                            True))
Exemple #10
0
    def test_missing_quality_scores(self):
        def missing_qual_gen():
            for seq in (RNA('A', positional_metadata={'quality':
                                                      [42]}), Sequence('AG'),
                        DNA('GG', positional_metadata={'quality': [41, 40]})):
                yield seq

        with six.assertRaisesRegex(self, ValueError,
                                   '2nd sequence.*quality scores'):
            list(_format_fasta_like_records(missing_qual_gen(), '-', '-',
                                            True))
Exemple #11
0
    def test_multi_char_replacement(self):
        exp = [
            ('', 'ACGT', range(4)),
            ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None),
            (' foo_-__-_ bar_-_', 'TAG', None),
            ('foo bar baz', 'A', [42])
        ]
        obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
Exemple #12
0
    def test_empty_str_replacement(self):
        exp = [
            ('', 'ACGT', range(4)),
            ('foobar', 'GAU', None),
            (' foo bar', 'TAG', None),
            ('foo bar baz', 'A', [42])
        ]
        obs = list(_format_fasta_like_records(self.gen, '', '', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
Exemple #13
0
def _generator_to_fastq(obj, fh, variant=None, phred_offset=None,
                        id_whitespace_replacement='_',
                        description_newline_replacement=' '):
    formatted_records = _format_fasta_like_records(
        obj, id_whitespace_replacement, description_newline_replacement, True)
    for header, seq_str, qual_scores in formatted_records:
        qual_str = _encode_phred_to_qual(qual_scores, variant=variant,
                                         phred_offset=phred_offset)
        fh.write('@')
        fh.write(header)
        fh.write('\n')
        fh.write(seq_str)
        fh.write('\n+\n')
        fh.write(qual_str)
        fh.write('\n')
Exemple #14
0
def _generator_to_fastq(obj, fh, variant=None, phred_offset=None,
                        id_whitespace_replacement='_',
                        description_newline_replacement=' ', lowercase=None):
    formatted_records = _format_fasta_like_records(
        obj, id_whitespace_replacement, description_newline_replacement, True,
        lowercase=lowercase)
    for header, seq_str, qual_scores in formatted_records:
        qual_str = _encode_phred_to_qual(qual_scores, variant=variant,
                                         phred_offset=phred_offset)
        fh.write('@')
        fh.write(header)
        fh.write('\n')
        fh.write(seq_str)
        fh.write('\n+\n')
        fh.write(qual_str)
        fh.write('\n')
Exemple #15
0
 def test_newline_character_in_description_newline_replacement(self):
     with self.assertRaisesRegexp(ValueError, 'Newline character'):
         list(_format_fasta_like_records(self.gen, None, 'a\nb', False))
Exemple #16
0
 def test_newline_character_in_description_newline_replacement(self):
     with six.assertRaisesRegex(self, ValueError, 'Newline character'):
         list(_format_fasta_like_records(self.gen, None, 'a\nb', False))
Exemple #17
0
 def test_newline_character_in_id_whitespace_replacement(self):
     with self.assertRaisesRegexp(ValueError, 'Newline character'):
         list(_format_fasta_like_records(self.gen, '-\n--', ' ', False))
Exemple #18
0
 def test_newline_character_in_id_whitespace_replacement(self):
     with six.assertRaisesRegex(self, ValueError, 'Newline character'):
         list(_format_fasta_like_records(self.gen, '-\n--', ' ', False))