예제 #1
0
def _generator_to_fasta(obj, fh, qual=FileSentinel,
                        id_whitespace_replacement='_',
                        description_newline_replacement=' ', max_width=None,
                        lowercase=None):
    if max_width is not None:
        if max_width < 1:
            raise ValueError(
                "Maximum line width must be greater than zero (max_width=%d)."
                % max_width)
        if qual is not None:
            # define text wrapper for splitting quality scores here for
            # efficiency. textwrap docs recommend reusing a TextWrapper
            # instance when it is used many times. configure text wrapper to
            # never break "words" (i.e., integer quality scores) across lines
            qual_wrapper = textwrap.TextWrapper(
                width=max_width, break_long_words=False,
                break_on_hyphens=False)

    formatted_records = _format_fasta_like_records(
        obj, id_whitespace_replacement, description_newline_replacement,
        qual is not None, lowercase)
    for header, seq_str, qual_scores in formatted_records:
        if max_width is not None:
            seq_str = chunk_str(seq_str, max_width, '\n')

        fh.write('>%s\n%s\n' % (header, seq_str))

        if qual is not None:
            qual_str = ' '.join(np.asarray(qual_scores, dtype=np.str))
            if max_width is not None:
                qual_str = qual_wrapper.fill(qual_str)
            qual.write('>%s\n%s\n' % (header, qual_str))
예제 #2
0
    def test_empty_sequence(self):
        def blank_seq_gen():
            yield from (DNA('A'), Sequence(''), RNA('GG'))

        with self.assertRaisesRegex(ValueError, r'2nd.*empty'):
            list(_format_fasta_like_records(blank_seq_gen(), None, None,
                                            False))
예제 #3
0
    def test_empty_str_replacement(self):
        exp = [("", "ACGT", range(4)), ("foobar", "GAU", None), (" foo bar", "TAG", None), ("foo bar baz", "A", [42])]
        obs = list(_format_fasta_like_records(self.gen, "", "", False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
예제 #4
0
def _generator_to_fasta(obj,
                        fh,
                        qual=FileSentinel,
                        id_whitespace_replacement='_',
                        description_newline_replacement=' ',
                        max_width=None,
                        lowercase=None):
    if max_width is not None:
        if max_width < 1:
            raise ValueError(
                "Maximum line width must be greater than zero (max_width=%d)."
                % max_width)
        if qual is not None:
            # define text wrapper for splitting quality scores here for
            # efficiency. textwrap docs recommend reusing a TextWrapper
            # instance when it is used many times. configure text wrapper to
            # never break "words" (i.e., integer quality scores) across lines
            qual_wrapper = textwrap.TextWrapper(width=max_width,
                                                break_long_words=False,
                                                break_on_hyphens=False)

    formatted_records = _format_fasta_like_records(
        obj, id_whitespace_replacement, description_newline_replacement, qual
        is not None, lowercase)
    for header, seq_str, qual_scores in formatted_records:
        if max_width is not None:
            seq_str = chunk_str(seq_str, max_width, '\n')

        fh.write('>%s\n%s\n' % (header, seq_str))

        if qual is not None:
            qual_str = ' '.join(np.asarray(qual_scores, dtype=np.str))
            if max_width is not None:
                qual_str = qual_wrapper.fill(qual_str)
            qual.write('>%s\n%s\n' % (header, qual_str))
예제 #5
0
    def test_empty_sequence(self):
        def blank_seq_gen():
            yield from (DNA('A'), Sequence(''), RNA('GG'))

        with self.assertRaisesRegex(ValueError, '2nd.*empty'):
            list(_format_fasta_like_records(blank_seq_gen(), None, None,
                                            False))
예제 #6
0
    def test_empty_str_replacement(self):
        exp = [('', 'ACGT', range(4)), ('foobar', 'GAU', None),
               (' foo bar', 'TAG', None), ('foo bar baz', 'A', [42])]
        obs = list(_format_fasta_like_records(self.gen, '', '', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
예제 #7
0
    def test_multi_char_replacement(self):
        exp = [('', 'ACGT', range(4)),
               ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None),
               (' foo_-__-_ bar_-_', 'TAG', None), ('foo bar baz', 'A', [42])]
        obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
예제 #8
0
    def test_missing_quality_scores(self):
        def missing_qual_gen():
            yield from (RNA('A', positional_metadata={'quality': [42]}),
                        Sequence('AG'),
                        DNA('GG', positional_metadata={'quality': [41, 40]}))

        with self.assertRaisesRegex(ValueError,
                                    r'2nd sequence.*quality scores'):
            list(_format_fasta_like_records(missing_qual_gen(), '-', '-',
                                            True))
예제 #9
0
    def test_missing_quality_scores(self):
        def missing_qual_gen():
            yield from (RNA('A', positional_metadata={'quality':
                                                      [42]}), Sequence('AG'),
                        DNA('GG', positional_metadata={'quality': [41, 40]}))

        with self.assertRaisesRegex(ValueError,
                                    '2nd sequence.*quality scores'):
            list(_format_fasta_like_records(missing_qual_gen(), '-', '-',
                                            True))
예제 #10
0
    def test_missing_quality_scores(self):
        def missing_qual_gen():
            yield from (
                RNA("A", positional_metadata={"quality": [42]}),
                Sequence("AG"),
                DNA("GG", positional_metadata={"quality": [41, 40]}),
            )

        with self.assertRaisesRegex(ValueError, "2nd sequence.*quality scores"):
            list(_format_fasta_like_records(missing_qual_gen(), "-", "-", True))
예제 #11
0
    def test_empty_str_replacement(self):
        exp = [
            ('', 'ACGT', range(4)),
            ('foobar', 'GAU', None),
            (' foo bar', 'TAG', None),
            ('foo bar baz', 'A', [42])
        ]
        obs = list(_format_fasta_like_records(self.gen, '', '', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
예제 #12
0
    def test_multi_char_replacement(self):
        exp = [
            ("", "ACGT", range(4)),
            ("-.--.-foo-.--.--.--.-bar-.-", "GAU", None),
            (" foo_-__-_ bar_-_", "TAG", None),
            ("foo bar baz", "A", [42]),
        ]
        obs = list(_format_fasta_like_records(self.gen, "-.-", "_-_", False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
예제 #13
0
    def test_multi_char_replacement(self):
        exp = [
            ('', 'ACGT', range(4)),
            ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None),
            (' foo_-__-_ bar_-_', 'TAG', None),
            ('foo bar baz', 'A', [42])
        ]
        obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
예제 #14
0
def _generator_to_fastq(obj, fh, variant=None, phred_offset=None,
                        id_whitespace_replacement='_',
                        description_newline_replacement=' ', lowercase=None):
    formatted_records = _format_fasta_like_records(
        obj, id_whitespace_replacement, description_newline_replacement, True,
        lowercase=lowercase)
    for header, seq_str, qual_scores in formatted_records:
        qual_str = _encode_phred_to_qual(qual_scores, variant=variant,
                                         phred_offset=phred_offset)
        fh.write('@')
        fh.write(header)
        fh.write('\n')
        fh.write(seq_str)
        fh.write('\n+\n')
        fh.write(qual_str)
        fh.write('\n')
예제 #15
0
def _generator_to_fastq(obj, fh, variant=None, phred_offset=None,
                        id_whitespace_replacement='_',
                        description_newline_replacement=' ', lowercase=None):
    formatted_records = _format_fasta_like_records(
        obj, id_whitespace_replacement, description_newline_replacement, True,
        lowercase=lowercase)
    for header, seq_str, qual_scores in formatted_records:
        qual_str = _encode_phred_to_qual(qual_scores, variant=variant,
                                         phred_offset=phred_offset)
        fh.write('@')
        fh.write(header)
        fh.write('\n')
        fh.write(seq_str)
        fh.write('\n+\n')
        fh.write(qual_str)
        fh.write('\n')
예제 #16
0
    def test_empty_sequence(self):
        def blank_seq_gen():
            yield from (DNA("A"), Sequence(""), RNA("GG"))

        with self.assertRaisesRegex(ValueError, "2nd.*empty"):
            list(_format_fasta_like_records(blank_seq_gen(), None, None, False))
예제 #17
0
 def test_newline_character_in_description_newline_replacement(self):
     with self.assertRaisesRegex(ValueError, "Newline character"):
         list(_format_fasta_like_records(self.gen, None, "a\nb", False))
예제 #18
0
 def test_newline_character_in_id_whitespace_replacement(self):
     with self.assertRaisesRegex(ValueError, "Newline character"):
         list(_format_fasta_like_records(self.gen, "-\n--", " ", False))
예제 #19
0
 def test_newline_character_in_description_newline_replacement(self):
     with six.assertRaisesRegex(self, ValueError, 'Newline character'):
         list(_format_fasta_like_records(self.gen, None, 'a\nb', False))
예제 #20
0
 def test_newline_character_in_id_whitespace_replacement(self):
     with six.assertRaisesRegex(self, ValueError, 'Newline character'):
         list(_format_fasta_like_records(self.gen, '-\n--', ' ', False))
예제 #21
0
 def test_newline_character_in_description_newline_replacement(self):
     with six.assertRaisesRegex(self, ValueError, 'Newline character'):
         list(_format_fasta_like_records(self.gen, None, 'a\nb', False))
예제 #22
0
 def test_newline_character_in_id_whitespace_replacement(self):
     with six.assertRaisesRegex(self, ValueError, 'Newline character'):
         list(_format_fasta_like_records(self.gen, '-\n--', ' ', False))