Example #1
0
    def test_fasta_to_generator_valid_files(self):
        test_cases = (self.empty, self.single, self.multi,
                      self.odd_labels_different_type,
                      self.sequence_collection_different_type,
                      self.lowercase_seqs)

        # Strategy:
        #   for each fasta file, read it without its corresponding qual file,
        #   and ensure observed vs. expected match, ignoring quality scores in
        #   expected. next, parse the current fasta file with each
        #   corresponding quality file and ensure that observed vs. expected
        #   match, this time taking quality scores into account. this
        #   sufficiently exercises parsing a standalone fasta file and paired
        #   fasta/qual files
        for exp, kwargs, fasta_fps, qual_fps in test_cases:
            for fasta_fp in fasta_fps:
                obs = list(_fasta_to_generator(fasta_fp, **kwargs))
                self.assertEqual(len(obs), len(exp))
                for o, e in zip(obs, exp):
                    e = e.copy()
                    del e.positional_metadata['quality']
                    self.assertEqual(o, e)

                for qual_fp in qual_fps:
                    obs = list(_fasta_to_generator(fasta_fp, qual=qual_fp,
                                                   **kwargs))

                    self.assertEqual(len(obs), len(exp))
                    for o, e in zip(obs, exp):
                        self.assertEqual(o, e)
Example #2
0
    def test_roundtrip_generators(self):
        # test that fasta and qual files can be streamed into memory and back
        # out to disk using generator reader and writer
        fps = list(map(lambda e: list(map(get_data_path, e)),
                       [('empty', 'empty'),
                        ('fasta_multi_seq_roundtrip',
                         'qual_multi_seq_roundtrip')]))

        for fasta_fp, qual_fp in fps:
            with io.open(fasta_fp) as fh:
                exp_fasta = fh.read()
            with io.open(qual_fp) as fh:
                exp_qual = fh.read()

            fasta_fh = io.StringIO()
            qual_fh = io.StringIO()
            _generator_to_fasta(_fasta_to_generator(fasta_fp, qual=qual_fp),
                                fasta_fh, qual=qual_fh)
            obs_fasta = fasta_fh.getvalue()
            obs_qual = qual_fh.getvalue()
            fasta_fh.close()
            qual_fh.close()

            self.assertEqual(obs_fasta, exp_fasta)
            self.assertEqual(obs_qual, exp_qual)
Example #3
0
def _construct_seq(fh, constructor=DNA, seq_num=1):
    lines = []
    for i, (data_type, seq_id, l) in enumerate(_yield_record(fh), 1):
        if data_type == 'data' and seq_num == i:
            lines = l
    seq = _get_nth_sequence(_fasta_to_generator(fh, constructor=constructor),
                            seq_num=seq_num)
    seq.interval_metadata = _parse_record(lines, len(seq))
    return seq
Example #4
0
def _construct_seq(fh, constructor=DNA, seq_num=1):
    lines = []
    for i, (data_type, seq_id, l) in enumerate(_yield_record(fh), 1):
        if data_type == 'data' and seq_num == i:
            lines = l
    seq = _get_nth_sequence(_fasta_to_generator(fh, constructor=constructor),
                            seq_num=seq_num)
    seq.interval_metadata = _parse_record(lines, len(seq))
    return seq
Example #5
0
 def test_fasta_to_generator_invalid_files(self):
     for fp, kwargs, error_type, error_msg_regex in self.invalid_fps:
         with six.assertRaisesRegex(self, error_type, error_msg_regex):
             list(_fasta_to_generator(fp, **kwargs))