Example #1
0
    def test_fastq_to_generator_invalid_files_all_variants(self):
        # files that should be invalid for all variants, as well as custom
        # phred offsets
        for fp, error_type, error_msg_regex in self.invalid_files:
            for variant in 'sanger', 'illumina1.3', 'illumina1.8':
                with six.assertRaisesRegex(self, error_type, error_msg_regex):
                    list(_fastq_to_generator(fp, variant=variant))

            for offset in 33, 64, 40, 77:
                with six.assertRaisesRegex(self, error_type, error_msg_regex):
                    list(_fastq_to_generator(fp, phred_offset=offset))
Example #2
0
    def test_fastq_to_generator_invalid_files_illumina(self):
        # files that should be invalid for illumina1.3 and illumina1.8 variants
        fps = [get_data_path(fp) for fp in
               ['sanger_full_range_original_sanger.fastq',
               'solexa_full_range_original_solexa.fastq']]

        for fp in fps:
            with self.assertRaisesRegex(ValueError, 'out of range \[0, 62\]'):
                list(_fastq_to_generator(fp, variant='illumina1.3'))
            with self.assertRaisesRegex(ValueError, 'out of range \[0, 62\]'):
                list(_fastq_to_generator(fp, variant='illumina1.8'))
Example #3
0
 def test_fastq_to_generator_solexa(self):
     # solexa support isn't implemented yet. should raise error even with
     # valid solexa file
     with self.assertRaises(NotImplementedError):
         list(_fastq_to_generator(
             get_data_path('solexa_full_range_original_solexa.fastq'),
             variant='solexa'))
Example #4
0
    def test_fastq_to_generator_valid_files(self):
        for valid_files, kwargs, components in self.valid_configurations:
            for valid in valid_files:
                for observed_kwargs in kwargs:
                    _drop_kwargs(observed_kwargs, 'seq_num')
                    constructor = observed_kwargs.get('constructor', Sequence)

                    # Can't use partials for this because the read
                    # function below can't operate on partials
                    expected_kwargs = {}
                    if hasattr(constructor, 'lowercase'):
                        expected_kwargs['lowercase'] = 'introns'
                        observed_kwargs['lowercase'] = 'introns'

                    expected = [constructor(c[2],
                                            metadata={'id': c[0],
                                                      'description': c[1]},
                                positional_metadata={'quality': np.array(c[3],
                                                     dtype=np.uint8)},
                                **expected_kwargs)
                                for c in components]

                    observed = list(_fastq_to_generator(valid,
                                                        **observed_kwargs))
                    self.assertEqual(len(expected), len(observed))
                    for o, e in zip(observed, expected):
                        self.assertEqual(o, e)
Example #5
0
    def test_conversion(self):
        for from_fp, to_fp, kwargs in self.conversions:
            for from_kwargs, to_kwargs in kwargs:
                read_gen = _fastq_to_generator(from_fp, **from_kwargs)
                fh = io.StringIO()

                # will issue warning when truncating quality scores
                with warnings.catch_warnings(record=True):
                    warnings.simplefilter("ignore")
                    _generator_to_fastq(read_gen, fh, **to_kwargs)

                obs = fh.getvalue()
                fh.close()

                with io.open(to_fp) as fh:
                    exp = fh.read()
                self.assertEqual(obs, exp)
Example #6
0
    def test_fastq_to_generator_valid_files(self):
        for valid_files, kwargs, components in self.valid_configurations:
            for valid in valid_files:
                for observed_kwargs in kwargs:
                    _drop_kwargs(observed_kwargs, 'seq_num')
                    constructor = observed_kwargs.get('constructor', Sequence)

                    expected_kwargs = {}
                    expected_kwargs['lowercase'] = 'introns'
                    observed_kwargs['lowercase'] = 'introns'

                    expected = [constructor(c[2],
                                            metadata={'id': c[0],
                                                      'description': c[1]},
                                positional_metadata={'quality': np.array(c[3],
                                                     dtype=np.uint8)},
                                **expected_kwargs)
                                for c in components]

                    observed = list(_fastq_to_generator(valid,
                                                        **observed_kwargs))
                    self.assertEqual(len(expected), len(observed))
                    for o, e in zip(observed, expected):
                        self.assertEqual(o, e)