def test_fastq_to_generator_invalid_files_all_variants(self): # files that should be invalid for all variants, as well as custom # phred offsets for fp, error_type, error_msg_regex in self.invalid_files: for variant in 'sanger', 'illumina1.3', 'illumina1.8': with self.assertRaisesRegexp(error_type, error_msg_regex): list(_fastq_to_generator(fp, variant=variant)) for offset in 33, 64, 40, 77: with self.assertRaisesRegexp(error_type, error_msg_regex): list(_fastq_to_generator(fp, phred_offset=offset))
def test_fastq_to_generator_invalid_files_illumina(self): # files that should be invalid for illumina1.3 and illumina1.8 variants fps = [get_data_path(fp) for fp in ['sanger_full_range_original_sanger.fastq', 'solexa_full_range_original_solexa.fastq']] for fp in fps: with self.assertRaisesRegexp(ValueError, 'out of range \[0, 62\]'): list(_fastq_to_generator(fp, variant='illumina1.3')) with self.assertRaisesRegexp(ValueError, 'out of range \[0, 62\]'): list(_fastq_to_generator(fp, variant='illumina1.8'))
def test_fastq_to_generator_invalid_files_illumina(self): # files that should be invalid for illumina1.3 and illumina1.8 variants fps = [get_data_path(fp) for fp in ['sanger_full_range_original_sanger.fastq', 'solexa_full_range_original_solexa.fastq']] for fp in fps: with six.assertRaisesRegex(self, ValueError, 'out of range \[0, 62\]'): list(_fastq_to_generator(fp, variant='illumina1.3')) with six.assertRaisesRegex(self, ValueError, 'out of range \[0, 62\]'): list(_fastq_to_generator(fp, variant='illumina1.8'))
def test_fastq_to_generator_solexa(self): # solexa support isn't implemented yet. should raise error even with # valid solexa file with self.assertRaises(NotImplementedError): list(_fastq_to_generator( get_data_path('solexa_full_range_original_solexa.fastq'), variant='solexa'))
def test_fastq_to_generator_valid_files(self): for valid_files, kwargs, components in self.valid_configurations: for valid in valid_files: for observed_kwargs in kwargs: _drop_kwargs(observed_kwargs, 'seq_num') constructor = observed_kwargs.get('constructor', Sequence) # Can't use partials for this because the read # function below can't operate on partials expected_kwargs = {} if hasattr(constructor, 'lowercase'): expected_kwargs['lowercase'] = 'introns' observed_kwargs['lowercase'] = 'introns' expected = [constructor(c[2], metadata={'id': c[0], 'description': c[1]}, positional_metadata={'quality': np.array(c[3], dtype=np.uint8)}, **expected_kwargs) for c in components] observed = list(_fastq_to_generator(valid, **observed_kwargs)) self.assertEqual(len(expected), len(observed)) for o, e in zip(observed, expected): self.assertEqual(o, e)
def test_fastq_to_generator_valid_files(self): for valid, kwargs, components in self.valid_files: for kwarg in kwargs: _drop_kwargs(kwarg, 'seq_num') constructor = kwarg.get('constructor', BiologicalSequence) expected = [constructor(c[2], id=c[0], description=c[1], quality=c[3]) for c in components] observed = list(_fastq_to_generator(valid, **kwarg)) self.assertEqual(len(expected), len(observed)) for o, e in zip(observed, expected): self.assertTrue(o.equals(e))
def test_fastq_to_generator_valid_files(self): for valid, kwargs, components in self.valid_files: for kwarg in kwargs: _drop_kwargs(kwarg, 'seq_num') constructor = kwarg.get('constructor', BiologicalSequence) expected = [ constructor(c[2], id=c[0], description=c[1], quality=c[3]) for c in components ] observed = list(_fastq_to_generator(valid, **kwarg)) self.assertEqual(len(expected), len(observed)) for o, e in zip(observed, expected): self.assertTrue(o.equals(e))
def test_conversion(self): for from_fp, to_fp, kwargs in self.conversions: for from_kwargs, to_kwargs in kwargs: read_gen = _fastq_to_generator(from_fp, **from_kwargs) fh = StringIO() # will issue warning when truncating quality scores with warnings.catch_warnings(record=True): warnings.simplefilter("ignore") _generator_to_fastq(read_gen, fh, **to_kwargs) obs = fh.getvalue() fh.close() with open(to_fp, 'U') as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_fastq_to_generator_valid_files(self): for valid_files, kwargs, components in self.valid_configurations: for valid in valid_files: for kwarg in kwargs: _drop_kwargs(kwarg, 'seq_num') constructor = kwarg.get('constructor', Sequence) expected = [constructor(c[2], metadata={'id': c[0], 'description': c[1]}, positional_metadata={'quality': np.array(c[3], dtype=np.uint8)}) for c in components] observed = list(_fastq_to_generator(valid, **kwarg)) self.assertEqual(len(expected), len(observed)) for o, e in zip(observed, expected): self.assertEqual(o, e)