def test_read_reentrant_without_splitting(self): with TempDir() as tempdir: file_name = self._create_temp_vcf_file( _SAMPLE_HEADER_LINES + _SAMPLE_TEXT_LINES, tempdir) source = VcfSource(file_name) source_test_utils.assert_reentrant_reads_succeed( (source, None, None))
def test_dynamic_work_rebalancing(self): with TempDir() as tempdir: file_name = self._create_temp_vcf_file(_SAMPLE_HEADER_LINES + _SAMPLE_TEXT_LINES, tempdir) source = VcfSource(file_name) splits = [split for split in source.split(desired_bundle_size=100000)] assert len(splits) == 1 source_test_utils.assert_split_at_fraction_exhaustive( splits[0].source, splits[0].start_position, splits[0].stop_position)
def test_read_reentrant_after_splitting(self): with TempDir() as tempdir: file_name = self._create_temp_vcf_file(_SAMPLE_HEADER_LINES + _SAMPLE_TEXT_LINES, tempdir) source = VcfSource(file_name) splits = [split for split in source.split(desired_bundle_size=100000)] assert len(splits) == 1 source_test_utils.assert_reentrant_reads_succeed( (splits[0].source, splits[0].start_position, splits[0].stop_position))
def test_read_after_splitting(self): file_name = get_full_file_path('valid-4.1-large.vcf') source = VcfSource(file_name) splits = [p for p in source.split(desired_bundle_size=500)] self.assertGreater(len(splits), 1) sources_info = ([(split.source, split.start_position, split.stop_position) for split in splits]) self.assertGreater(len(sources_info), 1) split_records = [] for source_info in sources_info: split_records.extend( source_test_utils.read_from_source(*source_info)) self.assertEqual(9882, len(split_records))
def _read_records(self, file_or_pattern, **kwargs): return source_test_utils.read_from_source( VcfSource(file_or_pattern, **kwargs))