コード例 #1
0
 def test_read_tbi(self):
   blocks = bgzf_io.split_bgzf(testdata_util.get_full_file_path(
       'empty.vcf.gz'))
   self.assertEqual(len(list(blocks)), 0)
   blocks = bgzf_io.split_bgzf(testdata_util.get_full_file_path(
       'Y.vcf.bgz'))
   self.assertEqual(len(list(blocks)), 19)
コード例 #2
0
 def test_pipeline_read_all_multiple_files_large(self):
   pipeline = TestPipeline()
   pcoll = (pipeline
            | 'Create' >> beam.Create(
                [testdata_util.get_full_file_path('valid-4.0.vcf'),
                 testdata_util.get_full_file_path('valid-4.1-large.vcf'),
                 testdata_util.get_full_file_path('valid-4.2.vcf')])
            | 'Read' >> ReadAllFromVcf())
   assert_that(pcoll, asserts.count_equals_to(9900))
   pipeline.run()
コード例 #3
0
 def test_read_single_file_large(self):
     test_data_conifgs = [
         {
             'file': 'valid-4.0.vcf',
             'num_records': 5
         },
         {
             'file': 'valid-4.0.vcf.gz',
             'num_records': 5
         },
         {
             'file': 'valid-4.0.vcf.bz2',
             'num_records': 5
         },
         {
             'file': 'valid-4.1-large.vcf',
             'num_records': 9882
         },
         {
             'file': 'valid-4.2.vcf',
             'num_records': 13
         },
     ]
     for config in test_data_conifgs:
         read_data = self._read_records(
             testdata_util.get_full_file_path(config['file']))
         self.assertEqual(config['num_records'], len(read_data))
コード例 #4
0
 def test_read_variants_large_mode(self):
     pipeline = test_pipeline.TestPipeline()
     all_patterns = [testdata_util.get_full_file_path('valid-4.0.vcf')]
     variants = pipeline_common.read_variants(pipeline, all_patterns,
                                              PipelineModes.LARGE, False)
     assert_that(variants, asserts.count_equals_to(5))
     pipeline.run()
コード例 #5
0
 def test_read_single_file_large(self):
     test_data_conifgs = [
         {
             'file': 'valid-4.0.vcf',
             'num_infos': 6,
             'num_formats': 4
         },
         {
             'file': 'valid-4.0.vcf.gz',
             'num_infos': 6,
             'num_formats': 4
         },
         {
             'file': 'valid-4.0.vcf.bz2',
             'num_infos': 6,
             'num_formats': 4
         },
         {
             'file': 'valid-4.1-large.vcf',
             'num_infos': 21,
             'num_formats': 33
         },
         {
             'file': 'valid-4.2.vcf',
             'num_infos': 8,
             'num_formats': 5
         },
     ]
     for config in test_data_conifgs:
         read_data = source_test_utils.read_from_source(
             VcfHeaderSource(
                 testdata_util.get_full_file_path(config['file'])))
         self.assertEqual(config['num_infos'], len(read_data[0].infos))
         self.assertEqual(config['num_formats'], len(read_data[0].formats))
コード例 #6
0
 def test_read_variants_use_1_based_coordinate(self):
     pipeline = test_pipeline.TestPipeline()
     all_patterns = [testdata_util.get_full_file_path('valid-4.0.vcf')]
     variants = pipeline_common.read_variants(pipeline,
                                              all_patterns,
                                              PipelineModes.SMALL,
                                              False,
                                              use_1_based_coordinate=True)
     assert_that(variants, asserts.count_equals_to(5))
     pipeline.run()
コード例 #7
0
 def setUp(self):
     with open(testdata_util.get_full_file_path('Y.vcf.bgz'),
               mode='rb') as file_to_read:
         data = file_to_read.readlines()
     self._data = b''.join(data)
     self.client = gcsio_test.FakeGcsClient()
     self.gcs = gcsio.GcsIO(self.client)
     self._file_name = 'gs://bucket/test'
     bucket, name = gcsio.parse_gcs_path(self._file_name)
     self.client.objects.add_file(
         gcsio_test.FakeFile(bucket, name, self._data, 1))
コード例 #8
0
 def test_read_after_splitting(self):
     file_name = testdata_util.get_full_file_path('valid-4.1-large.vcf')
     source = VcfSource(file_name)
     splits = [p for p in source.split(desired_bundle_size=500)]
     self.assertGreater(len(splits), 1)
     sources_info = ([(split.source, split.start_position,
                       split.stop_position) for split in splits])
     self.assertGreater(len(sources_info), 1)
     split_records = []
     for source_info in sources_info:
         split_records.extend(
             source_test_utils.read_from_source(*source_info))
     self.assertEqual(9882, len(split_records))
コード例 #9
0
 def test_read_single_file_large(self):
     test_data_conifgs = [
         {
             'file': 'valid-4.0.vcf',
             'variant_count': 4,
             'size': 1500
         },
         {
             'file': 'valid-4.0.vcf.gz',
             'variant_count': 13,
             'size': 1454
         },
         {
             'file': 'valid-4.0.vcf.bz2',
             'variant_count': 14,
             'size': 1562
         },
         {
             'file': 'valid-4.1-large.vcf',
             'variant_count': 14425,
             'size': 832396
         },
         {
             'file': 'valid-4.1-large.vcf.gz',
             'variant_count': 5498,
             'size': 313430
         },
         {
             'file': 'valid-4.2.vcf',
             'variant_count': 10,
             'size': 3195
         },
     ]
     for config in test_data_conifgs:
         read_data = source_test_utils.read_from_source(
             VcfEstimateSource(
                 testdata_util.get_full_file_path(config['file'])))
         self.assertEqual(config['variant_count'],
                          int(read_data[0].estimated_variant_count))
         self.assertEqual(config['size'], read_data[0].size_in_bytes)
コード例 #10
0
 def test_pipeline_read_all_single_file_large(self):
     self._assert_pipeline_read_files_record_count_equal(
         testdata_util.get_full_file_path('valid-4.1-large.vcf'),
         9882,
         use_read_all=True)
コード例 #11
0
 def test_gz(self):
   """Tests successfully parsing gz files."""
   file_path = testdata_util.get_full_file_path('valid-4.0.vcf.gz')
   header_fields = vcf_header_parser.get_vcf_headers(file_path)
   self.assertGreater(len(header_fields.infos), 1)
   self.assertGreater(len(header_fields.formats), 1)
コード例 #12
0
 def test_get_block_offsets(self):
   blocks = bgzf_io._get_block_offsets(testdata_util.get_full_file_path(
       'Y.vcf.bgz.tbi'))
   self.assertEqual(len(blocks), 108)