Exemple #1
0
 def test_create_processed_variant_no_change(self):
     variant = self._get_sample_variant()
     header_fields = vcf_header_util.make_header({'A1': '1', 'A2': 'A'})
     counter_factory = _CounterSpyFactory()
     factory = processed_variant.ProcessedVariantFactory(
         header_fields,
         split_alternate_allele_info_fields=False,
         counter_factory=counter_factory)
     proc_var = factory.create_processed_variant(variant)
     # In this mode, the only difference between the original `variant` and
     # `proc_var` should be that INFO fields are copied to `_non_alt_info` map
     # and `_alternate_datas` are filled with alternate bases information only.
     proc_var_synthetic = processed_variant.ProcessedVariant(variant)
     proc_var_synthetic._non_alt_info = {
         'A1': 'some data',
         'A2': ['data1', 'data2']
     }
     proc_var_synthetic._alternate_datas = [
         processed_variant.AlternateBaseData(a) for a in ['A', 'TT']
     ]
     self.assertEqual([proc_var_synthetic], [proc_var])
     self.assertEqual(
         counter_factory.counter_map[CEnum.VARIANT.value].get_value(), 1)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MATCH.value].get_value(), 0)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MISMATCH.value].get_value(), 0)
Exemple #2
0
  def test_create_processed_variant_move_alt_info_extra_values(self):
    header_fields = vcf_header_util.make_header({'A1': '1', 'A2': 'A'})
    variant = self._get_sample_variant()
    # Add a value to `A2` (it only has two alternate bases, so this is invalid).
    variant.info['A2'] = ['data1', 'data2', 'data3']

    # Ensure error is raised by default.
    factory = processed_variant.ProcessedVariantFactory(
        header_fields,
        split_alternate_allele_info_fields=True)
    with self.assertRaises(ValueError):
      _ = factory.create_processed_variant(variant)

    # Try again with allow_alternate_allele_info_mismatch=True.
    factory = processed_variant.ProcessedVariantFactory(
        header_fields,
        split_alternate_allele_info_fields=True,
        allow_alternate_allele_info_mismatch=True)
    proc_var = factory.create_processed_variant(variant)
    alt1 = processed_variant.AlternateBaseData('A')
    alt1._info = {'A2': 'data1'}
    alt2 = processed_variant.AlternateBaseData('TT')
    alt2._info = {'A2': 'data2'}
    self.assertEqual(proc_var.alternate_data_list, [alt1, alt2])
    self.assertFalse(proc_var.non_alt_info.has_key('A2'))
Exemple #3
0
def _get_processed_variant(variant, header_num_dict=None):
    # TODO(bashir2): To make this more of a "unit" test, we should create
    # ProcessedVariant instances directly (instead of Variant) and avoid calling
    # create_processed_variant here. Then we should also add cases that
    # have annotation fields.
    header_fields = vcf_header_util.make_header(header_num_dict or {})
    return processed_variant.ProcessedVariantFactory(
        header_fields).create_processed_variant(variant)
Exemple #4
0
 def test_create_processed_variant_move_alt_info(self):
     variant = self._get_sample_variant()
     header_fields = vcf_header_util.make_header({'A1': '1', 'A2': 'A'})
     factory = processed_variant.ProcessedVariantFactory(
         header_fields, split_alternate_allele_info_fields=True)
     proc_var = factory.create_processed_variant(variant)
     alt1 = processed_variant.AlternateBaseData('A')
     alt1._info = {'A2': 'data1'}
     alt2 = processed_variant.AlternateBaseData('TT')
     alt2._info = {'A2': 'data2'}
     self.assertEqual(proc_var.alternate_data_list, [alt1, alt2])
     self.assertFalse(proc_var.non_alt_info.has_key('A2'))
Exemple #5
0
 def test_convert_variant_to_bigquery_row_omit_empty_calls(self):
   variant, row, header_num_dict = self._get_sample_variant_with_empty_calls()
   header_fields = vcf_header_util.make_header(header_num_dict)
   proc_var = processed_variant.ProcessedVariantFactory(
       header_fields).create_processed_variant(variant)
   pipeline = TestPipeline(blocking=True)
   bigquery_rows = (
       pipeline
       | Create([proc_var])
       | 'ConvertToRow' >> beam.ParDo(ConvertVariantToRow(
           self._row_generator, omit_empty_sample_calls=True)))
   assert_that(bigquery_rows, equal_to([row]))
   pipeline.run()
Exemple #6
0
  def _get_row_list_from_variant(
      self, variant, header_num_dict=None, allow_incompatible_records=False,
      omit_empty_sample_calls=False, **kwargs):
    # TODO(bashir2): To make this more of a "unit" test, we should create
    # ProcessedVariant instances directly (instead of Variant) and avoid calling
    # create_processed_variant here. Then we should also add cases that
    # have annotation fields.
    header_fields = vcf_header_util.make_header(header_num_dict or {})
    proc_var = processed_variant.ProcessedVariantFactory(
        header_fields).create_processed_variant(variant)

    return list(self._row_generator.get_rows(
        proc_var, allow_incompatible_records,
        omit_empty_sample_calls, **kwargs))
 def _get_sample_variant_and_header_with_csq(self):
     variant = self._get_sample_variant()
     variant.info['CSQ'] = [
         'A|C1|I1|S1|G1', 'TT|C2|I2|S2|G2', 'A|C3|I3|S3|G3'
     ]
     header_fields = vcf_header_util.make_header({
         'CSQ': '.',
         'A1': '1',
         'A2': 'A'
     })
     header_fields.infos['CSQ'][
         vcf_header_io.VcfParserHeaderKeyConstants.DESC] = (
             'some desc Allele|Consequence|IMPACT|SYMBOL|Gene')
     return variant, header_fields
 def test_convert_variant_to_bigquery_row_allow_incompatible_recoreds(self):
   variant, row, header_num_dict = (
       self._get_sample_variant_with_incompatible_records())
   header_fields = vcf_header_util.make_header(header_num_dict)
   proc_var = processed_variant.ProcessedVariantFactory(
       header_fields).create_processed_variant(variant)
   pipeline = TestPipeline(blocking=True)
   bigquery_rows = (
       pipeline
       | Create([proc_var])
       | 'ConvertToRow' >> ParDo(ConvertToBigQueryTableRow(
           self._row_generator, allow_incompatible_records=True)))
   assert_that(bigquery_rows, equal_to([row]))
   pipeline.run()
Exemple #9
0
 def test_convert_variant_with_sample_name_to_bigquery_row(self):
     self._row_generator = bigquery_row_generator.VariantCallRowGenerator(
         self._schema_descriptor,
         self._conflict_resolver,
         include_call_name=True)
     variant, row, header_num_dict = self._get_sample_variant_with_sample_name(
     )
     header_fields = vcf_header_util.make_header(header_num_dict)
     proc_var = processed_variant.ProcessedVariantFactory(
         header_fields).create_processed_variant(variant)
     pipeline = TestPipeline(blocking=True)
     bigquery_rows = (
         pipeline
         | Create([proc_var])
         | 'ConvertToRow' >> beam.ParDo(
             ConvertVariantToRow(self._row_generator,
                                 omit_empty_sample_calls=True)))
     assert_that(bigquery_rows, equal_to([row]))
     pipeline.run()
Exemple #10
0
 def test_convert_variant_to_bigquery_row(self):
     variant_1, row_1, header_num_dict_1 = self._get_sample_variant_1()
     variant_2, row_2, header_num_dict_2 = self._get_sample_variant_2()
     variant_3, row_3, header_num_dict_3 = self._get_sample_variant_3()
     header_num_dict = header_num_dict_1.copy()
     header_num_dict.update(header_num_dict_2)
     header_num_dict.update(header_num_dict_3)
     header_fields = vcf_header_util.make_header(header_num_dict)
     proc_var_1 = processed_variant.ProcessedVariantFactory(
         header_fields).create_processed_variant(variant_1)
     proc_var_2 = processed_variant.ProcessedVariantFactory(
         header_fields).create_processed_variant(variant_2)
     proc_var_3 = processed_variant.ProcessedVariantFactory(
         header_fields).create_processed_variant(variant_3)
     pipeline = TestPipeline(blocking=True)
     bigquery_rows = (pipeline
                      | Create([proc_var_1, proc_var_2, proc_var_3])
                      | 'ConvertToRow' >> beam.ParDo(
                          ConvertVariantToRow(self._row_generator)))
     assert_that(bigquery_rows, equal_to([row_1, row_2, row_3]))
     pipeline.run()
def _get_processed_variant(variant, header_num_dict=None):
    header_fields = vcf_header_util.make_header(header_num_dict or {})
    return processed_variant.ProcessedVariantFactory(
        header_fields).create_processed_variant(variant)