예제 #1
0
    def test_overlapping_three_non_variants(self):
        strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            None, None, None)
        non_variant_1 = vcfio.Variant(reference_name='1', start=0, end=10)
        non_variant_2 = vcfio.Variant(reference_name='1', start=3, end=5)
        non_variant_3 = vcfio.Variant(reference_name='1', start=4, end=9)
        call_1 = vcfio.VariantCall('1', [0, 0])
        call_2 = vcfio.VariantCall('2', [0, 0])
        call_3 = vcfio.VariantCall('3', [0, 0])
        non_variant_1.calls.append(call_1)
        non_variant_2.calls.append(call_2)
        non_variant_3.calls.append(call_3)

        expected_1 = vcfio.Variant(reference_name='1', start=0, end=3)
        expected_2 = vcfio.Variant(reference_name='1', start=3, end=4)
        expected_3 = vcfio.Variant(reference_name='1', start=4, end=5)
        expected_4 = vcfio.Variant(reference_name='1', start=5, end=9)
        expected_5 = vcfio.Variant(reference_name='1', start=9, end=10)
        expected_1.calls.append(call_1)
        expected_2.calls.append(call_1)
        expected_2.calls.append(call_2)
        expected_3.calls.append(call_1)
        expected_3.calls.append(call_2)
        expected_3.calls.append(call_3)
        expected_4.calls.append(call_1)
        expected_4.calls.append(call_3)
        expected_5.calls.append(call_1)
        expected = [expected_1, expected_2, expected_3, expected_4, expected_5]
        actual = list(
            strategy.get_merged_variants(
                [non_variant_1, non_variant_2, non_variant_3]))
        self.assertEqual(sorted(actual), sorted(expected))
예제 #2
0
    def test_non_variant_split_by_snp(self):
        strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            None, None, None)
        non_variant = vcfio.Variant(reference_name='1', start=0, end=10)
        variant = vcfio.Variant(reference_name='1',
                                start=5,
                                end=6,
                                reference_bases='C',
                                alternate_bases=['A'])
        call_1 = vcfio.VariantCall(name='1', genotype=[0, 0])
        call_2 = vcfio.VariantCall(name='2', genotype=[1, 0])
        non_variant.calls.append(call_1)
        variant.calls.append(call_2)
        expected_1 = vcfio.Variant(reference_name='1', start=0, end=5)
        expected_2 = vcfio.Variant(reference_name='1',
                                   start=5,
                                   end=6,
                                   reference_bases='C',
                                   alternate_bases=['A'])
        expected_3 = vcfio.Variant(reference_name='1', start=6, end=10)
        expected_1.calls.append(call_1)
        expected_2.calls.append(call_1)
        expected_2.calls.append(call_2)
        expected_3.calls.append(call_1)

        actual = list(strategy.get_merged_variants([non_variant, variant]))
        expected = [expected_1, expected_2, expected_3]
        self.assertEqual(sorted(actual), sorted(expected))
예제 #3
0
    def test_merge_many_different_alternates(self):
        strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            None, None, None)

        variant_1 = vcfio.Variant(reference_name='1',
                                  start=1,
                                  end=2,
                                  reference_bases='A',
                                  alternate_bases=['C'])
        variant_2 = vcfio.Variant(reference_name='1',
                                  start=1,
                                  end=2,
                                  reference_bases='A',
                                  alternate_bases=['G'])
        variant_3 = vcfio.Variant(reference_name='1',
                                  start=1,
                                  end=2,
                                  reference_bases='A',
                                  alternate_bases=['T'])
        variant_1.calls.append(
            vcfio.VariantCall(name='Sample1', genotype=[1, 0]))
        variant_2.calls.append(
            vcfio.VariantCall(name='Sample2', genotype=[1, 0]))
        variant_3.calls.append(
            vcfio.VariantCall(name='Sample3', genotype=[1, 0]))
        variants = [variant_1, variant_2, variant_3]
        merged_variants = list(strategy.get_merged_variants(variants))
        self.assertEqual(sorted(merged_variants), sorted(variants))
예제 #4
0
  def test_get_merged_variants_no_custom_options(self):
    strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
        info_keys_to_move_to_calls_regex=None,
        copy_quality_to_calls=False,
        copy_filter_to_calls=False)
    variants = self._get_sample_variants()

    actual = list(strategy.get_merged_variants([variants[0]]))
    # Test single variant merge.
    self.assertEqual([variants[0]], actual)

    # Test multiple variant merge.
    merged_variant = list(strategy.get_merged_variants(variants))[0]
    self._assert_common_expected_merged_fields(merged_variant)
    self.assertEqual(
        [vcfio.VariantCall(name='Sample1', genotype=[0, 1],
                           info={'GQ': 20, 'HQ': [10, 20]}),
         vcfio.VariantCall(name='Sample2', genotype=[1, 0],
                           info={'GQ': 10, 'FLAG1': True}),
         vcfio.VariantCall(name='Sample3', genotype=[1, 1]),
         vcfio.VariantCall(name='Sample4', genotype=[1, 0], info={'GQ': 20})],
        merged_variant.calls)
    self.assertItemsEqual(['A1', 'A2', 'A3'], merged_variant.info.keys())
    self.assertTrue(
        merged_variant.info['A1'] in ('some data', 'some data2'))
    self.assertEqual(['data1', 'data2'], merged_variant.info['A2'])
    self.assertEqual(['data3', 'data4'], merged_variant.info['A3'])
예제 #5
0
  def test_merge_snp_with_non_variant(self):
    strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
        None, None, None)

    variant = vcfio.Variant(
        reference_name='1',
        start=5,
        end=6,
        reference_bases='A',
        alternate_bases=['C'],
        names=['v'],
        filters=['vf'],
        quality=1)
    non_variant = vcfio.Variant(
        reference_name='1',
        start=0,
        end=10,
        reference_bases='G',
        alternate_bases=['<NON_REF>'],
        names=['nv'],
        filters=['nvf'],
        quality=2)

    call_1 = vcfio.VariantCall(name='1', genotype=[1, 0])
    call_2 = vcfio.VariantCall(name='2', genotype=[0, 0])
    variant.calls.append(call_1)
    non_variant.calls.append(call_2)
    expected_1 = vcfio.Variant(
        reference_name='1',
        start=0,
        end=5,
        alternate_bases=['<NON_REF>'],
        names=['nv'],
        filters=['nvf'],
        quality=2)
    expected_2 = vcfio.Variant(
        reference_name='1',
        start=6,
        end=10,
        alternate_bases=['<NON_REF>'],
        names=['nv'],
        filters=['nvf'],
        quality=2)
    expected_3 = vcfio.Variant(
        reference_name='1',
        start=5,
        end=6,
        reference_bases='A',
        alternate_bases=['C'],
        names=['v'],
        filters=['vf'],
        quality=1)
    expected_1.calls.append(call_2)
    expected_2.calls.append(call_2)
    expected_3.calls.append(call_1)
    expected_3.calls.append(call_2)
    actual = list(strategy.get_merged_variants([variant, non_variant]))
    expected = [expected_1, expected_2, expected_3]
    self.assertEqual(sorted(actual), sorted(expected))
예제 #6
0
 def test_get_non_variant_merge_keys(self):
     strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
         None, None, None, 2)
     variant = vcfio.Variant(reference_name='2', start=6, end=12)
     keys = strategy.get_merge_keys(variant)
     self.assertEqual(next(keys), '2:6')
     self.assertEqual(next(keys), '2:8')
     self.assertEqual(next(keys), '2:10')
    def test_get_merged_variants_move_info_to_calls(self):
        strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            info_keys_to_move_to_calls_regex='^A1$',
            copy_quality_to_calls=False,
            copy_filter_to_calls=False)
        variants = self._get_sample_variants()

        # Test single variant merge.
        single_merged_variant = list(
            strategy.get_merged_variants([variants[0]]))[0]
        self.assertEqual([
            vcfio.VariantCall(sample_id=hash_name('Sample1'),
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20],
                                  'A1': 'some data'
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample2'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True,
                                  'A1': 'some data'
                              })
        ], single_merged_variant.calls)

        # Test multiple variant merge.
        merged_variant = list(strategy.get_merged_variants(variants))[0]
        self._assert_common_expected_merged_fields(merged_variant)
        self.assertEqual([
            vcfio.VariantCall(sample_id=hash_name('Sample1'),
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20],
                                  'A1': 'some data'
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample2'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True,
                                  'A1': 'some data'
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample3'),
                              genotype=[1, 1],
                              info={'A1': 'some data2'}),
            vcfio.VariantCall(sample_id=hash_name('Sample4'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 20,
                                  'A1': 'some data2'
                              })
        ], merged_variant.calls)
        self.assertItemsEqual(['A2', 'A3'], merged_variant.info.keys())
        self.assertEqual(['data1', 'data2'], merged_variant.info['A2'])
        self.assertEqual(['data3', 'data4'], merged_variant.info['A3'])
예제 #8
0
    def test_get_snp_merge_keys(self):
        strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            None, None, None, 2)

        variant_1 = vcfio.Variant(reference_name='1', start=3, end=4)
        variant_2 = vcfio.Variant(reference_name='2', start=4, end=5)

        self.assertEqual(next(strategy.get_merge_keys(variant_1)), '1:2')
        self.assertEqual(next(strategy.get_merge_keys(variant_2)), '2:4')
예제 #9
0
  def test_merge_2_non_variants(self):
    strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
        None, None, None)

    non_variant_1 = vcfio.Variant(
        reference_name='1',
        start=0,
        end=10,
        alternate_bases=['<NON_REF>'],
        names=['nonv1', 'nonv2'],
        filters=['f1', 'f2'],
        quality=1)
    non_variant_2 = vcfio.Variant(
        reference_name='1',
        start=5,
        end=15,
        alternate_bases=['<NON_REF>'],
        names=['nonv2', 'nonv3'],
        filters=['f2', 'f3'],
        quality=2)
    call_1 = vcfio.VariantCall(name='1', genotype=[0, 0])
    call_2 = vcfio.VariantCall(name='2', genotype=[0, 0])
    non_variant_1.calls.append(call_1)
    non_variant_2.calls.append(call_2)
    expected_1 = vcfio.Variant(
        reference_name='1',
        start=0,
        end=5,
        alternate_bases=['<NON_REF>'],
        names=['nonv1', 'nonv2'],
        filters=['f1', 'f2'],
        quality=1)
    expected_2 = vcfio.Variant(
        reference_name='1',
        start=10,
        end=15,
        alternate_bases=['<NON_REF>'],
        names=['nonv2', 'nonv3'],
        filters=['f2', 'f3'],
        quality=2)
    expected_3 = vcfio.Variant(
        reference_name='1',
        start=5,
        end=10,
        alternate_bases=['<NON_REF>'],
        names=['nonv1', 'nonv2', 'nonv3'],
        filters=['f1', 'f2', 'f3'],
        quality=1)
    expected_1.calls.append(call_1)
    expected_2.calls.append(call_2)
    expected_3.calls.append(call_1)
    expected_3.calls.append(call_2)
    actual = list(strategy.get_merged_variants([non_variant_1, non_variant_2]))
    expected = [expected_1, expected_2, expected_3]

    self.assertEqual(sorted(actual), sorted(expected))
예제 #10
0
    def test_align_non_variant(self):
        strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            None, None, None, 2)

        non_variant = vcfio.Variant(reference_name='1', start=5, end=12)

        expected = copy.deepcopy(non_variant)
        expected.start = 8
        expected.end = 10

        actual = list(strategy.get_merged_variants([non_variant], '1:8'))
        self.assertEqual(actual, [expected])
예제 #11
0
  def test_get_merged_variants_move_everything_to_calls(self):
    strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
        info_keys_to_move_to_calls_regex='.*',
        copy_quality_to_calls=True,
        copy_filter_to_calls=True)
    variants = self._get_sample_variants()

    # Test single variant merge.
    single_merged_variant = list(strategy.get_merged_variants([variants[0]]))[0]
    self.assertEqual(
        [vcfio.VariantCall(name='Sample1', genotype=[0, 1],
                           info={'GQ': 20, 'HQ': [10, 20],
                                 'A1': 'some data', 'A2': ['data1', 'data2'],
                                 ColumnKeyConstants.QUALITY: 2,
                                 ColumnKeyConstants.FILTER: ['PASS']}),
         vcfio.VariantCall(name='Sample2', genotype=[1, 0],
                           info={'GQ': 10, 'FLAG1': True,
                                 'A1': 'some data', 'A2': ['data1', 'data2'],
                                 ColumnKeyConstants.QUALITY: 2,
                                 ColumnKeyConstants.FILTER: ['PASS']})],
        single_merged_variant.calls)

    merged_variant = list(strategy.get_merged_variants(variants))[0]
    self._assert_common_expected_merged_fields(merged_variant)
    self.assertEqual(
        [vcfio.VariantCall(name='Sample1', genotype=[0, 1],
                           info={'GQ': 20, 'HQ': [10, 20],
                                 'A1': 'some data', 'A2': ['data1', 'data2'],
                                 ColumnKeyConstants.QUALITY: 2,
                                 ColumnKeyConstants.FILTER: ['PASS']}),
         vcfio.VariantCall(name='Sample2', genotype=[1, 0],
                           info={'GQ': 10, 'FLAG1': True,
                                 'A1': 'some data', 'A2': ['data1', 'data2'],
                                 ColumnKeyConstants.QUALITY: 2,
                                 ColumnKeyConstants.FILTER: ['PASS']}),
         vcfio.VariantCall(name='Sample3', genotype=[1, 1],
                           info={'A1': 'some data2', 'A3': ['data3', 'data4'],
                                 ColumnKeyConstants.QUALITY: 20,
                                 ColumnKeyConstants.FILTER: ['q10']}),
         vcfio.VariantCall(name='Sample4', genotype=[1, 0],
                           info={'GQ': 20,
                                 'A1': 'some data2', 'A3': ['data3', 'data4'],
                                 ColumnKeyConstants.QUALITY: 20,
                                 ColumnKeyConstants.FILTER: ['q10']})],
        merged_variant.calls)
    self.assertEqual([], merged_variant.info.keys())
    def test_merge_one_overlap(self):
        strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            None, None, None)

        variant_1 = vcfio.Variant(reference_name='1',
                                  start=1,
                                  end=2,
                                  reference_bases='A',
                                  alternate_bases=['C'])
        variant_2 = vcfio.Variant(reference_name='1',
                                  start=1,
                                  end=2,
                                  reference_bases='A',
                                  alternate_bases=['G'])
        variant_3 = vcfio.Variant(reference_name='1',
                                  start=1,
                                  end=2,
                                  reference_bases='A',
                                  alternate_bases=['T'])
        variant_4 = vcfio.Variant(reference_name='1',
                                  start=1,
                                  end=2,
                                  reference_bases='A',
                                  alternate_bases=['C'])
        variant_1.calls.append(
            vcfio.VariantCall(sample_id=hash_name('Sample1'), genotype=[1, 0]))
        variant_2.calls.append(
            vcfio.VariantCall(sample_id=hash_name('Sample2'), genotype=[1, 0]))
        variant_3.calls.append(
            vcfio.VariantCall(sample_id=hash_name('Sample3'), genotype=[1, 0]))
        variant_4.calls.append(
            vcfio.VariantCall(sample_id=hash_name('Sample4'), genotype=[1, 0]))
        variants = [variant_1, variant_2, variant_3, variant_4]
        merged = vcfio.Variant(reference_name='1',
                               start=1,
                               end=2,
                               reference_bases='A',
                               alternate_bases=['C'])
        merged.calls.append(
            vcfio.VariantCall(sample_id=hash_name('Sample1'), genotype=[1, 0]))
        merged.calls.append(
            vcfio.VariantCall(sample_id=hash_name('Sample4'), genotype=[1, 0]))
        merged_variants = list(strategy.get_merged_variants(variants))
        self.assertEqual(sorted(merged_variants),
                         sorted([merged, variant_2, variant_3]))
예제 #13
0
def _get_variant_merge_strategy(known_args  # type: argparse.Namespace
                                ):
    # type: (...) -> Optional(variant_merge_strategy.VariantMergeStrategy)
    merge_options = variant_transform_options.MergeOptions
    if (not known_args.variant_merge_strategy
            or known_args.variant_merge_strategy == merge_options.NONE):
        return None
    elif known_args.variant_merge_strategy == merge_options.MOVE_TO_CALLS:
        return move_to_calls_strategy.MoveToCallsStrategy(
            known_args.info_keys_to_move_to_calls_regex,
            known_args.copy_quality_to_calls, known_args.copy_filter_to_calls)
    elif (known_args.variant_merge_strategy ==
          merge_options.MERGE_WITH_NON_VARIANTS):
        return merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            known_args.info_keys_to_move_to_calls_regex,
            known_args.copy_quality_to_calls, known_args.copy_filter_to_calls)
    else:
        raise ValueError('Merge strategy is not supported.')
예제 #14
0
  def test_merge_mnps(self):
    strategy = merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
        None, None, None)

    variant_1 = vcfio.Variant(
        reference_name='1',
        start=5,
        end=8,
        reference_bases='GTC',
        alternate_bases=['G', 'GTCG'],
        names=['mnp1', 'mnp2'],
        filters=['f1', 'f2'],
        quality=1)
    variant_2 = vcfio.Variant(
        reference_name='1',
        start=5,
        end=8,
        reference_bases='GTC',
        alternate_bases=['G', 'GTCG'],
        names=['mnp2', 'mnp3'],
        filters=['f2', 'f3'],
        quality=2)
    call_1 = vcfio.VariantCall(name='1', genotype=[1, 2])
    call_2 = vcfio.VariantCall(name='2', genotype=[2, 0])
    expected = vcfio.Variant(
        reference_name='1',
        start=5,
        end=8,
        reference_bases='GTC',
        alternate_bases=['G', 'GTCG'],
        names=['mnp1', 'mnp2', 'mnp3'],
        filters=['f1', 'f2', 'f3'],
        quality=2)
    expected.calls.append(call_1)
    expected.calls.append(call_2)

    variant_1.calls.append(call_1)
    variant_2.calls.append(call_2)
    actual = list(strategy.get_merged_variants([variant_1, variant_2]))
    self.assertEqual(actual, [expected])