コード例 #1
0
 def test_decode_variants(self):
     variants = [
         test_utils.make_variant(start=1),
         test_utils.make_variant(start=2)
     ]
     encoded = [variant.SerializeToString() for variant in variants]
     actual = variantutils.decode_variants(encoded)
     # We have an iterable, so actual isn't equal to variants.
     self.assertNotEqual(actual, variants)
     # Making actual a list now makes it equal.
     self.assertEqual(list(actual), variants)
コード例 #2
0
 def test_variant_position_and_range(self):
     v1 = test_utils.make_variant(chrom='1', alleles=['A', 'C'], start=10)
     v2 = test_utils.make_variant(chrom='1',
                                  alleles=['AGCT', 'C'],
                                  start=10)
     pos = ranges.make_range('1', 10, 11)
     range_ = ranges.make_range('1', 10, 14)
     self.assertEqual(pos, variantutils.variant_position(v1))
     self.assertEqual(pos, variantutils.variant_position(v2))
     self.assertEqual(pos, variantutils.variant_range(v1))
     self.assertEqual(range_, variantutils.variant_range(v2))
コード例 #3
0
 def test_genotype_as_alleles_raises_with_bad_inputs(self):
     with self.assertRaises(Exception):
         variantutils.genotype_as_alleles(None)
     with self.assertRaises(Exception):
         variantutils.genotype_as_alleles(test_utils.make_variant(gt=None))
     with self.assertRaises(Exception):
         variantutils.genotype_type(None)
コード例 #4
0
    def test_is_variant_call_no_calls_are_variant(self):
        def check_is_variant(variant, expected, **kwargs):
            self.assertEqual(variantutils.is_variant_call(variant, **kwargs),
                             expected)

        no_call = test_utils.make_variant(gt=[-1, -1])
        hom_ref = test_utils.make_variant(gt=[0, 0])
        het = test_utils.make_variant(gt=[0, 1])
        hom_var = test_utils.make_variant(gt=[1, 1])

        check_is_variant(no_call, False, no_calls_are_variant=False)
        check_is_variant(no_call, True, no_calls_are_variant=True)
        check_is_variant(hom_ref, False, no_calls_are_variant=False)
        check_is_variant(hom_ref, False, no_calls_are_variant=True)
        check_is_variant(het, True, no_calls_are_variant=False)
        check_is_variant(het, True, no_calls_are_variant=True)
        check_is_variant(hom_var, True, no_calls_are_variant=False)
        check_is_variant(hom_var, True, no_calls_are_variant=True)
コード例 #5
0
def _format_test_variant(alleles, call_infos):
  variant = test_utils.make_variant(chrom='20', start=0, alleles=alleles)
  for i, call_info in enumerate(call_infos):
    call = variant.calls.add(call_set_name='sample' + str(i))
    for key, value in call_info.iteritems():
      if not isinstance(value, (list, tuple)):
        value = [value]
      call.info[key].values.extend(
          [struct_pb2.Value(number_value=v) for v in value])
  return variant
コード例 #6
0
 def setUp(self):
   self.out_fname = test_utils.test_tmpfile('output.vcf')
   self.options = core_pb2.VcfWriterOptions(
       contigs=[
           core_pb2.ContigInfo(name='Chr1', n_bases=50, pos_in_fasta=0),
           core_pb2.ContigInfo(name='Chr2', n_bases=25, pos_in_fasta=1),
       ],
       sample_names=['Fido', 'Spot'],
       filters=[])
   self.writer = vcf_writer.VcfWriter.to_file(self.out_fname, self.options)
   self.variant = test_utils.make_variant(
       chrom='Chr1', start=10, alleles=['A', 'C'])
   self.variant.calls.add(genotype=[0, 0], call_set_name='Fido')
   self.variant.calls.add(genotype=[0, 1], call_set_name='Spot')
コード例 #7
0
 def test_format_filters(self, filters, expected):
     variant = test_utils.make_variant(filters=filters)
     if filters is None:
         variant.ClearField('filter')
     self.assertEqual(variantutils.format_filters(variant), expected)
コード例 #8
0
class VariantUtilsTests(parameterized.TestCase):
    def test_decode_variants(self):
        variants = [
            test_utils.make_variant(start=1),
            test_utils.make_variant(start=2)
        ]
        encoded = [variant.SerializeToString() for variant in variants]
        actual = variantutils.decode_variants(encoded)
        # We have an iterable, so actual isn't equal to variants.
        self.assertNotEqual(actual, variants)
        # Making actual a list now makes it equal.
        self.assertEqual(list(actual), variants)

    def test_variant_position_and_range(self):
        v1 = test_utils.make_variant(chrom='1', alleles=['A', 'C'], start=10)
        v2 = test_utils.make_variant(chrom='1',
                                     alleles=['AGCT', 'C'],
                                     start=10)
        pos = ranges.make_range('1', 10, 11)
        range_ = ranges.make_range('1', 10, 14)
        v1_range_tuple = ('1', 10, 11)
        v2_range_tuple = ('1', 10, 14)
        self.assertEqual(pos, variantutils.variant_position(v1))
        self.assertEqual(pos, variantutils.variant_position(v2))
        self.assertEqual(pos, variantutils.variant_range(v1))
        self.assertEqual(range_, variantutils.variant_range(v2))
        self.assertEqual(v1_range_tuple, variantutils.variant_range_tuple(v1))
        self.assertEqual(v2_range_tuple, variantutils.variant_range_tuple(v2))

    @parameterized.parameters(
        (test_utils.make_variant(alleles=['A', 'C']), 'A/C'),
        (test_utils.make_variant(alleles=['A', 'C', 'T']), 'A/C,T'),
        (test_utils.make_variant(alleles=['A', 'AT']), 'A/AT'),
        (test_utils.make_variant(alleles=['AT', 'A']), 'AT/A'),
        (test_utils.make_variant(alleles=['AT', 'A', 'CT']), 'AT/A,CT'),
    )
    def test_format_alleles(self, variant, expected):
        self.assertEqual(variantutils.format_alleles(variant), expected)

    @parameterized.parameters(
        (None, '.'),
        (['.'], '.'),
        (['PASS'], 'PASS'),
        (['FILTER1', 'FILTER2'], 'FILTER1,FILTER2'),
        (['FILTER1', 'FILTER2', 'FILTER3'], 'FILTER1,FILTER2,FILTER3'),
    )
    def test_format_filters(self, filters, expected):
        variant = test_utils.make_variant(filters=filters)
        if filters is None:
            variant.ClearField('filter')
        self.assertEqual(variantutils.format_filters(variant), expected)

    @parameterized.parameters(
        # variant => status if we require non_ref genotype / status if we don't.
        (test_utils.make_variant(alleles=['A', 'C']), True, True),
        (test_utils.make_variant(alleles=['A', 'C'], gt=None), True, True),
        (test_utils.make_variant(alleles=['A', 'C', 'AT']), True, True),
        (test_utils.make_variant(alleles=['A']), False, False),
        (test_utils.make_variant(filters=['FAIL']), False, False),
        (test_utils.make_variant(gt=[-1, -1]), False, True),
        (test_utils.make_variant(gt=[0, 0]), False, True),
        (test_utils.make_variant(gt=[0, 1]), True, True),
        (test_utils.make_variant(gt=[1, 1]), True, True),
    )
    def test_is_variant_call(self, variant, expected_req_non_ref,
                             expected_any_genotype):
        # Check that default call checks for genotypes.
        self.assertEqual(variantutils.is_variant_call(variant),
                         expected_req_non_ref)
        # Ask explicitly for genotypes to be included.
        self.assertEqual(
            variantutils.is_variant_call(variant,
                                         require_non_ref_genotype=True),
            expected_req_non_ref)
        # Don't require non_ref genotypes.
        self.assertEqual(
            variantutils.is_variant_call(variant,
                                         require_non_ref_genotype=False),
            expected_any_genotype)

        with self.assertRaises(Exception):
            variantutils.is_variant_call(None)

    def test_is_variant_call_no_calls_are_variant(self):
        def check_is_variant(variant, expected, **kwargs):
            self.assertEqual(variantutils.is_variant_call(variant, **kwargs),
                             expected)

        no_call = test_utils.make_variant(gt=[-1, -1])
        hom_ref = test_utils.make_variant(gt=[0, 0])
        het = test_utils.make_variant(gt=[0, 1])
        hom_var = test_utils.make_variant(gt=[1, 1])

        check_is_variant(no_call, False, no_calls_are_variant=False)
        check_is_variant(no_call, True, no_calls_are_variant=True)
        check_is_variant(hom_ref, False, no_calls_are_variant=False)
        check_is_variant(hom_ref, False, no_calls_are_variant=True)
        check_is_variant(het, True, no_calls_are_variant=False)
        check_is_variant(het, True, no_calls_are_variant=True)
        check_is_variant(hom_var, True, no_calls_are_variant=False)
        check_is_variant(hom_var, True, no_calls_are_variant=True)

    @parameterized.parameters(
        (test_utils.make_variant(filters=None), False),
        (test_utils.make_variant(filters=['.']), False),
        (test_utils.make_variant(filters=['PASS']), False),
        (test_utils.make_variant(filters=['FAIL']), True),
        (test_utils.make_variant(filters=['FAIL1', 'FAIL2']), True),
        # These two are not allowed in VCF, but worth testing our
        # code's behavior
        (test_utils.make_variant(filters=['FAIL1', 'PASS']), True),
        (test_utils.make_variant(filters=['FAIL1', '.']), True),
    )
    def test_is_filtered(self, variant, expected):
        self.assertEqual(variantutils.is_filtered(variant), expected)

    @parameterized.parameters(
        (test_utils.make_variant(alleles=['A', 'C']),
         variantutils.VariantType.snp),
        (test_utils.make_variant(alleles=['A', 'C', 'T']),
         variantutils.VariantType.snp),
        (test_utils.make_variant(alleles=['A']), variantutils.VariantType.ref),
        (test_utils.make_variant(alleles=['A', '.']),
         variantutils.VariantType.ref),
        (test_utils.make_variant(alleles=['A', 'AC']),
         variantutils.VariantType.indel),
        (test_utils.make_variant(alleles=['AC', 'A']),
         variantutils.VariantType.indel),
        (test_utils.make_variant(alleles=['A', 'AC', 'ACC']),
         variantutils.VariantType.indel),
        (test_utils.make_variant(alleles=['ACC', 'AC', 'A']),
         variantutils.VariantType.indel),
    )
    def test_variant_type(self, variant, expected):
        self.assertEqual(variantutils.variant_type(variant), expected)

    @parameterized.parameters(
        (test_utils.make_variant('chr1', 10), 'chr1:11'),
        (test_utils.make_variant('chr2', 100), 'chr2:101'),
    )
    def test_format_position(self, variant, expected):
        self.assertEqual(variantutils.format_position(variant), expected)

    @parameterized.parameters(
        (test_utils.make_variant(alleles=['A', 'C']), True),
        (test_utils.make_variant(alleles=['A', 'C', 'T']), True),
        (test_utils.make_variant(alleles=['A', 'AT']), False),
        (test_utils.make_variant(alleles=['AT', 'A']), False),
        (test_utils.make_variant(alleles=['AT', 'A', 'CT']), False),
        (test_utils.make_variant(alleles=['A', 'C', 'AT']), False),
        (test_utils.make_variant(alleles=['A']), False),
        (test_utils.make_variant(alleles=['A', '.']), False),
    )
    def test_is_snp(self, variant, expected):
        self.assertEqual(variantutils.is_snp(variant), expected)

    @parameterized.parameters(
        (test_utils.make_variant(alleles=['A', 'C']), False),
        (test_utils.make_variant(alleles=['A', 'C', 'T']), False),
        (test_utils.make_variant(alleles=['A', 'AT']), True),
        (test_utils.make_variant(alleles=['AT', 'A']), True),
        (test_utils.make_variant(alleles=['AT', 'A', 'CT']), True),
        (test_utils.make_variant(alleles=['A', 'C', 'AT']), True),
        (test_utils.make_variant(alleles=['A']), False),
        (test_utils.make_variant(alleles=['A', '.']), False),
    )
    def test_is_indel(self, variant, expected):
        self.assertEqual(variantutils.is_indel(variant), expected)

    @parameterized.parameters(
        (test_utils.make_variant(alleles=['A', 'C']), False),
        (test_utils.make_variant(alleles=['A', 'C', 'T']), True),
        (test_utils.make_variant(alleles=['A', 'AT']), False),
        (test_utils.make_variant(alleles=['AT', 'A']), False),
        (test_utils.make_variant(alleles=['AT', 'A', 'CT']), True),
        (test_utils.make_variant(alleles=['A', 'C', 'AT']), True),
    )
    def test_is_multiallelic(self, variant, expected):
        self.assertEqual(variantutils.is_multiallelic(variant), expected)

    @parameterized.parameters(
        (test_utils.make_variant(alleles=['A', 'C']), True),
        (test_utils.make_variant(alleles=['A', 'C', 'T']), False),
        (test_utils.make_variant(alleles=['A', 'AT']), True),
        (test_utils.make_variant(alleles=['AT', 'A']), True),
        (test_utils.make_variant(alleles=['AT', 'A', 'CT']), False),
        (test_utils.make_variant(alleles=['AT']), False),
    )
    def test_is_biallelic(self, variant, expected):
        self.assertEqual(variantutils.is_biallelic(variant), expected)

    @parameterized.parameters(
        (['A', 'C'], ['A', 'C']),
        (['AA', 'CA'], ['A', 'C']),
        (['AAG', 'CAG'], ['A', 'C']),
        (['AAGAG', 'CAGAG'], ['A', 'C']),
        (['AACAG', 'CAGAG'], ['AAC', 'CAG']),
        (['AACAC', 'CAGAG'], ['AACAC', 'CAGAG']),
        (['ACT', 'A'], ['ACT', 'A']),
        (['ACTCT', 'ACT'], ['ACT', 'A']),
        (['ACTCT', 'A'], ['ACTCT', 'A']),
        (['CAG', 'GAG'], ['C', 'G']),
        # Make sure we don't reduce an allele to nothing.
        (['AT', 'ATAT'], ['A', 'ATA']),
        # Tests for multi-allelics.
        # There's one extra T here.
        (['ATT', 'AT', 'ATTT'], ['AT', 'A', 'ATT']),
        # Another single base postfix where we can remove a 'G'.
        (['CAG', 'GAG', 'TCG'], ['CA', 'GA', 'TC']),
        # There are two extra Ts to remove.
        (['ATTT', 'ATT', 'ATTTT'], ['AT', 'A', 'ATT']),
        # One pair can simplify, but not the other, so nothing can reduce.
        (['CAG', 'GAG', 'TCA'], ['CAG', 'GAG', 'TCA']),
        # Example from b/64022627.
        (['CGGCGG', 'CGG', 'CAACGG'], ['CGGC', 'C', 'CAAC']),
    )
    def test_simplify_alleles(self, alleles, expected):
        self.assertEqual(variantutils.simplify_alleles(*alleles),
                         tuple(expected))
        self.assertEqual(variantutils.simplify_alleles(*reversed(alleles)),
                         tuple(reversed(expected)))

    @parameterized.parameters(
        (['A', 'C'], ['A', 'C'], NO_MISMATCH),
        (['A', 'AC'], ['A', 'AC'], NO_MISMATCH),
        (['AC', 'A'], ['AC', 'A'], NO_MISMATCH),
        (['AC', 'A', 'ACT'], ['AC', 'A', 'ACT'], NO_MISMATCH),
        (['AC', 'A', 'ACT'], ['AC', 'ACT', 'A'], NO_MISMATCH),
        # Alleles are incompatible, so we have mismatches in both directions.
        (['A', 'C'], ['A', 'T'], {TRUE_MISS, EVAL_MISS}),
        (['A', 'C'], ['G', 'C'], {TRUE_MISS, EVAL_MISS}),
        # Missing alts specific to eval and truth.
        (['A', 'C', 'G'], ['A', 'C'], {EVAL_MISS}),
        (['A', 'C'], ['A', 'C', 'G'], {TRUE_MISS}),
        # Duplicate alleles.
        (['A', 'C', 'C'], ['A', 'C'], {EVAL_DUP}),
        (['A', 'C'], ['A', 'C', 'C'], {TRUE_DUP}),
        (['A', 'C', 'C'], ['A', 'C', 'C'], {EVAL_DUP, TRUE_DUP}),
        # Dups in truth, discordant alleles.
        (['A', 'C'], ['A', 'G', 'G'], {TRUE_DUP, EVAL_MISS, TRUE_MISS}),
        # Simplification of alleles does the right matching.
        (['A', 'C'], ['AA', 'CA'], NO_MISMATCH),  # trailing A.
        # preceding A, doesn't simplify so it's a mismatch.
        (['A', 'C'], ['AA', 'AC'], {EVAL_MISS, TRUE_MISS}),
        # both training preceding A, doesn't simplify, so mismatches
        (['A', 'C'], ['AAA', 'ACA'], {EVAL_MISS, TRUE_MISS}),
        # # Eval has 1 of the two alt alleles, so no eval mismatch.
        (['ACT', 'A'], ['ACTCT', 'ACT', 'A'], {TRUE_MISS}),
        # Eval has extra unmatched alleles, so it's got a mismatch.
        (['ACTCT', 'ACT', 'A'], ['ACT', 'A'], {EVAL_MISS}),
    )
    def test_allele_mismatch(self, a1, a2, expected):
        v1 = test_utils.make_variant(alleles=a1)
        v2 = test_utils.make_variant(alleles=a2)
        self.assertEqual(variantutils.allele_mismatches(v1, v2), expected)

    @parameterized.parameters(
        (['A', 'C'], False),
        (['A', 'G'], True),
        (['A', 'T'], False),
        (['C', 'G'], False),
        (['C', 'T'], True),
        (['G', 'T'], False),
    )
    def test_is_transition(self, ordered_alleles, expected):
        for alleles in [ordered_alleles, reversed(ordered_alleles)]:
            self.assertEqual(variantutils.is_transition(*alleles), expected)

    def test_is_transition_raises_with_bad_args(self):
        with self.assertRaises(ValueError):
            variantutils.is_transition('A', 'A')
        with self.assertRaises(ValueError):
            variantutils.is_transition('A', 'AA')
        with self.assertRaises(ValueError):
            variantutils.is_transition('AA', 'A')

    @parameterized.parameters(
        # alleles followed by is_insertion and is_deletion expectation
        (['A', 'C'], False, False),
        (['A', 'AT'], True, False),
        (['A', 'ATT'], True, False),
        (['AT', 'A'], False, True),
        (['ATT', 'A'], False, True),
        (['CAT', 'TCA'], False, False),

        # These are examples where ref is not simplified, such as could occur
        # a multi-allelic record, such as the following:
        # alleles = AT, A, ATT, CT (1 deletion, 1 insertion, 1 SNP)
        (['AT', 'A'], False, True),
        (['AT', 'ATT'], True, False),
        (['AT', 'CT'], False, False),
    )
    def test_is_insertion_deletion(self, alleles, is_insertion, is_deletion):
        self.assertEqual(variantutils.is_insertion(*alleles), is_insertion)
        self.assertEqual(variantutils.is_deletion(*alleles), is_deletion)

    @parameterized.parameters(
        (test_utils.make_variant(alleles=['A', 'C']), False, False),
        (test_utils.make_variant(alleles=['A', 'C', 'T']), False, False),
        (test_utils.make_variant(alleles=['A', 'AT']), True, False),
        (test_utils.make_variant(alleles=['AT', 'A']), False, True),
        (test_utils.make_variant(alleles=['AT', 'A', 'ATT']), True, True),
        (test_utils.make_variant(alleles=['AT', 'A', 'CT']), False, True),
        (test_utils.make_variant(alleles=['A', 'C', 'AT']), True, False),
        (test_utils.make_variant(alleles=['A']), False, False),
        (test_utils.make_variant(alleles=['A', '.']), False, False),
    )
    def test_has_insertion_deletion(self, variant, has_insertion,
                                    has_deletion):
        self.assertEqual(variantutils.has_insertion(variant), has_insertion)
        self.assertEqual(variantutils.has_deletion(variant), has_deletion)

    @parameterized.parameters(
        (test_utils.make_variant(gt=None), False),
        (test_utils.make_variant(gt=[0, 0]), True),
        (test_utils.make_variant(gt=[0, 1]), True),
        (test_utils.make_variant(gt=[1, 1]), True),
        (test_utils.make_variant(gt=[-1, -1]), True),
    )
    def test_has_genotypes(self, variant, expected):
        self.assertEqual(variantutils.has_genotypes(variant), expected)

    def test_has_genotypes_raises_with_bad_inputs(self):
        with self.assertRaises(Exception):
            variantutils.has_genotypes(None)

    @parameterized.parameters(
        (test_utils.make_variant(gt=None), variantutils.GenotypeType.no_call),
        (test_utils.make_variant(gt=[-1, -1]),
         variantutils.GenotypeType.no_call),
        (test_utils.make_variant(gt=[0, 0]),
         variantutils.GenotypeType.hom_ref),
        (test_utils.make_variant(gt=[0, 1]), variantutils.GenotypeType.het),
        (test_utils.make_variant(gt=[1, 0]), variantutils.GenotypeType.het),
        (test_utils.make_variant(gt=[0, 2]), variantutils.GenotypeType.het),
        (test_utils.make_variant(gt=[2, 0]), variantutils.GenotypeType.het),
        (test_utils.make_variant(gt=[1, 1]),
         variantutils.GenotypeType.hom_var),
        (test_utils.make_variant(gt=[1, 2]), variantutils.GenotypeType.het),
    )
    def test_genotype_type(self, variant, expected):
        self.assertEqual(variantutils.genotype_type(variant), expected)

    def test_genotype_type_raises_with_bad_args(self):
        with self.assertRaises(Exception):
            variantutils.genotype_type(None)

    @parameterized.parameters(
        (test_utils.make_variant(alleles=['A', 'C'], gt=[0, 0]), ['A', 'A']),
        (test_utils.make_variant(alleles=['A', 'C'], gt=[0, 1]), ['A', 'C']),
        (test_utils.make_variant(alleles=['A', 'C'], gt=[1, 0]), ['C', 'A']),
        (test_utils.make_variant(alleles=['A', 'C'], gt=[1, 1]), ['C', 'C']),
        (test_utils.make_variant(alleles=['A', 'C', 'T'], gt=[0, 0
                                                              ]), ['A', 'A']),
        (test_utils.make_variant(alleles=['A', 'C', 'T'], gt=[0, 1
                                                              ]), ['A', 'C']),
        (test_utils.make_variant(alleles=['A', 'C', 'T'], gt=[0, 2
                                                              ]), ['A', 'T']),
        (test_utils.make_variant(alleles=['A', 'C', 'T'], gt=[1, 2
                                                              ]), ['C', 'T']),
        (test_utils.make_variant(alleles=['A', 'C', 'T'], gt=[2, 1
                                                              ]), ['T', 'C']),
        (test_utils.make_variant(alleles=['A', 'C', 'T'], gt=[1, 1
                                                              ]), ['C', 'C']),
        (test_utils.make_variant(alleles=['A', 'C', 'T'], gt=[2, 2
                                                              ]), ['T', 'T']),
        (test_utils.make_variant(alleles=['A', 'C'], gt=[-1, -1]), ['.', '.']),
    )
    def test_genotype_as_alleles(self, variant, expected):
        self.assertEqual(variantutils.genotype_as_alleles(variant), expected)

    def test_genotype_as_alleles_raises_with_bad_inputs(self):
        with self.assertRaises(Exception):
            variantutils.genotype_as_alleles(None)
        with self.assertRaises(Exception):
            variantutils.genotype_as_alleles(test_utils.make_variant(gt=None))
        with self.assertRaises(Exception):
            variantutils.genotype_type(None)

    @parameterized.parameters(
        (test_utils.make_variant(gt=None), None),
        (test_utils.make_variant(gt=[0, 1], gq=10), 10),
        (test_utils.make_variant(gt=[0, 1], gq=20), 20),
        (test_utils.make_variant(gt=[0, 1], gq=30), 30),
        (test_utils.make_variant(gt=[0, 1], gq=35), 35),
    )
    def test_variant_gq(self, variant, expected):
        self.assertEqual(variantutils.genotype_quality(variant, default=None),
                         expected)

    def test_variant_gq_raises_with_none(self):
        with self.assertRaises(Exception):
            variantutils.genotype_quality(None)

    @parameterized.parameters(
        # Ref without an alt isn't gVCF.
        (test_utils.make_variant(alleles=['A']), False),
        # SNPs and indels aren't gVCF records.
        (test_utils.make_variant(alleles=['A', 'T']), False),
        (test_utils.make_variant(alleles=['A', 'AT']), False),
        (test_utils.make_variant(alleles=['AT', 'T']), False),
        # These are gVCF records.
        (test_utils.make_variant(alleles=['A', '<*>']), True),
        (test_utils.make_variant(alleles=['A', '<*>'], filters='PASS'), True),
        (test_utils.make_variant(alleles=['A', '<*>'], filters='FAIL'), True),
        # These are close but not exactly gVCFs.
        (test_utils.make_variant(alleles=['A', '<*>', 'C']), False),
        (test_utils.make_variant(alleles=['A', '<*F>']), False),
        (test_utils.make_variant(alleles=['A', '<CNV>']), False),
    )
    def test_is_gvcf(self, variant, expected):
        self.assertEqual(variantutils.is_gvcf(variant), expected)

    @parameterized.parameters(
        # Variants with one ref and one alt allele.
        (test_utils.make_variant(alleles=['A', 'C']), [(0, 0, 'A', 'A'),
                                                       (0, 1, 'A', 'C'),
                                                       (1, 1, 'C', 'C')]),
        # Variants with one ref and two alt alleles.
        (test_utils.make_variant(alleles=['A', 'C', 'G']), [(0, 0, 'A', 'A'),
                                                            (0, 1, 'A', 'C'),
                                                            (1, 1, 'C', 'C'),
                                                            (0, 2, 'A', 'G'),
                                                            (1, 2, 'C', 'G'),
                                                            (2, 2, 'G', 'G')]),
        # Variants with one ref and three alt alleles.
        (test_utils.make_variant(alleles=['A', 'C', 'G', 'T']), [
            (0, 0, 'A', 'A'), (0, 1, 'A', 'C'), (1, 1, 'C', 'C'),
            (0, 2, 'A', 'G'), (1, 2, 'C', 'G'), (2, 2, 'G', 'G'),
            (0, 3, 'A', 'T'), (1, 3, 'C', 'T'), (2, 3, 'G', 'T'),
            (3, 3, 'T', 'T')
        ]),
    )
    def test_genotype_ordering_in_likelihoods(self, variant, expected):
        self.assertEqual(
            list(variantutils.genotype_ordering_in_likelihoods(variant)),
            expected)

    @parameterized.parameters(
        # Haploid.
        dict(gls=[0.], allele_indices=[0], expected=0.),
        dict(gls=[-1, -2], allele_indices=[1], expected=-2),
        dict(gls=[-1, -2, -3], allele_indices=[2], expected=-3),
        # Diploid.
        dict(gls=[0.], allele_indices=[0, 0], expected=0.),
        dict(gls=[-1, -2, -3], allele_indices=[0, 0], expected=-1),
        dict(gls=[-1, -2, -3], allele_indices=[0, 1], expected=-2),
        dict(gls=[-1, -2, -3], allele_indices=[1, 0], expected=-2),
        dict(gls=[-1, -2, -3], allele_indices=[1, 1], expected=-3),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[0, 0], expected=-1),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[0, 1], expected=-2),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[1, 0], expected=-2),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[1, 1], expected=-3),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[0, 2], expected=-4),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[2, 0], expected=-4),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[1, 2], expected=-5),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[2, 1], expected=-5),
        dict(gls=[-1, -2, -3, -4, -5, -6], allele_indices=[2, 2], expected=-6),
        dict(gls=range(10), allele_indices=[0, 3], expected=6),
        dict(gls=range(10), allele_indices=[1, 3], expected=7),
        dict(gls=range(10), allele_indices=[2, 3], expected=8),
        dict(gls=range(10), allele_indices=[3, 3], expected=9),
    )
    def test_genotype_likelihood(self, gls, allele_indices, expected):
        variantcall = variants_pb2.VariantCall(genotype_likelihood=gls)
        actual = variantutils.genotype_likelihood(variantcall, allele_indices)
        self.assertEqual(actual, expected)

    def test_unsupported_genotype_likelihood(self):
        variantcall = variants_pb2.VariantCall(
            genotype_likelihood=[-1, -2, -3])
        with self.assertRaisesRegexp(NotImplementedError,
                                     'only supports haploid and diploid'):
            variantutils.genotype_likelihood(variantcall, [0, 1, 1])

    def test_haploid_allele_indices_for_genotype_likelihood_index(self):
        for aix in xrange(20):
            allele_indices = (aix, )
            ix = variantutils.genotype_likelihood_index(allele_indices)
            actual = variantutils.allele_indices_for_genotype_likelihood_index(
                ix, ploidy=1)
            self.assertEqual(actual, aix)

    def test_diploid_allele_indices_for_genotype_likelihood_index(self):
        for aix in xrange(20):
            for bix in xrange(20):
                allele_indices = (aix, bix)
                expected = tuple(sorted(allele_indices))
                ix = variantutils.genotype_likelihood_index(allele_indices)
                actual = variantutils.allele_indices_for_genotype_likelihood_index(
                    ix, ploidy=2)
                self.assertEqual(actual, expected)

    @parameterized.parameters(
        dict(ploidy=-1),
        dict(ploidy=0),
        dict(ploidy=3),
    )
    def test_unsupported_allele_indices_for_genotype_likelihood_index(
            self, ploidy):
        with self.assertRaisesRegexp(NotImplementedError,
                                     'only supported for haploid and diploid'):
            variantutils.allele_indices_for_genotype_likelihood_index(
                0, ploidy)

    @parameterized.parameters(
        dict(alt_bases=[], num_alts=0, expected=[(0, 0)]),
        dict(alt_bases=['A'], num_alts=0, expected=[(0, 0)]),
        dict(alt_bases=['A'], num_alts=1, expected=[(0, 1)]),
        dict(alt_bases=['A'], num_alts=2, expected=[(1, 1)]),
        dict(alt_bases=['A', 'C'], num_alts=0, expected=[(0, 0)]),
        dict(alt_bases=['A', 'C'], num_alts=1, expected=[(0, 1), (0, 2)]),
        dict(alt_bases=['A', 'C'],
             num_alts=2,
             expected=[(1, 1), (1, 2), (2, 2)]),
    )
    def test_allele_indices_with_num_alts(self, alt_bases, num_alts, expected):
        variant = variants_pb2.Variant(alternate_bases=alt_bases)
        actual = variantutils.allele_indices_with_num_alts(variant,
                                                           num_alts,
                                                           ploidy=2)
        self.assertEqual(actual, expected)

    @parameterized.parameters(
        dict(alt_bases=['A'], num_alts=0, ploidy=1),
        dict(alt_bases=['A'], num_alts=0, ploidy=3),
        dict(alt_bases=['A'], num_alts=-1, ploidy=2),
        dict(alt_bases=['A'], num_alts=3, ploidy=2),
    )
    def test_invalid_allele_indices_with_num_alts(self, alt_bases, num_alts,
                                                  ploidy):
        variant = variants_pb2.Variant(alternate_bases=alt_bases)
        with self.assertRaises((NotImplementedError, ValueError)):
            variantutils.allele_indices_with_num_alts(variant, num_alts,
                                                      ploidy)
コード例 #9
0
 def test_allele_mismatch(self, a1, a2, expected):
     v1 = test_utils.make_variant(alleles=a1)
     v2 = test_utils.make_variant(alleles=a2)
     self.assertEqual(variantutils.allele_mismatches(v1, v2), expected)