Example #1
0
 def test_partitions_bad_interval_size_raises(self):
     # list() is necessary to force the generator to execute.
     with self.assertRaisesRegexp(ValueError, 'max_size'):
         list(
             ranges.RangeSet([ranges.make_range('chrM', 0,
                                                100)]).partition(-10))
     with self.assertRaisesRegexp(ValueError, 'max_size'):
         list(
             ranges.RangeSet([ranges.make_range('chrM', 0,
                                                100)]).partition(0))
Example #2
0
    def test_envelops(self):
        start_ix = 5
        end_ix = 10
        start_ix2 = end_ix + 1
        end_ix2 = end_ix + 5
        range_set = ranges.RangeSet([
            ranges.make_range('chr1', start_ix, end_ix),
            ranges.make_range('chr1', start_ix2, end_ix2)
        ])

        # No start position before the first start range is enveloped.
        for i in range(start_ix):
            self.assertFalse(range_set.envelops('chr1', i, start_ix + 1))

        # All regions within a single record are enveloped.
        for six in range(start_ix, end_ix):
            for eix in range(six, end_ix + 1):
                self.assertTrue(range_set.envelops('chr1', six, eix),
                                'chr1 {} {} not enveloped'.format(six, eix))

        # Bridging across two ranges is not enveloped.
        for six in range(start_ix, end_ix):
            for eix in range(start_ix2, end_ix2 + 1):
                self.assertFalse(range_set.envelops('chr1', six, eix))

        # Other chromosome is not spanned.
        self.assertFalse(range_set.envelops('chr2', start_ix, start_ix + 1))
Example #3
0
  def test_label_variants(self,
                          candidate,
                          expected_confident,
                          expected_truth,
                          expected_label=None,
                          variant_alt_alleles_indices=None):
    if variant_alt_alleles_indices is None:
      variant_alt_alleles_indices = [0]
    labeler = self._make_labeler(
        self.variants,
        ranges.RangeSet(
            [ranges.make_range(self.snp_class1.reference_name, 10, 100)]))

    # Call _match so we can compare our expected truth with the actual one.
    is_confident, truth_variant = labeler._match(candidate)
    self.assertEqual(expected_truth, truth_variant)
    self.assertEqual(is_confident, expected_confident)

    # Now call label_variants to exercise the higher-level API.
    classes_dict = (
        customized_classes_labeler.CustomizedClassesVariantLabel.classes_dict)
    if expected_label is None and expected_truth is not None:
      expected_class_str = expected_truth.info[
          customized_classes_labeler.CustomizedClassesVariantLabel.
          info_field_name
      ].values[0].string_value
      expected_label = classes_dict[expected_class_str]

    labels = list(labeler.label_variants([candidate]))
    self.assertEqual(len(labels), 1)
    self.assertEqual(candidate, labels[0].variant)
    self.assertEqual(expected_confident, labels[0].is_confident)
    self.assertEqual(
        expected_label,
        labels[0].label_for_alt_alleles(variant_alt_alleles_indices))
Example #4
0
  def test_detector_ranges(self):
    test_ranges = [
        ranges.make_range('chr1', 0, 5),
        ranges.make_range('chr1', 8, 10),
        ranges.make_range('chr1', 12, 13),
        ranges.make_range('chr2', 2, 5),
    ]
    range_set = ranges.RangeSet(test_ranges)
    self.assertEqual(bool(range_set), True)
    self.assertEqual(len(range_set), 4)

    self.assertEqual(range_set.overlaps('chr1', 0), True)
    self.assertEqual(range_set.overlaps('chr1', 1), True)
    self.assertEqual(range_set.overlaps('chr1', 2), True)
    self.assertEqual(range_set.overlaps('chr1', 3), True)
    self.assertEqual(range_set.overlaps('chr1', 4), True)
    self.assertEqual(range_set.overlaps('chr1', 5), False)
    self.assertEqual(range_set.overlaps('chr1', 6), False)
    self.assertEqual(range_set.overlaps('chr1', 7), False)
    self.assertEqual(range_set.overlaps('chr1', 8), True)
    self.assertEqual(range_set.overlaps('chr1', 9), True)
    self.assertEqual(range_set.overlaps('chr1', 10), False)
    self.assertEqual(range_set.overlaps('chr1', 11), False)
    self.assertEqual(range_set.overlaps('chr1', 12), True)
    self.assertEqual(range_set.overlaps('chr1', 13), False)
    self.assertEqual(range_set.overlaps('chr1', 100), False)
    self.assertEqual(range_set.overlaps('chr1', 1000), False)
    self.assertEqual(range_set.overlaps('chr2', 0), False)
    self.assertEqual(range_set.overlaps('chr2', 1), False)
    self.assertEqual(range_set.overlaps('chr2', 2), True)
    self.assertEqual(range_set.overlaps('chr2', 3), True)
    self.assertEqual(range_set.overlaps('chr2', 4), True)
    self.assertEqual(range_set.overlaps('chr2', 5), False)
    self.assertEqual(range_set.overlaps('chr2', 6), False)
    self.assertEqual(range_set.overlaps('chr3', 3), False)
Example #5
0
 def test_overlaps_variant_with_ranges(self):
   variant = variants_pb2.Variant(reference_name='chr2', start=10, end=11)
   range_set = ranges.RangeSet([ranges.make_range('chr1', 0, 5)])
   with mock.patch.object(range_set, 'overlaps') as mock_overlaps:
     mock_overlaps.return_value = True
     self.assertEqual(range_set.variant_overlaps(variant), True)
     mock_overlaps.assert_called_once_with('chr2', 10)
Example #6
0
 def test_partitions(self, interval_size, expected):
   rangeset = ranges.RangeSet([
       ranges.make_range('chrM', 0, 100),
       ranges.make_range('chr1', 0, 76),
       ranges.make_range('chr2', 0, 121),
   ])
   self.assertCountEqual([ranges.make_range(*args) for args in expected],
                         rangeset.partition(interval_size))
Example #7
0
 def test_partition_of_multiple_intervals(self, interval_size, expected):
   rangeset = ranges.RangeSet([
       ranges.make_range('1', 0, 10),
       ranges.make_range('1', 20, 40),
       ranges.make_range('1', 45, 50),
   ])
   self.assertCountEqual([ranges.make_range(*args) for args in expected],
                         rangeset.partition(interval_size))
Example #8
0
    def test_intersection(self, regions, expected):
        regions_list = [ranges.RangeSet.from_regions(r) for r in regions]
        copies = [ranges.RangeSet(rs) for rs in regions_list]

        # Check that the intersection is as expected.
        self.assertCountEqual(ranges.RangeSet.from_regions(expected),
                              regions_list[0].intersection(*regions_list[1:]))

        # Check that no one was modified.
        for pre, post in zip(copies, regions_list):
            self.assertCountEqual(pre, post)
 def test_match_multiple_matches(self, overlapping_variants, candidate,
                                 expected_confident, truth_variant_idx):
   labeler = self._make_labeler(
       overlapping_variants,
       ranges.RangeSet(
           [ranges.make_range(overlapping_variants[0].reference_name, 0,
                              100)]))
   is_confident, variant_match = labeler._match(candidate)
   expected_variant = overlapping_variants[truth_variant_idx]
   self.assertEqual(is_confident, expected_confident)
   self.assertEqual(variant_match, expected_variant)
Example #10
0
    def test_rangeset_iteration_order(self):
        contigs = [
            reference_pb2.ContigInfo(name='c', n_bases=100, pos_in_fasta=0),
            reference_pb2.ContigInfo(name='b', n_bases=121, pos_in_fasta=2),
            reference_pb2.ContigInfo(name='a', n_bases=76, pos_in_fasta=1),
        ]
        unsorted = ranges.parse_literals(
            ['a:10', 'c:20', 'b:30', 'b:10-15', 'a:5'])

        # Iteration order over a RangeSet instantiated with a contigs list is
        # determined by pos_in_fasta, start, end.
        range_set_with_contigs = ranges.RangeSet(unsorted, contigs)
        self.assertEqual(
            ranges.parse_literals(['c:20', 'a:5', 'a:10', 'b:10-15', 'b:30']),
            [range_ for range_ in range_set_with_contigs])

        # For a RangeSet instantiated *without* a contig map, the iteration order
        # is determined by reference_name, start, end.
        range_set_no_contigs = ranges.RangeSet(unsorted)
        self.assertEqual(
            ranges.parse_literals(['a:5', 'a:10', 'b:10-15', 'b:30', 'c:20']),
            [range_ for range_ in range_set_no_contigs])
Example #11
0
def _make_labeler(truth_variants=None, confident_regions=None, **kwargs):
  mock_ref_reader = mock.MagicMock()

  if confident_regions is None:
    # Use the reference of the truth variants if possible, otherwise just use
    # a dummy placeholder value for the contig name and make the confident
    # region a giant span.
    contig = truth_variants[0].reference_name if truth_variants else 'dummy'
    confident_regions = ranges.RangeSet(
        [ranges.make_range(contig, 0, 1000000000)])

  return haplotype_labeler.HaplotypeLabeler(
      truth_vcf_reader=vcf.InMemoryVcfReader(truth_variants or []),
      ref_reader=mock_ref_reader,
      confident_regions=confident_regions,
      **kwargs)
    def test_get_truth_variants(self):
        v1 = test_utils.make_variant(chrom='1', start=10)
        v2 = test_utils.make_variant(chrom='1', start=20)
        v3_filtered = test_utils.make_variant(chrom='1',
                                              start=30,
                                              filters=['FAIL'])
        v4_del = test_utils.make_variant(chrom='1',
                                         start=40,
                                         alleles=['AAAA', 'A'])
        v5_non_confident = test_utils.make_variant(chrom='1', start=150)

        variants = [v1, v2, v3_filtered, v4_del, v5_non_confident]
        reader = vcf.InMemoryVcfReader(variants=variants)
        confident_regions = ranges.RangeSet([ranges.make_range('1', 1, 100)])
        labeler = PlaceholderVariantLabeler(
            truth_vcf_reader=reader, confident_regions=confident_regions)

        # Check that we get v1 and v2 specifically when only they are covered by the
        # query.
        self.assertEqual(
            list(labeler._get_truth_variants(ranges.parse_literal('1:1-15'))),
            [v1])
        self.assertEqual(
            list(labeler._get_truth_variants(ranges.parse_literal('1:15-25'))),
            [v2])

        # We don't include filtered variants.
        self.assertEqual(
            list(labeler._get_truth_variants(ranges.parse_literal('1:25-35'))),
            [])

        # Check that we get all overlapping variants of our query.
        for del_query in ['1:35-45', '1:42-43', '1:38-42', '1:42-50']:
            self.assertEqual(
                list(
                    labeler._get_truth_variants(
                        ranges.parse_literal(del_query))), [v4_del])

        # Checks that a simple query gets all our non-filtered variants.
        self.assertEqual(
            list(labeler._get_truth_variants(ranges.parse_literal('1:1-100'))),
            [v1, v2, v4_del])
        # Even through our query covers v5, it's not confident, so we don't get it.
        self.assertEqual(
            list(labeler._get_truth_variants(
                ranges.parse_literal('1:1-1000'))), [v1, v2, v4_del])
Example #13
0
    def test_regions_and_exclude_regions_flags(self):
        FLAGS.mode = 'calling'
        FLAGS.ref = testdata.CHR20_FASTA
        FLAGS.reads = testdata.CHR20_BAM
        FLAGS.regions = 'chr20:10,000,000-11,000,000'
        FLAGS.examples = 'examples.tfrecord'
        FLAGS.exclude_regions = 'chr20:10,010,000-10,100,000'

        options = make_examples.default_options(add_flags=True)
        six.assertCountEqual(
            self,
            list(
                ranges.RangeSet(
                    make_examples_core.processing_regions_from_options(
                        options))),
            _from_literals_list(
                ['chr20:10,000,000-10,009,999',
                 'chr20:10,100,001-11,000,000']))
  def test_match_selects_variant_by_start(self):
    # Tests that match() selects the variant at the same start even if that
    # variant doesn't have the same alleles at candidate and there's an
    # overlapping with the same alleles.
    overlapping = [
        test_utils.make_variant(start=20, alleles=['CC', 'A'], gt=[1, 1]),
        test_utils.make_variant(start=21, alleles=['AAA', 'A'], gt=[0, 1]),
        test_utils.make_variant(start=22, alleles=['AA', 'A'], gt=[1, 1]),
    ]
    candidate = test_utils.make_variant(start=21, alleles=['CC', 'A'])

    labeler = self._make_labeler(
        overlapping,
        ranges.RangeSet(
            [ranges.make_range(overlapping[0].reference_name, 0, 100)]))
    is_confident, truth_variant = labeler._match(candidate)
    self.assertEqual(is_confident, True)
    self.assertEqual(truth_variant, overlapping[1])
  def test_label_variants(self,
                          candidate,
                          expected_confident,
                          expected_truth,
                          expected_genotype=None):
    labeler = self._make_labeler(
        self.variants,
        ranges.RangeSet([ranges.make_range(self.snp.reference_name, 10, 100)]))

    # Call _match so we can compare our expected truth with the actual one.
    is_confident, truth_variant = labeler._match(candidate)
    self.assertEqual(expected_truth, truth_variant)
    self.assertEqual(is_confident, expected_confident)

    # Now call label_variants to exercise the higher-level API.
    if expected_genotype is None and expected_truth is not None:
      expected_genotype = tuple(expected_truth.calls[0].genotype)
    labels = list(labeler.label_variants([candidate]))
    self.assertLen(labels, 1)
    self.assertEqual(candidate, labels[0].variant)
    self.assertEqual(expected_confident, labels[0].is_confident)
    self.assertEqual(expected_genotype, labels[0].genotype)
Example #16
0
 def test_detector_no_ranges(self):
   range_set = ranges.RangeSet()
   # don't have any ranges by default
   self.assertEqual(bool(range_set), False)
   # make sure we can call overlaps without any ranges
   self.assertFalse(range_set.overlaps('chr1', 10))
Example #17
0
 def test_unrecognized_contig_triggers_exception(self):
     with self.assertRaises(ValueError):
         _ = ranges.RangeSet([ranges.make_range('bogus_chromosome', 1, 10)],
                             _TEST_CONTIGS)
Example #18
0
 def test_overlaps_variant_empty_range(self):
   variant = variants_pb2.Variant(reference_name='chr2', start=10, end=11)
   empty_set = ranges.RangeSet()
   self.assertEqual(
       empty_set.variant_overlaps(variant, empty_set_return_value='foo'),
       'foo')