def test_partitions_bad_interval_size_raises(self): # list() is necessary to force the generator to execute. with self.assertRaisesRegexp(ValueError, 'max_size'): list( ranges.RangeSet([ranges.make_range('chrM', 0, 100)]).partition(-10)) with self.assertRaisesRegexp(ValueError, 'max_size'): list( ranges.RangeSet([ranges.make_range('chrM', 0, 100)]).partition(0))
def test_envelops(self): start_ix = 5 end_ix = 10 start_ix2 = end_ix + 1 end_ix2 = end_ix + 5 range_set = ranges.RangeSet([ ranges.make_range('chr1', start_ix, end_ix), ranges.make_range('chr1', start_ix2, end_ix2) ]) # No start position before the first start range is enveloped. for i in range(start_ix): self.assertFalse(range_set.envelops('chr1', i, start_ix + 1)) # All regions within a single record are enveloped. for six in range(start_ix, end_ix): for eix in range(six, end_ix + 1): self.assertTrue(range_set.envelops('chr1', six, eix), 'chr1 {} {} not enveloped'.format(six, eix)) # Bridging across two ranges is not enveloped. for six in range(start_ix, end_ix): for eix in range(start_ix2, end_ix2 + 1): self.assertFalse(range_set.envelops('chr1', six, eix)) # Other chromosome is not spanned. self.assertFalse(range_set.envelops('chr2', start_ix, start_ix + 1))
def test_label_variants(self, candidate, expected_confident, expected_truth, expected_label=None, variant_alt_alleles_indices=None): if variant_alt_alleles_indices is None: variant_alt_alleles_indices = [0] labeler = self._make_labeler( self.variants, ranges.RangeSet( [ranges.make_range(self.snp_class1.reference_name, 10, 100)])) # Call _match so we can compare our expected truth with the actual one. is_confident, truth_variant = labeler._match(candidate) self.assertEqual(expected_truth, truth_variant) self.assertEqual(is_confident, expected_confident) # Now call label_variants to exercise the higher-level API. classes_dict = ( customized_classes_labeler.CustomizedClassesVariantLabel.classes_dict) if expected_label is None and expected_truth is not None: expected_class_str = expected_truth.info[ customized_classes_labeler.CustomizedClassesVariantLabel. info_field_name ].values[0].string_value expected_label = classes_dict[expected_class_str] labels = list(labeler.label_variants([candidate])) self.assertEqual(len(labels), 1) self.assertEqual(candidate, labels[0].variant) self.assertEqual(expected_confident, labels[0].is_confident) self.assertEqual( expected_label, labels[0].label_for_alt_alleles(variant_alt_alleles_indices))
def test_detector_ranges(self): test_ranges = [ ranges.make_range('chr1', 0, 5), ranges.make_range('chr1', 8, 10), ranges.make_range('chr1', 12, 13), ranges.make_range('chr2', 2, 5), ] range_set = ranges.RangeSet(test_ranges) self.assertEqual(bool(range_set), True) self.assertEqual(len(range_set), 4) self.assertEqual(range_set.overlaps('chr1', 0), True) self.assertEqual(range_set.overlaps('chr1', 1), True) self.assertEqual(range_set.overlaps('chr1', 2), True) self.assertEqual(range_set.overlaps('chr1', 3), True) self.assertEqual(range_set.overlaps('chr1', 4), True) self.assertEqual(range_set.overlaps('chr1', 5), False) self.assertEqual(range_set.overlaps('chr1', 6), False) self.assertEqual(range_set.overlaps('chr1', 7), False) self.assertEqual(range_set.overlaps('chr1', 8), True) self.assertEqual(range_set.overlaps('chr1', 9), True) self.assertEqual(range_set.overlaps('chr1', 10), False) self.assertEqual(range_set.overlaps('chr1', 11), False) self.assertEqual(range_set.overlaps('chr1', 12), True) self.assertEqual(range_set.overlaps('chr1', 13), False) self.assertEqual(range_set.overlaps('chr1', 100), False) self.assertEqual(range_set.overlaps('chr1', 1000), False) self.assertEqual(range_set.overlaps('chr2', 0), False) self.assertEqual(range_set.overlaps('chr2', 1), False) self.assertEqual(range_set.overlaps('chr2', 2), True) self.assertEqual(range_set.overlaps('chr2', 3), True) self.assertEqual(range_set.overlaps('chr2', 4), True) self.assertEqual(range_set.overlaps('chr2', 5), False) self.assertEqual(range_set.overlaps('chr2', 6), False) self.assertEqual(range_set.overlaps('chr3', 3), False)
def test_overlaps_variant_with_ranges(self): variant = variants_pb2.Variant(reference_name='chr2', start=10, end=11) range_set = ranges.RangeSet([ranges.make_range('chr1', 0, 5)]) with mock.patch.object(range_set, 'overlaps') as mock_overlaps: mock_overlaps.return_value = True self.assertEqual(range_set.variant_overlaps(variant), True) mock_overlaps.assert_called_once_with('chr2', 10)
def test_partitions(self, interval_size, expected): rangeset = ranges.RangeSet([ ranges.make_range('chrM', 0, 100), ranges.make_range('chr1', 0, 76), ranges.make_range('chr2', 0, 121), ]) self.assertCountEqual([ranges.make_range(*args) for args in expected], rangeset.partition(interval_size))
def test_partition_of_multiple_intervals(self, interval_size, expected): rangeset = ranges.RangeSet([ ranges.make_range('1', 0, 10), ranges.make_range('1', 20, 40), ranges.make_range('1', 45, 50), ]) self.assertCountEqual([ranges.make_range(*args) for args in expected], rangeset.partition(interval_size))
def test_intersection(self, regions, expected): regions_list = [ranges.RangeSet.from_regions(r) for r in regions] copies = [ranges.RangeSet(rs) for rs in regions_list] # Check that the intersection is as expected. self.assertCountEqual(ranges.RangeSet.from_regions(expected), regions_list[0].intersection(*regions_list[1:])) # Check that no one was modified. for pre, post in zip(copies, regions_list): self.assertCountEqual(pre, post)
def test_match_multiple_matches(self, overlapping_variants, candidate, expected_confident, truth_variant_idx): labeler = self._make_labeler( overlapping_variants, ranges.RangeSet( [ranges.make_range(overlapping_variants[0].reference_name, 0, 100)])) is_confident, variant_match = labeler._match(candidate) expected_variant = overlapping_variants[truth_variant_idx] self.assertEqual(is_confident, expected_confident) self.assertEqual(variant_match, expected_variant)
def test_rangeset_iteration_order(self): contigs = [ reference_pb2.ContigInfo(name='c', n_bases=100, pos_in_fasta=0), reference_pb2.ContigInfo(name='b', n_bases=121, pos_in_fasta=2), reference_pb2.ContigInfo(name='a', n_bases=76, pos_in_fasta=1), ] unsorted = ranges.parse_literals( ['a:10', 'c:20', 'b:30', 'b:10-15', 'a:5']) # Iteration order over a RangeSet instantiated with a contigs list is # determined by pos_in_fasta, start, end. range_set_with_contigs = ranges.RangeSet(unsorted, contigs) self.assertEqual( ranges.parse_literals(['c:20', 'a:5', 'a:10', 'b:10-15', 'b:30']), [range_ for range_ in range_set_with_contigs]) # For a RangeSet instantiated *without* a contig map, the iteration order # is determined by reference_name, start, end. range_set_no_contigs = ranges.RangeSet(unsorted) self.assertEqual( ranges.parse_literals(['a:5', 'a:10', 'b:10-15', 'b:30', 'c:20']), [range_ for range_ in range_set_no_contigs])
def _make_labeler(truth_variants=None, confident_regions=None, **kwargs): mock_ref_reader = mock.MagicMock() if confident_regions is None: # Use the reference of the truth variants if possible, otherwise just use # a dummy placeholder value for the contig name and make the confident # region a giant span. contig = truth_variants[0].reference_name if truth_variants else 'dummy' confident_regions = ranges.RangeSet( [ranges.make_range(contig, 0, 1000000000)]) return haplotype_labeler.HaplotypeLabeler( truth_vcf_reader=vcf.InMemoryVcfReader(truth_variants or []), ref_reader=mock_ref_reader, confident_regions=confident_regions, **kwargs)
def test_get_truth_variants(self): v1 = test_utils.make_variant(chrom='1', start=10) v2 = test_utils.make_variant(chrom='1', start=20) v3_filtered = test_utils.make_variant(chrom='1', start=30, filters=['FAIL']) v4_del = test_utils.make_variant(chrom='1', start=40, alleles=['AAAA', 'A']) v5_non_confident = test_utils.make_variant(chrom='1', start=150) variants = [v1, v2, v3_filtered, v4_del, v5_non_confident] reader = vcf.InMemoryVcfReader(variants=variants) confident_regions = ranges.RangeSet([ranges.make_range('1', 1, 100)]) labeler = PlaceholderVariantLabeler( truth_vcf_reader=reader, confident_regions=confident_regions) # Check that we get v1 and v2 specifically when only they are covered by the # query. self.assertEqual( list(labeler._get_truth_variants(ranges.parse_literal('1:1-15'))), [v1]) self.assertEqual( list(labeler._get_truth_variants(ranges.parse_literal('1:15-25'))), [v2]) # We don't include filtered variants. self.assertEqual( list(labeler._get_truth_variants(ranges.parse_literal('1:25-35'))), []) # Check that we get all overlapping variants of our query. for del_query in ['1:35-45', '1:42-43', '1:38-42', '1:42-50']: self.assertEqual( list( labeler._get_truth_variants( ranges.parse_literal(del_query))), [v4_del]) # Checks that a simple query gets all our non-filtered variants. self.assertEqual( list(labeler._get_truth_variants(ranges.parse_literal('1:1-100'))), [v1, v2, v4_del]) # Even through our query covers v5, it's not confident, so we don't get it. self.assertEqual( list(labeler._get_truth_variants( ranges.parse_literal('1:1-1000'))), [v1, v2, v4_del])
def test_regions_and_exclude_regions_flags(self): FLAGS.mode = 'calling' FLAGS.ref = testdata.CHR20_FASTA FLAGS.reads = testdata.CHR20_BAM FLAGS.regions = 'chr20:10,000,000-11,000,000' FLAGS.examples = 'examples.tfrecord' FLAGS.exclude_regions = 'chr20:10,010,000-10,100,000' options = make_examples.default_options(add_flags=True) six.assertCountEqual( self, list( ranges.RangeSet( make_examples_core.processing_regions_from_options( options))), _from_literals_list( ['chr20:10,000,000-10,009,999', 'chr20:10,100,001-11,000,000']))
def test_match_selects_variant_by_start(self): # Tests that match() selects the variant at the same start even if that # variant doesn't have the same alleles at candidate and there's an # overlapping with the same alleles. overlapping = [ test_utils.make_variant(start=20, alleles=['CC', 'A'], gt=[1, 1]), test_utils.make_variant(start=21, alleles=['AAA', 'A'], gt=[0, 1]), test_utils.make_variant(start=22, alleles=['AA', 'A'], gt=[1, 1]), ] candidate = test_utils.make_variant(start=21, alleles=['CC', 'A']) labeler = self._make_labeler( overlapping, ranges.RangeSet( [ranges.make_range(overlapping[0].reference_name, 0, 100)])) is_confident, truth_variant = labeler._match(candidate) self.assertEqual(is_confident, True) self.assertEqual(truth_variant, overlapping[1])
def test_label_variants(self, candidate, expected_confident, expected_truth, expected_genotype=None): labeler = self._make_labeler( self.variants, ranges.RangeSet([ranges.make_range(self.snp.reference_name, 10, 100)])) # Call _match so we can compare our expected truth with the actual one. is_confident, truth_variant = labeler._match(candidate) self.assertEqual(expected_truth, truth_variant) self.assertEqual(is_confident, expected_confident) # Now call label_variants to exercise the higher-level API. if expected_genotype is None and expected_truth is not None: expected_genotype = tuple(expected_truth.calls[0].genotype) labels = list(labeler.label_variants([candidate])) self.assertLen(labels, 1) self.assertEqual(candidate, labels[0].variant) self.assertEqual(expected_confident, labels[0].is_confident) self.assertEqual(expected_genotype, labels[0].genotype)
def test_detector_no_ranges(self): range_set = ranges.RangeSet() # don't have any ranges by default self.assertEqual(bool(range_set), False) # make sure we can call overlaps without any ranges self.assertFalse(range_set.overlaps('chr1', 10))
def test_unrecognized_contig_triggers_exception(self): with self.assertRaises(ValueError): _ = ranges.RangeSet([ranges.make_range('bogus_chromosome', 1, 10)], _TEST_CONTIGS)
def test_overlaps_variant_empty_range(self): variant = variants_pb2.Variant(reference_name='chr2', start=10, end=11) empty_set = ranges.RangeSet() self.assertEqual( empty_set.variant_overlaps(variant, empty_set_return_value='foo'), 'foo')