def _label_grouped_variants(self, variants): # redacted # redacted # they should be computed in the grouping. span = ranges.span([variant_utils.variant_range(v) for v in variants]) truths = list( self._get_truth_variants( ranges.expand(span, _TRUTH_VARIANTS_QUERY_REGION_EXPANSION_IN_BP))) if len(truths) > self.max_group_size: logging.warning(( 'Found a large number of variants to label (n_candidates=%d, ' 'n_truth=%d) relative to candidate cap of %d. This may make the ' 'algorithm very slow.'), len(variants), len(truths), self.max_group_size) # redacted logging.warning( 'Returning all variants with not-confident markers.') for variant in variants: yield variant_labeler.VariantLabel(is_confident=False, genotype=(-1, -1), variant=variant) return ref = self.make_labeler_ref(variants, truths) labeled_variants = label_variants(variants, truths, ref) if not labeled_variants: raise ValueError('Failed to assign labels for variants', variants) else: for labeled in labeled_variants: yield variant_labeler.VariantLabel( # redacted # now. Rethink how we establish a variant is confident. Seems like # it'd be confident if it has a non-ref genotype (as we only # consider confident truth variants) or if it overlaps the confident # regions. is_confident=self._confident_regions.variant_overlaps( labeled), genotype=tuple(labeled.calls[0].genotype), variant=labeled)
def _label_grouped_variants(self, variants): # redacted # redacted # they should be computed in the grouping. span = ranges.span([variant_utils.variant_range(v) for v in variants]) truths = list( self._get_truth_variants( ranges.expand(span, _TRUTH_VARIANTS_QUERY_REGION_EXPANSION_IN_BP))) if len(truths) > self.max_group_size: logging.warning( ('Found a large number of variants to label (n_candidates=%d, ' 'n_truth=%d) relative to candidate cap of %d. This may make the ' 'algorithm very slow.'), len(variants), len(truths), self.max_group_size) # redacted logging.warning('Returning all variants with not-confident markers.') for variant in variants: yield variant_labeler.VariantLabel( is_confident=False, genotype=(-1, -1), variant=variant) return ref = self.make_labeler_ref(variants, truths) labeled_variants = label_variants(variants, truths, ref) if not labeled_variants: raise ValueError('Failed to assign labels for variants', variants) else: for labeled in labeled_variants: yield variant_labeler.VariantLabel( # redacted # now. Rethink how we establish a variant is confident. Seems like # it'd be confident if it has a non-ref genotype (as we only # consider confident truth variants) or if it overlaps the confident # regions. is_confident=self._confident_regions.variant_overlaps(labeled), genotype=tuple(labeled.calls[0].genotype), variant=labeled)
def test_span_raises_on_bad_input(self, regions, regexp): with self.assertRaisesRegexp(ValueError, regexp): ranges.span(regions)
def test_span_computes_span_correctly(self, regions, expected_span): for permutation in itertools.permutations(regions, len(regions)): self.assertEqual(expected_span, ranges.span(permutation))