def test_expand_raises_with_missing_contig_in_map(self): # Empty contig_map should raise. with self.assertRaises(KeyError): ranges.expand(ranges.make_range('1', 10, 20), 1, contig_map={}) # Missing '1' from the contig map should raise. with self.assertRaises(KeyError): ranges.expand( ranges.make_range('1', 10, 20), 1, contig_map={ '2': reference_pb2.ContigInfo(name='2', n_bases=50), })
def _candidates_from_reads(config, ref_reader, reads, region): """Returns a list of candidate positions. Args: config: learning.genomics.deepvariant.realigner.WindowSelectorOptions options determining the behavior of this window selector. ref_reader: GenomeReference. Indexed reference genome to query bases. reads: list[nucleus.protos.Read]. The reads we are processing into candidate positions. region: nucleus.protos.Range. The region we are processing. Returns: A list. The elements are reference positions within region. Raises: ValueError: if config.window_selector_model.model_type isn't a valid enum name in realigner_pb2.WindowSelectorModel.ModelType. """ allele_counter_options = deepvariant_pb2.AlleleCounterOptions( read_requirements=reads_pb2.ReadRequirements( min_mapping_quality=config.min_mapq, min_base_quality=config.min_base_quality), keep_legacy_behavior=config.keep_legacy_behavior) expanded_region = ranges.expand(region, config.region_expansion_in_bp, contig_map=ranges.contigs_dict( ref_reader.header.contigs)) allele_counter = allelecounter.AlleleCounter(ref_reader.c_reader, expanded_region, [], allele_counter_options) for read in reads: allele_counter.add(read, 'placeholder_sample_id') model_type = config.window_selector_model.model_type if model_type == realigner_pb2.WindowSelectorModel.VARIANT_READS: return _variant_reads_threshold_selector( allele_counter, config.window_selector_model.variant_reads_model, expanded_region) elif model_type == realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR: return _allele_count_linear_selector( allele_counter, config.window_selector_model.allele_count_linear_model, expanded_region) else: raise ValueError('Unknown enum option "{}" for ' 'WindowSelectorModel.model_type'.format( config.window_selector_model.model_type))
def _label_grouped_variants(self, variants): # redacted # redacted # they should be computed in the grouping. span = ranges.span([variant_utils.variant_range(v) for v in variants]) truths = list( self._get_truth_variants( ranges.expand(span, _TRUTH_VARIANTS_QUERY_REGION_EXPANSION_IN_BP))) if len(truths) > self.max_group_size: logging.warning(( 'Found a large number of variants to label (n_candidates=%d, ' 'n_truth=%d) relative to candidate cap of %d. This may make the ' 'algorithm very slow.'), len(variants), len(truths), self.max_group_size) # redacted logging.warning( 'Returning all variants with not-confident markers.') for variant in variants: yield variant_labeler.VariantLabel(is_confident=False, genotype=(-1, -1), variant=variant) return ref = self.make_labeler_ref(variants, truths) labeled_variants = label_variants(variants, truths, ref) if not labeled_variants: raise ValueError('Failed to assign labels for variants', variants) else: for labeled in labeled_variants: yield variant_labeler.VariantLabel( # redacted # now. Rethink how we establish a variant is confident. Seems like # it'd be confident if it has a non-ref genotype (as we only # consider confident truth variants) or if it overlaps the confident # regions. is_confident=self._confident_regions.variant_overlaps( labeled), genotype=tuple(labeled.calls[0].genotype), variant=labeled)
def _label_grouped_variants(self, variants): # redacted # redacted # they should be computed in the grouping. span = ranges.span([variant_utils.variant_range(v) for v in variants]) truths = list( self._get_truth_variants( ranges.expand(span, _TRUTH_VARIANTS_QUERY_REGION_EXPANSION_IN_BP))) if len(truths) > self.max_group_size: logging.warning( ('Found a large number of variants to label (n_candidates=%d, ' 'n_truth=%d) relative to candidate cap of %d. This may make the ' 'algorithm very slow.'), len(variants), len(truths), self.max_group_size) # redacted logging.warning('Returning all variants with not-confident markers.') for variant in variants: yield variant_labeler.VariantLabel( is_confident=False, genotype=(-1, -1), variant=variant) return ref = self.make_labeler_ref(variants, truths) labeled_variants = label_variants(variants, truths, ref) if not labeled_variants: raise ValueError('Failed to assign labels for variants', variants) else: for labeled in labeled_variants: yield variant_labeler.VariantLabel( # redacted # now. Rethink how we establish a variant is confident. Seems like # it'd be confident if it has a non-ref genotype (as we only # consider confident truth variants) or if it overlaps the confident # regions. is_confident=self._confident_regions.variant_overlaps(labeled), genotype=tuple(labeled.calls[0].genotype), variant=labeled)
def test_expand_raises_on_negative_n_bp(self): with self.assertRaisesRegexp(ValueError, 'n_bp must be >= 0 but got -10'): ranges.expand(ranges.make_range('1', 10, 20), -10)
def test_expand_handles_boundaries(self, region, n_bp, contig_map, expected): self.assertEqual(expected, ranges.expand(region, n_bp, contig_map))
def test_expand_is_correct(self, region, n_bp, contig_map, expected): self.assertEqual(expected, ranges.expand(region, n_bp, contig_map))