def process(self, region): """Finds candidates and creates corresponding examples in a region. Args: region: A nucleus.genomics.v1.Range proto. Specifies the region on the genome we should process. Returns: Three values. First is a list of the found candidates, which are deepvariant.DeepVariantCall objects. The second value is a list of filled in tf.Example protos. For example, these will include the candidate variant, the pileup image, and, if in training mode, the truth variants and labels needed for training. The third value is a list of nucleus.genomics.v1.Variant protos containing gVCF information for all reference sites, if gvcf generation is enabled, otherwise returns []. """ region_timer = timer.TimerStart() # Print some basic information about what we are doing. if not self.initialized: self._initialize() self.in_memory_sam_reader.replace_reads(self.region_reads(region)) candidates, gvcfs = self.candidates_in_region(region) if in_training_mode(self.options): examples = [ self.add_label_to_example(example, label) for candidate, label in self.label_candidates( candidates, region) for example in self.create_pileup_examples(candidate) ] else: examples = [ example for candidate in candidates for example in self.create_pileup_examples(candidate) ] logging.info('Found %s candidates in %s [%d bp] [%0.2fs elapsed]', len(examples), ranges.to_literal(region), ranges.length(region), region_timer.Stop()) return candidates, examples, gvcfs
def assertCandidatesFromReadsEquals(self, reads, expected, start=None, end=None, ref=None): chrom = reads[0].alignment.position.reference_name start = 0 if start is None else start end = 20 if end is None else end region = ranges.make_range(chrom, start, end) if ref is None: ref = 'A' * (ranges.length(region) + 512) ref_reader = fasta.InMemoryFastaReader([(chrom, 0, ref)]) if isinstance(expected, type) and issubclass(expected, Exception): with self.assertRaises(expected): window_selector._candidates_from_reads(self.config, ref_reader, reads, region) else: actual = window_selector._candidates_from_reads( self.config, ref_reader, reads, region) self.assertEqual(actual, expected)
def test_length_is_correct(self, region, expected_length): self.assertEqual(expected_length, ranges.length(region))