Exemple #1
0
    def process(self, region):
        """Finds candidates and creates corresponding examples in a region.

    Args:
      region: A nucleus.genomics.v1.Range proto. Specifies the region on the
        genome we should process.

    Returns:
      Three values. First is a list of the found candidates, which are
      deepvariant.DeepVariantCall objects. The second value is a list of filled
      in tf.Example protos. For example, these will include the candidate
      variant, the pileup image, and, if in training mode, the truth variants
      and labels needed for training. The third value is a list of
      nucleus.genomics.v1.Variant protos containing gVCF information for all
      reference sites, if gvcf generation is enabled, otherwise returns [].
    """
        region_timer = timer.TimerStart()

        # Print some basic information about what we are doing.
        if not self.initialized:
            self._initialize()

        self.in_memory_sam_reader.replace_reads(self.region_reads(region))
        candidates, gvcfs = self.candidates_in_region(region)

        if in_training_mode(self.options):
            examples = [
                self.add_label_to_example(example, label)
                for candidate, label in self.label_candidates(
                    candidates, region)
                for example in self.create_pileup_examples(candidate)
            ]
        else:
            examples = [
                example for candidate in candidates
                for example in self.create_pileup_examples(candidate)
            ]

        logging.info('Found %s candidates in %s [%d bp] [%0.2fs elapsed]',
                     len(examples), ranges.to_literal(region),
                     ranges.length(region), region_timer.Stop())
        return candidates, examples, gvcfs
    def assertCandidatesFromReadsEquals(self,
                                        reads,
                                        expected,
                                        start=None,
                                        end=None,
                                        ref=None):
        chrom = reads[0].alignment.position.reference_name
        start = 0 if start is None else start
        end = 20 if end is None else end
        region = ranges.make_range(chrom, start, end)

        if ref is None:
            ref = 'A' * (ranges.length(region) + 512)

        ref_reader = fasta.InMemoryFastaReader([(chrom, 0, ref)])
        if isinstance(expected, type) and issubclass(expected, Exception):
            with self.assertRaises(expected):
                window_selector._candidates_from_reads(self.config, ref_reader,
                                                       reads, region)
        else:
            actual = window_selector._candidates_from_reads(
                self.config, ref_reader, reads, region)
            self.assertEqual(actual, expected)
Exemple #3
0
 def test_length_is_correct(self, region, expected_length):
     self.assertEqual(expected_length, ranges.length(region))