コード例 #1
0
    def realign_reads(self, reads, region):
        """Run realigner.

    This is the main function that
      - parses the input reads and reference sequence.
      - select candidate windows for local assembly (WindowSelector (ws)
        module).
        - Windows larger than max_window_size are skipped.
      - build pruned De-Bruijn graph for each candidate window (DeBruijnGraph
        (dbg) module).
        - Graphs with more than max_num_paths candidate haplotypes or
          with reference sequence as the only candidate are skipped.
      - Align reads based on candidate haplotypes (Aligner (aln) module).
      - Output all input reads (whether they required realignment or not).

    Args:
      reads: [`third_party.nucleus.protos.Read` protos]. The list of input reads
        to realign.
      region: A `third_party.nucleus.protos.Range` proto. Specifies the region
        on the genome we should process.

    Returns:
      [realigner_pb2.CandidateHaplotypes]. Information on the list of candidate
        haplotypes.
      [`third_party.nucleus.protos.Read` protos]. The realigned
        reads for the region. NOTE THESE READS MAY NO LONGER BE IN THE SAME
        ORDER AS BEFORE.
    """
        # Compute the windows where we need to assemble in the region.
        candidate_windows = window_selector.select_windows(
            self.config.ws_config, self.ref_reader, reads, region)

        # Assemble each of those regions.
        candidate_haplotypes = self.call_debruijn_graph(
            candidate_windows, reads)
        # Create our simple container to store candidate / read mappings.
        assembled_regions = [AssemblyRegion(ch) for ch in candidate_haplotypes]

        # Our realigned_reads start off with all of the unassigned reads.
        realigned_reads = assign_reads_to_assembled_regions(
            assembled_regions, reads)

        # Walk over each region and align the reads in that region, adding them to
        # our realigned_reads.
        for assembled_region in assembled_regions:
            if flags.FLAGS.use_fast_pass_aligner:
                realigned_reads_copy = self.call_fast_pass_aligner(
                    assembled_region)
            else:
                raise ValueError(
                    '--use_fast_pass_aligner is always true. '
                    'The older implementation is deprecated and removed.')

            realigned_reads.extend(realigned_reads_copy)

        self.diagnostic_logger.log_realigned_reads(region, realigned_reads,
                                                   self.shared_header)

        return candidate_haplotypes, realigned_reads
コード例 #2
0
 def test_select_windows_returns_empty_list_when_no_reads(self):
   self.assertEqual([],
                    window_selector.select_windows(
                        self.config,
                        ref_reader=fasta.InMemoryFastaReader([('chr1', 0,
                                                               'A' * 500)]),
                        reads=[],
                        region=ranges.make_range('chr1', 1, 100)))
コード例 #3
0
  def test_select_windows(self):
    # Simple end-to-end test of the high-level select_windows function. We give
    # it a few reads with a single candidate at 100 and we expect a window back
    # centered at 100.
    reads = [
        test_utils.make_read('AGA', start=99, cigar='3M', quals=[64] * 3),
        test_utils.make_read('AGA', start=99, cigar='3M', quals=[63] * 3),
        test_utils.make_read('AGA', start=99, cigar='3M', quals=[62] * 3),
    ]
    chrom = reads[0].alignment.position.reference_name
    ref_reader = fasta.InMemoryFastaReader([(chrom, 0, 'A' * 300)])
    region = ranges.make_range(chrom, 0, 200)

    self.assertEqual(
        window_selector.select_windows(self.config, ref_reader, reads, region),
        [ranges.make_range(chrom, 96, 104)])