예제 #1
0
    def call_debruijn_graph(self, windows, reads):
        """Helper function to call debruijn_graph module."""
        windows_haplotypes = []
        # Build and process de-Bruijn graph for each window.
        for window in windows:
            if window.end - window.start > self.config.ws_config.max_window_size:
                continue
            if not self.ref_reader.is_valid_interval(window):
                continue
            ref = self.ref_reader.bases(window)
            # redacted
            dbg_reads = [
                read for read in reads
                if ranges.ranges_overlap(window, utils.read_range(read))
            ]

            with timer.Timer() as t:
                graph = debruijn_graph.build(ref, dbg_reads,
                                             self.config.dbg_config)
            graph_building_time = t.GetDuration()

            if not graph:
                candidate_haplotypes = [ref]
            else:
                candidate_haplotypes = graph.candidate_haplotypes()
            if candidate_haplotypes and candidate_haplotypes != [ref]:
                candidate_haplotypes_info = realigner_pb2.CandidateHaplotypes(
                    span=window, haplotypes=candidate_haplotypes)
                windows_haplotypes.append(candidate_haplotypes_info)

            self.diagnostic_logger.log_graph_metrics(window, graph,
                                                     candidate_haplotypes,
                                                     graph_building_time)

        return windows_haplotypes
예제 #2
0
 def read_span(self):
     if self._read_span is None and self.reads:
         spans = [utils.read_range(r) for r in self.reads]
         self._read_span = ranges.make_range(spans[0].reference_name,
                                             min(s.start for s in spans),
                                             max(s.end for s in spans))
     return self._read_span
예제 #3
0
 def test_read_range(self):
     """Tests reads have their ranges calculated correctly."""
     start = 10000001
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M1I3M',
                                 quals=range(10, 16),
                                 name='read1')
     self.assertEquals(ranges.make_range('chrX', start, start + 5),
                       utils.read_range(read))
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M16D3M',
                                 quals=range(10, 16),
                                 name='read1')
     self.assertEquals(ranges.make_range('chrX', start, start + 5 + 16),
                       utils.read_range(read))
예제 #4
0
def assign_reads_to_assembled_regions(assembled_regions, reads):
    """Assign each read to the maximally overlapped window.

  Args:
    assembled_regions: list[AssemblyRegion], list of AssemblyRegion to assign
      reads to. Does not assume AssemblyRegion are sorted.
    reads: iterable[learning.genomics.genomics.Read], to be processed. Does
      not assume the reads are sorted.

  Returns:
    [AssemblyRegion], information on assigned reads for each assembled region.
    list[learning.genomics.genomics.Read], the list of unassigned reads.
  """
    regions = [ar.region for ar in assembled_regions]
    unassigned_reads = []
    for read in reads:
        read_range = utils.read_range(read)
        window_i = ranges.find_max_overlapping(read_range, regions)
        if window_i is not None:
            assembled_regions[window_i].add_read(read)
        else:
            unassigned_reads.append(read)
    return unassigned_reads