Ejemplo n.º 1
0
 def test_read_range(self, update_cached_read_end_first):
     """Tests reads have their ranges calculated correctly."""
     start = 10000001
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M1I3M',
                                 quals=range(10, 16),
                                 name='read1')
     if update_cached_read_end_first:
         # Explicitly update cached_end.
         read.cached_end = utils.read_end(read, use_cached_read_end=False)
     self.assertEqual(ranges.make_range('chrX', start, start + 5),
                      utils.read_range(read))
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M16D3M',
                                 quals=range(10, 16),
                                 name='read1')
     if update_cached_read_end_first:
         # Explicitly update cached_end.
         read.cached_end = utils.read_end(read, use_cached_read_end=False)
     self.assertEqual(ranges.make_range('chrX', start, start + 5 + 16),
                      utils.read_range(read))
Ejemplo n.º 2
0
  def call_debruijn_graph(self, windows, reads):
    """Helper function to call debruijn_graph module."""
    windows_haplotypes = []
    # Build and process de-Bruijn graph for each window.
    for window in windows:
      if window.end - window.start > self.config.ws_config.max_window_size:
        continue
      if not self.ref_reader.is_valid(window):
        continue
      ref = self.ref_reader.query(window)
      # redacted
      dbg_reads = [
          read for read in reads
          if ranges.ranges_overlap(window, utils.read_range(read))
      ]

      with timer.Timer() as t:
        graph = debruijn_graph.build(ref, dbg_reads, self.config.dbg_config)
      graph_building_time = t.GetDuration()

      if not graph:
        candidate_haplotypes = [ref]
      else:
        candidate_haplotypes = graph.candidate_haplotypes()
      if candidate_haplotypes and candidate_haplotypes != [ref]:
        candidate_haplotypes_info = realigner_pb2.CandidateHaplotypes(
            span=window, haplotypes=candidate_haplotypes)
        windows_haplotypes.append(candidate_haplotypes_info)

      self.diagnostic_logger.log_graph_metrics(
          window, graph, candidate_haplotypes, graph_building_time)

    return windows_haplotypes
Ejemplo n.º 3
0
 def read_span(self):
   if self._read_span is None and self.reads:
     spans = [utils.read_range(r) for r in self.reads]
     self._read_span = ranges.make_range(spans[0].reference_name,
                                         min(s.start for s in spans),
                                         max(s.end for s in spans))
   return self._read_span
Ejemplo n.º 4
0
 def test_read_range(self):
     """Tests reads have their ranges calculated correctly."""
     start = 10000001
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M1I3M',
                                 quals=range(10, 16),
                                 name='read1')
     self.assertEquals(ranges.make_range('chrX', start, start + 5),
                       utils.read_range(read))
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M16D3M',
                                 quals=range(10, 16),
                                 name='read1')
     self.assertEquals(ranges.make_range('chrX', start, start + 5 + 16),
                       utils.read_range(read))
Ejemplo n.º 5
0
    def query(self, region):
        """Returns an iterator for going through the reads in the region.

    Args:
      region: nucleus.genomics.v1.Range. The query region.

    Returns:
      An iterator over nucleus.genomics.v1.Read protos.
    """
        # redacted
        return (read for read in self.reads
                if ranges.ranges_overlap(region, utils.read_range(read)))
Ejemplo n.º 6
0
    def query(self, region):
        """Iterate over records overlapping a query region.

    Args:
      region: third_party.nucleus.protos.Range, query region.

    Returns:
      An iterator over third_party.nucleus.protos.Read
    """
        # redacted
        return (read for read in self.reads
                if ranges.ranges_overlap(region, utils.read_range(read)))
Ejemplo n.º 7
0
  def query(self, region):
    """Iterate over records overlapping a query region.

    Args:
      region: third_party.nucleus.protos.Range, query region.

    Returns:
      An iterator over third_party.nucleus.protos.Read
    """
    # redacted
    return (read for read in self.reads
            if ranges.ranges_overlap(region, utils.read_range(read)))
Ejemplo n.º 8
0
 def check_overlaps(chr1, start1, end1, chr2, start2, end2, expected):
     nbp = end1 - start1
     read = test_utils.make_read('A' * nbp,
                                 chrom=chr1,
                                 start=start1,
                                 cigar='{}M'.format(nbp))
     region = ranges.make_range(chr2, start2, end2)
     self.assertEqual(utils.read_overlaps_region(read, region),
                      expected)
     # This check ensures we get the same result calling ranges.ranges_overlap.
     self.assertEqual(
         ranges.ranges_overlap(region, utils.read_range(read)),
         expected)
Ejemplo n.º 9
0
 def test_read_range(self):
   """Tests reads have their ranges calculated correctly."""
   start = 10000001
   read = test_utils.make_read(
       'AAACAG',
       chrom='chrX',
       start=start,
       cigar='2M1I3M',
       quals=range(10, 16),
       name='read1')
   self.assertEquals(
       ranges.make_range('chrX', start, start + 5), utils.read_range(read))
   read = test_utils.make_read(
       'AAACAG',
       chrom='chrX',
       start=start,
       cigar='2M16D3M',
       quals=range(10, 16),
       name='read1')
   self.assertEquals(
       ranges.make_range('chrX', start, start + 5 + 16),
       utils.read_range(read))
Ejemplo n.º 10
0
def assign_reads_to_assembled_regions(assembled_regions, reads):
  """Assign each read to the maximally overlapped window.

  Args:
    assembled_regions: list[AssemblyRegion], list of AssemblyRegion to assign
      reads to. Does not assume AssemblyRegion are sorted.
    reads: iterable[learning.genomics.genomics.Read], to be processed. Does
      not assume the reads are sorted.

  Returns:
    [AssemblyRegion], information on assigned reads for each assembled region.
    list[learning.genomics.genomics.Read], the list of unassigned reads.
  """
  regions = [ar.region for ar in assembled_regions]
  unassigned_reads = []
  for read in reads:
    read_range = utils.read_range(read)
    window_i = ranges.find_max_overlapping(read_range, regions)
    if window_i is not None:
      assembled_regions[window_i].add_read(read)
    else:
      unassigned_reads.append(read)
  return unassigned_reads