def test_read_range(self, update_cached_read_end_first): """Tests reads have their ranges calculated correctly.""" start = 10000001 read = test_utils.make_read('AAACAG', chrom='chrX', start=start, cigar='2M1I3M', quals=range(10, 16), name='read1') if update_cached_read_end_first: # Explicitly update cached_end. read.cached_end = utils.read_end(read, use_cached_read_end=False) self.assertEqual(ranges.make_range('chrX', start, start + 5), utils.read_range(read)) read = test_utils.make_read('AAACAG', chrom='chrX', start=start, cigar='2M16D3M', quals=range(10, 16), name='read1') if update_cached_read_end_first: # Explicitly update cached_end. read.cached_end = utils.read_end(read, use_cached_read_end=False) self.assertEqual(ranges.make_range('chrX', start, start + 5 + 16), utils.read_range(read))
def call_debruijn_graph(self, windows, reads): """Helper function to call debruijn_graph module.""" windows_haplotypes = [] # Build and process de-Bruijn graph for each window. for window in windows: if window.end - window.start > self.config.ws_config.max_window_size: continue if not self.ref_reader.is_valid(window): continue ref = self.ref_reader.query(window) # redacted dbg_reads = [ read for read in reads if ranges.ranges_overlap(window, utils.read_range(read)) ] with timer.Timer() as t: graph = debruijn_graph.build(ref, dbg_reads, self.config.dbg_config) graph_building_time = t.GetDuration() if not graph: candidate_haplotypes = [ref] else: candidate_haplotypes = graph.candidate_haplotypes() if candidate_haplotypes and candidate_haplotypes != [ref]: candidate_haplotypes_info = realigner_pb2.CandidateHaplotypes( span=window, haplotypes=candidate_haplotypes) windows_haplotypes.append(candidate_haplotypes_info) self.diagnostic_logger.log_graph_metrics( window, graph, candidate_haplotypes, graph_building_time) return windows_haplotypes
def read_span(self): if self._read_span is None and self.reads: spans = [utils.read_range(r) for r in self.reads] self._read_span = ranges.make_range(spans[0].reference_name, min(s.start for s in spans), max(s.end for s in spans)) return self._read_span
def test_read_range(self): """Tests reads have their ranges calculated correctly.""" start = 10000001 read = test_utils.make_read('AAACAG', chrom='chrX', start=start, cigar='2M1I3M', quals=range(10, 16), name='read1') self.assertEquals(ranges.make_range('chrX', start, start + 5), utils.read_range(read)) read = test_utils.make_read('AAACAG', chrom='chrX', start=start, cigar='2M16D3M', quals=range(10, 16), name='read1') self.assertEquals(ranges.make_range('chrX', start, start + 5 + 16), utils.read_range(read))
def query(self, region): """Returns an iterator for going through the reads in the region. Args: region: nucleus.genomics.v1.Range. The query region. Returns: An iterator over nucleus.genomics.v1.Read protos. """ # redacted return (read for read in self.reads if ranges.ranges_overlap(region, utils.read_range(read)))
def query(self, region): """Iterate over records overlapping a query region. Args: region: third_party.nucleus.protos.Range, query region. Returns: An iterator over third_party.nucleus.protos.Read """ # redacted return (read for read in self.reads if ranges.ranges_overlap(region, utils.read_range(read)))
def check_overlaps(chr1, start1, end1, chr2, start2, end2, expected): nbp = end1 - start1 read = test_utils.make_read('A' * nbp, chrom=chr1, start=start1, cigar='{}M'.format(nbp)) region = ranges.make_range(chr2, start2, end2) self.assertEqual(utils.read_overlaps_region(read, region), expected) # This check ensures we get the same result calling ranges.ranges_overlap. self.assertEqual( ranges.ranges_overlap(region, utils.read_range(read)), expected)
def test_read_range(self): """Tests reads have their ranges calculated correctly.""" start = 10000001 read = test_utils.make_read( 'AAACAG', chrom='chrX', start=start, cigar='2M1I3M', quals=range(10, 16), name='read1') self.assertEquals( ranges.make_range('chrX', start, start + 5), utils.read_range(read)) read = test_utils.make_read( 'AAACAG', chrom='chrX', start=start, cigar='2M16D3M', quals=range(10, 16), name='read1') self.assertEquals( ranges.make_range('chrX', start, start + 5 + 16), utils.read_range(read))
def assign_reads_to_assembled_regions(assembled_regions, reads): """Assign each read to the maximally overlapped window. Args: assembled_regions: list[AssemblyRegion], list of AssemblyRegion to assign reads to. Does not assume AssemblyRegion are sorted. reads: iterable[learning.genomics.genomics.Read], to be processed. Does not assume the reads are sorted. Returns: [AssemblyRegion], information on assigned reads for each assembled region. list[learning.genomics.genomics.Read], the list of unassigned reads. """ regions = [ar.region for ar in assembled_regions] unassigned_reads = [] for read in reads: read_range = utils.read_range(read) window_i = ranges.find_max_overlapping(read_range, regions) if window_i is not None: assembled_regions[window_i].add_read(read) else: unassigned_reads.append(read) return unassigned_reads