def test_windows(self): """Test WindowSelector.windows().""" window = WindowSelector(self.test_ws_config()) candidates = {0: 2, 2: 4, 3: 11, 8: 3} self.assertEqual(list(window.windows(candidates, 'ref', 0)), [ range_pb2.Range(reference_name='ref', start=-4, end=6), range_pb2.Range(reference_name='ref', start=4, end=12) ])
def windows(self, candidate_pos, ref_name, ref_offset): """"Process candidate positions to determine windows for local assembly. Following filters are applied: - Candidate position with low number of supporting reads is ignored. - Candidate position with too many of supporting reads is ignored. Windows are within range of [min(pos) - self.config.min_windows_distance, max(pos) + self.config.min_windows_distance) Args: candidate_pos: A dictionary with ref_pos as key and number of supporting reads as its value. ref_name: Reference name, used in setting the output genomics.range.reference_name value. ref_offset: Start offset for reference position. Yields: A genomics.range record. Note: only start and end fields are populated. """ start_pos = end_pos = -1 for pos in sorted(candidate_pos): if candidate_pos[pos] < self.config.min_num_supporting_reads: continue if candidate_pos[pos] > self.config.max_num_supporting_reads: continue if start_pos == -1: start_pos = pos end_pos = pos elif pos > end_pos + self.config.min_windows_distance: yield range_pb2.Range(reference_name=ref_name, start=start_pos + ref_offset - self.config.min_windows_distance, end=end_pos + ref_offset + self.config.min_windows_distance) start_pos = pos end_pos = pos else: end_pos = pos if start_pos != -1: yield range_pb2.Range(reference_name=ref_name, start=start_pos + ref_offset - self.config.min_windows_distance, end=end_pos + ref_offset + self.config.min_windows_distance)
def make_range(chrom, start, end): """Returns a nucleus.genomics.v1.Range. Args: chrom: str. The chromosome name. start: int. The start position (0-based, inclusive) of this range. end: int. The end position (0-based, exclusive) of this range. Returns: A nucleus.genomics.v1.Range. """ return range_pb2.Range(reference_name=chrom, start=start, end=end)
def make_range(chrom, start, end): """Creates a genomics.Range object chr:start-end. Args: chrom: The chromosome name as a string. start: The start position (0-based, inclusive, integer) of this range. end: The end position (0-based, exclusive, integer) of this range. Returns: A third_party.nucleus.protos.Range. """ return range_pb2.Range(reference_name=chrom, start=start, end=end)
def read_range(read): """Creates a Range proto from the alignment of Read. Args: read: nucleus.genomics.v1.Read. The read to calculate the range for. Returns: A nucleus.genomics.v1.Range for read. """ range_pb = range_pb2.Range() utils_cpp.read_range(read, range_pb) return range_pb
def read_range(read, use_cached_read_end=True): """Creates a Range proto from the alignment of Read. Args: read: nucleus.genomics.v1.Read. The read to calculate the range for. use_cached_read_end: If True, use the `cached_end` stored in Read instead of recalculating. Returns: A nucleus.genomics.v1.Range for read. """ range_pb = range_pb2.Range() utils_cpp.read_range(read, range_pb, use_cached_read_end=use_cached_read_end) return range_pb