def test_windows(self):
        """Test WindowSelector.windows()."""
        window = WindowSelector(self.test_ws_config())
        candidates = {0: 2, 2: 4, 3: 11, 8: 3}

        self.assertEqual(list(window.windows(candidates, 'ref', 0)), [
            range_pb2.Range(reference_name='ref', start=-4, end=6),
            range_pb2.Range(reference_name='ref', start=4, end=12)
        ])
Example #2
0
    def windows(self, candidate_pos, ref_name, ref_offset):
        """"Process candidate positions to determine windows for local assembly.

    Following filters are applied:
      - Candidate position with low number of supporting reads is ignored.
      - Candidate position with too many of supporting reads is ignored.

    Windows are within range of
      [min(pos) - self.config.min_windows_distance,
       max(pos) + self.config.min_windows_distance)

    Args:
      candidate_pos: A dictionary with ref_pos as key and number of supporting
        reads as its value.
      ref_name: Reference name, used in setting the output
        genomics.range.reference_name value.
      ref_offset: Start offset for reference position.

    Yields:
      A genomics.range record. Note: only start and end fields are
      populated.
    """

        start_pos = end_pos = -1
        for pos in sorted(candidate_pos):
            if candidate_pos[pos] < self.config.min_num_supporting_reads:
                continue
            if candidate_pos[pos] > self.config.max_num_supporting_reads:
                continue
            if start_pos == -1:
                start_pos = pos
                end_pos = pos
            elif pos > end_pos + self.config.min_windows_distance:
                yield range_pb2.Range(reference_name=ref_name,
                                      start=start_pos + ref_offset -
                                      self.config.min_windows_distance,
                                      end=end_pos + ref_offset +
                                      self.config.min_windows_distance)
                start_pos = pos
                end_pos = pos
            else:
                end_pos = pos
        if start_pos != -1:
            yield range_pb2.Range(reference_name=ref_name,
                                  start=start_pos + ref_offset -
                                  self.config.min_windows_distance,
                                  end=end_pos + ref_offset +
                                  self.config.min_windows_distance)
Example #3
0
def make_range(chrom, start, end):
    """Returns a nucleus.genomics.v1.Range.

  Args:
    chrom: str. The chromosome name.
    start: int. The start position (0-based, inclusive) of this range.
    end: int. The end position (0-based, exclusive) of this range.

  Returns:
    A nucleus.genomics.v1.Range.
  """
    return range_pb2.Range(reference_name=chrom, start=start, end=end)
Example #4
0
def make_range(chrom, start, end):
    """Creates a genomics.Range object chr:start-end.

  Args:
    chrom: The chromosome name as a string.
    start: The start position (0-based, inclusive, integer) of this range.
    end: The end position (0-based, exclusive, integer) of this range.

  Returns:
    A third_party.nucleus.protos.Range.
  """
    return range_pb2.Range(reference_name=chrom, start=start, end=end)
Example #5
0
def read_range(read):
    """Creates a Range proto from the alignment of Read.

  Args:
    read: nucleus.genomics.v1.Read. The read to calculate the range for.

  Returns:
    A nucleus.genomics.v1.Range for read.
  """
    range_pb = range_pb2.Range()
    utils_cpp.read_range(read, range_pb)
    return range_pb
Example #6
0
def read_range(read, use_cached_read_end=True):
    """Creates a Range proto from the alignment of Read.

  Args:
    read: nucleus.genomics.v1.Read. The read to calculate the range for.
    use_cached_read_end: If True, use the `cached_end` stored in Read instead of
                         recalculating.

  Returns:
    A nucleus.genomics.v1.Range for read.
  """
    range_pb = range_pb2.Range()
    utils_cpp.read_range(read,
                         range_pb,
                         use_cached_read_end=use_cached_read_end)
    return range_pb