Example #1
0
def copy_read(read, part):
    """Copies a read proto to create a new read part."""
    new_read = reads_pb2.Read()
    new_read.CopyFrom(read)

    # Reset alignment information.
    # Note: If long reads will be used here, convert
    # to approach used for read trimming.
    new_read.alignment.Clear()
    new_read.aligned_quality[:] = []
    new_read.aligned_sequence = ''
    new_read.alignment.position.reference_name = read.alignment.position.reference_name
    new_read.alignment.position.reverse_strand = read.alignment.position.reverse_strand
    new_read.alignment.mapping_quality = read.alignment.mapping_quality
    new_read.fragment_name = f'{new_read.fragment_name}_p{part}'
    return new_read
Example #2
0
def make_read(bases,
              start,
              quals=None,
              cigar=None,
              mapq=50,
              chrom='chr1',
              name=None):
  """Makes a nucleus.genomics.v1.Read for testing."""
  if quals and len(bases) != len(quals):
    raise ValueError('Incompatable bases and quals', bases, quals)
  read = reads_pb2.Read(
      fragment_name=name if name else 'read_' + str(make_read.counter),
      proper_placement=True,
      read_number=1,
      number_reads=2,
      aligned_sequence=bases,
      aligned_quality=quals,
      alignment=reads_pb2.LinearAlignment(
          position=position_pb2.Position(reference_name=chrom, position=start),
          mapping_quality=mapq,
          cigar=_cigar.to_cigar_units(cigar) if cigar else []))
  make_read.counter += 1
  return read
Example #3
0
def make_read(bases,
              quals=None,
              cigar=None,
              mapq=50,
              chrom='chr1',
              start=1,
              name='read'):
    """Makes a third_party.nucleus.protos.Read for testing."""
    if quals and len(bases) != len(quals):
        raise ValueError('Incompatable bases and quals', bases, quals)
    read = reads_pb2.Read(
        fragment_name=name,
        proper_placement=True,
        read_number=1,
        number_reads=2,
        aligned_sequence=bases,
        aligned_quality=quals,
        alignment=reads_pb2.LinearAlignment(
            position=position_pb2.Position(reference_name=chrom,
                                           position=start),
            mapping_quality=mapq,
            cigar=_cigar.to_cigar_units(cigar) if cigar else []))
    return read
Example #4
0
def trim_read(read, region):
    """Trim a read down to the part that aligns within a given region.

  The following properties of the read are updated, trimming on both sides as
  necessary to save only the parts of the read that fit fully within the
  region, potentially starting and ending at the region's boundaries:
  - The alignment position (read.alignment.position.position).
  - The read sequence (read.aligned_sequence).
  - Base qualities (read.aligned_quality).
  - The cigar string of the alignment (read.alignment.cigar)

  Args:
    read: A `nucleus.protos.Read` that is aligned to the region.
    region: A `nucleus.protos.Range` region.

  Returns:
    a new `nucleus.protos.Read` trimmed to the region.
  """
    if not read.alignment:
        raise ValueError('Read must already be aligned.')

    read_start = read.alignment.position.position

    trim_left = max(region.start - read_start, 0)

    ref_length = region.end - max(region.start, read_start)
    new_cigar, read_trim, new_read_length = trim_cigar(read.alignment.cigar,
                                                       trim_left, ref_length)

    # Copy everything but aligned_sequence and aligned_quality fields of the read
    # to get all recursive properties and prevent mutating the original.
    new_read = reads_pb2.Read()
    new_read.fragment_name = read.fragment_name
    new_read.id = read.id
    new_read.read_group_id = read.read_group_id
    new_read.read_group_set_id = read.read_group_set_id
    new_read.read_number = read.read_number
    new_read.fragment_length = read.fragment_length
    new_read.number_reads = read.number_reads
    for each_info_key in read.info:
        new_read.info[each_info_key].CopyFrom(read.info[each_info_key])
    new_read.alignment.position.position = read.alignment.position.position
    new_read.alignment.position.reference_name = read.alignment.position.reference_name
    new_read.alignment.position.reverse_strand = read.alignment.position.reverse_strand
    new_read.alignment.mapping_quality = read.alignment.mapping_quality
    # Following fields are not needed but we copy them for consistency:
    new_read.next_mate_position.CopyFrom(read.next_mate_position)
    new_read.proper_placement = read.proper_placement
    new_read.duplicate_fragment = read.duplicate_fragment
    new_read.failed_vendor_quality_checks = read.failed_vendor_quality_checks
    new_read.secondary_alignment = read.secondary_alignment
    new_read.supplementary_alignment = read.supplementary_alignment

    if trim_left != 0:
        new_read.alignment.position.position = region.start
    # Set aligned_sequence, a string:
    new_read.aligned_sequence = read.aligned_sequence[read_trim:read_trim +
                                                      new_read_length]
    # Set aligned_quality, a repeated integer:
    new_read.aligned_quality[:] = read.aligned_quality[read_trim:read_trim +
                                                       new_read_length]

    # Direct assignment on a repeated message field is not allowed, so setting
    # the cigar by using 'extend'.
    new_read.alignment.cigar.extend(new_cigar)

    return new_read
 def _raw_next(self):
   record = reads_pb2.Read()
   not_done = self._cc_iterable.PythonNext(record)
   return record, not_done