def copy_read(read, part): """Copies a read proto to create a new read part.""" new_read = reads_pb2.Read() new_read.CopyFrom(read) # Reset alignment information. # Note: If long reads will be used here, convert # to approach used for read trimming. new_read.alignment.Clear() new_read.aligned_quality[:] = [] new_read.aligned_sequence = '' new_read.alignment.position.reference_name = read.alignment.position.reference_name new_read.alignment.position.reverse_strand = read.alignment.position.reverse_strand new_read.alignment.mapping_quality = read.alignment.mapping_quality new_read.fragment_name = f'{new_read.fragment_name}_p{part}' return new_read
def make_read(bases, start, quals=None, cigar=None, mapq=50, chrom='chr1', name=None): """Makes a nucleus.genomics.v1.Read for testing.""" if quals and len(bases) != len(quals): raise ValueError('Incompatable bases and quals', bases, quals) read = reads_pb2.Read( fragment_name=name if name else 'read_' + str(make_read.counter), proper_placement=True, read_number=1, number_reads=2, aligned_sequence=bases, aligned_quality=quals, alignment=reads_pb2.LinearAlignment( position=position_pb2.Position(reference_name=chrom, position=start), mapping_quality=mapq, cigar=_cigar.to_cigar_units(cigar) if cigar else [])) make_read.counter += 1 return read
def make_read(bases, quals=None, cigar=None, mapq=50, chrom='chr1', start=1, name='read'): """Makes a third_party.nucleus.protos.Read for testing.""" if quals and len(bases) != len(quals): raise ValueError('Incompatable bases and quals', bases, quals) read = reads_pb2.Read( fragment_name=name, proper_placement=True, read_number=1, number_reads=2, aligned_sequence=bases, aligned_quality=quals, alignment=reads_pb2.LinearAlignment( position=position_pb2.Position(reference_name=chrom, position=start), mapping_quality=mapq, cigar=_cigar.to_cigar_units(cigar) if cigar else [])) return read
def trim_read(read, region): """Trim a read down to the part that aligns within a given region. The following properties of the read are updated, trimming on both sides as necessary to save only the parts of the read that fit fully within the region, potentially starting and ending at the region's boundaries: - The alignment position (read.alignment.position.position). - The read sequence (read.aligned_sequence). - Base qualities (read.aligned_quality). - The cigar string of the alignment (read.alignment.cigar) Args: read: A `nucleus.protos.Read` that is aligned to the region. region: A `nucleus.protos.Range` region. Returns: a new `nucleus.protos.Read` trimmed to the region. """ if not read.alignment: raise ValueError('Read must already be aligned.') read_start = read.alignment.position.position trim_left = max(region.start - read_start, 0) ref_length = region.end - max(region.start, read_start) new_cigar, read_trim, new_read_length = trim_cigar(read.alignment.cigar, trim_left, ref_length) # Copy everything but aligned_sequence and aligned_quality fields of the read # to get all recursive properties and prevent mutating the original. new_read = reads_pb2.Read() new_read.fragment_name = read.fragment_name new_read.id = read.id new_read.read_group_id = read.read_group_id new_read.read_group_set_id = read.read_group_set_id new_read.read_number = read.read_number new_read.fragment_length = read.fragment_length new_read.number_reads = read.number_reads for each_info_key in read.info: new_read.info[each_info_key].CopyFrom(read.info[each_info_key]) new_read.alignment.position.position = read.alignment.position.position new_read.alignment.position.reference_name = read.alignment.position.reference_name new_read.alignment.position.reverse_strand = read.alignment.position.reverse_strand new_read.alignment.mapping_quality = read.alignment.mapping_quality # Following fields are not needed but we copy them for consistency: new_read.next_mate_position.CopyFrom(read.next_mate_position) new_read.proper_placement = read.proper_placement new_read.duplicate_fragment = read.duplicate_fragment new_read.failed_vendor_quality_checks = read.failed_vendor_quality_checks new_read.secondary_alignment = read.secondary_alignment new_read.supplementary_alignment = read.supplementary_alignment if trim_left != 0: new_read.alignment.position.position = region.start # Set aligned_sequence, a string: new_read.aligned_sequence = read.aligned_sequence[read_trim:read_trim + new_read_length] # Set aligned_quality, a repeated integer: new_read.aligned_quality[:] = read.aligned_quality[read_trim:read_trim + new_read_length] # Direct assignment on a repeated message field is not allowed, so setting # the cigar by using 'extend'. new_read.alignment.cigar.extend(new_cigar) return new_read
def _raw_next(self): record = reads_pb2.Read() not_done = self._cc_iterable.PythonNext(record) return record, not_done