def _make_read_unmapped(rec: AlignedSegment) -> None: """Removes mapping information from a read.""" if rec.is_reverse: quals = rec.query_qualities quals.reverse() rec.query_sequence = dnautils.reverse_complement(rec.query_sequence) rec.query_qualities = quals rec.is_reverse = False rec.reference_id = sam.NO_REF_INDEX rec.reference_start = sam.NO_REF_POS rec.cigar = None rec.mapping_quality = 0 rec.template_length = 0 rec.is_duplicate = False rec.is_secondary = False rec.is_supplementary = False rec.is_proper_pair = False rec.is_unmapped = True
def convert_events_to_softclipping( read: pysam.AlignedSegment, orientation: str, max_event_size: int, min_anchor_size: Optional[int] = None, ) -> pysam.AlignedSegment: """ given an alignment, simplifies the alignment by grouping everything past the first anchor and including the first event considered too large and unaligning them turning them into softclipping """ if min_anchor_size is None: min_anchor_size = max_event_size if orientation == ORIENT.LEFT: event_size = 0 adjusted_cigar = [] anchor = 0 for state, count in read.cigar: if state == CIGAR.M: raise NotImplementedError('match v mismatch must be specified') elif anchor < min_anchor_size: if state == CIGAR.EQ: anchor += count elif state in EVENT_STATES: event_size += count if event_size > max_event_size: break else: event_size = 0 adjusted_cigar.append((state, count)) if event_size > max_event_size: while adjusted_cigar[-1][0] in EVENT_STATES: del adjusted_cigar[-1] aligned = sum([y for x, y in adjusted_cigar if x in QUERY_ALIGNED_STATES] + [0]) sc = len(read.query_sequence) - aligned adjusted_cigar.append((CIGAR.S, sc)) read = copy(read) read.cigar = adjusted_cigar elif orientation == ORIENT.RIGHT: # more complicated than left b/c need to also adjust the start position event_size = 0 anchor = 0 adjusted_cigar = [] for state, count in read.cigar[::-1]: # first event from the right if state == CIGAR.M: raise NotImplementedError('match v mismatch must be specified') elif anchor < min_anchor_size: if state == CIGAR.EQ: anchor += count elif state in EVENT_STATES: event_size += count if event_size > max_event_size: break else: event_size = 0 adjusted_cigar.append((state, count)) if event_size > max_event_size: while adjusted_cigar[-1][0] in EVENT_STATES: del adjusted_cigar[-1] originally_refaligned = sum( [y for x, y in read.cigar if x in REFERENCE_ALIGNED_STATES] + [0] ) refaligned = sum([y for x, y in adjusted_cigar if x in REFERENCE_ALIGNED_STATES] + [0]) aligned = sum([y for x, y in adjusted_cigar if x in QUERY_ALIGNED_STATES] + [0]) sc = len(read.query_sequence) - aligned adjusted_cigar = [(CIGAR.S, sc)] + adjusted_cigar[::-1] read = copy(read) read.cigar = adjusted_cigar read.reference_start += originally_refaligned - refaligned else: raise ValueError('orientation must be specified', orientation) return read