Beispiel #1
0
def _make_read_unmapped(rec: AlignedSegment) -> None:
    """Removes mapping information from a read."""
    if rec.is_reverse:
        quals = rec.query_qualities
        quals.reverse()
        rec.query_sequence = dnautils.reverse_complement(rec.query_sequence)
        rec.query_qualities = quals
        rec.is_reverse = False

    rec.reference_id = sam.NO_REF_INDEX
    rec.reference_start = sam.NO_REF_POS
    rec.cigar = None
    rec.mapping_quality = 0
    rec.template_length = 0
    rec.is_duplicate = False
    rec.is_secondary = False
    rec.is_supplementary = False
    rec.is_proper_pair = False
    rec.is_unmapped = True
Beispiel #2
0
def convert_events_to_softclipping(
    read: pysam.AlignedSegment,
    orientation: str,
    max_event_size: int,
    min_anchor_size: Optional[int] = None,
) -> pysam.AlignedSegment:
    """
    given an alignment, simplifies the alignment by grouping everything past the first anchor and including the
    first event considered too large and unaligning them turning them into softclipping

    """
    if min_anchor_size is None:
        min_anchor_size = max_event_size

    if orientation == ORIENT.LEFT:
        event_size = 0
        adjusted_cigar = []
        anchor = 0
        for state, count in read.cigar:
            if state == CIGAR.M:
                raise NotImplementedError('match v mismatch must be specified')
            elif anchor < min_anchor_size:
                if state == CIGAR.EQ:
                    anchor += count
            elif state in EVENT_STATES:
                event_size += count
                if event_size > max_event_size:
                    break
            else:
                event_size = 0
            adjusted_cigar.append((state, count))
        if event_size > max_event_size:
            while adjusted_cigar[-1][0] in EVENT_STATES:
                del adjusted_cigar[-1]
            aligned = sum([y for x, y in adjusted_cigar if x in QUERY_ALIGNED_STATES] + [0])
            sc = len(read.query_sequence) - aligned
            adjusted_cigar.append((CIGAR.S, sc))
            read = copy(read)
            read.cigar = adjusted_cigar
    elif orientation == ORIENT.RIGHT:
        # more complicated than left b/c need to also adjust the start position
        event_size = 0
        anchor = 0
        adjusted_cigar = []
        for state, count in read.cigar[::-1]:  # first event from the right
            if state == CIGAR.M:
                raise NotImplementedError('match v mismatch must be specified')
            elif anchor < min_anchor_size:
                if state == CIGAR.EQ:
                    anchor += count
            elif state in EVENT_STATES:
                event_size += count
                if event_size > max_event_size:
                    break
            else:
                event_size = 0
            adjusted_cigar.append((state, count))
        if event_size > max_event_size:
            while adjusted_cigar[-1][0] in EVENT_STATES:
                del adjusted_cigar[-1]
            originally_refaligned = sum(
                [y for x, y in read.cigar if x in REFERENCE_ALIGNED_STATES] + [0]
            )
            refaligned = sum([y for x, y in adjusted_cigar if x in REFERENCE_ALIGNED_STATES] + [0])
            aligned = sum([y for x, y in adjusted_cigar if x in QUERY_ALIGNED_STATES] + [0])
            sc = len(read.query_sequence) - aligned
            adjusted_cigar = [(CIGAR.S, sc)] + adjusted_cigar[::-1]
            read = copy(read)
            read.cigar = adjusted_cigar
            read.reference_start += originally_refaligned - refaligned
    else:
        raise ValueError('orientation must be specified', orientation)
    return read