Example #1
0
def set_pair_info(r1: AlignedSegment,
                  r2: AlignedSegment,
                  proper_pair: bool = True) -> None:
    """Resets mate pair information between reads in a pair. Requires that both r1
    and r2 are mapped.  Can be handed reads that already have pairing flags setup or
    independent R1 and R2 records that are currently flagged as SE reads.

    Args:
        r1: read 1
        r2: read 2 with the same queryname as r1
    """
    assert not r1.is_unmapped, f"Cannot process unmapped mate {r1.query_name}/1"
    assert not r2.is_unmapped, f"Cannot process unmapped mate {r2.query_name}/2"
    assert r1.query_name == r2.query_name, f"Attempting to pair reads with different qnames."

    for r in [r1, r2]:
        r.is_paired = True
        r.is_proper_pair = proper_pair

    r1.is_read1 = True
    r1.is_read2 = False
    r2.is_read2 = True
    r2.is_read1 = False

    for src, dest in [(r1, r2), (r2, r1)]:
        dest.next_reference_id = src.reference_id
        dest.next_reference_start = src.reference_start
        dest.mate_is_reverse = src.is_reverse
        dest.mate_is_unmapped = False
        dest.set_tag("MC", src.cigarstring)

    insert_size = isize(r1, r2)
    r1.template_length = insert_size
    r2.template_length = -insert_size
Example #2
0
def _make_read_unmapped(rec: AlignedSegment) -> None:
    """Removes mapping information from a read."""
    if rec.is_reverse:
        quals = rec.query_qualities
        quals.reverse()
        rec.query_sequence = dnautils.reverse_complement(rec.query_sequence)
        rec.query_qualities = quals
        rec.is_reverse = False

    rec.reference_id = sam.NO_REF_INDEX
    rec.reference_start = sam.NO_REF_POS
    rec.cigar = None
    rec.mapping_quality = 0
    rec.template_length = 0
    rec.is_duplicate = False
    rec.is_secondary = False
    rec.is_supplementary = False
    rec.is_proper_pair = False
    rec.is_unmapped = True
Example #3
0
    def _set_mate_info(self, r1: pysam.AlignedSegment,
                       r2: pysam.AlignedSegment) -> None:
        """Sets the mate information on a pair of sam records.

        Handles cases where both reads are mapped, one of the two reads is unmapped or both reads
        are unmapped.

        Args:
            r1: the first read in the pair
            r2: the sceond read in the pair
        """
        for rec in r1, r2:
            rec.template_length = 0
            rec.is_proper_pair = False

        if r1.is_unmapped and r2.is_unmapped:
            # If they're both unmapped just clean the records up
            for rec, other in [(r1, r2), (r2, r1)]:
                rec.reference_id = sam.NO_REF_INDEX
                rec.next_reference_id = sam.NO_REF_INDEX
                rec.reference_start = sam.NO_REF_POS
                rec.next_reference_start = sam.NO_REF_POS
                rec.is_unmapped = True
                rec.mate_is_unmapped = True
                rec.is_proper_pair = False
                rec.mate_is_reverse = other.is_reverse

        elif r1.is_unmapped or r2.is_unmapped:
            # If only one is mapped/unmapped copy over the relevant stuff
            (m, u) = (r1, r2) if r2.is_unmapped else (r2, r1)
            u.reference_id = m.reference_id
            u.reference_start = m.reference_start
            u.next_reference_id = m.reference_id
            u.next_reference_start = m.reference_start
            u.mate_is_reverse = m.is_reverse
            u.mate_is_unmapped = False
            u.set_tag("MC", m.cigarstring)

            m.next_reference_id = u.reference_id
            m.next_reference_start = u.reference_start
            m.mate_is_reverse = u.is_reverse
            m.mate_is_unmapped = True

        else:
            # Else they are both mapped
            for rec, other in [(r1, r2), (r2, r1)]:
                rec.next_reference_id = other.reference_id
                rec.next_reference_start = other.reference_start
                rec.mate_is_reverse = other.is_reverse
                rec.mate_is_unmapped = False
                rec.set_tag("MC", other.cigarstring)

            if r1.reference_id == r2.reference_id:
                r1p = r1.reference_end if r1.is_reverse else r1.reference_start
                r2p = r2.reference_end if r2.is_reverse else r2.reference_start
                r1.template_length = r2p - r1p
                r2.template_length = r1p - r2p

                # Arbitrarily set proper pair if the we have an FR pair with isize <= 1000
                if r1.is_reverse != r2.is_reverse and abs(
                        r1.template_length) <= 1000:
                    fpos, rpos = (r2p, r1p) if r1.is_reverse else (r1p, r2p)
                    if fpos < rpos:
                        r1.is_proper_pair = True
                        r2.is_proper_pair = True