Esempio n. 1
0
def merge_annotated_clusters(
        biggest: pysam.AlignedSegment,
        other: pysam.AlignedSegment) -> pysam.AlignedSegment:
    """Merges 2 annotated clusters together.

    Merges 2 annotated aligned segments, each representing a cluster. Merges the
    smaller into the larger. Adds the read number of the 2nd cluster to the first.

    Args:
        biggest: The larger of the 2 clusters, with a higher read number.
        other: The smaller of the 2 clusters, with a lower read number.

    Returns:
        The annotated aligned segment representing the merged cluster.
    """

    merged_id = biggest.get_tag(CLUSTER_ID_TAG)
    if not merged_id.endswith("+"):
        merged_id = merged_id + "+"
    biggest.set_tag(CLUSTER_ID_TAG, merged_id, "Z")

    total_reads = biggest.get_tag(NUM_READS_TAG) + other.get_tag(NUM_READS_TAG)
    biggest.set_tag(NUM_READS_TAG, total_reads, "i")

    return biggest
Esempio n. 2
0
def set_qc_fail(rec: pysam.AlignedSegment, tool: Callable[..., Any],
                reason: str) -> None:
    """Sets the QC fail flag, and adds tags containing the tool name and reason for failing.
    Args:
        rec: the record to fail
        tool: the tool (as a callable) that failed this record
        reason: the reason for failing
    """
    assert '\t' not in reason, f"Reason may not contain tabs: {reason}"
    rec.is_qcfail = True
    rec.set_tag(QcFailToolTag, tool.__name__)
    rec.set_tag(QcFailReasonTag, reason)
Esempio n. 3
0
def _cleanup(rec: AlignedSegment, tags_to_invalidate: Iterable[str]) -> None:
    """Removes extended tags from a record that may have become invalid after clipping."""
    for tag in tags_to_invalidate:
        rec.set_tag(tag, None)
Esempio n. 4
0
def add_tags(alignedSegment: pysam.AlignedSegment) -> pysam.AlignedSegment:
    """ Takes an AlignedSegment and add percent identity and alignment length as tags
    alignment length = MID
    mismatches = NM
    percent identity = (MID - NM) / MID
    The percent identity is a value between 0.0 and 1.0
    If the segment is unmapped then it is returned as with a percent identity of 0
    and an alignment length of 0.
    :param alignedSegment: The pysam AlignedSegment object
    :return: alignedSegment: The updated pysam AlignedSegment object
    """

    # Assuming that if the id tag is present that the other tags are also there.
    if alignedSegment.has_tag('id'):
        return alignedSegment
    if alignedSegment.is_unmapped:
        alignedSegment.set_tag('id', 0.0, 'f')
        alignedSegment.set_tag('al', 0, 'i')
        alignedSegment.set_tag('qc', 0.0, 'f')
        return alignedSegment

    alnlength = sum(alignedSegment.get_cigar_stats()[0][0:3])

    query_covered_bases = sum(alignedSegment.get_cigar_stats()[0][0:2])

    query_length = alignedSegment.infer_read_length()
    mismatches = alignedSegment.get_tag('NM')
    percid = (alnlength - mismatches) / float(alnlength)
    qcov = query_covered_bases / float(query_length)
    alignedSegment.set_tag('id', percid, 'f')
    alignedSegment.set_tag('qc', qcov, 'f')
    alignedSegment.set_tag('al', alnlength, 'i')
    return alignedSegment