def merge_annotated_clusters( biggest: pysam.AlignedSegment, other: pysam.AlignedSegment) -> pysam.AlignedSegment: """Merges 2 annotated clusters together. Merges 2 annotated aligned segments, each representing a cluster. Merges the smaller into the larger. Adds the read number of the 2nd cluster to the first. Args: biggest: The larger of the 2 clusters, with a higher read number. other: The smaller of the 2 clusters, with a lower read number. Returns: The annotated aligned segment representing the merged cluster. """ merged_id = biggest.get_tag(CLUSTER_ID_TAG) if not merged_id.endswith("+"): merged_id = merged_id + "+" biggest.set_tag(CLUSTER_ID_TAG, merged_id, "Z") total_reads = biggest.get_tag(NUM_READS_TAG) + other.get_tag(NUM_READS_TAG) biggest.set_tag(NUM_READS_TAG, total_reads, "i") return biggest
def set_qc_fail(rec: pysam.AlignedSegment, tool: Callable[..., Any], reason: str) -> None: """Sets the QC fail flag, and adds tags containing the tool name and reason for failing. Args: rec: the record to fail tool: the tool (as a callable) that failed this record reason: the reason for failing """ assert '\t' not in reason, f"Reason may not contain tabs: {reason}" rec.is_qcfail = True rec.set_tag(QcFailToolTag, tool.__name__) rec.set_tag(QcFailReasonTag, reason)
def _cleanup(rec: AlignedSegment, tags_to_invalidate: Iterable[str]) -> None: """Removes extended tags from a record that may have become invalid after clipping.""" for tag in tags_to_invalidate: rec.set_tag(tag, None)
def add_tags(alignedSegment: pysam.AlignedSegment) -> pysam.AlignedSegment: """ Takes an AlignedSegment and add percent identity and alignment length as tags alignment length = MID mismatches = NM percent identity = (MID - NM) / MID The percent identity is a value between 0.0 and 1.0 If the segment is unmapped then it is returned as with a percent identity of 0 and an alignment length of 0. :param alignedSegment: The pysam AlignedSegment object :return: alignedSegment: The updated pysam AlignedSegment object """ # Assuming that if the id tag is present that the other tags are also there. if alignedSegment.has_tag('id'): return alignedSegment if alignedSegment.is_unmapped: alignedSegment.set_tag('id', 0.0, 'f') alignedSegment.set_tag('al', 0, 'i') alignedSegment.set_tag('qc', 0.0, 'f') return alignedSegment alnlength = sum(alignedSegment.get_cigar_stats()[0][0:3]) query_covered_bases = sum(alignedSegment.get_cigar_stats()[0][0:2]) query_length = alignedSegment.infer_read_length() mismatches = alignedSegment.get_tag('NM') percid = (alnlength - mismatches) / float(alnlength) qcov = query_covered_bases / float(query_length) alignedSegment.set_tag('id', percid, 'f') alignedSegment.set_tag('qc', qcov, 'f') alignedSegment.set_tag('al', alnlength, 'i') return alignedSegment