def add_tags(alignedSegment: pysam.AlignedSegment) -> pysam.AlignedSegment: """ Takes an AlignedSegment and add percent identity and alignment length as tags alignment length = MID mismatches = NM percent identity = (MID - NM) / MID The percent identity is a value between 0.0 and 1.0 If the segment is unmapped then it is returned as with a percent identity of 0 and an alignment length of 0. :param alignedSegment: The pysam AlignedSegment object :return: alignedSegment: The updated pysam AlignedSegment object """ # Assuming that if the id tag is present that the other tags are also there. if alignedSegment.has_tag('id'): return alignedSegment if alignedSegment.is_unmapped: alignedSegment.set_tag('id', 0.0, 'f') alignedSegment.set_tag('al', 0, 'i') alignedSegment.set_tag('qc', 0.0, 'f') return alignedSegment alnlength = sum(alignedSegment.get_cigar_stats()[0][0:3]) query_covered_bases = sum(alignedSegment.get_cigar_stats()[0][0:2]) query_length = alignedSegment.infer_read_length() mismatches = alignedSegment.get_tag('NM') percid = (alnlength - mismatches) / float(alnlength) qcov = query_covered_bases / float(query_length) alignedSegment.set_tag('id', percid, 'f') alignedSegment.set_tag('qc', qcov, 'f') alignedSegment.set_tag('al', alnlength, 'i') return alignedSegment
def from_aligned_segment(cls, align: pysam.AlignedSegment) -> "AlignmentRecord": """Extract information from a pysam Aligned segment""" read_name, read_idx, align_idx = align.query_name.split(":") read_idx, align_idx = int(read_idx), int(align_idx) if align.is_unmapped: align_cat = "unmapped" chrom, start, end, align_score = "NULL", 0, 0, 0 read_length = align.query_length quals = align.query_qualities # TODO: handle this more gracefully if quals is None: align_base_qscore = 0 else: align_base_qscore = mean_qscore(np.array( align.query_qualities)) else: chrom, start, end = (align.reference_name, align.reference_start, align.reference_end) read_length = align.infer_read_length() align_score = align.get_tag("AS") align_base_qscore = mean_qscore( np.array(align.query_alignment_qualities)) if align.is_secondary: align_cat = "secondary" elif align.is_supplementary: align_cat = "supplementary" else: align_cat = "primary" optional = {} for key, tag in [("haplotype", "HP"), ("phase_set", "PS"), ("phase_qual", "PC")]: if align.has_tag(tag): optional[key] = int(align.get_tag(tag)) return cls( read_idx=read_idx, align_idx=align_idx, align_type=align_cat, chrom=chrom, start=start, end=end, strand=not align.is_reverse, read_name=read_name, read_length=read_length, read_start=align.query_alignment_start, read_end=align.query_alignment_end, mapping_quality=align.mapq, align_score=align_score, align_base_qscore=np.rint(align_base_qscore), **optional, )
def get_qc_fail(rec: pysam.AlignedSegment) -> Optional[Tuple[str, str]]: """Gets the tool and reason for why the QC fail flag is set, otherwise None if not set. If the QC fail flag is set, but the tool and filter reason SAM tags are not set, None will be returned. Use pysam.AlignedSegment.is_qcfail() to check if the record is simply QC failed. Args: rec: the record to fail """ if not rec.is_qcfail or not rec.has_tag(QcFailToolTag): return None else: tool_value = rec.get_tag(QcFailToolTag) reason_value = rec.get_tag(QcFailReasonTag) return (tool_value, reason_value)