def test_wholeProbeMaps_justAlleleMapsNothingElseMaps(self): ref_name = "reference" ref_length = 55 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "5S1M50S" nm = "NM:i:0" md = "MD:Z:1" mapq = 60 pos = 1 query_name = "IV=[5,6);" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) classification = Classification(record=record) assert classification._whole_query_probe_maps()
def test_wholeProbeMaps_doesNotMapInTheForwardStrand(self): ref_name = "reference" ref_length = 55 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "29S27M" nm = "NM:i:0" md = "MD:Z:27" mapq = 60 pos = 1 query_name = "IV=[11,21);" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) classification = Classification(record=record) assert not classification._whole_query_probe_maps()
def test_wholeProbeMaps_probeStartsAtFirstAlignmentPositionMapsReturnsTrue( self): ref_name = "reference" ref_length = 55 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "11S45M" nm = "NM:i:0" md = "MD:Z:45" mapq = 60 pos = 1 query_name = "IV=[11,21);" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) classification = Classification(record=record) assert classification._whole_query_probe_maps()
def test_wholeProbeMaps_unmappedRecordReturnsFalse(self): ref_name = "reference" ref_length = 55 header = create_sam_header(ref_name, ref_length) flag = 4 cigar = "*" nm = "" md = "" mapq = 0 pos = 0 query_name = "IV=[11,21);" ref_name = "*" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) classification = Classification(record=record) assert not classification._whole_query_probe_maps()
def test_isCorrect_deletionInRefOfLastProbeCoreBaseReturnsFalse(self): ref_name = "reference" ref_length = 63 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "20M1I35M" nm = "NM:i:1" md = "MD:Z:55" mapq = 60 pos = 1 query_name = "IV=[11,21);" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) probe = Probe(header=ProbeHeader.from_string(query_name), full_sequence=sequence) classification = RecallClassification(record=record) assert not classification.is_correct()
def test_isCorrect_mismatchInBaseAfterCoreProbeEndsReturnsTrue(self): ref_name = "reference" ref_length = 64 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "56M" nm = "NM:i:1" md = "MD:Z:21C34" mapq = 60 pos = 1 query_name = "IV=[11,21);" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) probe = Probe(header=ProbeHeader.from_string(query_name), full_sequence=sequence) classification = RecallClassification(record=record) assert classification.is_correct()
def test_isCorrect_probeIsDeletionBaseToRightIsMismatchReturnsFalse(self): ref_name = "reference" ref_length = 64 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "56M" nm = "NM:i:1" md = "MD:Z:12T43" mapq = 60 pos = 6 query_name = "IV=[12,12);" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) probe = Probe(header=ProbeHeader.from_string(query_name), full_sequence=sequence) classification = RecallClassification(record=record) assert not classification.is_correct()
def test_wholeProbeMaps_probeEndsOnLastBaseOfAlignmentReturnsTrue(self): ref_name = "reference" ref_length = 59 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "33M35S" nm = "NM:i:0" md = "MD:Z:33" mapq = 60 pos = 6 query_name = "IV=[23,33);" sequence = ( "AAAAAAAAAAAAAAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" ) sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) classification = Classification(record=record) assert classification._whole_query_probe_maps()
def test_assessment_mismatchInLastCoreProbeBase(self): ref_name = "reference" ref_length = 64 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "56M" nm = "NM:i:1" md = "MD:Z:20C35" mapq = 60 pos = 1 query_name = "IV=[11,21);" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) probe = Probe(header=ProbeHeader.from_string(query_name), full_sequence=sequence) classification = PrecisionClassification(record=record) expected = 0.9 actual = classification.assessment() assert actual == expected
def test_assessment_probeIsDeletionBasesEitherSideMatch(self): ref_name = "reference" ref_length = 64 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "56M" nm = "NM:i:0" md = "MD:Z:56" mapq = 60 pos = 6 query_name = "IV=[12,12);" sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) probe = Probe(header=ProbeHeader.from_string(query_name), full_sequence=sequence) classification = PrecisionClassification(record=record) expected = 1.0 actual = classification.assessment() assert actual == expected
def test_isCorrect_probesMatchPerfectlyReturnsTrue(self): ref_name = "reference" ref_length = 59 header = create_sam_header(ref_name, ref_length) flag = 0 cigar = "33M35S" nm = "NM:i:0" md = "MD:Z:33" mapq = 60 pos = 6 query_name = "IV=[23,33);" sequence = ( "AAAAAAAAAAAAAAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" ) sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) probe = Probe(header=ProbeHeader.from_string(query_name), full_sequence=sequence) classification = RecallClassification(record=record) assert classification.is_correct()
def test_assessment_probesMatchPerfectlyInRCWithTotallyWrongFlanks(self): ref_name = "reference" ref_length = 59 header = create_sam_header(ref_name, ref_length) flag = 16 cigar = "53S5M10S" nm = "NM:i:0" md = "MD:Z:4" mapq = 60 pos = 6 query_name = "IV=[10,15);" sequence = ( "AAAAAAAAAAAAAAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" ) sam_string = f"{query_name}\t{flag}\t{ref_name}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:0\tXS:i:0" record = pysam.AlignedSegment.fromstring(sam_string, header) probe = Probe(header=ProbeHeader.from_string(query_name), full_sequence=sequence) classification = PrecisionClassification(record=record) expected = 1.0 actual = classification.assessment() assert actual == expected