def test_hardclipping(self): read = SamRead(reference_name='15') read.reference_start = 71491944 read.cigar = _cigar.convert_string_to_cigar('12=1D25=113H') read.query_sequence = 'GTGTGTGGTGTGGGGTGTGTGGTGTGTGTGGTGTGTG' read.is_reverse = True expected_bpp = BreakpointPair( Breakpoint('15', 71491956, orient='L', strand='-'), Breakpoint('15', 71491958, orient='R', strand='-'), untemplated_seq='') events = align.call_read_events(read, is_stranded=True) self.assertEqual(1, len(events)) self.assertEqual(expected_bpp.break1, events[0].break1) self.assertEqual(expected_bpp.break2, events[0].break2)
def test_bwa_mem(self): # SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG # std SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG # > BPP(Breakpoint(1:224646893L-), Breakpoint(1:224646906R-), opposing=False, seq='') read = SamRead(reference_name='1') read.query_sequence = 'TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG' read.reference_start = 224646710 read.reference_id = 0 print(_cigar.convert_string_to_cigar('183=12D19=')) read.cigar = _cigar.join(_cigar.convert_string_to_cigar('183=12D19=')) read.query_name = 'name' read.mapping_quality = NA_MAPPING_QUALITY std_read = Evidence.standardize_read(self.mock_evidence, read) print(SamRead.__repr__(read)) print(SamRead.__repr__(std_read)) self.assertEqual(_cigar.convert_string_to_cigar('186=12D16='), std_read.cigar) self.assertEqual(read.reference_start, std_read.reference_start)
def test_bwa_mem(self): mock_evidence = MockObject( reference_genome={ '1': MockObject( seq=MockLongString( 'TGGGTATCAGACACACTGGGTAGCTGAGTGCTCAGAGGAAGATGCGAGGTATTCAGGGAAAGTGTCAGTGGGGTCTCCCAGTGCCTGTTTGGTCCACAGTTAGGAGA' 'GGCCCTGCTTGCACTTCTAATACAGTCCCGGAAAGACGGGGCCAGAACTTAGGAGGGGAGCGCTTTGCAGCAACTTTTCAAGAAAAGGGGAAAATTTAAGCACCATA' 'CTGTTATGTGGTCCTTGTACCCAGAGGCCCTGTTCAGCTCCAGTGATCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGT' 'GTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCT' 'TTCTTCCTTCTACTGCTTAGATCAAGTCTTCAGCAGACATCATGTGACCTTGAGGATGGATGTCACATGCTGGAGGAAACAGAAGGCCGAAACCCTGATGACTTCAC' 'AGAGCTGCCAAAACAGTTCCTGACTGTTTATTCCGGGTCTTTAACAAAGTGATGAAAAGAAATCCTTGCAGTATGAAAACAACTTTTCTATTCCATGGAGCCAAACC' 'TCATTATAACAGATAACGTGACCCTCAGCGATATCCCAAGTATTTTCCTGTTCTCATCTATACTATGGCAAAGGGGCAAATACCTCTCAGTAAAGAAAGAAATAACA' 'ACTTCTATCTTGGGCGAGGCATTTCTTCTGTTAGAACTTTGTACACGGAATAAAATAGATCTGTTTGTGCTTATCTTTCTCCTTAGAATTATTGAATTTGAAGTCTT' 'TCCCAGGGTGGGGGTGGAGTGAAGCTGGGGTTTCATAAGCACATAGATAGTAGTG', offset=224646450, ) ) }, bam_cache=MockObject(get_read_reference_name=lambda x: x.reference_name), config={ 'validate.contig_aln_merge_inner_anchor': 10, 'validate.contig_aln_merge_outer_anchor': 20, **DEFAULTS, }, ) # SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG # std SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG # > BPP(Breakpoint(1:224646893L-), Breakpoint(1:224646906R-), opposing=False, seq='') read = SamRead(reference_name='1') read.query_sequence = 'TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG' read.reference_start = 224646710 read.reference_id = 0 print(_cigar.convert_string_to_cigar('183=12D19=')) read.cigar = _cigar.join(_cigar.convert_string_to_cigar('183=12D19=')) read.query_name = 'name' read.mapping_quality = NA_MAPPING_QUALITY std_read = Evidence.standardize_read(mock_evidence, read) assert std_read.cigar == _cigar.convert_string_to_cigar('186=12D16=') assert std_read.reference_start == read.reference_start