def test_trim_read(self, window, cigar, start, read_length, expected_cigar, expected_position, expected_read_length, comment): read = test_utils.make_read('A' * read_length, start=start, cigar=cigar, quals=[30] * read_length) region = ranges.parse_literal(window) output = realigner.trim_read(read, region) self.assertEqual(expected_cigar, cigar_utils.format_cigar_units( output.alignment.cigar), msg='Wrong cigar for case: {}'.format(comment)) # Start position of the alignment. self.assertEqual(output.alignment.position.position, expected_position, msg='Wrong position for case: {}'.format(comment)) # Read sequence. self.assertLen( output.aligned_sequence, expected_read_length, msg='Wrong length of aligned_sequence for case: {}'.format( comment)) # Base quality scores. self.assertLen( output.aligned_quality, expected_read_length, msg='Wrong length of aligned_quality for case: {}'.format( comment))
def test_trim_cigar(self, cigar, ref_trim, ref_length, expected_cigar, expected_read_trim, expected_read_length, comment): read = test_utils.make_read('AAAATAAAATAAAATAAAATA', start=100, cigar=cigar) output_cigar, output_read_trim, output_read_length = realigner.trim_cigar( read.alignment.cigar, ref_trim, ref_length) self.assertEqual(cigar_utils.format_cigar_units(output_cigar), expected_cigar, msg='Wrong cigar for: {}'.format(comment)) self.assertEqual(output_read_trim, expected_read_trim, msg='Wrong read trim for: {}'.format(comment)) self.assertEqual(output_read_length, expected_read_length, msg='Wrong read length for: {}'.format(comment)) self.assertEqual(cigar_utils.format_cigar_units(read.alignment.cigar), cigar, msg='Cigar in original read was mutated.')
def test_to_cigar_units(self, to_convert, expected): # We can convert the raw form. to_convert = list(to_convert) expected = list(expected) actual = cigar.to_cigar_units(to_convert) self.assertEqual(actual, expected) # We can also convert the string form by formatting actual. self.assertEqual( cigar.to_cigar_units(cigar.format_cigar_units(actual)), expected)
def test_align_to_haplotype(self, read_seq, prefix, suffix, haplotypes, expected_cigars): test_read = test_utils.make_read(read_seq, start=1) reads = [test_read] # Align to each haplotype in turn. for i in range(len(haplotypes)): aligned_reads = self.reads_realigner.align_to_haplotype( haplotypes[i], haplotypes, prefix, suffix, reads, 'test', 1) self.assertEqual(len(reads), len(aligned_reads)) self.assertEqual( cigar_utils.format_cigar_units( aligned_reads[0].alignment.cigar), expected_cigars[i])
def test_split_reads(self, read_seq, cigar, expected_cigars, expected_sequences, expected_positions): test_read = test_utils.make_read(read_seq, cigar=cigar, start=1) reads = realigner.split_reads([test_read]) for i in range(len(reads)): # Check sequences self.assertEqual(reads[i].aligned_sequence, expected_sequences[i]) # Check cigars self.assertEqual( cigar_utils.format_cigar_units(reads[i].alignment.cigar), expected_cigars[i]) # Check reference positions self.assertEqual(reads[i].alignment.position.position, expected_positions[i]) self.assertLen(reads, len(expected_sequences))
def test_align_to_haplotype_stress_tests(self, alt_allele, ref_buffer_length, read_buffer_length): """Testing what happens when read and reference sequences are shorter.""" # Start with long prefix and suffix to enable cutting it down as necessary whole_prefix = 'AGTGATCTAGTCCTTTTTGTTGTGCAAAAGGAAGTGCTAAAATCAGAATGAGAACCATGGTCACCTGACATAGAC' whole_suffix = 'ATCCATGTTCAAGTACTAATTCTGGGCAAGACACTGTTCTAAGTGCTATGAATATATTACCTCATTTAATCATCT' ref_prefix = whole_prefix[-ref_buffer_length:] ref_suffix = whole_suffix[:ref_buffer_length] # Make two haplotypes. ref_allele = '' haplotypes = [ref_allele, alt_allele] # Simulate one read from the reference and one from the alt haplotype. read_prefix = ref_prefix[-read_buffer_length:] read_suffix = ref_suffix[:read_buffer_length] expected_cigars = [ # Aligning to ref haplotype: Insertion. '{}M{}I{}M'.format(len(read_prefix), len(alt_allele), len(read_suffix)), # Aligning to alt haplotype: All matching. '{}M'.format( len(read_prefix) + len(alt_allele) + len(read_suffix)) ] reads = [ test_utils.make_read(read_prefix + alt_allele + read_suffix, start=1) ] # Align to each haplotype in turn. for i in range(len(haplotypes)): aligned_reads = self.reads_realigner.align_to_haplotype( haplotypes[i], haplotypes, ref_prefix, ref_suffix, reads, 'test', 1) self.assertEqual(len(reads), len(aligned_reads)) self.assertEqual( cigar_utils.format_cigar_units( aligned_reads[0].alignment.cigar), expected_cigars[i])
def test_format_cigar_units(self, cigar_units, expected): self.assertEqual(cigar.format_cigar_units(cigar_units), expected)