def test_aligner_should_provide_Gotoh_algorithm(self): score, A, B = aligner.align('ACACACTA', 'AGCACACA', 'GL', { 'match': 2, 'mismatch': -1, 'indel': -1 }) self.assertEqual(score, 12) self.assertIn((A, B), [('A-CACACTA', 'AGCACAC-A')]) score, A, B = aligner.align('CGTGAATTCAT', 'GACTTAC', 'GL', { 'match': 5, 'mismatch': -3, 'indel': -4 }) self.assertEqual(score, 18) self.assertIn((A, B), [('GAATTCA', 'GACTT-A'), ('GAATT-C', 'GACTTAC')]) score, A, B = aligner.align('CGGTCATAC', 'CGGAT', 'GG', { 'match': 1, 'mismatch': -1, 'indel': -1, 'gap_opening': -5 }) self.assertEqual(score, -5) self.assertIn((A, B), [('CGGTCATAC', 'CGG----AT')])
def test_aligner_should_provide_Smith_Waterman_algorithm(self): score, A, B = aligner.align('ACACACTA', 'AGCACACA', 'SW', {'match': 2, 'mismatch': -1, 'indel': -1}) self.assertEqual(score, 12) self.assertIn((A, B), [('A-CACACTA', 'AGCACAC-A')]) score, A, B = aligner.align('CGTGAATTCAT', 'GACTTAC', 'SW', {'match': 5, 'mismatch': -3, 'indel': -4}) self.assertEqual(score, 18) self.assertIn((A, B), [('GAATTCA', 'GACTT-A'), ('GAATT-C', 'GACTTAC')])
def test_aligner_should_accept_custom_penalties(self): score, A, B = aligner.align('GCATGCU', 'GCATGCU', None, {'match': 10, 'mismatch': -1, 'indel': -1}) # 7 matches self.assertEqual(score, 70) score, A, B = aligner.align('GCATGCU', 'GTATGAG', None, {'match': 0, 'mismatch': -3, 'indel': -10}) # 3 mutations self.assertEqual(score, -9) score, A, B = aligner.align('GCTGCU', 'GCATGC', None, {'match': 0, 'mismatch': -10, 'indel': -4}) # 2 deletions self.assertEqual(score, -8)
def test_aligner_should_provide_Gotoh_algorithm(self): score, A, B = aligner.align('ACACACTA', 'AGCACACA', 'GL', {'match': 2, 'mismatch': -1, 'indel': -1}) self.assertEqual(score, 12) self.assertIn((A, B), [('A-CACACTA', 'AGCACAC-A')]) score, A, B = aligner.align('CGTGAATTCAT', 'GACTTAC', 'GL', {'match': 5, 'mismatch': -3, 'indel': -4}) self.assertEqual(score, 18) self.assertIn((A, B), [('GAATTCA', 'GACTT-A'), ('GAATT-C', 'GACTTAC')]) score, A, B = aligner.align('CGGTCATAC', 'CGGAT', 'GG', {'match': 1, 'mismatch': -1, 'indel': -1, 'gap_opening': -5}) self.assertEqual(score, -5) self.assertIn((A, B), [('CGGTCATAC', 'CGG----AT')])
def test_aligner_should_work_on_Wikipedia_examples(self): score, A, B = aligner.align('GCATGCU', 'GATTACA', None) self.assertEqual(score, 0) self.assertIn( (A, B), [('GCATG-CU', 'G-ATTACA'), ('GCA-TGCU', 'G-ATTACA'), ('GCAT-GCU', 'G-ATTACA')]) # Accept one of possible variants
def test_aligner_should_provide_Smith_Waterman_algorithm(self): score, A, B = aligner.align('ACACACTA', 'AGCACACA', 'SW', { 'match': 2, 'mismatch': -1, 'indel': -1 }) self.assertEqual(score, 12) self.assertIn((A, B), [('A-CACACTA', 'AGCACAC-A')]) score, A, B = aligner.align('CGTGAATTCAT', 'GACTTAC', 'SW', { 'match': 5, 'mismatch': -3, 'indel': -4 }) self.assertEqual(score, 18) self.assertIn((A, B), [('GAATTCA', 'GACTT-A'), ('GAATT-C', 'GACTTAC')])
def test_aligner_should_detect_deletions(self, sequence_length=100): alphabet = ['A', 'C', 'G', 'T'] A = self._generate_random_sequence(sequence_length, alphabet) B = [] deletion_count = 0 for i in range(len(A)): if np.random.random() < 0.2: deletion_count = deletion_count + 1 else: B.append(A[i]) score, _, _ = aligner.align(''.join(A), ''.join(A), None) self.assertEqual(score, sequence_length) score, _, _ = aligner.align(''.join(A), ''.join(B), None, {'match': 1, 'mismatch': -100, 'indel': -1}) self.assertEqual(score, sequence_length + deletion_count * -2)
def test_aligner_should_detect_insertions(self, sequence_length=100): alphabet = ['A', 'C', 'G', 'T'] A = self._generate_random_sequence(sequence_length, alphabet) B = [] insertion_count = 0 for i in range(len(A)): B.append(A[i]) if np.random.random() < 0.2: insertion_count = insertion_count + 1 B.append(alphabet[np.random.randint(len(alphabet))]) score, _, _ = aligner.align(''.join(A), ''.join(A), None) self.assertEqual(score, sequence_length) score, _, _ = aligner.align(''.join(A), ''.join(B), None, {'match': 1, 'mismatch': -100, 'indel': -1}) self.assertEqual(score, sequence_length + insertion_count + insertion_count * -2) # Insertions also increase total sequence length
def test_aligner_should_detect_mutations(self, sequence_length=100): alphabet = ['A', 'C', 'G', 'T'] A = self._generate_random_sequence(sequence_length, alphabet) B = list(A) mutation_count = 0 for i in range(len(A)): if np.random.random() < 0.2: mutation = alphabet[np.random.randint(len(alphabet))] if B[i] != mutation: mutation_count = mutation_count + 1 B[i] = mutation score, _, _ = aligner.align(''.join(A), ''.join(A), None) self.assertEqual(score, sequence_length) score, _, _ = aligner.align(''.join(A), ''.join(B), None, {'match': 1, 'mismatch': -1, 'indel': -10}) self.assertEqual(score, sequence_length + mutation_count * -2)
def test_aligner_should_accept_custom_penalties(self): score, A, B = aligner.align('GCATGCU', 'GCATGCU', None, { 'match': 10, 'mismatch': -1, 'indel': -1 }) # 7 matches self.assertEqual(score, 70) score, A, B = aligner.align('GCATGCU', 'GTATGAG', None, { 'match': 0, 'mismatch': -3, 'indel': -10 }) # 3 mutations self.assertEqual(score, -9) score, A, B = aligner.align('GCTGCU', 'GCATGC', None, { 'match': 0, 'mismatch': -10, 'indel': -4 }) # 2 deletions self.assertEqual(score, -8)
def test_aligner_should_detect_deletions(self, sequence_length=100): alphabet = ['A', 'C', 'G', 'T'] A = self._generate_random_sequence(sequence_length, alphabet) B = [] deletion_count = 0 for i in range(len(A)): if np.random.random() < 0.2: deletion_count = deletion_count + 1 else: B.append(A[i]) score, _, _ = aligner.align(''.join(A), ''.join(A), None) self.assertEqual(score, sequence_length) score, _, _ = aligner.align(''.join(A), ''.join(B), None, { 'match': 1, 'mismatch': -100, 'indel': -1 }) self.assertEqual(score, sequence_length + deletion_count * -2)
def test_aligner_should_detect_mutations(self, sequence_length=100): alphabet = ['A', 'C', 'G', 'T'] A = self._generate_random_sequence(sequence_length, alphabet) B = list(A) mutation_count = 0 for i in range(len(A)): if np.random.random() < 0.2: mutation = alphabet[np.random.randint(len(alphabet))] if B[i] != mutation: mutation_count = mutation_count + 1 B[i] = mutation score, _, _ = aligner.align(''.join(A), ''.join(A), None) self.assertEqual(score, sequence_length) score, _, _ = aligner.align(''.join(A), ''.join(B), None, { 'match': 1, 'mismatch': -1, 'indel': -10 }) self.assertEqual(score, sequence_length + mutation_count * -2)
def test_aligner_should_detect_insertions(self, sequence_length=100): alphabet = ['A', 'C', 'G', 'T'] A = self._generate_random_sequence(sequence_length, alphabet) B = [] insertion_count = 0 for i in range(len(A)): B.append(A[i]) if np.random.random() < 0.2: insertion_count = insertion_count + 1 B.append(alphabet[np.random.randint(len(alphabet))]) score, _, _ = aligner.align(''.join(A), ''.join(A), None) self.assertEqual(score, sequence_length) score, _, _ = aligner.align(''.join(A), ''.join(B), None, { 'match': 1, 'mismatch': -100, 'indel': -1 }) self.assertEqual(score, sequence_length + insertion_count + insertion_count * -2) # Insertions also increase total sequence length
def test_aligner_should_raise_UnknownAlgorithmError_on_call_with_unknown_method( self): with self.assertRaises(aligner.UnknownAlgorithmError): aligner.align('GCATGCU', 'GATTACA', method='XY')
def test_aligner_should_work_on_Wikipedia_examples(self): score, A, B = aligner.align('GCATGCU', 'GATTACA', None) self.assertEqual(score, 0) self.assertIn((A, B), [('GCATG-CU', 'G-ATTACA'), ('GCA-TGCU', 'G-ATTACA'), ('GCAT-GCU', 'G-ATTACA')]) # Accept one of possible variants
def test_aligner_should_raise_UnknownAlgorithmError_on_call_with_unknown_method(self): with self.assertRaises(aligner.UnknownAlgorithmError): aligner.align('GCATGCU', 'GATTACA', method='XY')