def _check_argument_with_inequality_on_optimal_align_score( self, query_sequences=None, target_sequences=None, arg=None, default=None, i_range=None, compare_lt=None, compare_gt=None): iterable_kwarg = {} default_kwarg = {} default_kwarg[arg] = default for query_sequence in query_sequences: for target_sequence in target_sequences: for i in i_range: iterable_kwarg[arg] = i query1 = StripedSmithWaterman(query_sequence, **iterable_kwarg) align1 = query1(target_sequence) query2 = StripedSmithWaterman(query_sequence, **default_kwarg) align2 = query2(target_sequence) if i == default: self.assertEqual(align1.optimal_alignment_score, align2.optimal_alignment_score) if i < default: compare_lt(align1.optimal_alignment_score, align2.optimal_alignment_score) if i > default: compare_gt(align1.optimal_alignment_score, align2.optimal_alignment_score)
def test_arg_matrix_overrides_match_and_mismatch(self): query_sequences = [ "TTATAATTAATTCTTATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAAGGGGTATAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "CTGCCTCAGGGGCGAGGAAAGCGTCAGCGCGGCTGCCGTCGGCGCAGGGGC", "AGGGTAATTAGCGCGTGTTCACCTA" ] target_sequences = query_sequences matrix = { # This is a biologically meaningless matrix "A": { "A": 4, "T": -1, "C": -2, "G": -3, "N": 4 }, "T": { "A": -1, "T": 1, "C": -1, "G": -4, "N": 1 }, "C": { "A": -2, "T": -1, "C": 10, "G": 1, "N": 1 }, "G": { "A": -3, "T": -4, "C": 1, "G": 3, "N": 1 }, "N": { "A": 4, "T": 1, "C": 1, "G": 1, "N": 0 } } for query_sequence in query_sequences: for target_sequence in target_sequences: query1 = StripedSmithWaterman(query_sequence) align1 = query1(target_sequence) query2 = StripedSmithWaterman(query_sequence, substitution_matrix=matrix) align2 = query2(target_sequence) self.assertNotEqual(align1.optimal_alignment_score, align2.optimal_alignment_score)
def test_regression_on_instantiation_arguments(self): expected = { 'optimal_alignment_score': 23, 'suboptimal_alignment_score': 10, 'query_begin': 0, 'query_end': 16, 'target_begin': 0, 'target_end_optimal': 20, 'target_end_suboptimal': 4, 'cigar': '6M4D11M', 'query_sequence': 'AAACGATAAATCCGCGTA', 'target_sequence': 'AAACGACTACTAAATCCGCGTGATAGGGGA' } query = StripedSmithWaterman(expected['query_sequence'], gap_open=5, gap_extend=2, score_size=2, mask_length=15, mask_auto=True, score_only=False, score_filter=None, distance_filter=None, override_skip_babp=False, protein=False, match=2, mismatch=-3, substitution_matrix=None, suppress_sequences=False, zero_index=True) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected)
def test_protein_sequence_is_usable(self): expected = { 'optimal_alignment_score': 316, 'suboptimal_alignment_score': 95, 'query_begin': 0, 'query_end': 52, 'target_begin': 0, 'target_end_optimal': 52, 'target_end_suboptimal': 18, 'cigar': '15M1D15M1I22M', 'query_sequence': ('VHLTGEEKSAVAALWGKVNVDEVGGEALGRXLLVVYPWTQRFFESF' 'SDLSTPDABVMSNPKVKAHGK'), 'target_sequence': ('VHLTPEEKSAVTALWBGKVNVDEVGGEALGRLLVVYPWTQRFFES' 'FGDLSTPD*') } query = StripedSmithWaterman(expected['query_sequence'], protein=True, substitution_matrix=self.blosum50) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected)
def test_arg_gap_extend(self): query_sequences = [ "TTATAATTTTCTTATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAATACTAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "CTGCCTCAGGGGGAGGCAAAGCGTCAGCGCGGCTGCCGTCGGCGCAGGGGC", "AGGGTAATTAGGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_argument_with_inequality_on_optimal_align_score( query_sequences=query_sequences, target_sequences=target_sequences, arg='gap_extend', default=2, i_range=range(1, 10), # These are intentionally inverted compare_lt=self.assertGreaterEqual, compare_gt=self.assertLessEqual) # The above is not a strict bound, so lets use an expected align # to plug the hole where every align is exactly equal to default expected = { 'optimal_alignment_score': 9, 'suboptimal_alignment_score': 8, 'query_begin': 6, 'query_end': 12, 'target_begin': 7, 'target_end_optimal': 13, 'target_end_suboptimal': 38, 'cigar': '7M', 'query_sequence': 'TCTATAAGATTCCGCATGCGTTACTTATAAGATGTCTCAACGG', 'target_sequence': 'GCCCAGTAGCTTCCCAATATGAGAGCATCAATTGTAGATCGGGCC' } query = StripedSmithWaterman(expected['query_sequence'], gap_extend=10) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected)
def _check_bit_flag_sets_properties_falsy_or_negative( self, query_sequences=None, target_sequences=None, arg_settings=[], properties_to_null=[]): kwarg = {} def falsy_or_negative(alignment, prop): if type(alignment[prop]) is int: return alignment[prop] < 0 else: return not alignment[prop] for query_sequence in query_sequences: for target_sequence in target_sequences: for arg, setting in arg_settings: kwarg[arg] = setting query = StripedSmithWaterman(query_sequence, **kwarg) alignment = query(target_sequence) for prop in properties_to_null: self.assertTrue(falsy_or_negative(alignment, prop)) # Every property not in our null list for prop in [ p for p in self.align_attributes if p not in properties_to_null ]: self.assertFalse(falsy_or_negative(alignment, prop))
def test_get_aligned_query_target_sequence(self): query = StripedSmithWaterman("AGGGTAATTAGGCGTGTTCACCTA") alignment = query("AGTCGAAGGGTAATATAGGCGTGTCACCTA") self.assertEqual("AGGGTAATATAGGCGT-GTCACCTA", alignment.get_aligned_target_sequence()) self.assertEqual("AGGGTAAT-TAGGCGTGTTCACCTA", alignment.get_aligned_query_sequence())
def test_works_for_dot_and_square_bracket_access(self): q_seq = "AGGGTAATTAGGCGTGTTCACCTA" query = StripedSmithWaterman(q_seq) alignment = query("TACTTATAAGATGTCTCAACGGCATGCGCAACTTGTGAAGTG") for accessible in self.align_attributes: self.assertEqual(getattr(alignment, accessible), alignment[accessible])
def test_same_as_using_StripedSmithWaterman_object(self): query_sequence = 'ATGGAAGCTATAAGCGCGGGTGAG' target_sequence = 'AACTTATATAATAAAAATTATATATTCGTTGGGTTCTTTTGATATAAATC' query = StripedSmithWaterman(query_sequence) align1 = query(target_sequence) align2 = align_striped_smith_waterman(query_sequence, target_sequence) self._check_alignment(align2, align1)
def test_kwargs_are_usable(self): kwargs = {} kwargs['zero_index'] = False kwargs['match'] = 5 query_sequence = 'AGGGTAATTAGGCGTGTTCACCTA' target_sequence = 'TACTTATAAGATGTCTCAACGGCATGCGCAACTTGTGAAGTG' query = StripedSmithWaterman(query_sequence, **kwargs) align1 = query(target_sequence) align2 = align_striped_smith_waterman(query_sequence, target_sequence, **kwargs) self._check_alignment(align2, align1)
def test_arg_zero_index_changes_base_of_index_to_0_or_1(self): expected_alignments = [ ({ 'optimal_alignment_score': 100, 'suboptimal_alignment_score': 44, 'query_begin': 5, 'query_end': 54, 'target_begin': 0, 'target_end_optimal': 49, 'target_end_suboptimal': 21, 'cigar': '50M', 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, True), ({ 'optimal_alignment_score': 100, 'suboptimal_alignment_score': 44, 'query_begin': 6, 'query_end': 55, 'target_begin': 1, 'target_end_optimal': 50, 'target_end_suboptimal': 22, 'cigar': '50M', 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, False) ] for expected, z in expected_alignments: query = StripedSmithWaterman(expected['query_sequence'], zero_index=z) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected)
def test_align_with_N_in_nucleotide_sequence(self): expected = { 'optimal_alignment_score': 9, 'suboptimal_alignment_score': 0, 'query_begin': 0, 'query_end': 8, 'target_begin': 0, 'target_end_optimal': 9, 'target_end_suboptimal': 0, 'cigar': '4M1D5M', 'query_sequence': 'ACTCANNATCGANCTAGC', 'target_sequence': 'ACTCGAAAATGTNNGCA' } query = StripedSmithWaterman(expected['query_sequence']) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected)
def test_lowercase_is_valid_sequence(self): expected = { 'optimal_alignment_score': 23, 'suboptimal_alignment_score': 10, 'query_begin': 0, 'query_end': 16, 'target_begin': 0, 'target_end_optimal': 20, 'target_end_suboptimal': 4, 'cigar': '6M4D11M', 'query_sequence': 'aaacgataaatccgcgta', 'target_sequence': 'aaacgactactaaatccgcgtgatagggga' } query = StripedSmithWaterman(expected['query_sequence']) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected)
def test_arg_suppress_sequences(self): expected = { 'optimal_alignment_score': 100, 'suboptimal_alignment_score': 44, 'query_begin': 5, 'query_end': 54, 'target_begin': 0, 'target_end_optimal': 49, 'target_end_suboptimal': 21, 'cigar': '50M', 'query_sequence': '', 'target_sequence': '' } query = StripedSmithWaterman( "AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCCGGGCGGGGC", suppress_sequences=True) alignment = query("CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCCGGGCGGGGC") self._check_alignment(alignment, expected)
def test_is_zero_based_returns_true_if_index_base_is_zero(self): expected_alignments = [ ({ 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, True), ({ 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, False) ] for expected, z in expected_alignments: query = StripedSmithWaterman(expected['query_sequence'], zero_index=z) alignment = query(expected['target_sequence']) self.assertEqual(z, alignment.is_zero_based())
def test_get_aligned_query_target_sequence_with_suppressed_sequences(self): query = StripedSmithWaterman("AGGGTAATTAGGCGTGTTCACCTA", suppress_sequences=True) alignment = query("AGTCGAAGGGTAATATAGGCGTGTCACCTA") self.assertEqual(None, alignment.get_aligned_target_sequence()) self.assertEqual(None, alignment.get_aligned_query_sequence())
def test_object_is_reusable(self): q_seq = "AGGGTAATTAGGCGTGTTCACCTA" expected_alignments = [{ 'optimal_alignment_score': 10, 'suboptimal_alignment_score': 10, 'query_begin': 4, 'query_end': 8, 'target_begin': 3, 'target_end_optimal': 7, 'target_end_suboptimal': 34, 'cigar': '5M', 'query_sequence': q_seq, 'target_sequence': ('TTATAATTTTCTTATTATTATCAATATTTATAATTTGATTT' 'TGTTGTAAT') }, { 'optimal_alignment_score': 36, 'suboptimal_alignment_score': 16, 'query_begin': 0, 'query_end': 23, 'target_begin': 6, 'target_end_optimal': 29, 'target_end_suboptimal': 13, 'cigar': '8M1D8M1I7M', 'query_sequence': q_seq, 'target_sequence': 'AGTCGAAGGGTAATATAGGCGTGTCACCTA' }, { 'optimal_alignment_score': 16, 'suboptimal_alignment_score': 0, 'query_begin': 0, 'query_end': 7, 'target_begin': 6, 'target_end_optimal': 13, 'target_end_suboptimal': 0, 'cigar': '8M', 'query_sequence': q_seq, 'target_sequence': 'AGTCGAAGGGTAATA' }, { 'optimal_alignment_score': 8, 'suboptimal_alignment_score': 8, 'query_begin': 0, 'query_end': 3, 'target_begin': 7, 'target_end_optimal': 10, 'target_end_suboptimal': 42, 'cigar': '4M', 'query_sequence': q_seq, 'target_sequence': ('CTGCCTCAGGGGGAGGAAAGCGTCAGCGCGGCTGCCGTCGG' 'CGCAGGGGC') }, { 'optimal_alignment_score': 48, 'suboptimal_alignment_score': 16, 'query_begin': 0, 'query_end': 23, 'target_begin': 0, 'target_end_optimal': 23, 'target_end_suboptimal': 7, 'cigar': '24M', 'query_sequence': q_seq, 'target_sequence': q_seq }] query = StripedSmithWaterman(q_seq) results = [] for expected in expected_alignments: alignment = query(expected['target_sequence']) results.append(alignment) for result, expected in zip(results, expected_alignments): self._check_alignment(result, expected)