def test_should_remove_gap_case_c(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['seq1'] problem.number_of_variables = 1 msa = MSASolution(problem, msa=[('seq1', 'AB----CD-E-')]) msa.remove_gap_from_sequence(0, 3) self.assertEqual(['AB---CD-E-'], msa.decode_alignment_as_list_of_sequences()) msa.remove_gap_from_sequence(0, 3) self.assertEqual(['AB--CD-E-'], msa.decode_alignment_as_list_of_sequences()) msa.remove_gap_from_sequence(0, 8) self.assertEqual(['AB--CD-E'], msa.decode_alignment_as_list_of_sequences())
def test_should_fill_sequences_with_gaps_to_reach_the_max_sequence_length(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['a', 'b'] problem.number_of_variables = 2 msa_1 = MSASolution(problem, msa=[('a', '-----GE'), ('b', 'KWPFFQEAQK')]) msa_2 = MSASolution(problem, msa=[('a', '-----GE'), ('b', 'KWPFFQEAQK')]) msa_3 = MSASolution(problem, msa=[('a', '-'), ('b', 'ABC')]) crossover = SPXMSA(probability=1.0, remove_gap_columns=False) # run crossover.fill_sequences_with_gaps_to_reach_the_max_sequence_length(msa_1, 10, [-1, -1]) crossover.fill_sequences_with_gaps_to_reach_the_max_sequence_length(msa_2, 10, [-1, 5]) crossover.fill_sequences_with_gaps_to_reach_the_max_sequence_length(msa_3, 5, [-1, 1]) # check self.assertEqual(["-----G---E", "KWPFFQEAQK"], msa_1.decode_alignment_as_list_of_sequences()) self.assertEqual(["-----G---E", "KWPFFQEAQK"], msa_2.decode_alignment_as_list_of_sequences()) self.assertEqual(["-----", "AB--C"], msa_3.decode_alignment_as_list_of_sequences())
def evaluate(self, solution: MSASolution) -> MSASolution: solution.remove_full_of_gaps_columns() sequences = solution.decode_alignment_as_list_of_sequences() for i, score in enumerate(self.score_list): solution.objectives[i] = score.compute(sequences) if not score.is_minimization(): solution.objectives[i] = -solution.objectives[i] return solution
def test_should_return_original_sequences(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['seq1', 'seq2', 'seq3'] problem.number_of_variables = 3 msa = MSASolution(problem, msa=[('seq1', 'AC---TGAC'), ('seq2', 'AT--CT--C'), ('seq3', 'AAC---TGC')]) # check self.assertEqual(['AC---TGAC', 'AT--CT--C', 'AAC---TGC'], msa.decode_alignment_as_list_of_sequences())
def test_should_remove_all_gap_columns_case_d(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['seq1', 'seq2'] problem.number_of_variables = 2 msa = MSASolution(problem, msa=[('seq1', 'AB--CDE-'), ('seq2', 'AB--CD-E')]) msa.remove_full_of_gaps_columns() # check self.assertEqual(['ABCDE-', 'ABCD-E'], msa.decode_alignment_as_list_of_sequences())
def test_should_remove_all_gap_columns_case_b(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['seq1', 'seq2', 'seq3'] problem.number_of_variables = 3 msa = MSASolution(problem, msa=[('seq1', 'AC--T--GC'), ('seq2', 'AC-----AC'), ('seq3', 'A---C--AC')]) msa.remove_full_of_gaps_columns() # check self.assertEqual(['ACTGC', 'AC-AC', 'A-CAC'], msa.decode_alignment_as_list_of_sequences())
def test_should_remove_gap(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['seq1', 'seq2', 'seq3'] problem.number_of_variables = 3 msa = MSASolution(problem, msa=[('seq1', 'AC---TGAC'), ('seq2', 'AC---TGAC'), ('seq3', 'AC---TGAC')]) msa.remove_gap_from_sequence(0, 2) msa.remove_gap_from_sequence(1, 2) msa.remove_gap_from_sequence(2, 2) # check self.assertEqual(['AC--TGAC', 'AC--TGAC', 'AC--TGAC'], msa.decode_alignment_as_list_of_sequences())
def test_should_merge_gaps_groups_case_b(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['seq1'] problem.number_of_variables = 1 aln_seq = [('seq1', 'ACTGAC')] msa = MSASolution(problem, msa=aln_seq) msa.gaps_groups[0] = [2, 4, 4, 8, 8, 10] self.assertEqual(["AC-----------TGAC"], msa.decode_alignment_as_list_of_sequences()) # run msa.merge_gaps_groups() # check self.assertEqual([2, 10], msa.gaps_groups[0])
def test_should_merge_gaps_groups(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['seq1', 'seq2'] problem.number_of_variables = 2 aln_seq = [('seq1', 'ACTGAC'), ('seq2', 'ATCTC')] msa = MSASolution(problem, msa=aln_seq) msa.gaps_groups[0] = [2, 4, 4, 5] msa.gaps_groups[1] = [2, 4, 5, 8] self.assertEqual(["AC-----TGAC", "AT-------CTC"], msa.decode_alignment_as_list_of_sequences()) # run msa.merge_gaps_groups() # check self.assertEqual([2, 5], msa.gaps_groups[0]) self.assertEqual([2, 8], msa.gaps_groups[1])