def test_should_return_original_alignment_size(self): # setup problem = MSA(score_list=[]) problem.identifiers = ['seq1', 'seq2', 'seq3'] problem.number_of_variables = 3 msa = MSASolution(problem, msa=[('seq1', 'AC---TGAC'), ('seq2', 'AT--CT--C'), ('seq3', 'AAC---TGC')]) # check self.assertEqual(9, msa.get_length_of_alignment())
def __find_symbol_position_in_original_sequence(self, solution: MSASolution, seq_index: int, position: int): """ Given a symbol position, finds the corresponding position of the symbol in the original sequence if gaps are not taken into account. If the symbol is a gap the returned value is -1 """ if position > solution.get_length_of_alignment(): raise Exception( 'Position {0} is larger than the sequence size {1}'.format( position, solution.get_length_of_alignment())) if not solution.is_gap_char_at_sequence(seq_index, position): symbol_position = solution.get_char_position_in_original_sequence( seq_index, position) else: position = solution.get_next_char_position_after_gap( seq_index, position) if position < 0: symbol_position = -1 else: symbol_position = solution.get_char_position_in_original_sequence( seq_index, position) return symbol_position
def do_mutation(self, solution: MSASolution) -> MSASolution: if random.random() <= self.probability: length_of_alignment = solution.get_length_of_alignment() for seq_index in range(solution.number_of_variables): point = random.randint(0, length_of_alignment - 1) solution.add_gap_to_sequence_at_index(seq_index, point) if self.remove_full_of_gap_columns: solution.remove_full_of_gaps_columns() # Sanity check: alignment is valid (same length for all sequences) if not solution.is_valid_msa(): raise Exception("Mutated solution is not valid! {0}".format( solution.decode_alignment_as_list_of_pairs())) return solution