Ejemplo n.º 1
0
    def test_should_single_point_crossover_work_properly_case_g(self, random_call):
        """ Example of MSA in Ortuño's paper
            GKGD---PK|KP, GKGD-PK|KP   => GKGD---PK-KP, GKGD-PK--KP
            M------QD|RV, --M--QD|RV   => M------QD-RV, --M--QD--RV
            MKKLKKHPD|FP, MKKLKKHPD|FP => MKKLKKHPD-FP, MKKLKKHPDFP
            M--------|HI, ---M--H|I-   => M--------HI-, ---M--H---I """
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3', 'seq4']
        problem.number_of_variables = 4
        msa_1 = MSASolution(problem, msa=[('seq1', 'GKGD---PKKP'), ('seq2', 'M------QDRV'),
                                          ('seq3', 'MKKLKKHPDFP'), ('seq4', 'M--------HI')])
        msa_2 = MSASolution(problem, msa=[('seq1', 'GKGD-PKKP'), ('seq2', '--M--QDRV'),
                                          ('seq3', 'MKKLKKHPDFP'), ('seq4', '---M--HI-')])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        random_call.return_value = 8
        children = crossover.execute([msa_1, msa_2])

        # check
        self.assertEqual(["GKGD---PK-KP", "M------QD-RV", "MKKLKKHPD-FP", "M--------HI-"],
                         children[0].decode_alignment_as_list_of_sequences())
        self.assertEqual(["GKGD-PK--KP", "--M--QD--RV", "MKKLKKHPDFP", "---M--H---I"],
                         children[1].decode_alignment_as_list_of_sequences())
Ejemplo n.º 2
0
    def test_should_single_point_crossover_work_properly_real_case(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['a', 'b', 'c', 'd']
        problem.number_of_variables = 4
        msa_1 = MSASolution(problem, msa=[
            ('a', '----GKGDPKKPRGKMSSYAFFVQTSREEHKKKHPDASVNFSEFSKKCSERWKTMSAKEKGKFEDMAKADKARYEREMKTYIPPK----------GE'),
            ('b', '-------MQDRVKRPMNAFIVWSRDQRRKMALENPRMRN--SEISKQLGYQWKMLTEAEKWPFFQEAQKLQAMHREKYPNYKYRP---RRKAKMLPK'),
            ('c', 'MKKLK---KHPDFPKKPLTPYFRFFMEKRAKYAKLHPEMSNLDLTKILSKKYKELPEKKKMKYIQDFQREKQEFERNLARFREDH---PDLIQNAKK'),
            ('d', '---------MHIKKPLNAFMLYMKEMRANVVAESTLKES--AAINQILGRRWHALSREEQAKYYELARKERQLHMQLYPGWSARDNYGKKKKRKREK')
        ])
        msa_2 = MSASolution(problem, msa=[
            ('a', '----GKGDPKKPRGKMSSYAFFVQTSREEHKKKHPDASVNFSEFSKKCSERWKTMSAKEKGKFEDMAKADKARYEREMKTYIPPK---GE-------'),
            ('b', '----M---QDRVKRPMNAFIVWSRDQRRKMALENPRMRN--SEISKQLGYQWKMLTEAEKWPFFQEAQKLQAMHREKYPNYKYRP---RRKAKMLPK'),
            ('c', 'MKKLK-KHPDFPKKPLTPYFRFFMEKRAKYAKLHPEMSN--LDLTKILSKKYKELPEKKKMKYIQDFQREKQEFERNLARFREDH---PDLIQNAKK'),
            ('d', '-------MH--IKKPLNAFMLYMKEMRANVVAESTLKES--AAINQILGRRWHALSREEQAKYYELARKERQLHMQLYPGWSARDNYGKKKKRKREK')
        ])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        children = crossover.cross_parents(10, [msa_1, msa_2], [10, 10, 10, 10], [10, 10, 8, 8])

        # check
        self.assertTrue(children[0].is_valid_msa())
        self.assertTrue(children[1].is_valid_msa())
Ejemplo n.º 3
0
    def find_length_of_the_largest_sequence(self, solution: MSASolution):
        max_length = solution.get_length_of_sequence(0)

        for i in range(1, solution.number_of_variables):
            length_of_sequence_i = solution.get_length_of_sequence(i)
            if max_length < length_of_sequence_i:
                max_length = length_of_sequence_i

        return max_length
Ejemplo n.º 4
0
    def test_should_return_number_of_gaps_of_one_sequences(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa = MSASolution(problem,
                          msa=[('seq1', 'AC---TGAC'), ('seq2', 'AT--CT--C'),
                               ('seq3', 'AAC---TGC')])

        # check
        self.assertEqual(3, msa.get_number_of_gaps_of_sequence_at_index(0))
Ejemplo n.º 5
0
    def test_should_return_original_alignment_size(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa = MSASolution(problem,
                          msa=[('seq1', 'AC---TGAC'), ('seq2', 'AT--CT--C'),
                               ('seq3', 'AAC---TGC')])

        # check
        self.assertEqual(9, msa.get_length_of_alignment())
Ejemplo n.º 6
0
    def evaluate(self, solution: MSASolution) -> MSASolution:
        solution.remove_full_of_gaps_columns()
        sequences = solution.decode_alignment_as_list_of_sequences()

        for i, score in enumerate(self.score_list):
            solution.objectives[i] = score.compute(sequences)

            if not score.is_minimization():
                solution.objectives[i] = -solution.objectives[i]

        return solution
Ejemplo n.º 7
0
    def test_should_return_gap_columns(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa = MSASolution(problem,
                          msa=[('seq1', '--AA-'), ('seq2', '--AA-'),
                               ('seq3', '--AA-')])

        # check
        self.assertEqual([0, 1, 4], msa.get_gap_columns_from_alignment())
Ejemplo n.º 8
0
    def test_should_return_original_sequences(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa = MSASolution(problem,
                          msa=[('seq1', 'AC---TGAC'), ('seq2', 'AT--CT--C'),
                               ('seq3', 'AAC---TGC')])

        # check
        self.assertEqual(['AC---TGAC', 'AT--CT--C', 'AAC---TGC'],
                         msa.decode_alignment_as_list_of_sequences())
Ejemplo n.º 9
0
    def test_should_return_is_gap_column(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa = MSASolution(problem,
                          msa=[('seq1', 'AC---TGAC'), ('seq2', 'AT--CT--C'),
                               ('seq3', 'AAC---TGC')])

        # check
        self.assertTrue(msa.is_gap_column(3))
        self.assertFalse(msa.is_gap_column(4))
Ejemplo n.º 10
0
    def test_should_single_point_crossover_work_properly_real_case(self, random_call):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['1bbt_ac', '1al2_ad', '1b35_C', '1bbt_ab', '1mec_aa', '1bbt_aa', '1al2_ab',
                                   '1al2_ac']
        problem.number_of_variables = 8
        msa_1 = MSASolution(problem, msa=[
            ('1bbt_ac',
             '------GIFPVACSDGYGGLVTTDPKTAD---PVYGKVFNPPRNQLPGRFTNLLDVAEACP--------TFLRFEGGVPYVTTKTDSDRVLAQFDMSL----AAKHMSNTFLAG---------------------LAQYYTQYSGT-----INLHFMFTGPTDAKA-------RYMVAY----APPGMEPPKTPEAAAH---------------CIHAEWDTGLNSKF---------TFSIPYLSAADYT----YTASDVAETTNV--------QGWVCLFQ--------ITHGKADG-------DALVVLASAGKDF-----------------------ELRLPVDARAE----'),
            ('1al2_ad',
             '-------GLPVMNTPGSNQYLTADNFQSP---CALPEFDVTPPIDIPGEVKNMMELAEIDTMIPFDL--SATKKNTMEMYRVRLSDKPHTDDPILCLSLSPASDPRLSHTMLGE---------------------ILNYYTHWAGS-----LKFTFLFCGSMMATG-------KLLVSY----APPGADPPKKRKEAML---------------GTHVIWDIGLQSSC---------TMVVPWISNTT------YRQTIDDSFTE---------GGYISVFYQTRIV---VPLSTPRE-------MDILGFVSACNDF-----------------------SVRLLRDTTHIEQKA'),
            ('1b35_C',
             'SKPTVQGKIGECKLRGQGRMANFDGMDMSHKMALSSTNEIETNEGLAGTSLDVMDLSRVLSIPNYWDRFTWKTSDVINTVLWDNYVSPFKVKPYSATI-----TDRFRCTHMGK---------------------VANAFTYWRGS-----MVYTFKFVKTQYHSG---RLRISFIPYYYNTTISTGTPDVSRTQKI---------------------VVDLRTSTAV---------SFTVPYIGSRPWLYCIRPESSWLSKDNTDGALMYNCVSGIVRVEVLNQLVAAQNVFSEIDVICEVNGGPDLEFAGPTCPRY----------VPYAGDFTLADTRKIEAERTQEYSNNED'),
            ('1bbt_ab',
             '-------LLEDRILTTRNGHTTSTTQSS----VGVTYGYATAEDFVSGPNTSGLETRVV----------QAERFFKTHLFDWVTSDSFGRCHLLELPT---------DHKGVYGS--------------------LTDSYAYMRNG-----WDVEVTAVGNQFNGG-------CLLVAM----VPELCSIQKRELYQLT--------------LFPHQFINPRTNMTA---------HITVPFVGVNR------YDQYKVHKP-----------WTLVVMVVAPLTV---NTEGAPQI-------KVYANIAPTNVHV-----------------------AGEFPSKE-------'),
            ('1mec_aa',
             '------------------GVENAEKGVTEN--TDATADFVAQPVYLPENQTKVAFFYDRSSPIGRFAVKSGSLESGFAPFSNKACPNSVILTPGPQFDPAYDQLRPQRLTEIWGNGNEETSEVFPLKTKQDYSFCLFSPFVYYKCD-----LEVTLSPHTSGAHGL---------LVRW----CPTGTPTKPTTQVLHEVSSLSEGRT------PQVYSAGPGTSNQI---------SFVVPYNSPLSVLPAVWYNGHKRFDNTGD--------LGIAPNSDFGTLF---FAGTKPDI-------KFTVYLRYKNMRVFCPRP--TVFFPWPT----SGDKIDMTPRAGVL-----'),
            ('1bbt_aa',
             '---------------------TTSAGESADPVTTTVENYGGETQIQRRQHTDVSFI--------------------MDRFVKVTPQNQINILDLMQVP---------SHTLVGG---------------------LLRASTYYFSD-----LEIAVK------HEG---------DLTW----VPNGAPEK---------------------------ALDNTTNPTAYHKAPLT--RLALPYTAPHRVLATV-YNGECRTLPTSFN-------YGAIKATRVTELL---YRMKRAETYCP----RPLLAIHPTEARH---------------------KQKIVAP----------'),
            ('1al2_ab',
             '------AATSRDALPNTEASGPTHSKEIP---ALTAVETGATNPLVPSDTVQTRHVVQH----------RSRSESSIESFFARGACVTIMTVDNPAST-----TNKDKLFAVWKITYKDTVQLRR----------KLEFFTYSRFD-----MELTFVVTANFTETNNGHALNQVYQIMY----IPPGAPVP----EKWD-----------------DYTWQTSSNPSIFYTYGTAPARISVPYVGISN-AYSHFYDGFSKVPLKDQSAALGDSLYGAASLNDFGILAVRVVNDHNPTKVT----SKIRVYLKPKHIRVWCPRPPRAVAYYGPGVDYKDGTLTPLSTKDLTTY----'),
            ('1al2_ac',
             '----EACGYSDRVLQLTLGNSTITTQEA----ANSVVAYGRWPEYLRDSEANPVDQPTEPDV-------AACRFYTLDTVSWTKESRGWWWKLPDALRDMGLFGQNMYYHYLGRSGYTVHVQCNASKFHQGALGVFAVPEMCLAGDSNTTTMHTSYQNANPGEKGG-------TFTGTF----TPDNNQTSPARRFCPVDYLLGNGTLLGNAFVFPHQIINLRTNNCA---------TLVLPYVNSLS------IDSMVKHNN-----------WGIAILPLAPLNF---ASESSPEI-------PITLTIAPMCCEF-------------------NGLRNITLPRLQ-------'),
        ])
        msa_2 = MSASolution(problem, msa=[
            ('1bbt_ac',
             '------GIFPVACSDGYGGLVTTDPKTAD---PVYGKVFNPPRNQLPGRFTNLLDVAEACP--------TFLRFEGGVPYVTTKTDSDRVLAQFDMSL----AAKHMSNTFLAG---------------------LAQYYTQYSGT-----INLHFMFTGPTDAKA-------RYMVAY----APPGMEPPKTPEAAAH---------------CIHAEWDTGLNSKF---------TFSIPYLSAADYT----YTASDVAETTNV--------QGWVCLFQ--------ITHGKADG-------DALVVLASAGKDF-----------------------ELRLPVDARAE----'),
            ('1al2_ad',
             '-------GLPVMNTPGSNQYLTADNFQSP---CALPEFDVTPPIDIPGEVKNMMELAEIDTMIPFDL--SATKKNTMEMYRVRLSDKPHTDDPILCLSLSPASDPRLSHTMLGE---------------------ILNYYTHWAGS-----LKFTFLFCGSMMATG-------KLLVSY----APPGADPPKKRKEAML---------------GTHVIWDIGLQSSC---------TMVVPWISNTT------YRQTIDDSFTE---------GGYISVFYQTRIV---VPLSTPRE-------MDILGFVSACNDF-----------------------SVRLLRDTTHIEQKA'),
            ('1b35_C',
             'SKPTVQGKIGECKLRGQGRMANFDGMDMSHKMALSSTNEIETNEGLAGTSLDVMDLSRVLSIPNYWDRFTWKTSDVINTVLWDNYVSPFKVKPYSATI-----TDRFRCTHMGK---------------------VANAFTYWRGS-----MVYTFKFVKTQYHSG---RLRISFIPYYYNTTISTGTPDVSRTQKI---------------------VVDLRTSTAV---------SFTVPYIGSRPWLYCIRPESSWLSKDNTDGALMYNCVSGIVRVEVLNQLVAAQNVFSEIDVICEVNGGPDLEFAGPTCPRY----------VPYAGDFTLADTRKIEAERTQEYSNNED'),
            ('1bbt_ab',
             '-------LLEDRILTTRNGHTTSTTQSS----VGVTYGYATAEDFVSGPNTSGLETRVV----------QAERFFKTHLFDWVTSDSFGRCHLLELPT---------DHKGVYGS--------------------LTDSYAYMRNG-----WDVEVTAVGNQFNGG-------CLLVAM----VPELCSIQKRELYQLT--------------LFPHQFINPRTNMTA---------HITVPFVGVNR------YDQYKVHKP-----------WTLVVMVVAPLTV---NTEGAPQI-------KVYANIAPTNVHV-----------------------AGEFPSKE-------'),
            ('1mec_aa',
             '------------------GVENAEKGVTEN--TDATADFVAQPVYLPENQTKVAFFYDRSSPIGRFAVKSGSLESGFAPFSNKACPNSVILTPGPQFDPAYDQLRPQRLTEIWGNGNEETSEVFPLKTKQDYSFCLFSPFVYYKCD-----LEVTLSPHTSGAHGL---------LVRW----CPTGTPTKPTTQVLHEVSSLSEGRT------PQVYSAGPGTSNQI---------SFVVPYNSPLSVLPAVWYNGHKRFDNTGD--------LGIAPNSDFGTLF---FAGTKPDI-------KFTVYLRYKNMRVFCPRP--TVFFPWPT----SGDKIDMTPRAGVL-----'),
            ('1bbt_aa',
             '---------------------TTSAGESADPVTTTVENYGGETQIQRRQHTDVSFI--------------------MDRFVKVTPQNQINILDLMQVP---------SHTLVGG---------------------LLRASTYYFSD-----LEIAVK------HEG---------DLTW----VPNGAPEK---------------------------ALDNTTNPTAYHKAPLT--RLALPYTAPHRVLATV-YNGECRTLPTSFN-------YGAIKATRVTELL---YRMKRAETYCP----RPLLAIHPTEARH---------------------KQKIVAP----------'),
            ('1al2_ab',
             '------AATSRDALPNTEASGPTHSKEIP---ALTAVETGATNPLVPSDTVQTRHVVQH----------RSRSESSIESFFARGACVTIMTVDNPAST-----TNKDKLFAVWKITYKDTVQLRR----------KLEFFTYSRFD-----MELTFVVTANFTETNNGHALNQVYQIMY----IPPGAPVP----EKWD-----------------DYTWQTSSNPSIFYTYGTAPARISVPYVGISN-AYSHFYDGFSKVPLKDQSAALGDSLYGAASLNDFGILAVRVVNDHNPTKVT----SKIRVYLKPKHIRVWCPRPPRAVAYYGPGVDYKDGTLTPLSTKDLTTY----'),
            ('1al2_ac',
             '----EACGYSDRVLQLTLGNSTITTQEA----ANSVVAYGRWPEYLRDSEANPVDQPTEPDV-------AACRFYTLDTVSWTKESRGWWWKLPDALRDMGLFGQNMYYHYLGRSGYTVHVQCNASKFHQGALGVFAVPEMCLAGDSNTTTMHTSYQNANPGEKGG-------TFTGTF----TPDNNQTSPARRFCPVDYLLGNGTLLGNAFVFPHQIINLRTNNCA---------TLVLPYVNSLS------IDSMVKHNN-----------WGIAILPLAPLNF---ASESSPEI-------PITLTIAPMCCEF-------------------NGLRNITLPRLQ-------'),
        ])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        random_call.return_value = 176
        children = crossover.execute([msa_1, msa_2])

        # check
        self.assertTrue(children[0].is_valid_msa())
        self.assertTrue(children[1].is_valid_msa())
Ejemplo n.º 11
0
 def fill_sequences_with_gaps_to_reach_the_max_sequence_length(
         self, solution: MSASolution, max_length: int,
         cutting_points: list):
     for i in range(solution.number_of_variables):
         sequence_length = solution.get_length_of_sequence(i)
         if sequence_length != max_length:
             for j in range(sequence_length, max_length):
                 if cutting_points[i] == -1:
                     solution.add_gap_to_sequence_at_index(
                         seq_index=i, gap_position=sequence_length - 1)
                 else:
                     solution.add_gap_to_sequence_at_index(
                         seq_index=i, gap_position=cutting_points[i] + 1)
Ejemplo n.º 12
0
    def test_should_remove_gap_column(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa = MSASolution(problem,
                          msa=[('seq1', 'AC---TGAC'), ('seq2', 'AT--CT--C'),
                               ('seq3', 'AAC---TGC')])

        msa.remove_gap_column(3)

        # check
        self.assertEqual([[2, 2, 4, 4], [2, 2, 6, 7], [4, 5]], msa.gaps_groups)
Ejemplo n.º 13
0
    def test_should_remove_all_gap_columns_case_d(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2']
        problem.number_of_variables = 2
        msa = MSASolution(problem,
                          msa=[('seq1', 'AB--CDE-'), ('seq2', 'AB--CD-E')])

        msa.remove_full_of_gaps_columns()

        # check
        self.assertEqual(['ABCDE-', 'ABCD-E'],
                         msa.decode_alignment_as_list_of_sequences())
Ejemplo n.º 14
0
    def test_should_remove_all_gap_columns_case_b(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa = MSASolution(problem,
                          msa=[('seq1', 'AC--T--GC'), ('seq2', 'AC-----AC'),
                               ('seq3', 'A---C--AC')])

        msa.remove_full_of_gaps_columns()

        # check
        self.assertEqual(['ACTGC', 'AC-AC', 'A-CAC'],
                         msa.decode_alignment_as_list_of_sequences())
Ejemplo n.º 15
0
    def find_cutting_points_in_first_parent(self, solution: MSASolution,
                                            position: int) -> list:
        """ Find the real cutting points in a solution. If the column is a gap then the next non-gap
        symbol must be found """
        positions = [-1 for _ in range(solution.number_of_variables)]

        for i in range(solution.number_of_variables):
            if solution.is_gap_char_at_sequence(i, position):
                positions[i] = solution.get_next_char_position_after_gap(
                    i, position)
            else:
                positions[i] = position

        return positions
Ejemplo n.º 16
0
    def test_should_return_length_of_gaps_groups(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa_1 = MSASolution(problem,
                            msa=[('seq1', 'AC---TGAC'), ('seq2', 'AT--CT--C'),
                                 ('seq3', 'AAC---TGC')])

        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3', 'seq4']
        problem.number_of_variables = 4
        msa_2 = MSASolution(problem,
                            msa=[('seq1', 'GKGD---PKKP'),
                                 ('seq2', 'M------QDRV'),
                                 ('seq3', 'MKKLKKHPDFP'),
                                 ('seq4', 'M--------HI-')])

        # check
        self.assertEqual(3, msa_1.get_length_of_gaps(0))
        self.assertEqual(4, msa_1.get_length_of_gaps(1))
        self.assertEqual(3, msa_1.get_length_of_gaps(2))

        self.assertEqual(3, msa_2.get_length_of_gaps(0))
        self.assertEqual(6, msa_2.get_length_of_gaps(1))
        self.assertEqual(0, msa_2.get_length_of_gaps(2))
        self.assertEqual(9, msa_2.get_length_of_gaps(3))
Ejemplo n.º 17
0
    def do_mutation(self, solution: MSASolution) -> MSASolution:
        if random.random() <= self.probability:
            for i in range(solution.number_of_variables):
                gaps_group = solution.gaps_groups[i]

                if len(gaps_group) >= 4:
                    random_gaps_group = random.randrange(
                        0,
                        len(gaps_group) - 2, 2)
                    right_is_closest = False

                    if not right_is_closest:
                        diff = (gaps_group[random_gaps_group + 3] - gaps_group[random_gaps_group + 2]) - \
                               (gaps_group[random_gaps_group + 1] - gaps_group[random_gaps_group])

                        if diff < 0:
                            # diff < 0 means that gaps group 2 is shorter than gaps group 1, thus we need to decrease
                            # the length of the gaps group 1
                            diff = -1 * diff
                            gaps_group[random_gaps_group + 1] -= diff

                            gaps_group[random_gaps_group + 3] += diff

                            # displace gaps group 2 one position to the left
                            gaps_group[random_gaps_group + 2] -= diff
                            gaps_group[random_gaps_group + 3] -= diff
                        elif diff > 0:
                            # diff > 0 means that gaps group 2 is larger than gaps group 1, thus we need to increase
                            # the length of the gaps group 1
                            gaps_group[random_gaps_group + 1] += diff

                            gaps_group[random_gaps_group + 3] -= diff

                            # displace gaps group 2 one position to the right
                            gaps_group[random_gaps_group + 2] += diff
                            gaps_group[random_gaps_group + 3] += diff

            if self.remove_full_of_gap_columns:
                solution.remove_full_of_gaps_columns()

            # Sanity check: alignment is valid (same length for all sequences)
            if not solution.is_valid_msa():
                raise Exception("Mutated solution is not valid! {0}".format(
                    solution.decode_alignment_as_list_of_pairs()))

        return solution
Ejemplo n.º 18
0
    def test_should_single_point_crossover_work_properly_case_j(self, random_call):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2']
        problem.number_of_variables = 2
        msa_1 = MSASolution(problem, msa=[('seq1', 'MIKMIM-IK'), ('seq2', 'A-B-CDEF-')])
        msa_2 = MSASolution(problem, msa=[('seq1', '--MIKMIMIK'), ('seq2', 'ABC-D-E-F-')])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=True)

        # run
        random_call.return_value = 2
        children = crossover.execute([msa_1, msa_2])

        # check
        self.assertEqual(["MIK--MIMIK", "A-BCD-E-F-"], children[0].decode_alignment_as_list_of_sequences())
        self.assertEqual(["--MIKMIM-IK", "AB----CDEF-"], children[1].decode_alignment_as_list_of_sequences())
Ejemplo n.º 19
0
    def test_should_the_solution_remain_unchanged_if_the_probability_is_zero(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa_1 = MSASolution(problem, msa=[('seq1', 'ACTC'), ('seq2', 'A-TC'), ('seq3', 'A--C')])
        msa_2 = MSASolution(problem, msa=[('seq1', 'CT-G'), ('seq2', '-T-G'), ('seq3', '-ATG')])

        crossover = SPXMSA(probability=0.0, remove_gap_columns=False)

        # run
        offspring = crossover.execute([msa_1, msa_2])

        # check
        self.assertEqual([('seq1', 'ACTC'), ('seq2', 'A-TC'), ('seq3', 'A--C')],
                         offspring[0].decode_alignment_as_list_of_pairs())
        self.assertEqual([('seq1', 'CT-G'), ('seq2', '-T-G'), ('seq3', '-ATG')],
                         offspring[1].decode_alignment_as_list_of_pairs())
Ejemplo n.º 20
0
    def test_should_single_point_crossover_work_properly_case_c(self, random_call):
        """ A|B-CD-EF, ---A|BCD-EF => ABCD-EF, ---AB-CD-EF """
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1']
        problem.number_of_variables = 1
        msa_1 = MSASolution(problem, msa=[('seq1', 'AB-CD-EF')])
        msa_2 = MSASolution(problem, msa=[('seq1', '---ABCD-EF')])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        random_call.return_value = 0
        children = crossover.execute([msa_1, msa_2])

        # check
        self.assertEqual(["ABCD-EF"], children[0].decode_alignment_as_list_of_sequences())
        self.assertEqual(["---AB-CD-EF"], children[1].decode_alignment_as_list_of_sequences())
Ejemplo n.º 21
0
    def test_should_single_point_crossover_work_properly_case_h(self, random_call):
        """ MSA with no crossover in the first sequence
            -----------|-M, --M|------  =>  ------------M------, --M """
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1']
        problem.number_of_variables = 1
        msa_1 = MSASolution(problem, msa=[('seq1', '------------M')])
        msa_2 = MSASolution(problem, msa=[('seq1', '--M------')])
        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        random_call.return_value = 10
        children = crossover.execute([msa_1, msa_2])

        # check
        self.assertEqual(["------------M------"], children[0].decode_alignment_as_list_of_sequences())
        self.assertEqual(["--M"], children[1].decode_alignment_as_list_of_sequences())
Ejemplo n.º 22
0
    def test_should_get_original_char_position_in_aligned_sequence(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2', 'seq3']
        problem.number_of_variables = 3
        msa = MSASolution(problem,
                          msa=[('seq1', '-ABC'), ('seq2', 'ABCD'),
                               ('seq3', '--AB')])

        # check
        self.assertEqual(
            1,
            msa.get_original_char_position_in_aligned_sequence(seq_index=0,
                                                               position=0))
        self.assertEqual(
            2,
            msa.get_original_char_position_in_aligned_sequence(seq_index=0,
                                                               position=1))
        self.assertEqual(
            3,
            msa.get_original_char_position_in_aligned_sequence(seq_index=0,
                                                               position=2))

        self.assertEqual(
            0,
            msa.get_original_char_position_in_aligned_sequence(seq_index=1,
                                                               position=0))
        self.assertEqual(
            1,
            msa.get_original_char_position_in_aligned_sequence(seq_index=1,
                                                               position=1))
        self.assertEqual(
            2,
            msa.get_original_char_position_in_aligned_sequence(seq_index=1,
                                                               position=2))

        self.assertEqual(
            2,
            msa.get_original_char_position_in_aligned_sequence(seq_index=2,
                                                               position=0))
        self.assertEqual(
            3,
            msa.get_original_char_position_in_aligned_sequence(seq_index=2,
                                                               position=1))
Ejemplo n.º 23
0
    def test_should_single_point_crossover_work_properly_case_f(self, random_call):
        """ GKGD---P|KK, GKGD-P|KK   => GKGD---PKK, GKGD-P-KK
            M------Q|DR-, --M--Q|DR  => M------QDR, --M--QDR- """
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2']
        problem.number_of_variables = 2
        msa_1 = MSASolution(problem, msa=[('seq1', 'GKGD---PKK'), ('seq2', 'M------QDR-')])
        msa_2 = MSASolution(problem, msa=[('seq1', 'GKGD-PKK'), ('seq2', '--M--QDR')])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        random_call.return_value = 7
        children = crossover.execute([msa_1, msa_2])

        # check
        self.assertEqual(["GKGD---PKK", "M------QDR"], children[0].decode_alignment_as_list_of_sequences())
        self.assertEqual(["GKGD-P-KK", "--M--QDR-"], children[1].decode_alignment_as_list_of_sequences())
Ejemplo n.º 24
0
    def test_should_single_point_crossover_work_properly_case_a_with_remove_gap_columns(self, random_call):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1']
        problem.number_of_variables = 1
        msa_1 = MSASolution(problem, msa=[('seq1', 'AB--CD-E')])
        msa_2 = MSASolution(problem, msa=[('seq1', 'AB--CDE-')])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)
        crossover_remove_full = SPXMSA(probability=1.0, remove_gap_columns=True)

        # run
        random_call.return_value = 4
        children_1 = crossover.execute([msa_1, msa_2])
        children_2 = crossover_remove_full.execute([msa_1, msa_2])

        # check
        self.assertEqual(["AB--CDE-"], children_1[0].decode_alignment_as_list_of_sequences())
        self.assertEqual(["AB--CD-E"], children_1[1].decode_alignment_as_list_of_sequences())
        self.assertEqual(["ABCDE"], children_2[0].decode_alignment_as_list_of_sequences())
        self.assertEqual(["ABCDE"], children_2[1].decode_alignment_as_list_of_sequences())
Ejemplo n.º 25
0
    def __find_original_positions_in_aligned_sequences(
            self, solution: MSASolution,
            column_positions_in_first_parent: list):
        positions = [-1 for _ in range(solution.number_of_variables)]

        for i in range(solution.number_of_variables):
            pos = column_positions_in_first_parent[i]
            positions[
                i] = solution.get_original_char_position_in_aligned_sequence(
                    i, pos)

        return positions
Ejemplo n.º 26
0
    def do_mutation(self, solution: MSASolution) -> MSASolution:
        if random.random() <= self.probability:
            if solution.number_of_variables >= 1:
                seq = random.randint(0, solution.number_of_variables - 1)
            else:
                seq = 0

            gaps_group = solution.gaps_groups[seq]

            if len(gaps_group) >= 4:
                random_gaps_group = random.randrange(0, len(gaps_group) - 2, 2)
                right_is_closest = False

                if not right_is_closest:
                    to_add = gaps_group[random_gaps_group +
                                        3] - gaps_group[random_gaps_group +
                                                        2] + 1
                    gaps_group[random_gaps_group + 1] += to_add

                    del gaps_group[random_gaps_group + 3]
                    del gaps_group[random_gaps_group + 2]

            solution.merge_gaps_groups()

            if self.remove_full_of_gap_columns:
                solution.remove_full_of_gaps_columns()

            # Sanity check: alignment is valid (same length for all sequences)
            if not solution.is_valid_msa():
                raise Exception("Mutated solution is not valid! {0}".format(
                    solution.decode_alignment_as_list_of_pairs()))

        return solution
Ejemplo n.º 27
0
    def do_mutation(self, solution: MSASolution) -> MSASolution:
        if random.random() <= self.probability:
            # Select one random sequence from all
            for seq in range(solution.number_of_variables):
                gaps_group = solution.gaps_groups[seq]

                if len(gaps_group) >= 4:
                    random_gaps_group = random.randrange(
                        0,
                        len(gaps_group) - 2, 2)
                    shift_to = -1 if random.randint(0, 1) == 0 else 1

                    gaps_group[random_gaps_group] += shift_to
                    gaps_group[random_gaps_group + 1] += shift_to

            solution.merge_gaps_groups()

            if self.remove_full_of_gap_columns:
                solution.remove_full_of_gaps_columns()

            # Sanity check: alignment is valid (same length for all sequences)
            if not solution.is_valid_msa():
                raise Exception("Mutated solution is not valid! {0}".format(
                    solution.decode_alignment_as_list_of_pairs()))

        return solution
Ejemplo n.º 28
0
    def test_should_find_max_sequence_length(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['a', 'b', 'c']
        problem.number_of_variables = 3
        msa = MSASolution(problem, msa=[('a', 'AAC'), ('b', 'AAAAAAAC'), ('c', 'C')])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        max = crossover.find_length_of_the_largest_sequence(msa)

        # check
        self.assertEqual(8, max)
Ejemplo n.º 29
0
    def test_should_fill_sequences_with_gaps_to_reach_the_max_sequence_length(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['a', 'b']
        problem.number_of_variables = 2
        msa_1 = MSASolution(problem, msa=[('a', '-----GE'), ('b', 'KWPFFQEAQK')])
        msa_2 = MSASolution(problem, msa=[('a', '-----GE'), ('b', 'KWPFFQEAQK')])
        msa_3 = MSASolution(problem, msa=[('a', '-'), ('b', 'ABC')])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        crossover.fill_sequences_with_gaps_to_reach_the_max_sequence_length(msa_1, 10, [-1, -1])
        crossover.fill_sequences_with_gaps_to_reach_the_max_sequence_length(msa_2, 10, [-1, 5])
        crossover.fill_sequences_with_gaps_to_reach_the_max_sequence_length(msa_3, 5, [-1, 1])

        # check
        self.assertEqual(["-----G---E", "KWPFFQEAQK"], msa_1.decode_alignment_as_list_of_sequences())
        self.assertEqual(["-----G---E", "KWPFFQEAQK"], msa_2.decode_alignment_as_list_of_sequences())
        self.assertEqual(["-----", "AB--C"], msa_3.decode_alignment_as_list_of_sequences())
Ejemplo n.º 30
0
    def test_should_find_original_positions_in_solution_with_gaps(self):
        # setup
        problem = MSA(score_list=[])
        problem.identifiers = ['seq1', 'seq2']
        problem.number_of_variables = 2
        msa = MSASolution(problem, msa=[('seq1', 'BC-D-E---'), ('seq2', '--C--E---')])

        crossover = SPXMSA(probability=1.0, remove_gap_columns=False)

        # run
        cutting_points = crossover.find_original_positions_in_original_sequences(msa, 5)

        # check
        self.assertEqual(3, cutting_points[0])
        self.assertEqual(1, cutting_points[1])