Ejemplo n.º 1
0
class PartitonsTest(ExpectingTestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.reset(cur=self.aln_obj.cur, )

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_read_from_nexus(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.assertEqual(len(self.aln_obj.partitions.partitions), 7)

    def test_read_from_phylip(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        self.assertEqual(len(self.aln_obj.partitions.partitions), 7)

    def test_bad_partitions_phy(self):

        e = self.aln_obj.partitions.read_from_file(partition_bad_phy[0],
                                                   no_aln_check=True)

        self.assertTrue(isinstance(e, InvalidPartitionFile))

    def test_unsorted_part_phylip(self):

        self.aln_obj.partitions.read_from_file(partition_unsorted_phy[0],
                                               no_aln_check=True)

        data = [
            self.aln_obj.partitions.partitions.keys(),
            self.aln_obj.partitions.counter
        ]

        self.assertEqual(data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ], 595])

    def test_phylip_dot_notation(self):

        self.aln_obj.partitions.read_from_file(partition_dot_not[0],
                                               no_aln_check=True)

        data = [
            self.aln_obj.partitions.partitions.keys(),
            self.aln_obj.partitions.counter
        ]

        self.assertEqual(data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ], 595])

    def test_nexus_dot_notation(self):

        self.aln_obj.partitions.read_from_file(dot_notation_nex[0],
                                               no_aln_check=True)

        data = [
            self.aln_obj.partitions.partitions.keys(),
            self.aln_obj.partitions.counter
        ]

        self.assertEqual(data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ], 595])

    def test_bad_dot_notation(self):

        e = self.aln_obj.partitions.read_from_file(bad_dot_notation_nex[0],
                                                   no_aln_check=True)

        self.assertTrue(isinstance(e, InvalidPartitionFile))

    def test_import_new_partscheme(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)

        self.aln_obj.partitions.reset()

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, [
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ])

    def test_add_duplicate_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        self.assertRaises(
            PartitionException,
            self.aln_obj.partitions.add_partition("BaseCond1.fas", length=100))

    def test_get_partition_names(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, [
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ])

    def test_get_partition_names_withCodon(self):

        self.aln_obj.partitions.read_from_file(
            concatenated_smallCodon_parNex[0], no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, [
            "BaseConc1.fas_1_1", "BaseConc1.fas_1_2", "BaseConc1.fas_1_3",
            "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas",
            "BaseConc6.fas", "BaseConc7.fas"
        ])

    def test_single_partition(self):

        self.aln_obj = AlignmentList([dna_data_fas[0]],
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertTrue(self.aln_obj.partitions.is_single())

    def test_multiple_partitions(self):

        self.assertFalse(self.aln_obj.partitions.is_single())

    def test_remove_partition_from_file_original(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.aln_obj.partitions.remove_partition(
            file_name="trifusion/tests/data/BaseConc3.fas")

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_remove_partition_from_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)
        self.aln_obj.partitions.remove_partition("BaseConc3.fas")

        # Check keys from _partitions, partitions_alignment and models
        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas",
            "BaseConc6.fas", "BaseConc7.fas"
        ]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_remove_partition_from_file(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)
        self.aln_obj.partitions.remove_partition(file_name="BaseConc3.fas")

        # Check keys from _partitions, partitions_alignment and models
        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas",
            "BaseConc6.fas", "BaseConc7.fas"
        ]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_change_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName")

        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas",
            "BaseConc6.fas", "BaseConc7.fas", "OtherName"
        ]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0

        self.aln_obj.partitions.partitions = OrderedDict(
            sorted(self.aln_obj.partitions.partitions.iteritems(),
                   key=lambda x: x[1][0]))

        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_merge_partitions(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions([
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ], "New_part")

        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.assertEqual(key_data, [["New_part"]] * 3)

    def test_split_partition(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.split_partition("BaseConc1.fas",
                                                [[0, 50], [51, 84]],
                                                ["part1", "part2"])

        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "part1", "part2", "BaseConc2.fas", "BaseConc3.fas",
            "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0

        self.aln_obj.partitions.partitions = OrderedDict(
            sorted(self.aln_obj.partitions.partitions.iteritems(),
                   key=lambda x: x[1][0]))

        for r in self.aln_obj.partitions.partitions.values():
            print(r)
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_merge_and_split(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part")

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ]] * 3)

    def test_merge_and_custom_split1(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part", [(0, 50),
                                                             (51, 254)],
                                                ["one", "two"])

        key_data = [
            self.aln_obj.partitions.partitions_alignments["one"],
            self.aln_obj.partitions.partitions_alignments["two"]
        ]

        self.assertEqual(key_data,
                         [['BaseConc1.fas'],
                          ['BaseConc1.fas', 'BaseConc3.fas', 'BaseConc2.fas']])

    def test_merge_and_custom_split2(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part", [(0, 84),
                                                             (85, 254)],
                                                ["one", "two"])

        key_data = [
            self.aln_obj.partitions.partitions_alignments["one"],
            self.aln_obj.partitions.partitions_alignments["two"]
        ]

        self.assertEqual(
            key_data, [['BaseConc1.fas'], ['BaseConc3.fas', 'BaseConc2.fas']])

    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_par[0])

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas")
        ])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)

    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas")
        ])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)

    def test_merge_with_custom_fileset(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc5.fas")
        ])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc5.fas"]] * 3)

    def test_model_detection(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_nexus_data,
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([
                ('Teste1.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste2.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste3.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste4.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste5.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste6.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste7.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []])
            ]))

    def test_model_detection_codons(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_codon_nexus_data,
                                     db_cur=self.aln_obj.cur,
                                     db_con=self.aln_obj.con,
                                     sql_db=sql_db)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([('Teste1.fas_1',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste2.fas_86',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste3.fas_171',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste4.fas_256',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste5.fas_341',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste6.fas_426',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste7.fas_511',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []])]))

    def test_set_model(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"])

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]),
                         ('BaseConc2.fas', [[[]], [None], []]),
                         ('BaseConc3.fas', [[[]], [None], []]),
                         ('BaseConc4.fas', [[[]], [None], []]),
                         ('BaseConc5.fas', [[[]], [None], []]),
                         ('BaseConc6.fas', [[[]], [None], []]),
                         ('BaseConc7.fas', [[[]], [None], []])]))

    def test_set_model_all(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"],
                                          apply_all=True)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]),
                         ('BaseConc2.fas', [[[]], ['GTR'], []]),
                         ('BaseConc3.fas', [[[]], ['GTR'], []]),
                         ('BaseConc4.fas', [[[]], ['GTR'], []]),
                         ('BaseConc5.fas', [[[]], ['GTR'], []]),
                         ('BaseConc6.fas', [[[]], ['GTR'], []]),
                         ('BaseConc7.fas', [[[]], ['GTR'], []])]))

    def test_set_model_codon(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"],
                                          links=["12", "3"],
                                          apply_all=True)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([
                ('BaseConc1.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc2.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc3.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc4.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc5.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc6.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc7.fas', [[[]], ['GTR', 'SYM'], ['12', '3']])
            ]))
Ejemplo n.º 2
0
class SeconaryOpsTest(unittest.TestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_summary_stats_all(self):

        sum_table, table_data = self.aln_obj.get_summary_stats()

        self.assertEqual(
            [sum_table, table_data],
            [{
                'missing': '5 (0.04%)',
                'taxa': 24,
                'genes': 7,
                'informative': '0 (0.0%)',
                'gaps': '0 (0.0%)',
                'avg_gaps': 0.0,
                'avg_missing': 1.0,
                'variable': '7 (1.18%)',
                'seq_len': 595,
                'avg_var': 1.0,
                'avg_inf': 0.0
            },
             [[
                 'Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene',
                 'Missing data', 'Missing data per gene', 'Variable sites',
                 'Variable sites per gene', 'Informative sites',
                 'Informative sites per gene'
             ],
              [
                  7, 24, 595, '0 (0.0%)', 0.0, '5 (0.04%)', 1.0, '7 (1.18%)',
                  1.0, '0 (0.0%)', 0.0
              ]]])

    def test_summary_stats_one_active(self):

        sum_table, table_data = self.aln_obj.get_summary_stats(
            [join(data_path, "BaseConc1.fas")])

        self.assertEqual(
            [sum_table, table_data],
            [{
                'missing': '1 (0.05%)',
                'taxa': 24,
                'genes': 1,
                'informative': '0 (0.0%)',
                'gaps': '0 (0.0%)',
                'avg_gaps': 0.0,
                'avg_missing': 1.0,
                'variable': '1 (1.18%)',
                'seq_len': 85,
                'avg_var': 1.0,
                'avg_inf': 0.0
            },
             [[
                 'Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene',
                 'Missing data', 'Missing data per gene', 'Variable sites',
                 'Variable sites per gene', 'Informative sites',
                 'Informative sites per gene'
             ],
              [
                  1, 24, 85, '0 (0.0%)', 0.0, '1 (0.05%)', 1.0, '1 (1.18%)',
                  1.0, '0 (0.0%)', 0.0
              ]]])

    def test_single_aln_outlier_mdata(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_missing_data(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_mdata_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        print(self.aln_obj.alignments)

        self.assertEqual(self.aln_obj.outlier_missing_data_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seg(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_segregating(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seg_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        print(self.aln_obj.alignments)

        self.assertEqual(self.aln_obj.outlier_segregating_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seqsize(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_sequence_size(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seqsize_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_sequence_size_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_average_seqsize_per_species(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.average_seqsize_per_species(),
                         {"exception": "single_alignment"})

    def test_single_aln_average_seqsize(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.average_seqsize(),
                         {"exception": "single_alignment"})

    def test_single_aln_sequence_similarity(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.sequence_similarity(),
                         {"exception": "single_alignment"})

    def test_single_aln_sequence_segregation(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.sequence_segregation(),
                         {"exception": "single_alignment"})

    def test_single_aln_length_polymorphism_correlation(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.length_polymorphism_correlation(),
                         {"exception": "single_alignment"})

    def test_single_aln_taxa_distribution(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.taxa_distribution(),
                         {"exception": "single_alignment"})

    def test_single_aln_cumulative_missing_genes(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.cumulative_missing_genes(),
                         {"exception": "single_alignment"})

    def test_single_aln_gene_occupancy(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {"exception": "single_alignment"})

    def test_single_aln_missing_data_distribution(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.missing_data_distribution(),
                         {"exception": "single_alignment"})

    def test_single_aln_missing_genes_average(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.missing_genes_average(),
                         {"exception": "single_alignment"})

    def test_no_data(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {'exception': "empty_data"})

    def test_gene_occupancy(self):

        self.assertTrue(self.aln_obj.gene_occupancy())

    def test_missing_data_distribution(self):

        self.assertTrue(self.aln_obj.missing_data_distribution())

    def test_missing_data_per_species(self):

        self.assertTrue(self.aln_obj.missing_data_per_species())

    def test_missing_genes_per_species(self):

        self.assertTrue(self.aln_obj.missing_genes_per_species())

    def test_missing_genes_average(self):

        self.assertTrue(self.aln_obj.missing_genes_average())

    def test_average_seqsize_per_species(self):

        self.assertTrue(self.aln_obj.average_seqsize_per_species())

    def test_average_seqsize(self):

        self.assertTrue(self.aln_obj.average_seqsize())

    def test_characters_proportion(self):

        self.assertTrue(self.aln_obj.characters_proportion())

    def test_characters_proportion_per_species(self):

        self.assertTrue(self.aln_obj.characters_proportion_per_species())

    def test_characters_proportion_gene(self):

        self.assertTrue(
            self.aln_obj.characters_proportion_gene(
                join(data_path, "BaseConc1.fas"), 10))

    def test_sequence_similarity(self):

        self.assertTrue(self.aln_obj.sequence_similarity())

    def test_sequence_similarity_per_species(self):

        self.assertTrue(self.aln_obj.sequence_similarity_per_species())

    def test_sequence_similarity_gene(self):

        self.assertTrue(
            self.aln_obj.sequence_similarity_gene(
                join(data_path, "BaseConc1.fas"), 10))

    def test_sequence_conservation(self):

        self.assertTrue(
            self.aln_obj.sequence_conservation_gnp(
                join(data_path, "BaseConc1.fas"), 10))

    def test_sequence_segregation(self):

        self.assertTrue(self.aln_obj.sequence_segregation())

    def test_sequence_segregation_per_species(self):

        self.assertTrue(self.aln_obj.sequence_segregation_per_species())

    def test_sequence_segregation_gene(self):

        self.assertTrue(
            self.aln_obj.sequence_segregation_gene(
                join(data_path, "BaseConc1.fas"), 10))

    def test_length_polymorphism_correlation(self):

        self.assertTrue(self.aln_obj.length_polymorphism_correlation())

    def test_allele_frequency_spectrum(self):

        self.assertTrue(self.aln_obj.allele_frequency_spectrum())

    def test_allele_frequency_spectrum_gene(self):

        self.assertTrue(
            self.aln_obj.allele_frequency_spectrum_gene(
                join(data_path, "BaseConc1.fas"), None))

    def test_taxa_distribution(self):

        self.assertTrue(self.aln_obj.taxa_distribution())

    def test_cumulative_missing_genes(self):

        self.assertTrue(self.aln_obj.cumulative_missing_genes())

    def test_outlier_missing_data(self):

        self.assertTrue(self.aln_obj.outlier_missing_data())

    def test_outlier_missing_data_sp(self):

        self.assertTrue(self.aln_obj.outlier_missing_data_sp())

    def test_outlier_segregating(self):

        self.assertTrue(self.aln_obj.outlier_segregating())

    def test_outlier_segregating_sp(self):

        self.assertTrue(self.aln_obj.outlier_segregating_sp())

    def test_outlier_sequence_size(self):

        self.assertTrue(self.aln_obj.outlier_sequence_size())

    def test_outlier_sequence_size_sp(self):

        self.assertTrue(self.aln_obj.outlier_sequence_size_sp())
Ejemplo n.º 3
0
class AlignmentManipulationTest(unittest.TestCase):
    def setUp(self):

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

    def tearDown(self):

        try:
            self.aln_obj.clear_alignments()
        except:
            pass
        self.aln_obj.con.close()
        os.remove(sql_db)

    def test_clear_alns(self):

        self.aln_obj.clear_alignments()
        aln = AlignmentList([], sql_db=sql_db)

        self.assertTrue(
            compare_inst(self.aln_obj, aln, [
                "log_progression", "locus_length", "partitions", "cur", "con"
            ]))

    def test_update_act_anls(self):

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas")
        ])

        self.assertEqual(list(self.aln_obj.alignments.keys()), [
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas")
        ])

    def test_update_act_alns_err(self):

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas"),
            join(data_path, "Wrong_name")
        ])

        self.assertEqual(list(self.aln_obj.alignments.keys()), [
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas")
        ])

    def test_update_aln_shelve(self):

        self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"),
                                             "shelve")

        self.assertEqual(list(self.aln_obj.alignments.keys()), [
            join(data_path, "BaseConc2.fas"),
            join(data_path, "BaseConc3.fas"),
            join(data_path, "BaseConc4.fas"),
            join(data_path, "BaseConc5.fas"),
            join(data_path, "BaseConc6.fas"),
            join(data_path, "BaseConc7.fas")
        ])

    def test_update_aln_act(self):

        self.aln_obj.update_active_alignments([])
        self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"),
                                             "active")

        self.assertEqual(list(self.aln_obj.alignments.keys()),
                         [join(data_path, "BaseConc1.fas")])

    def test_add_aln_obj(self):

        fl = self.aln_obj.alignments.keys()

        aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur)

        self.aln_obj.add_alignments([aln])

        self.assertEqual(self.aln_obj.alignments.keys(),
                         fl + [join(data_path, "c97d5m4p2.loci")])

    def test_remove_taxa_from_list(self):

        taxa_list = [
            "1285_RAD_original", "130a_RAD_original", "137a_RAD_original",
            "1427_RAD_original", "167a_RAD_original"
        ]

        expected_taxa = [
            tx for tx in self.aln_obj.taxa_names if tx not in taxa_list
        ]

        self.aln_obj.remove_taxa(taxa_list)

        self.assertEqual(self.aln_obj.taxa_names, expected_taxa)

    def test_remove_taxa_from_file(self):

        taxa_list = [
            "1285_RAD_original", "130a_RAD_original", "137a_RAD_original",
            "1427_RAD_original", "167a_RAD_original"
        ]

        expected_taxa = [
            tx for tx in self.aln_obj.taxa_names if tx not in taxa_list
        ]

        self.aln_obj.remove_taxa(taxa_to_remove)

        self.assertEqual(self.aln_obj.taxa_names, expected_taxa)

    def test_remove_taxa_from_list_inverse(self):

        taxa_list = [
            "1285_RAD_original", "130a_RAD_original", "137a_RAD_original",
            "1427_RAD_original", "167a_RAD_original"
        ]

        expected_taxa = [
            tx for tx in self.aln_obj.taxa_names if tx not in taxa_list
        ]

        self.aln_obj.remove_taxa(taxa_list, mode="inverse")

        self.assertEqual(self.aln_obj.taxa_names, taxa_list)

    #
    # def test_retrieve_alignment(self):
    #
    #     aln = self.aln_obj.retrieve_alignment("BaseConc1.fas")
    #
    #     aln2 = Alignment(dna_data_fas[0], dest="new_one")
    #
    #     self.assertTrue(compare_inst(aln, aln2,
    #                                  ["log_progression", "locus_length",
    #                                   "partitions"]))

    def test_concatenation(self):

        aln = self.aln_obj.concatenate(alignment_name="test")
        aln.write_to_file(["fasta"], "test")

        with open("trifusion/tests/data/BaseConcatenation.fas") as fh1, \
                open("test.fas") as fh2:
            self.assertEqual(fh1.read(), fh2.read())

        os.remove("test.fas")
Ejemplo n.º 4
0
class SeconaryOpsTest(unittest.TestCase):

    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_summary_stats_all(self):

        sum_table, table_data = self.aln_obj.get_summary_stats()

        self.assertEqual([sum_table, table_data],
                         [{'missing': '5 (0.04%)', 'taxa': 24, 'genes': 7,
                           'informative': '0 (0.0%)', 'gaps': '0 (0.0%)',
                           'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '7 (1.18%)',
                           'seq_len': 595, 'avg_var': 1.0, 'avg_inf': 0.0},
                          [['Genes', 'Taxa', 'Alignment length', 'Gaps',
                            'Gaps per gene', 'Missing data',
                            'Missing data per gene', 'Variable sites',
                            'Variable sites per gene', 'Informative sites',
                            'Informative sites per gene'],
                           [7, 24, 595, '0 (0.0%)', 0.0, '5 (0.04%)', 1.0,
                            '7 (1.18%)', 1.0, '0 (0.0%)', 0.0]]])

    def test_summary_stats_one_active(self):

        sum_table, table_data = self.aln_obj.get_summary_stats([
            join(data_path, "BaseConc1.fas")])

        self.assertEqual([sum_table, table_data],
                         [{'missing': '1 (0.05%)', 'taxa': 24, 'genes': 1,
                           'informative': '0 (0.0%)', 'gaps': '0 (0.0%)',
                           'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '1 (1.18%)',
                           'seq_len': 85, 'avg_var': 1.0, 'avg_inf': 0.0},
                          [['Genes', 'Taxa', 'Alignment length', 'Gaps',
                            'Gaps per gene', 'Missing data',
                            'Missing data per gene', 'Variable sites',
                            'Variable sites per gene', 'Informative sites',
                            'Informative sites per gene'],
                           [1, 24, 85, '0 (0.0%)', 0.0, '1 (0.05%)', 1.0,
                            '1 (1.18%)', 1.0, '0 (0.0%)', 0.0]]])

    def test_single_aln_outlier_mdata(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_missing_data(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_mdata_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        print(self.aln_obj.alignments)

        self.assertEqual(self.aln_obj.outlier_missing_data_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seg(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_segregating(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seg_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        print(self.aln_obj.alignments)

        self.assertEqual(self.aln_obj.outlier_segregating_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seqsize(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_sequence_size(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seqsize_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_sequence_size_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_average_seqsize_per_species(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.average_seqsize_per_species(),
                         {"exception": "single_alignment"})

    def test_single_aln_average_seqsize(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.average_seqsize(),
                         {"exception": "single_alignment"})

    def test_single_aln_sequence_similarity(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.sequence_similarity(),
                         {"exception": "single_alignment"})

    def test_single_aln_sequence_segregation(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.sequence_segregation(),
                         {"exception": "single_alignment"})

    def test_single_aln_length_polymorphism_correlation(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.length_polymorphism_correlation(),
                         {"exception": "single_alignment"})

    def test_single_aln_taxa_distribution(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.taxa_distribution(),
                         {"exception": "single_alignment"})

    def test_single_aln_cumulative_missing_genes(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.cumulative_missing_genes(),
                         {"exception": "single_alignment"})

    def test_single_aln_gene_occupancy(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {"exception": "single_alignment"})

    def test_single_aln_missing_data_distribution(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.missing_data_distribution(),
                         {"exception": "single_alignment"})

    def test_single_aln_missing_genes_average(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.missing_genes_average(),
                         {"exception": "single_alignment"})

    def test_no_data(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {'exception': "empty_data"})

    def test_gene_occupancy(self):

        self.assertTrue(self.aln_obj.gene_occupancy())

    def test_missing_data_distribution(self):

        self.assertTrue(self.aln_obj.missing_data_distribution())

    def test_missing_data_per_species(self):

        self.assertTrue(self.aln_obj.missing_data_per_species())

    def test_missing_genes_per_species(self):

        self.assertTrue(self.aln_obj.missing_genes_per_species())

    def test_missing_genes_average(self):

        self.assertTrue(self.aln_obj.missing_genes_average())

    def test_average_seqsize_per_species(self):

        self.assertTrue(self.aln_obj.average_seqsize_per_species())

    def test_average_seqsize(self):

        self.assertTrue(self.aln_obj.average_seqsize())

    def test_characters_proportion(self):

        self.assertTrue(self.aln_obj.characters_proportion())

    def test_characters_proportion_per_species(self):

        self.assertTrue(self.aln_obj.characters_proportion_per_species())

    def test_characters_proportion_gene(self):

        self.assertTrue(self.aln_obj.characters_proportion_gene(
            join(data_path, "BaseConc1.fas"), 10
        ))

    def test_sequence_similarity(self):

        self.assertTrue(self.aln_obj.sequence_similarity())

    def test_sequence_similarity_per_species(self):

        self.assertTrue(self.aln_obj.sequence_similarity_per_species())

    def test_sequence_similarity_gene(self):

        self.assertTrue(self.aln_obj.sequence_similarity_gene(
            join(data_path, "BaseConc1.fas"), 10))

    def test_sequence_conservation(self):

        self.assertTrue(self.aln_obj.sequence_conservation_gnp(
            join(data_path, "BaseConc1.fas"), 10
        ))

    def test_sequence_segregation(self):

        self.assertTrue(self.aln_obj.sequence_segregation())

    def test_sequence_segregation_per_species(self):

        self.assertTrue(self.aln_obj.sequence_segregation_per_species())

    def test_sequence_segregation_gene(self):

        self.assertTrue(self.aln_obj.sequence_segregation_gene(
            join(data_path, "BaseConc1.fas"), 10))

    def test_length_polymorphism_correlation(self):

        self.assertTrue(self.aln_obj.length_polymorphism_correlation())

    def test_allele_frequency_spectrum(self):

        self.assertTrue(self.aln_obj.allele_frequency_spectrum())

    def test_allele_frequency_spectrum_gene(self):

        self.assertTrue(self.aln_obj.allele_frequency_spectrum_gene(
            join(data_path, "BaseConc1.fas"), None))

    def test_taxa_distribution(self):

        self.assertTrue(self.aln_obj.taxa_distribution())

    def test_cumulative_missing_genes(self):

        self.assertTrue(self.aln_obj.cumulative_missing_genes())

    def test_outlier_missing_data(self):

        self.assertTrue(self.aln_obj.outlier_missing_data())

    def test_outlier_missing_data_sp(self):

        self.assertTrue(self.aln_obj.outlier_missing_data_sp())

    def test_outlier_segregating(self):

        self.assertTrue(self.aln_obj.outlier_segregating())

    def test_outlier_segregating_sp(self):

        self.assertTrue(self.aln_obj.outlier_segregating_sp())

    def test_outlier_sequence_size(self):

        self.assertTrue(self.aln_obj.outlier_sequence_size())

    def test_outlier_sequence_size_sp(self):

        self.assertTrue(self.aln_obj.outlier_sequence_size_sp())
Ejemplo n.º 5
0
class PartitonsTest(ExpectingTestCase):

    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.reset(cur=self.aln_obj.cur,)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_read_from_nexus(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.assertEqual(len(self.aln_obj.partitions.partitions), 7)

    def test_read_from_phylip(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        self.assertEqual(len(self.aln_obj.partitions.partitions), 7)

    def test_bad_partitions_phy(self):

        e = self.aln_obj.partitions.read_from_file(partition_bad_phy[0],
                                                   no_aln_check=True)

        self.assertTrue(isinstance(e, InvalidPartitionFile))

    def test_unsorted_part_phylip(self):

        self.aln_obj.partitions.read_from_file(partition_unsorted_phy[0],
                                               no_aln_check=True)

        data = [self.aln_obj.partitions.partitions.keys(),
                self.aln_obj.partitions.counter]

        self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas",
                                 "BaseConc3.fas", "BaseConc4.fas",
                                 "BaseConc5.fas", "BaseConc6.fas",
                                 "BaseConc7.fas"],
                                595])

    def test_phylip_dot_notation(self):

        self.aln_obj.partitions.read_from_file(partition_dot_not[0],
                                               no_aln_check=True)

        data = [self.aln_obj.partitions.partitions.keys(),
                self.aln_obj.partitions.counter]

        self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas",
                                 "BaseConc3.fas", "BaseConc4.fas",
                                 "BaseConc5.fas", "BaseConc6.fas",
                                 "BaseConc7.fas"],
                                595])

    def test_nexus_dot_notation(self):

        self.aln_obj.partitions.read_from_file(dot_notation_nex[0],
                                               no_aln_check=True)

        data = [self.aln_obj.partitions.partitions.keys(),
                self.aln_obj.partitions.counter]

        self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas",
                                 "BaseConc3.fas", "BaseConc4.fas",
                                 "BaseConc5.fas", "BaseConc6.fas",
                                 "BaseConc7.fas"],
                                595])

    def test_import_new_partscheme(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(concatenated_medium_nexus,
                                     sql_db=sql_db)

        self.aln_obj.partitions.reset()

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"])

    def test_add_duplicate_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        self.assertRaises(PartitionException,
                          self.aln_obj.partitions.add_partition(
                              "BaseCond1.fas", length=100))

    def test_get_partition_names(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"])

    def test_get_partition_names_withCodon(self):

        self.aln_obj.partitions.read_from_file(
            concatenated_smallCodon_parNex[0], no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, ["BaseConc1.fas_1_1", "BaseConc1.fas_1_2",
                               "BaseConc1.fas_1_3", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"])

    def test_single_partition(self):

        self.aln_obj = AlignmentList([dna_data_fas[0]],
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertTrue(self.aln_obj.partitions.is_single())

    def test_multiple_partitions(self):

        self.assertFalse(self.aln_obj.partitions.is_single())

    def test_remove_partition_from_file_original(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.aln_obj.partitions.remove_partition(
            file_name="trifusion/tests/data/BaseConc3.fas")

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_remove_partition_from_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)
        self.aln_obj.partitions.remove_partition("BaseConc3.fas")

        # Check keys from _partitions, partitions_alignment and models
        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data,
                          [["BaseConc1.fas", "BaseConc2.fas",
                           "BaseConc4.fas",
                           "BaseConc5.fas", "BaseConc6.fas",
                           "BaseConc7.fas"]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_remove_partition_from_file(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)
        self.aln_obj.partitions.remove_partition(file_name="BaseConc3.fas")

        # Check keys from _partitions, partitions_alignment and models
        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data,
                          [["BaseConc1.fas", "BaseConc2.fas",
                            "BaseConc4.fas",
                            "BaseConc5.fas", "BaseConc6.fas",
                            "BaseConc7.fas"]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_change_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName")

        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data,
                          [["BaseConc2.fas",
                           "BaseConc3.fas", "BaseConc4.fas",
                           "BaseConc5.fas", "BaseConc6.fas",
                           "BaseConc7.fas", "OtherName"]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0

        self.aln_obj.partitions.partitions = OrderedDict(sorted(
            self.aln_obj.partitions.partitions.iteritems(),
            key=lambda x: x[1][0]
        ))

        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_merge_partitions(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
             "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], "New_part")

        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.assertEqual(key_data, [["New_part"]] * 3)

    def test_split_partition(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.split_partition("BaseConc1.fas",
                                                [[0, 50], [51, 84]],
                                                ["part1", "part2"])

        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data,
                          [["part1", "part2", "BaseConc2.fas",
                            "BaseConc3.fas", "BaseConc4.fas",
                            "BaseConc5.fas", "BaseConc6.fas",
                            "BaseConc7.fas"]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0

        self.aln_obj.partitions.partitions = OrderedDict(sorted(
            self.aln_obj.partitions.partitions.iteritems(),
            key=lambda x: x[1][0]
        ))

        for r in self.aln_obj.partitions.partitions.values():
            print(r)
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_merge_and_split(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part")

        key_data = [sorted(self.aln_obj.partitions.partitions.keys()),
                    sorted(self.aln_obj.partitions.partitions_alignments.keys()),
                    sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"]] * 3)

    def test_merge_and_custom_split1(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part",
                                                [(0, 50), (51, 254)],
                                                ["one", "two"])

        key_data = [self.aln_obj.partitions.partitions_alignments["one"],
                    self.aln_obj.partitions.partitions_alignments["two"]]

        self.assertEqual(key_data,
                         [['BaseConc1.fas'],
                          ['BaseConc1.fas', 'BaseConc3.fas', 'BaseConc2.fas']])

    def test_merge_and_custom_split2(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part",
                                                [(0, 84), (85, 254)],
                                                ["one", "two"])

        key_data = [self.aln_obj.partitions.partitions_alignments["one"],
                    self.aln_obj.partitions.partitions_alignments["two"]]

        self.assertEqual(key_data,
                         [['BaseConc1.fas'],
                          ['BaseConc3.fas', 'BaseConc2.fas']])

    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_par[0])

        self.aln_obj.update_active_alignments(
            [join(data_path, "BaseConc1.fas"),
             join(data_path, "BaseConc2.fas")])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)

    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.update_active_alignments(
            [join(data_path, "BaseConc1.fas"),
             join(data_path, "BaseConc2.fas")])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)

    def test_merge_with_custom_fileset(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.update_active_alignments(
            [join(data_path, "BaseConc1.fas"),
             join(data_path, "BaseConc5.fas")])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc5.fas"]] * 3)

    def test_model_detection(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_nexus_data,
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('Teste1.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste2.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste3.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste4.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste5.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste6.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste7.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []])]))

    def test_model_detection_codons(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_codon_nexus_data,
                                     db_cur=self.aln_obj.cur,
                                     db_con=self.aln_obj.con,
                                     sql_db=sql_db)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('Teste1.fas_1', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste2.fas_86', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste3.fas_171', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste4.fas_256', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste5.fas_341', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste6.fas_426', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste7.fas_511', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []])]))

    def test_set_model(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"])

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc2.fas', [[[]], [None], []]),
                                      ('BaseConc3.fas', [[[]], [None], []]),
                                      ('BaseConc4.fas', [[[]], [None], []]),
                                      ('BaseConc5.fas', [[[]], [None], []]),
                                      ('BaseConc6.fas', [[[]], [None], []]),
                                      ('BaseConc7.fas', [[[]], [None], []])])
                         )

    def test_set_model_all(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"],
                                          apply_all=True)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc2.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc3.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc4.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc5.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc6.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc7.fas', [[[]], ['GTR'], []])])
                         )

    def test_set_model_codon(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"],
                                          links=["12", "3"],
                                          apply_all=True)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('BaseConc1.fas',
                                       [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc2.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc3.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc4.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc5.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc6.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc7.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']])]))
class AlignmentManipulationTest(unittest.TestCase):

    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

    def tearDown(self):

        try:
            self.aln_obj.clear_alignments()
        except:
            pass
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_clear_alns(self):

        self.aln_obj.clear_alignments()
        aln = AlignmentList([], sql_db=sql_db)

        self.assertTrue(compare_inst(self.aln_obj, aln, ["log_progression",
                                                         "locus_length",
                                                         "partitions",
                                                         "cur",
                                                         "con"]))

    def test_update_act_anls(self):

        self.aln_obj.update_active_alignments([join(data_path,
                                                    "BaseConc1.fas"),
                                               join(data_path,
                                                    "BaseConc2.fas")])

        self.assertEqual(list(self.aln_obj.alignments.keys()),
                         [join(data_path, "BaseConc1.fas"),
                          join(data_path, "BaseConc2.fas")])

    def test_update_act_alns_err(self):

        self.aln_obj.update_active_alignments([join(data_path,
                                                    "BaseConc1.fas"),
                                               join(data_path,
                                                    "BaseConc2.fas"),
                                               join(data_path,
                                                    "Wrong_name")])

        self.assertEqual(list(self.aln_obj.alignments.keys()),
                         [join(data_path, "BaseConc1.fas"),
                          join(data_path, "BaseConc2.fas")])

    def test_update_aln_shelve(self):

        self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"),
                                             "shelve")

        self.assertEqual(list(self.aln_obj.alignments.keys()),
                         [join(data_path, "BaseConc2.fas"),
                          join(data_path, "BaseConc3.fas"),
                          join(data_path, "BaseConc4.fas"),
                          join(data_path, "BaseConc5.fas"),
                          join(data_path, "BaseConc6.fas"),
                          join(data_path, "BaseConc7.fas")])

    def test_update_aln_act(self):

        self.aln_obj.update_active_alignments([])
        self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"),
                                             "active")

        self.assertEqual(list(self.aln_obj.alignments.keys()),
                         [join(data_path, "BaseConc1.fas")])

    def test_add_aln_obj(self):

        fl = self.aln_obj.alignments.keys()

        aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur,
                        sql_con=self.aln_obj.con,
                        db_idx=self.aln_obj._idx + 1, temp_dir=temp_dir)

        self.aln_obj.add_alignments([aln])

        self.assertEqual(self.aln_obj.alignments.keys(),
                         fl + [join(data_path, "c97d5m4p2.loci")])

    def test_remove_taxa_from_list(self):

        taxa_list = [
            "1285_RAD_original",
            "130a_RAD_original",
            "137a_RAD_original",
            "1427_RAD_original",
            "167a_RAD_original"
        ]

        expected_taxa = [tx for tx in self.aln_obj.taxa_names if
                         tx not in taxa_list]

        self.aln_obj.remove_taxa(taxa_list)

        self.assertEqual(self.aln_obj.taxa_names, expected_taxa)

    def test_remove_taxa_from_file(self):

        taxa_list = [
            "1285_RAD_original",
            "130a_RAD_original",
            "137a_RAD_original",
            "1427_RAD_original",
            "167a_RAD_original"
        ]

        expected_taxa = [tx for tx in self.aln_obj.taxa_names if
                         tx not in taxa_list]

        self.aln_obj.remove_taxa(taxa_to_remove)

        self.assertEqual(self.aln_obj.taxa_names, expected_taxa)

    def test_remove_taxa_from_list_inverse(self):

        taxa_list = [
            "1285_RAD_original",
            "130a_RAD_original",
            "137a_RAD_original",
            "1427_RAD_original",
            "167a_RAD_original"
        ]

        expected_taxa = [tx for tx in self.aln_obj.taxa_names if
                         tx not in taxa_list]

        self.aln_obj.remove_taxa(taxa_list, mode="inverse")

        self.assertEqual(self.aln_obj.taxa_names, taxa_list)

    #
    # def test_retrieve_alignment(self):
    #
    #     aln = self.aln_obj.retrieve_alignment("BaseConc1.fas")
    #
    #     aln2 = Alignment(dna_data_fas[0], dest="new_one")
    #
    #     self.assertTrue(compare_inst(aln, aln2,
    #                                  ["log_progression", "locus_length",
    #                                   "_partitions"]))

    def test_concatenation(self):

        self.aln_obj.concatenate()
        self.aln_obj.write_to_file(["fasta"], output_file="test")

        with open("trifusion/tests/data/BaseConcatenation.fas") as fh1, \
                open("test.fas") as fh2:
            self.assertEqual(sorted(fh1.readlines()), sorted(fh2.readlines()))

        os.remove("test.fas")

    def test_concatention_after_removal(self):

        fl = [x for x in self.aln_obj.alignments][3:]
        self.aln_obj.remove_file(fl)

        self.aln_obj.concatenate()

        self.assertEqual(len(self.aln_obj.alignments), 1)