class PartitonsTest(ExpectingTestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.reset(cur=self.aln_obj.cur, ) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_read_from_nexus(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_read_from_phylip(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_bad_partitions_phy(self): e = self.aln_obj.partitions.read_from_file(partition_bad_phy[0], no_aln_check=True) self.assertTrue(isinstance(e, InvalidPartitionFile)) def test_unsorted_part_phylip(self): self.aln_obj.partitions.read_from_file(partition_unsorted_phy[0], no_aln_check=True) data = [ self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter ] self.assertEqual(data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ], 595]) def test_phylip_dot_notation(self): self.aln_obj.partitions.read_from_file(partition_dot_not[0], no_aln_check=True) data = [ self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter ] self.assertEqual(data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ], 595]) def test_nexus_dot_notation(self): self.aln_obj.partitions.read_from_file(dot_notation_nex[0], no_aln_check=True) data = [ self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter ] self.assertEqual(data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ], 595]) def test_bad_dot_notation(self): e = self.aln_obj.partitions.read_from_file(bad_dot_notation_nex[0], no_aln_check=True) self.assertTrue(isinstance(e, InvalidPartitionFile)) def test_import_new_partscheme(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.aln_obj.partitions.reset() self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, [ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]) def test_add_duplicate_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) self.assertRaises( PartitionException, self.aln_obj.partitions.add_partition("BaseCond1.fas", length=100)) def test_get_partition_names(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, [ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]) def test_get_partition_names_withCodon(self): self.aln_obj.partitions.read_from_file( concatenated_smallCodon_parNex[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, [ "BaseConc1.fas_1_1", "BaseConc1.fas_1_2", "BaseConc1.fas_1_3", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]) def test_single_partition(self): self.aln_obj = AlignmentList([dna_data_fas[0]], db_con=self.aln_obj.con, db_cur=self.aln_obj.cur, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.is_single()) def test_multiple_partitions(self): self.assertFalse(self.aln_obj.partitions.is_single()) def test_remove_partition_from_file_original(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.remove_partition( file_name="trifusion/tests/data/BaseConc3.fas") # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.remove_partition("BaseConc3.fas") # Check keys from _partitions, partitions_alignment and models key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_file(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.remove_partition(file_name="BaseConc3.fas") # Check keys from _partitions, partitions_alignment and models key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_change_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName") key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas", "OtherName" ]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict( sorted(self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0])) for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_partitions(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions([ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ], "New_part") key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.assertEqual(key_data, [["New_part"]] * 3) def test_split_partition(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.split_partition("BaseConc1.fas", [[0, 50], [51, 84]], ["part1", "part2"]) key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "part1", "part2", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict( sorted(self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0])) for r in self.aln_obj.partitions.partitions.values(): print(r) if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_and_split(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part") key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]] * 3) def test_merge_and_custom_split1(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part", [(0, 50), (51, 254)], ["one", "two"]) key_data = [ self.aln_obj.partitions.partitions_alignments["one"], self.aln_obj.partitions.partitions_alignments["two"] ] self.assertEqual(key_data, [['BaseConc1.fas'], ['BaseConc1.fas', 'BaseConc3.fas', 'BaseConc2.fas']]) def test_merge_and_custom_split2(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part", [(0, 84), (85, 254)], ["one", "two"]) key_data = [ self.aln_obj.partitions.partitions_alignments["one"], self.aln_obj.partitions.partitions_alignments["two"] ] self.assertEqual( key_data, [['BaseConc1.fas'], ['BaseConc3.fas', 'BaseConc2.fas']]) def test_concat_custom_fileset_from_phy_partfile(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_par[0]) self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3) def test_concat_custom_fileset_from_phy_partfile(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3) def test_merge_with_custom_fileset(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc5.fas") ]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc5.fas"]] * 3) def test_model_detection(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList(models_nexus_data, db_con=self.aln_obj.con, db_cur=self.aln_obj.cur, sql_db=sql_db) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([ ('Teste1.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste2.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste3.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste4.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste5.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste6.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste7.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]) ])) def test_model_detection_codons(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList(models_codon_nexus_data, db_cur=self.aln_obj.cur, db_con=self.aln_obj.con, sql_db=sql_db) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([('Teste1.fas_1', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste2.fas_86', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste3.fas_171', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste4.fas_256', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste5.fas_341', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste6.fas_426', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste7.fas_511', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []])])) def test_set_model(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"]) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], [None], []]), ('BaseConc3.fas', [[[]], [None], []]), ('BaseConc4.fas', [[[]], [None], []]), ('BaseConc5.fas', [[[]], [None], []]), ('BaseConc6.fas', [[[]], [None], []]), ('BaseConc7.fas', [[[]], [None], []])])) def test_set_model_all(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"], apply_all=True) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], ['GTR'], []]), ('BaseConc3.fas', [[[]], ['GTR'], []]), ('BaseConc4.fas', [[[]], ['GTR'], []]), ('BaseConc5.fas', [[[]], ['GTR'], []]), ('BaseConc6.fas', [[[]], ['GTR'], []]), ('BaseConc7.fas', [[[]], ['GTR'], []])])) def test_set_model_codon(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"], links=["12", "3"], apply_all=True) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([ ('BaseConc1.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc2.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc3.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc4.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc5.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc6.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc7.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]) ]))
class SeconaryOpsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_summary_stats_all(self): sum_table, table_data = self.aln_obj.get_summary_stats() self.assertEqual( [sum_table, table_data], [{ 'missing': '5 (0.04%)', 'taxa': 24, 'genes': 7, 'informative': '0 (0.0%)', 'gaps': '0 (0.0%)', 'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '7 (1.18%)', 'seq_len': 595, 'avg_var': 1.0, 'avg_inf': 0.0 }, [[ 'Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene', 'Missing data', 'Missing data per gene', 'Variable sites', 'Variable sites per gene', 'Informative sites', 'Informative sites per gene' ], [ 7, 24, 595, '0 (0.0%)', 0.0, '5 (0.04%)', 1.0, '7 (1.18%)', 1.0, '0 (0.0%)', 0.0 ]]]) def test_summary_stats_one_active(self): sum_table, table_data = self.aln_obj.get_summary_stats( [join(data_path, "BaseConc1.fas")]) self.assertEqual( [sum_table, table_data], [{ 'missing': '1 (0.05%)', 'taxa': 24, 'genes': 1, 'informative': '0 (0.0%)', 'gaps': '0 (0.0%)', 'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '1 (1.18%)', 'seq_len': 85, 'avg_var': 1.0, 'avg_inf': 0.0 }, [[ 'Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene', 'Missing data', 'Missing data per gene', 'Variable sites', 'Variable sites per gene', 'Informative sites', 'Informative sites per gene' ], [ 1, 24, 85, '0 (0.0%)', 0.0, '1 (0.05%)', 1.0, '1 (1.18%)', 1.0, '0 (0.0%)', 0.0 ]]]) def test_single_aln_outlier_mdata(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_missing_data(), {"exception": "single_alignment"}) def test_single_aln_outlier_mdata_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) print(self.aln_obj.alignments) self.assertEqual(self.aln_obj.outlier_missing_data_sp(), {"exception": "single_alignment"}) def test_single_aln_outlier_seg(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_segregating(), {"exception": "single_alignment"}) def test_single_aln_outlier_seg_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) print(self.aln_obj.alignments) self.assertEqual(self.aln_obj.outlier_segregating_sp(), {"exception": "single_alignment"}) def test_single_aln_outlier_seqsize(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_sequence_size(), {"exception": "single_alignment"}) def test_single_aln_outlier_seqsize_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_sequence_size_sp(), {"exception": "single_alignment"}) def test_single_aln_average_seqsize_per_species(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.average_seqsize_per_species(), {"exception": "single_alignment"}) def test_single_aln_average_seqsize(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.average_seqsize(), {"exception": "single_alignment"}) def test_single_aln_sequence_similarity(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.sequence_similarity(), {"exception": "single_alignment"}) def test_single_aln_sequence_segregation(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.sequence_segregation(), {"exception": "single_alignment"}) def test_single_aln_length_polymorphism_correlation(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.length_polymorphism_correlation(), {"exception": "single_alignment"}) def test_single_aln_taxa_distribution(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.taxa_distribution(), {"exception": "single_alignment"}) def test_single_aln_cumulative_missing_genes(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.cumulative_missing_genes(), {"exception": "single_alignment"}) def test_single_aln_gene_occupancy(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.gene_occupancy(), {"exception": "single_alignment"}) def test_single_aln_missing_data_distribution(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.missing_data_distribution(), {"exception": "single_alignment"}) def test_single_aln_missing_genes_average(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.missing_genes_average(), {"exception": "single_alignment"}) def test_no_data(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertEqual(self.aln_obj.gene_occupancy(), {'exception': "empty_data"}) def test_gene_occupancy(self): self.assertTrue(self.aln_obj.gene_occupancy()) def test_missing_data_distribution(self): self.assertTrue(self.aln_obj.missing_data_distribution()) def test_missing_data_per_species(self): self.assertTrue(self.aln_obj.missing_data_per_species()) def test_missing_genes_per_species(self): self.assertTrue(self.aln_obj.missing_genes_per_species()) def test_missing_genes_average(self): self.assertTrue(self.aln_obj.missing_genes_average()) def test_average_seqsize_per_species(self): self.assertTrue(self.aln_obj.average_seqsize_per_species()) def test_average_seqsize(self): self.assertTrue(self.aln_obj.average_seqsize()) def test_characters_proportion(self): self.assertTrue(self.aln_obj.characters_proportion()) def test_characters_proportion_per_species(self): self.assertTrue(self.aln_obj.characters_proportion_per_species()) def test_characters_proportion_gene(self): self.assertTrue( self.aln_obj.characters_proportion_gene( join(data_path, "BaseConc1.fas"), 10)) def test_sequence_similarity(self): self.assertTrue(self.aln_obj.sequence_similarity()) def test_sequence_similarity_per_species(self): self.assertTrue(self.aln_obj.sequence_similarity_per_species()) def test_sequence_similarity_gene(self): self.assertTrue( self.aln_obj.sequence_similarity_gene( join(data_path, "BaseConc1.fas"), 10)) def test_sequence_conservation(self): self.assertTrue( self.aln_obj.sequence_conservation_gnp( join(data_path, "BaseConc1.fas"), 10)) def test_sequence_segregation(self): self.assertTrue(self.aln_obj.sequence_segregation()) def test_sequence_segregation_per_species(self): self.assertTrue(self.aln_obj.sequence_segregation_per_species()) def test_sequence_segregation_gene(self): self.assertTrue( self.aln_obj.sequence_segregation_gene( join(data_path, "BaseConc1.fas"), 10)) def test_length_polymorphism_correlation(self): self.assertTrue(self.aln_obj.length_polymorphism_correlation()) def test_allele_frequency_spectrum(self): self.assertTrue(self.aln_obj.allele_frequency_spectrum()) def test_allele_frequency_spectrum_gene(self): self.assertTrue( self.aln_obj.allele_frequency_spectrum_gene( join(data_path, "BaseConc1.fas"), None)) def test_taxa_distribution(self): self.assertTrue(self.aln_obj.taxa_distribution()) def test_cumulative_missing_genes(self): self.assertTrue(self.aln_obj.cumulative_missing_genes()) def test_outlier_missing_data(self): self.assertTrue(self.aln_obj.outlier_missing_data()) def test_outlier_missing_data_sp(self): self.assertTrue(self.aln_obj.outlier_missing_data_sp()) def test_outlier_segregating(self): self.assertTrue(self.aln_obj.outlier_segregating()) def test_outlier_segregating_sp(self): self.assertTrue(self.aln_obj.outlier_segregating_sp()) def test_outlier_sequence_size(self): self.assertTrue(self.aln_obj.outlier_sequence_size()) def test_outlier_sequence_size_sp(self): self.assertTrue(self.aln_obj.outlier_sequence_size_sp())
class AlignmentManipulationTest(unittest.TestCase): def setUp(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def tearDown(self): try: self.aln_obj.clear_alignments() except: pass self.aln_obj.con.close() os.remove(sql_db) def test_clear_alns(self): self.aln_obj.clear_alignments() aln = AlignmentList([], sql_db=sql_db) self.assertTrue( compare_inst(self.aln_obj, aln, [ "log_progression", "locus_length", "partitions", "cur", "con" ])) def test_update_act_anls(self): self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) self.assertEqual(list(self.aln_obj.alignments.keys()), [ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) def test_update_act_alns_err(self): self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas"), join(data_path, "Wrong_name") ]) self.assertEqual(list(self.aln_obj.alignments.keys()), [ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) def test_update_aln_shelve(self): self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"), "shelve") self.assertEqual(list(self.aln_obj.alignments.keys()), [ join(data_path, "BaseConc2.fas"), join(data_path, "BaseConc3.fas"), join(data_path, "BaseConc4.fas"), join(data_path, "BaseConc5.fas"), join(data_path, "BaseConc6.fas"), join(data_path, "BaseConc7.fas") ]) def test_update_aln_act(self): self.aln_obj.update_active_alignments([]) self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"), "active") self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc1.fas")]) def test_add_aln_obj(self): fl = self.aln_obj.alignments.keys() aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur) self.aln_obj.add_alignments([aln]) self.assertEqual(self.aln_obj.alignments.keys(), fl + [join(data_path, "c97d5m4p2.loci")]) def test_remove_taxa_from_list(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [ tx for tx in self.aln_obj.taxa_names if tx not in taxa_list ] self.aln_obj.remove_taxa(taxa_list) self.assertEqual(self.aln_obj.taxa_names, expected_taxa) def test_remove_taxa_from_file(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [ tx for tx in self.aln_obj.taxa_names if tx not in taxa_list ] self.aln_obj.remove_taxa(taxa_to_remove) self.assertEqual(self.aln_obj.taxa_names, expected_taxa) def test_remove_taxa_from_list_inverse(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [ tx for tx in self.aln_obj.taxa_names if tx not in taxa_list ] self.aln_obj.remove_taxa(taxa_list, mode="inverse") self.assertEqual(self.aln_obj.taxa_names, taxa_list) # # def test_retrieve_alignment(self): # # aln = self.aln_obj.retrieve_alignment("BaseConc1.fas") # # aln2 = Alignment(dna_data_fas[0], dest="new_one") # # self.assertTrue(compare_inst(aln, aln2, # ["log_progression", "locus_length", # "partitions"])) def test_concatenation(self): aln = self.aln_obj.concatenate(alignment_name="test") aln.write_to_file(["fasta"], "test") with open("trifusion/tests/data/BaseConcatenation.fas") as fh1, \ open("test.fas") as fh2: self.assertEqual(fh1.read(), fh2.read()) os.remove("test.fas")
class SeconaryOpsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_summary_stats_all(self): sum_table, table_data = self.aln_obj.get_summary_stats() self.assertEqual([sum_table, table_data], [{'missing': '5 (0.04%)', 'taxa': 24, 'genes': 7, 'informative': '0 (0.0%)', 'gaps': '0 (0.0%)', 'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '7 (1.18%)', 'seq_len': 595, 'avg_var': 1.0, 'avg_inf': 0.0}, [['Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene', 'Missing data', 'Missing data per gene', 'Variable sites', 'Variable sites per gene', 'Informative sites', 'Informative sites per gene'], [7, 24, 595, '0 (0.0%)', 0.0, '5 (0.04%)', 1.0, '7 (1.18%)', 1.0, '0 (0.0%)', 0.0]]]) def test_summary_stats_one_active(self): sum_table, table_data = self.aln_obj.get_summary_stats([ join(data_path, "BaseConc1.fas")]) self.assertEqual([sum_table, table_data], [{'missing': '1 (0.05%)', 'taxa': 24, 'genes': 1, 'informative': '0 (0.0%)', 'gaps': '0 (0.0%)', 'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '1 (1.18%)', 'seq_len': 85, 'avg_var': 1.0, 'avg_inf': 0.0}, [['Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene', 'Missing data', 'Missing data per gene', 'Variable sites', 'Variable sites per gene', 'Informative sites', 'Informative sites per gene'], [1, 24, 85, '0 (0.0%)', 0.0, '1 (0.05%)', 1.0, '1 (1.18%)', 1.0, '0 (0.0%)', 0.0]]]) def test_single_aln_outlier_mdata(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_missing_data(), {"exception": "single_alignment"}) def test_single_aln_outlier_mdata_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) print(self.aln_obj.alignments) self.assertEqual(self.aln_obj.outlier_missing_data_sp(), {"exception": "single_alignment"}) def test_single_aln_outlier_seg(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_segregating(), {"exception": "single_alignment"}) def test_single_aln_outlier_seg_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) print(self.aln_obj.alignments) self.assertEqual(self.aln_obj.outlier_segregating_sp(), {"exception": "single_alignment"}) def test_single_aln_outlier_seqsize(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_sequence_size(), {"exception": "single_alignment"}) def test_single_aln_outlier_seqsize_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_sequence_size_sp(), {"exception": "single_alignment"}) def test_single_aln_average_seqsize_per_species(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.average_seqsize_per_species(), {"exception": "single_alignment"}) def test_single_aln_average_seqsize(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.average_seqsize(), {"exception": "single_alignment"}) def test_single_aln_sequence_similarity(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.sequence_similarity(), {"exception": "single_alignment"}) def test_single_aln_sequence_segregation(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.sequence_segregation(), {"exception": "single_alignment"}) def test_single_aln_length_polymorphism_correlation(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.length_polymorphism_correlation(), {"exception": "single_alignment"}) def test_single_aln_taxa_distribution(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.taxa_distribution(), {"exception": "single_alignment"}) def test_single_aln_cumulative_missing_genes(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.cumulative_missing_genes(), {"exception": "single_alignment"}) def test_single_aln_gene_occupancy(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.gene_occupancy(), {"exception": "single_alignment"}) def test_single_aln_missing_data_distribution(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.missing_data_distribution(), {"exception": "single_alignment"}) def test_single_aln_missing_genes_average(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.missing_genes_average(), {"exception": "single_alignment"}) def test_no_data(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertEqual(self.aln_obj.gene_occupancy(), {'exception': "empty_data"}) def test_gene_occupancy(self): self.assertTrue(self.aln_obj.gene_occupancy()) def test_missing_data_distribution(self): self.assertTrue(self.aln_obj.missing_data_distribution()) def test_missing_data_per_species(self): self.assertTrue(self.aln_obj.missing_data_per_species()) def test_missing_genes_per_species(self): self.assertTrue(self.aln_obj.missing_genes_per_species()) def test_missing_genes_average(self): self.assertTrue(self.aln_obj.missing_genes_average()) def test_average_seqsize_per_species(self): self.assertTrue(self.aln_obj.average_seqsize_per_species()) def test_average_seqsize(self): self.assertTrue(self.aln_obj.average_seqsize()) def test_characters_proportion(self): self.assertTrue(self.aln_obj.characters_proportion()) def test_characters_proportion_per_species(self): self.assertTrue(self.aln_obj.characters_proportion_per_species()) def test_characters_proportion_gene(self): self.assertTrue(self.aln_obj.characters_proportion_gene( join(data_path, "BaseConc1.fas"), 10 )) def test_sequence_similarity(self): self.assertTrue(self.aln_obj.sequence_similarity()) def test_sequence_similarity_per_species(self): self.assertTrue(self.aln_obj.sequence_similarity_per_species()) def test_sequence_similarity_gene(self): self.assertTrue(self.aln_obj.sequence_similarity_gene( join(data_path, "BaseConc1.fas"), 10)) def test_sequence_conservation(self): self.assertTrue(self.aln_obj.sequence_conservation_gnp( join(data_path, "BaseConc1.fas"), 10 )) def test_sequence_segregation(self): self.assertTrue(self.aln_obj.sequence_segregation()) def test_sequence_segregation_per_species(self): self.assertTrue(self.aln_obj.sequence_segregation_per_species()) def test_sequence_segregation_gene(self): self.assertTrue(self.aln_obj.sequence_segregation_gene( join(data_path, "BaseConc1.fas"), 10)) def test_length_polymorphism_correlation(self): self.assertTrue(self.aln_obj.length_polymorphism_correlation()) def test_allele_frequency_spectrum(self): self.assertTrue(self.aln_obj.allele_frequency_spectrum()) def test_allele_frequency_spectrum_gene(self): self.assertTrue(self.aln_obj.allele_frequency_spectrum_gene( join(data_path, "BaseConc1.fas"), None)) def test_taxa_distribution(self): self.assertTrue(self.aln_obj.taxa_distribution()) def test_cumulative_missing_genes(self): self.assertTrue(self.aln_obj.cumulative_missing_genes()) def test_outlier_missing_data(self): self.assertTrue(self.aln_obj.outlier_missing_data()) def test_outlier_missing_data_sp(self): self.assertTrue(self.aln_obj.outlier_missing_data_sp()) def test_outlier_segregating(self): self.assertTrue(self.aln_obj.outlier_segregating()) def test_outlier_segregating_sp(self): self.assertTrue(self.aln_obj.outlier_segregating_sp()) def test_outlier_sequence_size(self): self.assertTrue(self.aln_obj.outlier_sequence_size()) def test_outlier_sequence_size_sp(self): self.assertTrue(self.aln_obj.outlier_sequence_size_sp())
class PartitonsTest(ExpectingTestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.reset(cur=self.aln_obj.cur,) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_read_from_nexus(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_read_from_phylip(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_bad_partitions_phy(self): e = self.aln_obj.partitions.read_from_file(partition_bad_phy[0], no_aln_check=True) self.assertTrue(isinstance(e, InvalidPartitionFile)) def test_unsorted_part_phylip(self): self.aln_obj.partitions.read_from_file(partition_unsorted_phy[0], no_aln_check=True) data = [self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter] self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], 595]) def test_phylip_dot_notation(self): self.aln_obj.partitions.read_from_file(partition_dot_not[0], no_aln_check=True) data = [self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter] self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], 595]) def test_nexus_dot_notation(self): self.aln_obj.partitions.read_from_file(dot_notation_nex[0], no_aln_check=True) data = [self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter] self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], 595]) def test_import_new_partscheme(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.aln_obj.partitions.reset() self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]) def test_add_duplicate_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) self.assertRaises(PartitionException, self.aln_obj.partitions.add_partition( "BaseCond1.fas", length=100)) def test_get_partition_names(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]) def test_get_partition_names_withCodon(self): self.aln_obj.partitions.read_from_file( concatenated_smallCodon_parNex[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, ["BaseConc1.fas_1_1", "BaseConc1.fas_1_2", "BaseConc1.fas_1_3", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]) def test_single_partition(self): self.aln_obj = AlignmentList([dna_data_fas[0]], db_con=self.aln_obj.con, db_cur=self.aln_obj.cur, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.is_single()) def test_multiple_partitions(self): self.assertFalse(self.aln_obj.partitions.is_single()) def test_remove_partition_from_file_original(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.remove_partition( file_name="trifusion/tests/data/BaseConc3.fas") # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.remove_partition("BaseConc3.fas") # Check keys from _partitions, partitions_alignment and models key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_file(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.remove_partition(file_name="BaseConc3.fas") # Check keys from _partitions, partitions_alignment and models key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_change_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName") key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas", "OtherName"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict(sorted( self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0] )) for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_partitions(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], "New_part") key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.assertEqual(key_data, [["New_part"]] * 3) def test_split_partition(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.split_partition("BaseConc1.fas", [[0, 50], [51, 84]], ["part1", "part2"]) key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["part1", "part2", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict(sorted( self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0] )) for r in self.aln_obj.partitions.partitions.values(): print(r) if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_and_split(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part") key_data = [sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) def test_merge_and_custom_split1(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part", [(0, 50), (51, 254)], ["one", "two"]) key_data = [self.aln_obj.partitions.partitions_alignments["one"], self.aln_obj.partitions.partitions_alignments["two"]] self.assertEqual(key_data, [['BaseConc1.fas'], ['BaseConc1.fas', 'BaseConc3.fas', 'BaseConc2.fas']]) def test_merge_and_custom_split2(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part", [(0, 84), (85, 254)], ["one", "two"]) key_data = [self.aln_obj.partitions.partitions_alignments["one"], self.aln_obj.partitions.partitions_alignments["two"]] self.assertEqual(key_data, [['BaseConc1.fas'], ['BaseConc3.fas', 'BaseConc2.fas']]) def test_concat_custom_fileset_from_phy_partfile(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_par[0]) self.aln_obj.update_active_alignments( [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3) def test_concat_custom_fileset_from_phy_partfile(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.update_active_alignments( [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3) def test_merge_with_custom_fileset(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.update_active_alignments( [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc5.fas")]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc5.fas"]] * 3) def test_model_detection(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList(models_nexus_data, db_con=self.aln_obj.con, db_cur=self.aln_obj.cur, sql_db=sql_db) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('Teste1.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste2.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste3.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste4.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste5.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste6.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste7.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []])])) def test_model_detection_codons(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList(models_codon_nexus_data, db_cur=self.aln_obj.cur, db_con=self.aln_obj.con, sql_db=sql_db) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('Teste1.fas_1', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste2.fas_86', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste3.fas_171', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste4.fas_256', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste5.fas_341', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste6.fas_426', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste7.fas_511', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []])])) def test_set_model(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"]) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], [None], []]), ('BaseConc3.fas', [[[]], [None], []]), ('BaseConc4.fas', [[[]], [None], []]), ('BaseConc5.fas', [[[]], [None], []]), ('BaseConc6.fas', [[[]], [None], []]), ('BaseConc7.fas', [[[]], [None], []])]) ) def test_set_model_all(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"], apply_all=True) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], ['GTR'], []]), ('BaseConc3.fas', [[[]], ['GTR'], []]), ('BaseConc4.fas', [[[]], ['GTR'], []]), ('BaseConc5.fas', [[[]], ['GTR'], []]), ('BaseConc6.fas', [[[]], ['GTR'], []]), ('BaseConc7.fas', [[[]], ['GTR'], []])]) ) def test_set_model_codon(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"], links=["12", "3"], apply_all=True) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc2.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc3.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc4.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc5.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc6.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc7.fas', [[[]], ['GTR', 'SYM'], ['12', '3']])]))
class AlignmentManipulationTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def tearDown(self): try: self.aln_obj.clear_alignments() except: pass self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_clear_alns(self): self.aln_obj.clear_alignments() aln = AlignmentList([], sql_db=sql_db) self.assertTrue(compare_inst(self.aln_obj, aln, ["log_progression", "locus_length", "partitions", "cur", "con"])) def test_update_act_anls(self): self.aln_obj.update_active_alignments([join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) def test_update_act_alns_err(self): self.aln_obj.update_active_alignments([join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas"), join(data_path, "Wrong_name")]) self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) def test_update_aln_shelve(self): self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"), "shelve") self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc2.fas"), join(data_path, "BaseConc3.fas"), join(data_path, "BaseConc4.fas"), join(data_path, "BaseConc5.fas"), join(data_path, "BaseConc6.fas"), join(data_path, "BaseConc7.fas")]) def test_update_aln_act(self): self.aln_obj.update_active_alignments([]) self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"), "active") self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc1.fas")]) def test_add_aln_obj(self): fl = self.aln_obj.alignments.keys() aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur, sql_con=self.aln_obj.con, db_idx=self.aln_obj._idx + 1, temp_dir=temp_dir) self.aln_obj.add_alignments([aln]) self.assertEqual(self.aln_obj.alignments.keys(), fl + [join(data_path, "c97d5m4p2.loci")]) def test_remove_taxa_from_list(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [tx for tx in self.aln_obj.taxa_names if tx not in taxa_list] self.aln_obj.remove_taxa(taxa_list) self.assertEqual(self.aln_obj.taxa_names, expected_taxa) def test_remove_taxa_from_file(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [tx for tx in self.aln_obj.taxa_names if tx not in taxa_list] self.aln_obj.remove_taxa(taxa_to_remove) self.assertEqual(self.aln_obj.taxa_names, expected_taxa) def test_remove_taxa_from_list_inverse(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [tx for tx in self.aln_obj.taxa_names if tx not in taxa_list] self.aln_obj.remove_taxa(taxa_list, mode="inverse") self.assertEqual(self.aln_obj.taxa_names, taxa_list) # # def test_retrieve_alignment(self): # # aln = self.aln_obj.retrieve_alignment("BaseConc1.fas") # # aln2 = Alignment(dna_data_fas[0], dest="new_one") # # self.assertTrue(compare_inst(aln, aln2, # ["log_progression", "locus_length", # "_partitions"])) def test_concatenation(self): self.aln_obj.concatenate() self.aln_obj.write_to_file(["fasta"], output_file="test") with open("trifusion/tests/data/BaseConcatenation.fas") as fh1, \ open("test.fas") as fh2: self.assertEqual(sorted(fh1.readlines()), sorted(fh2.readlines())) os.remove("test.fas") def test_concatention_after_removal(self): fl = [x for x in self.aln_obj.alignments][3:] self.aln_obj.remove_file(fl) self.aln_obj.concatenate() self.assertEqual(len(self.aln_obj.alignments), 1)