class AlignmentCodonFilters(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_codon_filter_pos1(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, False, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["a" * 16] * 10) def test_codon_filter_pos2(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([False, True, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["t" * 16] * 10) def test_codon_filter_pos3(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([False, False, True], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["g" * 16] * 10) def test_codon_filter_pos12(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, True, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["at" * 16] * 10) def test_codon_filter_pos13(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, False, True], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["ag" * 16] * 10) def test_codon_filter_all(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, True, True]) s = [] for _, seq, _ in self.aln_obj.iter_alignments(): s.append(seq) self.assertEqual(s, ["atg" * 16] * 10)
class PartitonsTest(ExpectingTestCase): def setUp(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.reset() def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() os.remove(sql_db) def test_read_from_nexus(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_read_from_phylip(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0]) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_get_partition_names(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0]) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]) def test_get_partition_names_withCodon(self): self.aln_obj.partitions.read_from_file( concatenated_smallCodon_parNex[0]) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, ["BaseConc1.fas_1_1", "BaseConc1.fas_1_2", "BaseConc1.fas_1_3", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]) def test_single_partition(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList([dna_data_fas[0]], sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.is_single()) def test_multiple_partitions(self): self.assertFalse(self.aln_obj.partitions.is_single()) def test_remove_partition_from_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.remove_partition("BaseConc3.fas") # Check keys from partitions, partitions_alignment and models key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0] == prev: prev = r[0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_file(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.remove_partition("BaseConc3.fas") # Check keys from partitions, partitions_alignment and models key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0] == prev: prev = r[0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_change_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName") key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas", "OtherName"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict(sorted( self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0] )) for r in self.aln_obj.partitions.partitions.values(): if r[0][0] == prev: prev = r[0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_partitions(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], "New_part") key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.assertEqual(key_data, [["New_part"]] * 3) def test_split_partition(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.split_partition("BaseConc1.fas", [(0, 50), (51, 84)], ["part1", "part2"]) key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas", "part1", "part2"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict(sorted( self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0] )) for r in self.aln_obj.partitions.partitions.values(): if r[0][0] == prev: prev = r[0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_model_detection(self): self.aln_obj = AlignmentList(models_nexus_data, db_con=self.aln_obj.con, db_cur=self.aln_obj.cur) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('Teste1.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste2.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste3.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste4.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste5.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste6.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste7.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []])])) def test_model_detection_codons(self): self.aln_obj = AlignmentList(models_codon_nexus_data, db_cur=self.aln_obj.cur, db_con=self.aln_obj.con) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('Teste1.fas_1', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste2.fas_86', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste3.fas_171', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste4.fas_256', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste5.fas_341', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste6.fas_426', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste7.fas_511', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []])])) def test_set_model(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"]) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], [None], []]), ('BaseConc3.fas', [[[]], [None], []]), ('BaseConc4.fas', [[[]], [None], []]), ('BaseConc5.fas', [[[]], [None], []]), ('BaseConc6.fas', [[[]], [None], []]), ('BaseConc7.fas', [[[]], [None], []])]) ) def test_set_model_all(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"], apply_all=True) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], ['GTR'], []]), ('BaseConc3.fas', [[[]], ['GTR'], []]), ('BaseConc4.fas', [[[]], ['GTR'], []]), ('BaseConc5.fas', [[[]], ['GTR'], []]), ('BaseConc6.fas', [[[]], ['GTR'], []]), ('BaseConc7.fas', [[[]], ['GTR'], []])]) ) def test_set_model_codon(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"], links=["12", "3"], apply_all=True) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc2.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc3.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc4.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc5.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc6.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc7.fas', [[[]], ['GTR', 'SYM'], ['12', '3']])]))
class AlignmentTaxaFilters(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_filter_min_taxa(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_min_taxa(50) self.assertEqual(len(self.aln_obj.alignments), 5) def test_filter_min_taxa_max(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_min_taxa(100) self.assertEqual(len(self.aln_obj.alignments), 1) def test_filter_min_taxa_min(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_min_taxa(0) self.assertEqual(len(self.aln_obj.alignments), 7) def test_filter_by_taxa_include(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_by_taxa(["spa", "spb", "spc", "spd"], "Contain") self.assertEqual(len(self.aln_obj.alignments), 2) def test_filter_by_taxa_exclude(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_by_taxa(["spa", "spb", "spc", "spd"], "Exclude") self.assertEqual(len(self.aln_obj.alignments), 5) def test_filter_by_taxa_all(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_by_taxa(["no_taxa"], "Contain") self.assertEqual(len(self.aln_obj.alignments), 0) def test_filter_by_taxa_from_file(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_by_taxa("trifusion/tests/data/filter_taxa.txt", "Contain") self.assertEqual(len(self.aln_obj.alignments), 2)
class AlignmentManipulationTest(unittest.TestCase): def setUp(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def tearDown(self): try: self.aln_obj.clear_alignments() except: pass self.aln_obj.con.close() os.remove(sql_db) def test_clear_alns(self): self.aln_obj.clear_alignments() aln = AlignmentList([], sql_db=sql_db) self.assertTrue( compare_inst(self.aln_obj, aln, [ "log_progression", "locus_length", "partitions", "cur", "con" ])) def test_update_act_anls(self): self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) self.assertEqual(list(self.aln_obj.alignments.keys()), [ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) def test_update_act_alns_err(self): self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas"), join(data_path, "Wrong_name") ]) self.assertEqual(list(self.aln_obj.alignments.keys()), [ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) def test_update_aln_shelve(self): self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"), "shelve") self.assertEqual(list(self.aln_obj.alignments.keys()), [ join(data_path, "BaseConc2.fas"), join(data_path, "BaseConc3.fas"), join(data_path, "BaseConc4.fas"), join(data_path, "BaseConc5.fas"), join(data_path, "BaseConc6.fas"), join(data_path, "BaseConc7.fas") ]) def test_update_aln_act(self): self.aln_obj.update_active_alignments([]) self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"), "active") self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc1.fas")]) def test_add_aln_obj(self): fl = self.aln_obj.alignments.keys() aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur) self.aln_obj.add_alignments([aln]) self.assertEqual(self.aln_obj.alignments.keys(), fl + [join(data_path, "c97d5m4p2.loci")]) def test_remove_taxa_from_list(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [ tx for tx in self.aln_obj.taxa_names if tx not in taxa_list ] self.aln_obj.remove_taxa(taxa_list) self.assertEqual(self.aln_obj.taxa_names, expected_taxa) def test_remove_taxa_from_file(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [ tx for tx in self.aln_obj.taxa_names if tx not in taxa_list ] self.aln_obj.remove_taxa(taxa_to_remove) self.assertEqual(self.aln_obj.taxa_names, expected_taxa) def test_remove_taxa_from_list_inverse(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [ tx for tx in self.aln_obj.taxa_names if tx not in taxa_list ] self.aln_obj.remove_taxa(taxa_list, mode="inverse") self.assertEqual(self.aln_obj.taxa_names, taxa_list) # # def test_retrieve_alignment(self): # # aln = self.aln_obj.retrieve_alignment("BaseConc1.fas") # # aln2 = Alignment(dna_data_fas[0], dest="new_one") # # self.assertTrue(compare_inst(aln, aln2, # ["log_progression", "locus_length", # "partitions"])) def test_concatenation(self): aln = self.aln_obj.concatenate(alignment_name="test") aln.write_to_file(["fasta"], "test") with open("trifusion/tests/data/BaseConcatenation.fas") as fh1, \ open("test.fas") as fh2: self.assertEqual(fh1.read(), fh2.read()) os.remove("test.fas")
class LoadAlignmentsTest(unittest.TestCase): def setUp(self): self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() os.remove(sql_db) def test_class_instance(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertIsInstance(self.aln_obj.alignments, OrderedDict) def test_load_fas(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def test_load_single_fas(self): single_aln = Alignment(dna_data_fas[0], sql_cursor=self.aln_obj.cur) def test_load_phy(self): self.aln_obj = AlignmentList(dna_data_phy, sql_db=sql_db) def test_load_single_phy(self): single_aln = Alignment(dna_data_phy[0], sql_cursor=self.aln_obj.cur) def test_load_single_interleave_phy(self): single_aln = Alignment(phylip_interleave[0], sql_cursor=self.aln_obj.cur) def test_load_nex(self): self.aln_obj = AlignmentList(dna_data_nex, sql_db=sql_db) def test_load_single_nex(self): single_aln = Alignment(dna_data_nex[0], sql_cursor=self.aln_obj.cur) def test_load_stc(self): self.aln_obj = AlignmentList(dna_data_stc, sql_db=sql_db) def test_load_single_stc(self): single_aln = Alignment(dna_data_stc[0], sql_cursor=self.aln_obj.cur) def test_load_loci(self): self.aln_obj = AlignmentList(dna_data_loci, sql_db=sql_db) def test_load_single_loci(self): single_aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur) def test_load_nexus_par(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.partitions) def test_load_wrong_type(self): self.aln_obj = AlignmentList(bad_file, sql_db=sql_db) self.assertTrue(self.aln_obj.bad_alignments) def test_duplicate_files(self): self.aln_obj = AlignmentList(dna_data_loci + dna_data_loci, sql_db=sql_db) self.assertTrue(self.aln_obj.duplicate_alignments) def test_unequal_length(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.assertTrue(self.aln_obj.non_alignments) def test_load_no_data(self): self.aln_obj = AlignmentList(no_data, sql_db=sql_db)
class SeconaryOpsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_summary_stats_all(self): sum_table, table_data = self.aln_obj.get_summary_stats() self.assertEqual( [sum_table, table_data], [{ 'missing': '5 (0.04%)', 'taxa': 24, 'genes': 7, 'informative': '0 (0.0%)', 'gaps': '0 (0.0%)', 'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '7 (1.18%)', 'seq_len': 595, 'avg_var': 1.0, 'avg_inf': 0.0 }, [[ 'Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene', 'Missing data', 'Missing data per gene', 'Variable sites', 'Variable sites per gene', 'Informative sites', 'Informative sites per gene' ], [ 7, 24, 595, '0 (0.0%)', 0.0, '5 (0.04%)', 1.0, '7 (1.18%)', 1.0, '0 (0.0%)', 0.0 ]]]) def test_summary_stats_one_active(self): sum_table, table_data = self.aln_obj.get_summary_stats( [join(data_path, "BaseConc1.fas")]) self.assertEqual( [sum_table, table_data], [{ 'missing': '1 (0.05%)', 'taxa': 24, 'genes': 1, 'informative': '0 (0.0%)', 'gaps': '0 (0.0%)', 'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '1 (1.18%)', 'seq_len': 85, 'avg_var': 1.0, 'avg_inf': 0.0 }, [[ 'Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene', 'Missing data', 'Missing data per gene', 'Variable sites', 'Variable sites per gene', 'Informative sites', 'Informative sites per gene' ], [ 1, 24, 85, '0 (0.0%)', 0.0, '1 (0.05%)', 1.0, '1 (1.18%)', 1.0, '0 (0.0%)', 0.0 ]]]) def test_single_aln_outlier_mdata(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_missing_data(), {"exception": "single_alignment"}) def test_single_aln_outlier_mdata_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) print(self.aln_obj.alignments) self.assertEqual(self.aln_obj.outlier_missing_data_sp(), {"exception": "single_alignment"}) def test_single_aln_outlier_seg(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_segregating(), {"exception": "single_alignment"}) def test_single_aln_outlier_seg_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) print(self.aln_obj.alignments) self.assertEqual(self.aln_obj.outlier_segregating_sp(), {"exception": "single_alignment"}) def test_single_aln_outlier_seqsize(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_sequence_size(), {"exception": "single_alignment"}) def test_single_aln_outlier_seqsize_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_sequence_size_sp(), {"exception": "single_alignment"}) def test_single_aln_average_seqsize_per_species(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.average_seqsize_per_species(), {"exception": "single_alignment"}) def test_single_aln_average_seqsize(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.average_seqsize(), {"exception": "single_alignment"}) def test_single_aln_sequence_similarity(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.sequence_similarity(), {"exception": "single_alignment"}) def test_single_aln_sequence_segregation(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.sequence_segregation(), {"exception": "single_alignment"}) def test_single_aln_length_polymorphism_correlation(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.length_polymorphism_correlation(), {"exception": "single_alignment"}) def test_single_aln_taxa_distribution(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.taxa_distribution(), {"exception": "single_alignment"}) def test_single_aln_cumulative_missing_genes(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.cumulative_missing_genes(), {"exception": "single_alignment"}) def test_single_aln_gene_occupancy(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.gene_occupancy(), {"exception": "single_alignment"}) def test_single_aln_missing_data_distribution(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.missing_data_distribution(), {"exception": "single_alignment"}) def test_single_aln_missing_genes_average(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.missing_genes_average(), {"exception": "single_alignment"}) def test_no_data(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertEqual(self.aln_obj.gene_occupancy(), {'exception': "empty_data"}) def test_gene_occupancy(self): self.assertTrue(self.aln_obj.gene_occupancy()) def test_missing_data_distribution(self): self.assertTrue(self.aln_obj.missing_data_distribution()) def test_missing_data_per_species(self): self.assertTrue(self.aln_obj.missing_data_per_species()) def test_missing_genes_per_species(self): self.assertTrue(self.aln_obj.missing_genes_per_species()) def test_missing_genes_average(self): self.assertTrue(self.aln_obj.missing_genes_average()) def test_average_seqsize_per_species(self): self.assertTrue(self.aln_obj.average_seqsize_per_species()) def test_average_seqsize(self): self.assertTrue(self.aln_obj.average_seqsize()) def test_characters_proportion(self): self.assertTrue(self.aln_obj.characters_proportion()) def test_characters_proportion_per_species(self): self.assertTrue(self.aln_obj.characters_proportion_per_species()) def test_characters_proportion_gene(self): self.assertTrue( self.aln_obj.characters_proportion_gene( join(data_path, "BaseConc1.fas"), 10)) def test_sequence_similarity(self): self.assertTrue(self.aln_obj.sequence_similarity()) def test_sequence_similarity_per_species(self): self.assertTrue(self.aln_obj.sequence_similarity_per_species()) def test_sequence_similarity_gene(self): self.assertTrue( self.aln_obj.sequence_similarity_gene( join(data_path, "BaseConc1.fas"), 10)) def test_sequence_conservation(self): self.assertTrue( self.aln_obj.sequence_conservation_gnp( join(data_path, "BaseConc1.fas"), 10)) def test_sequence_segregation(self): self.assertTrue(self.aln_obj.sequence_segregation()) def test_sequence_segregation_per_species(self): self.assertTrue(self.aln_obj.sequence_segregation_per_species()) def test_sequence_segregation_gene(self): self.assertTrue( self.aln_obj.sequence_segregation_gene( join(data_path, "BaseConc1.fas"), 10)) def test_length_polymorphism_correlation(self): self.assertTrue(self.aln_obj.length_polymorphism_correlation()) def test_allele_frequency_spectrum(self): self.assertTrue(self.aln_obj.allele_frequency_spectrum()) def test_allele_frequency_spectrum_gene(self): self.assertTrue( self.aln_obj.allele_frequency_spectrum_gene( join(data_path, "BaseConc1.fas"), None)) def test_taxa_distribution(self): self.assertTrue(self.aln_obj.taxa_distribution()) def test_cumulative_missing_genes(self): self.assertTrue(self.aln_obj.cumulative_missing_genes()) def test_outlier_missing_data(self): self.assertTrue(self.aln_obj.outlier_missing_data()) def test_outlier_missing_data_sp(self): self.assertTrue(self.aln_obj.outlier_missing_data_sp()) def test_outlier_segregating(self): self.assertTrue(self.aln_obj.outlier_segregating()) def test_outlier_segregating_sp(self): self.assertTrue(self.aln_obj.outlier_segregating_sp()) def test_outlier_sequence_size(self): self.assertTrue(self.aln_obj.outlier_sequence_size()) def test_outlier_sequence_size_sp(self): self.assertTrue(self.aln_obj.outlier_sequence_size_sp())
class LoadBadAlignmentsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = None def test_bad_extra_space_nexus_interleave(self): self.aln_obj = AlignmentList(bad_extraspace_interleave, sql_db=sql_db) self.assertEqual(self.aln_obj.non_alignments, bad_extraspace_interleave) def test_no_final_colon_interleave(self): self.aln_obj = AlignmentList(bad_no_colon_interleave, sql_db=sql_db) aln_obj = self.aln_obj.alignments.values()[0] data = [aln_obj.name, aln_obj.locus_length, len(aln_obj.taxa_idx)] self.assertEqual(data, ["bad_no_colon_interleave.nex", 898, 12]) def test_no_end_colon_interleave(self): self.aln_obj = AlignmentList(bad_no_end_interleave, sql_db=sql_db) aln_obj = self.aln_obj.alignments.values()[0] data = [aln_obj.name, aln_obj.locus_length, len(aln_obj.taxa_idx)] self.assertEqual(data, ["bad_no_end_interleave.nex", 898, 12]) def test_bad_no_colon_nexus(self): self.aln_obj = AlignmentList(bad_no_colon, sql_db=sql_db) aln_obj = self.aln_obj.alignments.values()[0] data = [aln_obj.name, aln_obj.locus_length, len(aln_obj.taxa_idx)] self.assertEqual(data, ["bad_no_colon.nex", 898, 12]) def test_bad_no_end_nexus(self): self.aln_obj = AlignmentList(bad_no_end, sql_db=sql_db) aln_obj = self.aln_obj.alignments.values()[0] data = [aln_obj.name, aln_obj.locus_length, len(aln_obj.taxa_idx)] self.assertEqual(data, ["bad_no_end.nex", 898, 12]) def test_bad_no_header_nexus(self): self.aln_obj = AlignmentList(bad_no_header, sql_db=sql_db) self.assertEqual(self.aln_obj.bad_alignments, bad_no_header) def test_bad_no_matrix_nexus(self): self.aln_obj = AlignmentList(bad_no_matrix, sql_db=sql_db) self.assertEqual(self.aln_obj.bad_alignments, bad_no_matrix) def test_bad_no_format_line_nexus(self): self.aln_obj = AlignmentList(bad_no_format_line, sql_db=sql_db) self.assertEqual(self.aln_obj.bad_alignments, bad_no_format_line) def test_bad_space_in_middle_nexus(self): self.aln_obj = AlignmentList(bad_space_in_middle, sql_db=sql_db) aln_obj = self.aln_obj.alignments.values()[0] data = [aln_obj.name, aln_obj.locus_length, len(aln_obj.taxa_idx)] self.assertEqual(data, ["bad_space_in_middle.nex", 898, 12]) def test_bad_wrong_dimensions_nexus(self): self.aln_obj = AlignmentList(bad_wrong_dimensions, sql_db=sql_db) self.assertEqual(self.aln_obj.bad_alignments, bad_wrong_dimensions) def test_bad_wrong_size_nexus(self): self.aln_obj = AlignmentList(bad_wrong_size, sql_db=sql_db) aln_obj = self.aln_obj.alignments.values()[0] data = [aln_obj.name, aln_obj.locus_length, len(aln_obj.taxa_idx)] self.assertEqual(data, ["bad_wrong_size.nex", 898, 12]) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir)
class LoadAlignmentsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_dna_load(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.assertEqual(["DNA"], self.aln_obj.sequence_code) def test_protein_load(self): self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db) self.assertEqual(["Protein"], self.aln_obj.sequence_code) def test_mixed_type_load(self): self.aln_obj = AlignmentList(mixed_seq_type, sql_db=sql_db) self.assertEqual(["DNA", "Protein"], sorted(self.aln_obj.sequence_code)) def test_class_instance(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertIsInstance(self.aln_obj.alignments, OrderedDict) def test_load_fas(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def test_load_single_fas(self): single_aln = Alignment(dna_data_fas[0], sql_cursor=self.aln_obj.cur) def test_load_phy(self): self.aln_obj = AlignmentList(dna_data_phy, sql_db=sql_db) def test_load_single_phy(self): single_aln = Alignment(dna_data_phy[0], sql_cursor=self.aln_obj.cur) def test_load_single_interleave_phy(self): single_aln = Alignment(phylip_interleave[0], sql_cursor=self.aln_obj.cur) def test_load_nex(self): self.aln_obj = AlignmentList(dna_data_nex, sql_db=sql_db) def test_load_single_nex(self): single_aln = Alignment(dna_data_nex[0], sql_cursor=self.aln_obj.cur) def test_load_interleave_nex(self): single_aln = Alignment(concatenated_interleave_nexus[0], sql_cursor=self.aln_obj.cur) def test_load_stc(self): self.aln_obj = AlignmentList(dna_data_stc, sql_db=sql_db) def test_load_single_stc(self): single_aln = Alignment(dna_data_stc[0], sql_cursor=self.aln_obj.cur, db_idx=self.aln_obj._idx + 1) def test_load_loci(self): self.aln_obj = AlignmentList(dna_data_loci, sql_db=sql_db) def test_load_single_loci(self): single_aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur, db_idx=self.aln_obj._idx + 1) def test_load_nexus_par(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.partitions) def test_load_wrong_type(self): self.aln_obj = AlignmentList(bad_file, sql_db=sql_db) self.assertTrue(self.aln_obj.bad_alignments) def test_duplicate_files(self): self.aln_obj = AlignmentList(dna_data_loci + dna_data_loci, sql_db=sql_db) self.assertTrue(self.aln_obj.duplicate_alignments) def test_unequal_length(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.assertTrue(self.aln_obj.non_alignments) def test_bad_file_removal_from_db(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.aln_obj.add_alignment_files(dna_data_fas) s = 0 for tx, _, _ in self.aln_obj.iter_alignments(): if tx == "Seq1": s += 1 self.assertEqual(s, 0) def test_load_no_data(self): self.aln_obj = AlignmentList(no_data, sql_db=sql_db) def test_alternative_missing(self): self.aln_obj = AlignmentList(alternative_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "?") def test_dna_missing_default(self): self.aln_obj = AlignmentList(single_dna, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "n") def test_protein_missing_default(self): self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "x") def test_dna_missing_eval(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "n") def test_protein_missing_eval(self): self.aln_obj = AlignmentList(protein_normal_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "x") def test_non_ascii_taxon_names(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) non_ascii_tx = [x for x in self.aln_obj.taxa_names if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'] self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_iteration(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) non_ascii_tx = [] for tx, _, _ in self.aln_obj.iter_alignments(): if tx == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><': non_ascii_tx.append(tx) self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_get_taxaidx(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] non_ascii_tx = [x for x in aln.taxa_idx if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'] self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_iter_columns(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) tx_list, _, _ = next(self.aln_obj.iter_columns(include_taxa=True)) non_ascii_tx = [x for x in tx_list if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'] self.assertEqual(len(non_ascii_tx), 1)
class AlignmentVariationFilters(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_variation_filter_min(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_segregating_sites(None, None) self.assertEqual(len(self.aln_obj.alignments), 3) def test_variation_var_sites(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_segregating_sites(1, 2) self.assertEqual(len(self.aln_obj.alignments), 0) def test_variation_var_sites2(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_segregating_sites(1, 3) self.assertEqual(len(self.aln_obj.alignments), 1) def test_variation_inf_min(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_informative_sites(None, None) self.assertEqual(len(self.aln_obj.alignments), 3) def test_variation_inf_sites(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_informative_sites(1, 4) self.assertEqual(len(self.aln_obj.alignments), 1) def test_variation_inf_sites2(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_informative_sites(1, 1) print(self.aln_obj.alignments) self.assertEqual(len(self.aln_obj.alignments), 1)
class AlignmentManipulationTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def tearDown(self): try: self.aln_obj.clear_alignments() except: pass self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_clear_alns(self): self.aln_obj.clear_alignments() aln = AlignmentList([], sql_db=sql_db) self.assertTrue(compare_inst(self.aln_obj, aln, ["log_progression", "locus_length", "partitions", "cur", "con"])) def test_update_act_anls(self): self.aln_obj.update_active_alignments([join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) def test_update_act_alns_err(self): self.aln_obj.update_active_alignments([join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas"), join(data_path, "Wrong_name")]) self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) def test_update_aln_shelve(self): self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"), "shelve") self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc2.fas"), join(data_path, "BaseConc3.fas"), join(data_path, "BaseConc4.fas"), join(data_path, "BaseConc5.fas"), join(data_path, "BaseConc6.fas"), join(data_path, "BaseConc7.fas")]) def test_update_aln_act(self): self.aln_obj.update_active_alignments([]) self.aln_obj.update_active_alignment(join(data_path, "BaseConc1.fas"), "active") self.assertEqual(list(self.aln_obj.alignments.keys()), [join(data_path, "BaseConc1.fas")]) def test_add_aln_obj(self): fl = self.aln_obj.alignments.keys() aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur, sql_con=self.aln_obj.con, db_idx=self.aln_obj._idx + 1, temp_dir=temp_dir) self.aln_obj.add_alignments([aln]) self.assertEqual(self.aln_obj.alignments.keys(), fl + [join(data_path, "c97d5m4p2.loci")]) def test_remove_taxa_from_list(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [tx for tx in self.aln_obj.taxa_names if tx not in taxa_list] self.aln_obj.remove_taxa(taxa_list) self.assertEqual(self.aln_obj.taxa_names, expected_taxa) def test_remove_taxa_from_file(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [tx for tx in self.aln_obj.taxa_names if tx not in taxa_list] self.aln_obj.remove_taxa(taxa_to_remove) self.assertEqual(self.aln_obj.taxa_names, expected_taxa) def test_remove_taxa_from_list_inverse(self): taxa_list = [ "1285_RAD_original", "130a_RAD_original", "137a_RAD_original", "1427_RAD_original", "167a_RAD_original" ] expected_taxa = [tx for tx in self.aln_obj.taxa_names if tx not in taxa_list] self.aln_obj.remove_taxa(taxa_list, mode="inverse") self.assertEqual(self.aln_obj.taxa_names, taxa_list) # # def test_retrieve_alignment(self): # # aln = self.aln_obj.retrieve_alignment("BaseConc1.fas") # # aln2 = Alignment(dna_data_fas[0], dest="new_one") # # self.assertTrue(compare_inst(aln, aln2, # ["log_progression", "locus_length", # "_partitions"])) def test_concatenation(self): self.aln_obj.concatenate() self.aln_obj.write_to_file(["fasta"], output_file="test") with open("trifusion/tests/data/BaseConcatenation.fas") as fh1, \ open("test.fas") as fh2: self.assertEqual(sorted(fh1.readlines()), sorted(fh2.readlines())) os.remove("test.fas") def test_concatention_after_removal(self): fl = [x for x in self.aln_obj.alignments][3:] self.aln_obj.remove_file(fl) self.aln_obj.concatenate() self.assertEqual(len(self.aln_obj.alignments), 1)
class AlignmentMissingFiltersTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_filter_default(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy"] ) self.aln_obj.filter_missing_data(25, 50) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [42, 43]) def test_filter_and_concat(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy"] ) self.aln_obj.filter_missing_data(25, 50, table_out="master_out") self.aln_obj.concatenate(table_in="master_out") self.assertEqual(self.aln_obj.size, 85) def test_no_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy"] ) self.aln_obj.filter_missing_data(100, 100) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [50, 50]) def test_no_missing(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy"] ) self.aln_obj.filter_missing_data(0, 0) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [0, 19]) def test_no_data_aln_default_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data3.phy"] ) self.aln_obj.filter_missing_data(25, 50) s = None for aln in self.aln_obj: s = aln.locus_length self.assertEqual(s, 0) def test_no_data_aln_no_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data3.phy"] ) self.aln_obj.filter_missing_data(100, 100) s = None for aln in self.aln_obj: s = aln.locus_length self.assertEqual(s, 50)
class AlignmentCodonFilters(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([],sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_codon_filter_pos1(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, False, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["a" * 16] * 10) def test_codon_filter_pos2(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([False, True, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["t" * 16] * 10) def test_codon_filter_pos3(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([False, False, True], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["g" * 16] * 10) def test_codon_filter_pos12(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, True, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["at" * 16] * 10) def test_codon_filter_pos13(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, False, True], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["ag" * 16] * 10) def test_codon_filter_all(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, True, True]) s = [] for _, seq, _ in self.aln_obj.iter_alignments(): s.append(seq) self.assertEqual(s, ["atg" * 16] * 10)
class AlignmentMissingFiltersTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_filter_default(self): self.aln_obj.add_alignment_files([ "trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy" ]) self.aln_obj.filter_missing_data(25, 50) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [42, 43]) def test_filter_and_concat(self): self.aln_obj.add_alignment_files([ "trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy" ]) self.aln_obj.filter_missing_data(25, 50, table_out="master_out") self.aln_obj.concatenate(table_in="master_out") self.assertEqual(self.aln_obj.size, 85) def test_no_filters(self): self.aln_obj.add_alignment_files([ "trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy" ]) self.aln_obj.filter_missing_data(100, 100) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [50, 50]) def test_no_missing(self): self.aln_obj.add_alignment_files([ "trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy" ]) self.aln_obj.filter_missing_data(0, 0) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [0, 19]) def test_no_data_aln_default_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data3.phy"]) self.aln_obj.filter_missing_data(25, 50) s = None for aln in self.aln_obj: s = aln.locus_length self.assertEqual(s, 0) def test_no_data_aln_no_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data3.phy"]) self.aln_obj.filter_missing_data(100, 100) s = None for aln in self.aln_obj: s = aln.locus_length self.assertEqual(s, 50)
class PartitonsTest(ExpectingTestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.reset(cur=self.aln_obj.cur,) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_read_from_nexus(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_read_from_phylip(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_bad_partitions_phy(self): e = self.aln_obj.partitions.read_from_file(partition_bad_phy[0], no_aln_check=True) self.assertTrue(isinstance(e, InvalidPartitionFile)) def test_unsorted_part_phylip(self): self.aln_obj.partitions.read_from_file(partition_unsorted_phy[0], no_aln_check=True) data = [self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter] self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], 595]) def test_phylip_dot_notation(self): self.aln_obj.partitions.read_from_file(partition_dot_not[0], no_aln_check=True) data = [self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter] self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], 595]) def test_nexus_dot_notation(self): self.aln_obj.partitions.read_from_file(dot_notation_nex[0], no_aln_check=True) data = [self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter] self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], 595]) def test_import_new_partscheme(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.aln_obj.partitions.reset() self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]) def test_add_duplicate_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) self.assertRaises(PartitionException, self.aln_obj.partitions.add_partition( "BaseCond1.fas", length=100)) def test_get_partition_names(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]) def test_get_partition_names_withCodon(self): self.aln_obj.partitions.read_from_file( concatenated_smallCodon_parNex[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, ["BaseConc1.fas_1_1", "BaseConc1.fas_1_2", "BaseConc1.fas_1_3", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]) def test_single_partition(self): self.aln_obj = AlignmentList([dna_data_fas[0]], db_con=self.aln_obj.con, db_cur=self.aln_obj.cur, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.is_single()) def test_multiple_partitions(self): self.assertFalse(self.aln_obj.partitions.is_single()) def test_remove_partition_from_file_original(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.remove_partition( file_name="trifusion/tests/data/BaseConc3.fas") # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.remove_partition("BaseConc3.fas") # Check keys from _partitions, partitions_alignment and models key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_file(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.remove_partition(file_name="BaseConc3.fas") # Check keys from _partitions, partitions_alignment and models key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_change_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName") key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas", "OtherName"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict(sorted( self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0] )) for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_partitions(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], "New_part") key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.assertEqual(key_data, [["New_part"]] * 3) def test_split_partition(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.split_partition("BaseConc1.fas", [[0, 50], [51, 84]], ["part1", "part2"]) key_data = [list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["part1", "part2", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict(sorted( self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0] )) for r in self.aln_obj.partitions.partitions.values(): print(r) if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_and_split(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part") key_data = [sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"]] * 3) def test_merge_and_custom_split1(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part", [(0, 50), (51, 254)], ["one", "two"]) key_data = [self.aln_obj.partitions.partitions_alignments["one"], self.aln_obj.partitions.partitions_alignments["two"]] self.assertEqual(key_data, [['BaseConc1.fas'], ['BaseConc1.fas', 'BaseConc3.fas', 'BaseConc2.fas']]) def test_merge_and_custom_split2(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part", [(0, 84), (85, 254)], ["one", "two"]) key_data = [self.aln_obj.partitions.partitions_alignments["one"], self.aln_obj.partitions.partitions_alignments["two"]] self.assertEqual(key_data, [['BaseConc1.fas'], ['BaseConc3.fas', 'BaseConc2.fas']]) def test_concat_custom_fileset_from_phy_partfile(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_par[0]) self.aln_obj.update_active_alignments( [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3) def test_concat_custom_fileset_from_phy_partfile(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.update_active_alignments( [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas")]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3) def test_merge_with_custom_fileset(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.update_active_alignments( [join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc5.fas")]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys())] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc5.fas"]] * 3) def test_model_detection(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList(models_nexus_data, db_con=self.aln_obj.con, db_cur=self.aln_obj.cur, sql_db=sql_db) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('Teste1.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste2.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste3.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste4.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste5.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste6.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste7.fas', [ [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []])])) def test_model_detection_codons(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList(models_codon_nexus_data, db_cur=self.aln_obj.cur, db_con=self.aln_obj.con, sql_db=sql_db) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('Teste1.fas_1', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste2.fas_86', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste3.fas_171', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste4.fas_256', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste5.fas_341', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste6.fas_426', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste7.fas_511', [ [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []])])) def test_set_model(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"]) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], [None], []]), ('BaseConc3.fas', [[[]], [None], []]), ('BaseConc4.fas', [[[]], [None], []]), ('BaseConc5.fas', [[[]], [None], []]), ('BaseConc6.fas', [[[]], [None], []]), ('BaseConc7.fas', [[[]], [None], []])]) ) def test_set_model_all(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"], apply_all=True) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], ['GTR'], []]), ('BaseConc3.fas', [[[]], ['GTR'], []]), ('BaseConc4.fas', [[[]], ['GTR'], []]), ('BaseConc5.fas', [[[]], ['GTR'], []]), ('BaseConc6.fas', [[[]], ['GTR'], []]), ('BaseConc7.fas', [[[]], ['GTR'], []])]) ) def test_set_model_codon(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"], links=["12", "3"], apply_all=True) self.assertEqual(self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc2.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc3.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc4.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc5.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc6.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ( 'BaseConc7.fas', [[[]], ['GTR', 'SYM'], ['12', '3']])]))
class SeconaryOpsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_summary_stats_all(self): sum_table, table_data = self.aln_obj.get_summary_stats() self.assertEqual([sum_table, table_data], [{'missing': '5 (0.04%)', 'taxa': 24, 'genes': 7, 'informative': '0 (0.0%)', 'gaps': '0 (0.0%)', 'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '7 (1.18%)', 'seq_len': 595, 'avg_var': 1.0, 'avg_inf': 0.0}, [['Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene', 'Missing data', 'Missing data per gene', 'Variable sites', 'Variable sites per gene', 'Informative sites', 'Informative sites per gene'], [7, 24, 595, '0 (0.0%)', 0.0, '5 (0.04%)', 1.0, '7 (1.18%)', 1.0, '0 (0.0%)', 0.0]]]) def test_summary_stats_one_active(self): sum_table, table_data = self.aln_obj.get_summary_stats([ join(data_path, "BaseConc1.fas")]) self.assertEqual([sum_table, table_data], [{'missing': '1 (0.05%)', 'taxa': 24, 'genes': 1, 'informative': '0 (0.0%)', 'gaps': '0 (0.0%)', 'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '1 (1.18%)', 'seq_len': 85, 'avg_var': 1.0, 'avg_inf': 0.0}, [['Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene', 'Missing data', 'Missing data per gene', 'Variable sites', 'Variable sites per gene', 'Informative sites', 'Informative sites per gene'], [1, 24, 85, '0 (0.0%)', 0.0, '1 (0.05%)', 1.0, '1 (1.18%)', 1.0, '0 (0.0%)', 0.0]]]) def test_single_aln_outlier_mdata(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_missing_data(), {"exception": "single_alignment"}) def test_single_aln_outlier_mdata_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) print(self.aln_obj.alignments) self.assertEqual(self.aln_obj.outlier_missing_data_sp(), {"exception": "single_alignment"}) def test_single_aln_outlier_seg(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_segregating(), {"exception": "single_alignment"}) def test_single_aln_outlier_seg_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) print(self.aln_obj.alignments) self.assertEqual(self.aln_obj.outlier_segregating_sp(), {"exception": "single_alignment"}) def test_single_aln_outlier_seqsize(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_sequence_size(), {"exception": "single_alignment"}) def test_single_aln_outlier_seqsize_sp(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.outlier_sequence_size_sp(), {"exception": "single_alignment"}) def test_single_aln_average_seqsize_per_species(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.average_seqsize_per_species(), {"exception": "single_alignment"}) def test_single_aln_average_seqsize(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.average_seqsize(), {"exception": "single_alignment"}) def test_single_aln_sequence_similarity(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.sequence_similarity(), {"exception": "single_alignment"}) def test_single_aln_sequence_segregation(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.sequence_segregation(), {"exception": "single_alignment"}) def test_single_aln_length_polymorphism_correlation(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.length_polymorphism_correlation(), {"exception": "single_alignment"}) def test_single_aln_taxa_distribution(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.taxa_distribution(), {"exception": "single_alignment"}) def test_single_aln_cumulative_missing_genes(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.cumulative_missing_genes(), {"exception": "single_alignment"}) def test_single_aln_gene_occupancy(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.gene_occupancy(), {"exception": "single_alignment"}) def test_single_aln_missing_data_distribution(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.missing_data_distribution(), {"exception": "single_alignment"}) def test_single_aln_missing_genes_average(self): self.aln_obj.update_active_alignments([dna_data_fas[0]]) self.assertEqual(self.aln_obj.missing_genes_average(), {"exception": "single_alignment"}) def test_no_data(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertEqual(self.aln_obj.gene_occupancy(), {'exception': "empty_data"}) def test_gene_occupancy(self): self.assertTrue(self.aln_obj.gene_occupancy()) def test_missing_data_distribution(self): self.assertTrue(self.aln_obj.missing_data_distribution()) def test_missing_data_per_species(self): self.assertTrue(self.aln_obj.missing_data_per_species()) def test_missing_genes_per_species(self): self.assertTrue(self.aln_obj.missing_genes_per_species()) def test_missing_genes_average(self): self.assertTrue(self.aln_obj.missing_genes_average()) def test_average_seqsize_per_species(self): self.assertTrue(self.aln_obj.average_seqsize_per_species()) def test_average_seqsize(self): self.assertTrue(self.aln_obj.average_seqsize()) def test_characters_proportion(self): self.assertTrue(self.aln_obj.characters_proportion()) def test_characters_proportion_per_species(self): self.assertTrue(self.aln_obj.characters_proportion_per_species()) def test_characters_proportion_gene(self): self.assertTrue(self.aln_obj.characters_proportion_gene( join(data_path, "BaseConc1.fas"), 10 )) def test_sequence_similarity(self): self.assertTrue(self.aln_obj.sequence_similarity()) def test_sequence_similarity_per_species(self): self.assertTrue(self.aln_obj.sequence_similarity_per_species()) def test_sequence_similarity_gene(self): self.assertTrue(self.aln_obj.sequence_similarity_gene( join(data_path, "BaseConc1.fas"), 10)) def test_sequence_conservation(self): self.assertTrue(self.aln_obj.sequence_conservation_gnp( join(data_path, "BaseConc1.fas"), 10 )) def test_sequence_segregation(self): self.assertTrue(self.aln_obj.sequence_segregation()) def test_sequence_segregation_per_species(self): self.assertTrue(self.aln_obj.sequence_segregation_per_species()) def test_sequence_segregation_gene(self): self.assertTrue(self.aln_obj.sequence_segregation_gene( join(data_path, "BaseConc1.fas"), 10)) def test_length_polymorphism_correlation(self): self.assertTrue(self.aln_obj.length_polymorphism_correlation()) def test_allele_frequency_spectrum(self): self.assertTrue(self.aln_obj.allele_frequency_spectrum()) def test_allele_frequency_spectrum_gene(self): self.assertTrue(self.aln_obj.allele_frequency_spectrum_gene( join(data_path, "BaseConc1.fas"), None)) def test_taxa_distribution(self): self.assertTrue(self.aln_obj.taxa_distribution()) def test_cumulative_missing_genes(self): self.assertTrue(self.aln_obj.cumulative_missing_genes()) def test_outlier_missing_data(self): self.assertTrue(self.aln_obj.outlier_missing_data()) def test_outlier_missing_data_sp(self): self.assertTrue(self.aln_obj.outlier_missing_data_sp()) def test_outlier_segregating(self): self.assertTrue(self.aln_obj.outlier_segregating()) def test_outlier_segregating_sp(self): self.assertTrue(self.aln_obj.outlier_segregating_sp()) def test_outlier_sequence_size(self): self.assertTrue(self.aln_obj.outlier_sequence_size()) def test_outlier_sequence_size_sp(self): self.assertTrue(self.aln_obj.outlier_sequence_size_sp())
class PartitonsTest(ExpectingTestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.reset(cur=self.aln_obj.cur, ) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_read_from_nexus(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_read_from_phylip(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) self.assertEqual(len(self.aln_obj.partitions.partitions), 7) def test_bad_partitions_phy(self): e = self.aln_obj.partitions.read_from_file(partition_bad_phy[0], no_aln_check=True) self.assertTrue(isinstance(e, InvalidPartitionFile)) def test_unsorted_part_phylip(self): self.aln_obj.partitions.read_from_file(partition_unsorted_phy[0], no_aln_check=True) data = [ self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter ] self.assertEqual(data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ], 595]) def test_phylip_dot_notation(self): self.aln_obj.partitions.read_from_file(partition_dot_not[0], no_aln_check=True) data = [ self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter ] self.assertEqual(data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ], 595]) def test_nexus_dot_notation(self): self.aln_obj.partitions.read_from_file(dot_notation_nex[0], no_aln_check=True) data = [ self.aln_obj.partitions.partitions.keys(), self.aln_obj.partitions.counter ] self.assertEqual(data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ], 595]) def test_bad_dot_notation(self): e = self.aln_obj.partitions.read_from_file(bad_dot_notation_nex[0], no_aln_check=True) self.assertTrue(isinstance(e, InvalidPartitionFile)) def test_import_new_partscheme(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.aln_obj.partitions.reset() self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, [ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]) def test_add_duplicate_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) self.assertRaises( PartitionException, self.aln_obj.partitions.add_partition("BaseCond1.fas", length=100)) def test_get_partition_names(self): self.aln_obj.partitions.read_from_file(concatenated_small_par[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, [ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]) def test_get_partition_names_withCodon(self): self.aln_obj.partitions.read_from_file( concatenated_smallCodon_parNex[0], no_aln_check=True) res = self.aln_obj.partitions.get_partition_names() self.assertEqual(res, [ "BaseConc1.fas_1_1", "BaseConc1.fas_1_2", "BaseConc1.fas_1_3", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]) def test_single_partition(self): self.aln_obj = AlignmentList([dna_data_fas[0]], db_con=self.aln_obj.con, db_cur=self.aln_obj.cur, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.is_single()) def test_multiple_partitions(self): self.assertFalse(self.aln_obj.partitions.is_single()) def test_remove_partition_from_file_original(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.remove_partition( file_name="trifusion/tests/data/BaseConc3.fas") # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.remove_partition("BaseConc3.fas") # Check keys from _partitions, partitions_alignment and models key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_remove_partition_from_file(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.remove_partition(file_name="BaseConc3.fas") # Check keys from _partitions, partitions_alignment and models key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_change_name(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName") key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas", "OtherName" ]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict( sorted(self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0])) for r in self.aln_obj.partitions.partitions.values(): if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_partitions(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions([ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ], "New_part") key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.assertEqual(key_data, [["New_part"]] * 3) def test_split_partition(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.split_partition("BaseConc1.fas", [[0, 50], [51, 84]], ["part1", "part2"]) key_data = [ list(self.aln_obj.partitions.partitions.keys()), list(self.aln_obj.partitions.partitions_alignments.keys()), list(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "part1", "part2", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]] * 3) # Check if remaining partition ranges are continuous cont = True prev = 0 self.aln_obj.partitions.partitions = OrderedDict( sorted(self.aln_obj.partitions.partitions.iteritems(), key=lambda x: x[1][0])) for r in self.aln_obj.partitions.partitions.values(): print(r) if r[0][0][0] == prev: prev = r[0][0][1] + 1 else: cont = False self.expect_equal(cont, True) def test_merge_and_split(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part") key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [[ "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas" ]] * 3) def test_merge_and_custom_split1(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part", [(0, 50), (51, 254)], ["one", "two"]) key_data = [ self.aln_obj.partitions.partitions_alignments["one"], self.aln_obj.partitions.partitions_alignments["two"] ] self.assertEqual(key_data, [['BaseConc1.fas'], ['BaseConc1.fas', 'BaseConc3.fas', 'BaseConc2.fas']]) def test_merge_and_custom_split2(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.partitions.split_partition("new_part", [(0, 84), (85, 254)], ["one", "two"]) key_data = [ self.aln_obj.partitions.partitions_alignments["one"], self.aln_obj.partitions.partitions_alignments["two"] ] self.assertEqual( key_data, [['BaseConc1.fas'], ['BaseConc3.fas', 'BaseConc2.fas']]) def test_concat_custom_fileset_from_phy_partfile(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_par[0]) self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3) def test_concat_custom_fileset_from_phy_partfile(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc2.fas") ]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3) def test_merge_with_custom_fileset(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0]) self.aln_obj.partitions.merge_partitions( ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part") self.aln_obj.update_active_alignments([ join(data_path, "BaseConc1.fas"), join(data_path, "BaseConc5.fas") ]) self.aln_obj.concatenate() key_data = [ sorted(self.aln_obj.partitions.partitions.keys()), sorted(self.aln_obj.partitions.partitions_alignments.keys()), sorted(self.aln_obj.partitions.models.keys()) ] self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc5.fas"]] * 3) def test_model_detection(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList(models_nexus_data, db_con=self.aln_obj.con, db_cur=self.aln_obj.cur, sql_db=sql_db) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([ ('Teste1.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste2.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste3.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste4.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste5.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste6.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]), ('Teste7.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None], []]) ])) def test_model_detection_codons(self): self.aln_obj.clear_alignments() self.aln_obj = AlignmentList(models_codon_nexus_data, db_cur=self.aln_obj.cur, db_con=self.aln_obj.con, sql_db=sql_db) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([('Teste1.fas_1', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste2.fas_86', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste3.fas_171', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste4.fas_256', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste5.fas_341', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste6.fas_426', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []]), ('Teste7.fas_511', [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'], ['nst=6', 'statefreqpr=fixed(equal)'], ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']], [None, None, None], []])])) def test_set_model(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"]) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], [None], []]), ('BaseConc3.fas', [[[]], [None], []]), ('BaseConc4.fas', [[[]], [None], []]), ('BaseConc5.fas', [[[]], [None], []]), ('BaseConc6.fas', [[[]], [None], []]), ('BaseConc7.fas', [[[]], [None], []])])) def test_set_model_all(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"], apply_all=True) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]), ('BaseConc2.fas', [[[]], ['GTR'], []]), ('BaseConc3.fas', [[[]], ['GTR'], []]), ('BaseConc4.fas', [[[]], ['GTR'], []]), ('BaseConc5.fas', [[[]], ['GTR'], []]), ('BaseConc6.fas', [[[]], ['GTR'], []]), ('BaseConc7.fas', [[[]], ['GTR'], []])])) def test_set_model_codon(self): self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0], no_aln_check=True) self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"], links=["12", "3"], apply_all=True) self.assertEqual( self.aln_obj.partitions.models, OrderedDict([ ('BaseConc1.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc2.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc3.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc4.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc5.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc6.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]), ('BaseConc7.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]) ]))
class LoadAlignmentsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_dna_load(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.assertEqual(["DNA"], self.aln_obj.sequence_code) def test_protein_load(self): self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db) self.assertEqual(["Protein"], self.aln_obj.sequence_code) def test_mixed_type_load(self): self.aln_obj = AlignmentList(mixed_seq_type, sql_db=sql_db) self.assertEqual(["DNA", "Protein"], sorted(self.aln_obj.sequence_code)) def test_class_instance(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertIsInstance(self.aln_obj.alignments, OrderedDict) def test_load_fas(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def test_load_single_fas(self): single_aln = Alignment(dna_data_fas[0], sql_cursor=self.aln_obj.cur) def test_load_phy(self): self.aln_obj = AlignmentList(dna_data_phy, sql_db=sql_db) def test_load_single_phy(self): single_aln = Alignment(dna_data_phy[0], sql_cursor=self.aln_obj.cur) def test_load_single_interleave_phy(self): single_aln = Alignment(phylip_interleave[0], sql_cursor=self.aln_obj.cur) def test_load_nex(self): self.aln_obj = AlignmentList(dna_data_nex, sql_db=sql_db) def test_load_single_nex(self): single_aln = Alignment(dna_data_nex[0], sql_cursor=self.aln_obj.cur) def test_load_interleave_nex(self): single_aln = Alignment(concatenated_interleave_nexus[0], sql_cursor=self.aln_obj.cur) def test_load_stc(self): self.aln_obj = AlignmentList(dna_data_stc, sql_db=sql_db) def test_load_single_stc(self): single_aln = Alignment(dna_data_stc[0], sql_cursor=self.aln_obj.cur, db_idx=self.aln_obj._idx + 1) def test_load_loci(self): self.aln_obj = AlignmentList(dna_data_loci, sql_db=sql_db) def test_load_single_loci(self): single_aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur, db_idx=self.aln_obj._idx + 1) def test_load_nexus_par(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.partitions) def test_load_wrong_type(self): self.aln_obj = AlignmentList(bad_file, sql_db=sql_db) self.assertTrue(self.aln_obj.bad_alignments) def test_duplicate_files(self): self.aln_obj = AlignmentList(dna_data_loci + dna_data_loci, sql_db=sql_db) self.assertTrue(self.aln_obj.duplicate_alignments) def test_unequal_length(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.assertTrue(self.aln_obj.non_alignments) def test_bad_file_removal_from_db(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.aln_obj.add_alignment_files(dna_data_fas) s = 0 for tx, _, _ in self.aln_obj.iter_alignments(): if tx == "Seq1": s += 1 self.assertEqual(s, 0) def test_load_no_data(self): self.aln_obj = AlignmentList(no_data, sql_db=sql_db) def test_alternative_missing(self): self.aln_obj = AlignmentList(alternative_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "?") def test_dna_missing_default(self): self.aln_obj = AlignmentList(single_dna, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "n") def test_protein_missing_default(self): self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "x") def test_dna_missing_eval(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "n") def test_protein_missing_eval(self): self.aln_obj = AlignmentList(protein_normal_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "x") def test_non_ascii_taxon_names(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) non_ascii_tx = [ x for x in self.aln_obj.taxa_names if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><' ] self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_iteration(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) non_ascii_tx = [] for tx, _, _ in self.aln_obj.iter_alignments(): if tx == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><': non_ascii_tx.append(tx) self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_get_taxaidx(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] non_ascii_tx = [ x for x in aln.taxa_idx if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><' ] self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_iter_columns(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) tx_list, _, _ = next(self.aln_obj.iter_columns(include_taxa=True)) non_ascii_tx = [ x for x in tx_list if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><' ] self.assertEqual(len(non_ascii_tx), 1)