def test_no_has_labels(self): tns = TaxonNamespace(self.str_labels) check = ["u", "x", "y",] for label in check: assert label not in self.str_labels self.assertFalse(tns.has_taxa_labels(check)) self.assertFalse(tns.has_taxa_labels(check + self.str_labels))
def test_get_nonexistant_taxon_by_label(self): tns = TaxonNamespace(self.str_labels) check = ["u", "x", "y",] for label in check: assert label not in self.str_labels t = tns.get_taxon(check) self.assertIs(t, None)
def test_add_taxon(self): tns = TaxonNamespace() for t in self.taxa: tns.add_taxon(t) self.validate_taxon_concepts(tns, self.taxa_labels) for t in self.taxa: self.assertIn(t, tns._taxa)
def test_new_taxon_to_immutable(self): tns = TaxonNamespace() tns.is_mutable = False for idx, label in enumerate(self.str_labels): with self.assertRaises(TypeError): t = tns.new_taxon(label) self.assertEqual(len(tns), 0)
def test_add_taxa_duplicate(self): tns = TaxonNamespace(self.taxa) self.validate_taxon_concepts(tns, self.taxa_labels) tns.add_taxa(self.taxa) self.assertEqual(len(tns), len(self.taxa)) for t1, t2 in zip(tns, self.taxa): self.assertIs(t1, t2)
def test_basic_adding(self): tns = TaxonNamespace() self.assertEqual(len(tns), 0) for idx, label in enumerate(self.str_labels): tns.add_taxon(Taxon(label=label)) self.assertEqual(len(tns), idx+1) self.validate_taxon_concepts(tns, self.str_labels)
def test_require_taxon_by_label_noadd(self): tns = TaxonNamespace(self.str_labels) for label in self.str_labels: t = tns.get_taxon(label) self.assertEqual(t.label, label) self.assertEqual(len(tns), len(self.str_labels)) self.validate_taxon_concepts(tns, self.str_labels)
def test_get_taxa_by_label(self): tns = TaxonNamespace(self.str_labels) # label_set = set(self.str_labels) # taxa = tns.get_taxa(label_set) taxa = tns.get_taxa(self.str_labels + ["u", "x", "y"]) self.assertEqual(len(taxa), len(self.str_labels)) tx = [t.label for t in taxa] self.assertEqual(tx, self.str_labels)
def test_has_labels_case_sensitivity(self): tns = TaxonNamespace(self.str_labels) labels_upper = [label.upper() for label in self.str_labels if label.upper() != label] assert labels_upper tns.is_case_sensitive = True self.assertFalse(tns.has_taxa_labels(labels_upper)) tns.is_case_sensitive = False self.assertTrue(tns.has_taxa_labels(labels_upper))
def test_new_taxon(self): tns = TaxonNamespace() for idx, label in enumerate(self.str_labels): t = tns.new_taxon(label) self.assertTrue(isinstance(t, Taxon)) self.assertEqual(t.label, label) self.assertEqual(len(tns), idx+1) self.validate_taxon_concepts(tns, self.str_labels)
def test_basic_adding_to_immutable(self): tns = TaxonNamespace() self.assertEqual(len(tns), 0) tns.is_mutable = False for idx, label in enumerate(self.str_labels): with self.assertRaises(TypeError): tns.add_taxon(Taxon(label=label)) self.assertEqual(len(tns), 0)
def test_require_taxon_by_label_add_to_immutable(self): tns = TaxonNamespace(self.str_labels) tns.is_mutable = False check = ["u", "x", "y",] for label in check: assert label not in self.str_labels with self.assertRaises(TypeError): t = tns.require_taxon(label)
def test_clear(self): tns = TaxonNamespace(self.str_labels) self.assertEqual(len(tns), len(self.str_labels)) tns.clear() self.assertEqual(len(tns), 0) x = [] for t in tns: x.append(t) self.assertEqual(len(x), 0)
def test_findall_multiple(self): tns = TaxonNamespace(self.str_labels) multilabels= ["_", "z"] for label in multilabels: tns.is_case_sensitive=True taxa = tns.findall(label=label) self.assertTrue(isinstance(taxa, collections.Iterable)) self.assertEqual(len(taxa), len([s for s in self.str_labels if s == label])) for t in taxa: self.assertEqual(t.label, label)
def test_discard_taxon_label_error(self): tns = TaxonNamespace(self.str_labels) key = "zzz" assert key not in self.str_labels try: tns.discard_taxon_label(key) except LookupError: self.fail() else: self.validate_taxon_concepts(tns, self.str_labels)
def test_reverse(self): r = self.str_labels[:] r.sort() tns = TaxonNamespace(r) tns.reverse() r2 = r[:] r2.reverse() assert r != r2 for idx, t1 in enumerate(tns): self.assertEqual(t1.label, r2[idx])
def test_case_insensitive_require_taxon_by_label2(self): tns = TaxonNamespace(self.str_labels) labels_upper = [label.upper() for label in self.str_labels if label.upper() != label] labels_upper = list(set(labels_upper)) assert labels_upper for label in labels_upper: tns.is_case_sensitive = True t = tns.require_taxon(label) self.assertEqual(t.label, label) self.validate_taxon_concepts(tns, self.str_labels + labels_upper)
def test_case_insensitive_require_taxon_by_label1(self): tns = TaxonNamespace(self.str_labels) labels_upper = [label.upper() for label in self.str_labels if label.upper() != label] assert labels_upper for label in labels_upper: tns.is_case_sensitive = False t = tns.require_taxon(label) self.assertEqual(t.label.lower(), label.lower()) self.assertEqual(len(tns), len(self.str_labels)) self.validate_taxon_concepts(tns, self.str_labels)
def test_initialize_other_taxon_namespace(self): tns1 = TaxonNamespace(self.taxa) tns2 = TaxonNamespace(tns1) self.assertIsNot(tns1, tns2) self.validate_taxon_concepts(tns1, self.taxa_labels) self.validate_taxon_concepts(tns2, self.taxa_labels) for t in self.taxa: self.assertIn(t, tns1._taxa) self.assertIn(t, tns2._taxa) for t1, t2 in zip(tns1, tns2): self.assertIs(t1, t2)
def test_construct_from_another(self): tns2 = TaxonNamespace(self.tns1) for tns2 in (TaxonNamespace(self.tns1), self.tns1.clone(0), copy.copy(self.tns1)): self.assertIsNot(tns2, self.tns1) self.assertEqual(tns2.label, self.tns1.label) self.assertEqual(tns2._taxa, self.tns1._taxa) for t1, t2 in zip(self.tns1, tns2): self.assertIs(t1, t2) self.compare_distinct_annotables(tns2, self.tns1)
def test_require_taxon_by_label_add(self): tns = TaxonNamespace(self.str_labels) check = ["u", "x", "y",] for label in check: assert label not in self.str_labels t = tns.require_taxon(label) self.assertTrue(isinstance(t, Taxon)) self.assertEqual(t.label, label) total = self.str_labels + check self.assertEqual(len(tns), len(total)) self.validate_taxon_concepts(tns, total)
def test_discard_taxon_label(self): taxa = [Taxon(s) for s in self.str_labels] tns = TaxonNamespace(taxa) expected = taxa[:] for idx, label in enumerate(set(self.str_labels)): tns.discard_taxon_label(label) for t in taxa: if t.label == label and t in expected: expected.remove(t) self.assertEqual(len(tns), len(expected)) for t1, t2 in zip(tns, expected): self.assertIs(t1, t2)
def test_remove_taxon(self): taxa = [Taxon(s) for s in self.str_labels] tns = TaxonNamespace(taxa) expected = taxa[:] for idx, taxon in enumerate(taxa): tns.remove_taxon(taxon) expected.remove(taxon) self.assertEqual(len(tns), len(expected)) for idx2, taxon2 in enumerate(expected): if taxon2 in expected: self.assertIn(taxon2, tns) elif taxon2 not in expected: self.assertNotIn(taxon2, tns)
def test_case_insensitive_get_taxa_by_label(self): tns = TaxonNamespace(self.str_labels) labels_upper = [label.upper() for label in self.str_labels if label.upper() != label] assert labels_upper # default: case-insensitive t2 = tns.get_taxa(labels_upper) self.assertEqual(len(t2), len(labels_upper)) for t, label in zip(t2, labels_upper): self.assertEqual(t.label.lower(), label.lower()) # test: case sensitive tns.is_case_sensitive = True t1 = tns.get_taxa(labels_upper) self.assertEqual(len(t1), 0)
def test_case_insensitive_get_taxon_by_label(self): tns = TaxonNamespace(self.str_labels) labels_upper = [label.upper() for label in self.str_labels if label.upper() != label] assert labels_upper # default: case insensitive for label in labels_upper: t = tns.get_taxon(label) self.assertIsNot(t, None) self.assertEqual(t.label.lower(), label.lower()) # test: case sensitive tns.is_case_sensitive = True for label in labels_upper: t = tns.get_taxon(label) self.assertIs(t, None)
def test_reverse_iter(self): tns = TaxonNamespace(self.str_labels) r = self.str_labels[:] r.reverse() assert r != self.str_labels for idx, t1 in enumerate(reversed(tns)): self.assertEqual(t1.label, r[idx])
def run_tree_regression(arg, taxa): taxon_namespace = TaxonNamespace([taxon['id'] for taxon in taxa['taxa']]) tree_format = 'newick' with open(arg.tree) as fp: if next(fp).upper().startswith('#NEXUS'): tree_format = 'nexus' if tree_format == 'nexus': tree = Tree.get( path=arg.tree, schema='nexus', tree_offset=0, preserve_underscores=True, taxon_namespace=taxon_namespace, ) else: tree = Tree.get( path=arg.tree, schema='newick', tree_offset=0, preserve_underscores=True, taxon_namespace=taxon_namespace, ) tree.resolve_polytomies(update_bipartitions=True) setup_indexes(tree, False) taxa2 = [{'date': taxon['attributes']['date']} for taxon in taxa['taxa']] initialize_dates_from_taxa(tree, taxa2) return regression(tree)
def simulate_pangenome(ngenes, nisolates, effective_pop_size, gain_rate, loss_rate, mutation_rate, max_core): # simulate a phylogeny using the coalscent sim_tree = treesim.pure_kingman_tree(taxon_namespace=TaxonNamespace( [str(i) for i in range(1, 1 + nisolates)]), pop_size=effective_pop_size) basic_tree = copy.deepcopy(sim_tree) # simulate gene p/a and mutation sim_tree = simulate_img_with_mutation(sim_tree, gain_rate=gain_rate, loss_rate=loss_rate, mutation_rate=mutation_rate, ngenes=ngenes, max_ncore=max_core) # get genes and mutations for each isolate gene_mutations = [] for leaf in sim_tree.leaf_node_iter(): gene_mutations.append([[g, leaf.gene_mutations[g]] for g in leaf.acc_genes]) return (gene_mutations, basic_tree)
def read_forest(filenames, true_tree_file): """ Reads files with Netwick formated trees into dendropy tree objects :param filenames: array of paths to files to compare :param true_tree_file: path to file of "true" tree :return: dict of dendropy trees with basename as key and dendropy tree for "true" tree """ def _read_tree_from_path(path, taxon_namespace): """ Wrapper for netwick-file to dendropy tree """ tree = Tree() my_tree = tree.get_from_path(path, "newick", taxon_namespace=taxon_namespace) return my_tree taxon_ns = TaxonNamespace() # needed true_tree = _read_tree_from_path(true_tree_file, taxon_ns) trees = { basename(tree_path).replace(".msl", ""): _read_tree_from_path(tree_path, taxon_ns) for tree_path in filenames } return trees, true_tree
def test_discard_taxon_label_case_insensitive(self): ucase_labels = [s.upper() for s in self.str_labels] assert ucase_labels assert ucase_labels != self.str_labels taxa = [Taxon(s) for s in self.str_labels] tns = TaxonNamespace(taxa) expected = taxa[:] # default: case-insensitive for idx, label in enumerate(set(ucase_labels)): tns.discard_taxon_label(label) for t in taxa: if t.label.upper() == label.upper() and t in expected: expected.remove(t) self.assertEqual(len(tns), len(expected)) for t1, t2 in zip(tns, expected): self.assertIs(t1, t2)
def test_discard_taxon_label_case_sensitive(self): ucase_labels = [s.upper() for s in self.str_labels] assert ucase_labels assert ucase_labels != self.str_labels taxa = [Taxon(s) for s in self.str_labels] tns = TaxonNamespace(taxa) expected = taxa[:] # test: case sensitive tns.is_case_sensitive = True for idx, label in enumerate(set(ucase_labels)): if label != label.lower(): x1 = len(tns) try: tns.discard_taxon_label(label) except LookupError: self.fail() else: self.assertEqual(len(tns), x1)
def test_remove_taxon_label_case_insensitive(self): ucase_labels = [s.upper() for s in self.str_labels] assert ucase_labels assert ucase_labels != self.str_labels taxa = [Taxon(s) for s in self.str_labels] tns = TaxonNamespace(taxa) expected = taxa[:] for idx, label in enumerate(set(ucase_labels)): if label != label.lower(): with self.assertRaises(LookupError): tns.is_case_sensitive = True tns.remove_taxon_label(label) tns.is_case_sensitive = False tns.remove_taxon_label(label) for t in taxa: if t.label.upper() == label.upper() and t in expected: expected.remove(t) self.assertEqual(len(tns), len(expected)) for t1, t2 in zip(tns, expected): self.assertIs(t1, t2)
def test_remove_taxon_error(self): tns = TaxonNamespace(self.str_labels) with self.assertRaises(ValueError): tns.remove_taxon(self.taxa[0])
def setUp(self): self.str_labels = ["a", "a", "b", "c", "d", "e", "_", "_", "_", "z", "z", "z"] self.taxa = [ Taxon(label) for label in self.str_labels ] self.tns1 = TaxonNamespace(self.taxa, label="T1")
class TaxonNamespaceCloning(compare_and_validate.Comparator, unittest.TestCase): def setUp(self): self.str_labels = ["a", "a", "b", "c", "d", "e", "_", "_", "_", "z", "z", "z"] self.taxa = [ Taxon(label) for label in self.str_labels ] self.tns1 = TaxonNamespace(self.taxa, label="T1") def test_taxon_namespace_scoped_copy(self): for tns2 in (self.tns1.clone(1), self.tns1.taxon_namespace_scoped_copy()): self.assertIs(tns2, self.tns1) def test_construct_from_another(self): tns2 = TaxonNamespace(self.tns1) for tns2 in (TaxonNamespace(self.tns1), self.tns1.clone(0), copy.copy(self.tns1)): self.assertIsNot(tns2, self.tns1) self.assertEqual(tns2.label, self.tns1.label) self.assertEqual(tns2._taxa, self.tns1._taxa) for t1, t2 in zip(self.tns1, tns2): self.assertIs(t1, t2) self.compare_distinct_annotables(tns2, self.tns1) def test_construct_from_another_different_label(self): tns2 = TaxonNamespace(self.tns1, label="T2") self.assertIsNot(tns2, self.tns1) self.assertNotEqual(tns2.label, self.tns1.label) self.assertEqual(self.tns1.label, "T1") self.assertEqual(tns2.label, "T2") self.assertEqual(tns2._taxa, self.tns1._taxa) for t1, t2 in zip(self.tns1, tns2): self.assertIs(t1, t2) self.compare_distinct_annotables(tns2, self.tns1) def test_construct_from_changed_label(self): for tns2 in (TaxonNamespace(self.tns1), self.tns1.clone(0), copy.copy(self.tns1)): tns2.label = "T2" self.assertNotEqual(tns2.label, self.tns1.label) self.assertEqual(self.tns1.label, "T1") self.assertEqual(tns2.label, "T2") self.assertEqual(tns2._taxa, self.tns1._taxa) for t1, t2 in zip(self.tns1, tns2): self.assertIs(t1, t2) self.compare_distinct_annotables(tns2, self.tns1) def test_construct_from_another_with_simple_annotations(self): self.tns1.annotations.add_new("A", 1) self.tns1.annotations.add_new("B", 2) self.tns1.annotations.add_new("C", 3) for tns2 in (TaxonNamespace(self.tns1), self.tns1.clone(0), copy.copy(self.tns1)): self.assertIsNot(tns2, self.tns1) self.assertEqual(tns2._taxa, self.tns1._taxa) for t1, t2 in zip(tns2, self.tns1): self.assertIs(t1, t2) self.compare_distinct_annotables(tns2, self.tns1) def test_construct_from_another_with_complex_annotations(self): self.tns1.annotations.add_new("a", 0) b = self.tns1.annotations.add_new("b", (self.tns1, "label"), is_attribute=True) b.annotations.add_new("c", 3) self.tns1.annotations.add_new("A", 1) self.tns1.annotations.add_new("B", 2) self.tns1.annotations.add_new("C", 3) for tns2 in (TaxonNamespace(self.tns1), self.tns1.clone(0), copy.copy(self.tns1)): self.assertIsNot(tns2, self.tns1) self.assertEqual(tns2._taxa, self.tns1._taxa) for t1, t2 in zip(tns2, self.tns1): self.assertIs(t1, t2) self.compare_distinct_annotables(tns2, self.tns1) def test_deepcopy_from_another(self): for tns2 in (copy.deepcopy(self.tns1), self.tns1.clone(2)): self.assertIsNot(tns2, self.tns1) self.assertEqual(tns2.label, self.tns1.label) self.assertEqual(len(tns2), len(self.tns1)) for t1, t2 in zip(self.tns1, tns2): self.assertIsNot(t1, t2) self.assertEqual(t1.label, t2.label) self.compare_distinct_annotables(t1, t2) self.compare_distinct_annotables(tns2, self.tns1) def test_deepcopy_from_another_with_simple_annotations(self): self.tns1.annotations.add_new("a", 0) self.tns1.annotations.add_new("b", 1) self.tns1.annotations.add_new("c", 3) for tns2 in (copy.deepcopy(self.tns1), self.tns1.clone(2)): self.assertIsNot(tns2, self.tns1) self.assertEqual(tns2.label, self.tns1.label) self.assertEqual(len(tns2), len(self.tns1)) for t1, t2 in zip(self.tns1, tns2): self.assertIsNot(t1, t2) self.assertEqual(t1.label, t2.label) self.compare_distinct_annotables(t1, t2) self.compare_distinct_annotables(tns2, self.tns1) def test_deepcopy_from_another_with_complex_annotations(self): self.tns1.annotations.add_new("a", 0) b = self.tns1.annotations.add_new("b", (self.tns1, "label"), is_attribute=True) b.annotations.add_new("c", 3) for tns2 in (copy.deepcopy(self.tns1), self.tns1.clone(2)): self.assertIsNot(tns2, self.tns1) self.assertEqual(tns2.label, self.tns1.label) self.assertEqual(len(tns2), len(self.tns1)) for t1, t2 in zip(self.tns1, tns2): self.assertIsNot(t1, t2) self.assertEqual(t1.label, t2.label) self.compare_distinct_annotables(t1, t2) self.compare_distinct_annotables(tns2, self.tns1) self.tns1.label = "x" tns2.label = "y" self.assertEqual(self.tns1.annotations[1].value, "x") self.assertEqual(tns2.annotations[1].value, "y") self.tns1.label = "T1"
def test_remove_taxon_label_error(self): tns = TaxonNamespace(self.str_labels) key = "zzz" assert key not in self.str_labels with self.assertRaises(LookupError): tns.remove_taxon_label(key)