def setUp(self): self.tns = dendropy.TaxonNamespace() self.t1 = charmatrixmodel.CharacterMatrix(label="a", taxon_namespace=self.tns) self.t2 = charmatrixmodel.CharacterMatrix(label="a", taxon_namespace=self.tns) self.t3 = charmatrixmodel.CharacterMatrix(label="a")
def test_setitem_by_taxon_idx(self): tns = get_taxon_namespace(3) char_matrix = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) self.assertEqual(len(char_matrix), len(char_matrix._taxon_sequence_map)) self.assertEqual(len(char_matrix), 0) seqs = ["abcd", [ 1, 2, 3, 4, ], [ "a", "b", "c", "d", ]] assert len(seqs) == len(tns) for idx, taxon in enumerate(tns): self.assertFalse(taxon in char_matrix) self.assertNotIn(taxon, char_matrix) char_matrix[idx] = seqs[idx] self.assertEqual(len(char_matrix._taxon_sequence_map), len(tns)) self.assertEqual(len(char_matrix), len(char_matrix._taxon_sequence_map)) for idx, taxon in enumerate(tns): self.assertTrue(taxon in char_matrix) self.assertIn(taxon, char_matrix) self.assertTrue( isinstance(char_matrix[taxon], charmatrixmodel.CharacterDataSequence)) self.assertEqual(len(char_matrix[taxon]), len(seqs[idx])) for c1, c2 in zip(char_matrix[taxon], seqs[idx]): self.assertEqual(c1, c2)
def get_char_matrix(self): labels = [ "z01", "<NONE>", "z03", "z04", "z05", "z06", None, None, "z09", "z10", "z11", "<NONE>", None, "z14", "z15", ] char_matrix = charmatrixmodel.CharacterMatrix() char_matrix.expected_labels = [] char_matrix.expected_taxa = set() random.shuffle(labels) for label in labels: t = dendropy.Taxon(label=None) char_matrix.taxon_namespace.add_taxon(t) char_matrix[t] = [1, 1, 1] char_matrix.expected_taxa.add(t) char_matrix.expected_labels.append(t.label) char_matrix.taxon_namespace = dendropy.TaxonNamespace() assert len(char_matrix) == len(labels) assert len(char_matrix) == len(char_matrix._taxon_sequence_map) char_matrix.nseqs = len(char_matrix) return char_matrix
def test_delitem(self): tns = get_taxon_namespace(3) char_matrix = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) self.assertEqual(len(char_matrix), len(char_matrix._taxon_sequence_map)) self.assertEqual(len(char_matrix), 0) seqs = [ "abcd", [1,2,3,4,], ["a", "b", "c", "d",] ] assert len(seqs) == len(tns) for idx, taxon in enumerate(tns): self.assertFalse(taxon in char_matrix) self.assertNotIn(taxon, char_matrix) char_matrix[taxon] = seqs[idx] self.assertEqual(len(char_matrix._taxon_sequence_map), len(tns)) self.assertEqual(len(char_matrix), len(char_matrix._taxon_sequence_map)) for idx, taxon in enumerate(tns): self.assertTrue(taxon in char_matrix) self.assertIn(taxon, char_matrix) del char_matrix[taxon] self.assertFalse(taxon in char_matrix) self.assertNotIn(taxon, char_matrix) self.assertEqual(len(char_matrix._taxon_sequence_map), 0) self.assertEqual(len(char_matrix), 0)
def get_char_matrix(self, labels=None): char_matrix = charmatrixmodel.CharacterMatrix() if labels is None: labels = [str(i) for i in range(1000)] char_matrix.expected_labels = [] char_matrix.original_taxa = [] char_matrix.original_seqs = [] self.rng.shuffle(labels) for label in labels: t = dendropy.Taxon(label=label) char_matrix.taxon_namespace.add_taxon(t) char_matrix.original_taxa.append(t) char_matrix[t].original_taxon = t char_matrix.expected_labels.append(label) seq = [self.rng.randint(0, 100) for _ in range(4)] char_matrix[t] = seq char_matrix[t].original_seq = char_matrix[t] char_matrix.original_seqs.append(char_matrix[t]) char_matrix[t].original_taxon = t char_matrix[t].label = label assert len(char_matrix.taxon_namespace) == len(char_matrix.original_taxa) assert len(char_matrix) == len(char_matrix.original_taxa) assert len(char_matrix) == len(labels) char_matrix.nseqs = len(char_matrix) return char_matrix
def test_multi_setitem(self): tns = get_taxon_namespace(3) char_matrix = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) self.assertEqual(len(char_matrix), len(char_matrix._taxon_sequence_map)) self.assertEqual(len(char_matrix), 0) seqs = [ "abcd", [1,2,3,4,], ["a", "b", "c", "d",] ] t = tns[0] for seq in seqs: char_matrix[t] = seq for taxon in tns: if taxon is t: self.assertIn(taxon, char_matrix) else: self.assertNotIn(taxon, char_matrix) seq = seqs[-1] self.assertEqual(len(char_matrix), 1) self.assertEqual(len(char_matrix), len(char_matrix._taxon_sequence_map)) self.assertEqual(len(char_matrix[0]), len(seq)) self.assertTrue(isinstance(char_matrix[0], charmatrixmodel.CharacterDataSequence)) for c1, c2 in zip(char_matrix[0], seq): self.assertEqual(c1, c2) for c1, c2 in zip(char_matrix[0], seqs[1]): self.assertNotEqual(c1, c2)
def get_char_matrices(self): tns = get_taxon_namespace(3) c1 = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) c2 = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) c1[tns[0]] = [1, 1, 1] c1[tns[1]] = [2, 2, 2] c2[tns[1]] = [3, 3, 3] c2[tns[2]] = [4, 4, 4] assert len(c1) == 2 assert tns[0] in c1 assert tns[1] in c1 assert tns[2] not in c1 assert len(c2) == 2 assert tns[0] not in c2 assert tns[1] in c2 assert tns[2] in c2 return c1, c2, tns
def test_fill_taxa(self): tns = get_taxon_namespace(5) char_matrix = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) s = ["z"] * 10 char_matrix[tns[0]] = s for taxon in tns[1:3]: char_matrix[taxon] = ["x"] char_matrix.pack() self.assertEqual(len(char_matrix), len(tns)) for taxon in tns: self.assertIn(taxon, char_matrix) self.assertEqual(len(char_matrix[taxon]), 10)
def test_fill_taxa(self): tns = get_taxon_namespace(5) char_matrix = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) for taxon in tns[:3]: char_matrix[taxon] = "z" for taxon in tns[:3]: self.assertIn(taxon, char_matrix) for taxon in tns[3:]: self.assertNotIn(taxon, char_matrix) char_matrix.fill_taxa() for taxon in tns: self.assertIn(taxon, char_matrix)
def test_sequence_sizes(self): seq_sizes = [2, 10, 20, 0, 1] tns = get_taxon_namespace(len(seq_sizes)) char_matrix = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) self.assertEqual(len(char_matrix), 0) self.assertEqual(char_matrix.sequence_size, 0) self.assertEqual(char_matrix.max_sequence_size, 0) for taxon, seq_size in zip(tns, seq_sizes): char_matrix[taxon] = ["x"] * seq_size self.assertEqual(len(char_matrix), len(seq_sizes)) self.assertEqual(char_matrix.sequence_size, seq_sizes[0]) self.assertEqual(char_matrix.max_sequence_size, max(seq_sizes))
def setUp(self): self.char_matrix = charmatrixmodel.CharacterMatrix() labels = [ "a", "b", "c", "d", "e", "f", ] self.expected_taxa = set() for label in labels: t = dendropy.Taxon(label=label) self.char_matrix.taxon_namespace.add_taxon(t) self.expected_taxa.add(t) seq = [_ for _ in range(4)] self.char_matrix[t] = seq
def test_setitem_by_taxon_not_in_namespace(self): tns = get_taxon_namespace(3) char_matrix = charmatrixmodel.CharacterMatrix() t = tns[0] seq = ["a", "b"] with self.assertRaises(ValueError): char_matrix[t] = seq char_matrix.taxon_namespace.add_taxon(t) char_matrix[t] = seq self.assertEqual(len(char_matrix), 1) self.assertIn(t, char_matrix) self.assertEqual(len(char_matrix[t]), len(seq)) self.assertTrue(isinstance(char_matrix[t], charmatrixmodel.CharacterDataSequence)) for c1, c2 in zip(char_matrix[t], seq): self.assertEqual(c1, c2)
def test_fill(self): seq_sizes = [2, 10, 20, 0, 1] tns = get_taxon_namespace(len(seq_sizes)) original_sequences = [] for seq_size in seq_sizes: original_sequences.append(["1"] * seq_size) for size in (None, 50, 1, 0, 8): for append in (False, True, None): kwargs = {} if size is None: expected_sizes = [max(seq_sizes)] * len(seq_sizes) else: kwargs["size"] = size expected_sizes = [max(size, s) for s in seq_sizes] assert len(expected_sizes) == len(original_sequences) if append is None: append = True else: kwargs["append"] = append expected_sequences = [] for idx, seq in enumerate(original_sequences): if expected_sizes[idx] <= len(seq): expected_sequences.append(list(seq)) else: s1 = list(seq) diff = expected_sizes[idx] - len(s1) s2 = ["0"] * diff if append: s = s1 + s2 else: s = s2 + s1 expected_sequences.append(s) assert len(expected_sequences[idx]) == expected_sizes[idx], \ "{}: {}/{}: {}: {} ({})".format(idx, size, append, expected_sequences[idx], len(expected_sequences[idx]), expected_sizes[idx]) char_matrix = charmatrixmodel.CharacterMatrix( taxon_namespace=tns) for taxon, seq in zip(tns, original_sequences): char_matrix[taxon] = seq assert len(char_matrix) == len(seq_sizes) char_matrix.fill("0", **kwargs) for taxon, expected_size, expected_seq in zip( char_matrix, expected_sizes, expected_sequences): obs_seq = char_matrix[taxon] self.assertEqual(len(obs_seq), expected_size) for c1, c2 in zip(obs_seq, expected_seq): self.assertEqual(c1, c2)
def test_standard_iterator(self): tns = get_taxon_namespace(100) char_matrix = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) taxa = list(tns) self.rng.shuffle(taxa) included = set() excluded = set() for idx, taxon in enumerate(taxa): if self.rng.uniform(0, 1) < 0.5: included.add(taxon) char_matrix[taxon] = [0] else: excluded.add(taxon) expected = [taxon for taxon in tns if taxon in included] self.assertEqual(len(char_matrix), len(expected)) observed = [taxon for taxon in char_matrix] self.assertEqual(observed, expected)
def test_assign_taxon_namespace(self): tns = get_taxon_namespace(5) char_matrix = charmatrixmodel.CharacterMatrix(taxon_namespace=tns) self.assertIs(char_matrix.taxon_namespace, tns)
def test_extend_matrix_fail(self): c1 = charmatrixmodel.CharacterMatrix() c2 = charmatrixmodel.CharacterMatrix() with self.assertRaises(error.TaxonNamespaceIdentityError): c1.extend_matrix(c2)
def test_update_sequences_fail(self): c1 = charmatrixmodel.CharacterMatrix() c2 = charmatrixmodel.CharacterMatrix() with self.assertRaises(error.TaxonNamespaceIdentityError): c1.update_sequences(c2)
def test_setitem_by_idx_not_in_namespace(self): tns = get_taxon_namespace(3) char_matrix = charmatrixmodel.CharacterMatrix() with self.assertRaises(KeyError): char_matrix[tns[0].label] = []
def test_setitem_by_idx_not_in_namespace(self): tns = get_taxon_namespace(3) char_matrix = charmatrixmodel.CharacterMatrix() with self.assertRaises(IndexError): char_matrix[len(tns)] = []