def test_GivenInconsistentClusterNumbers_RaisesError(self): """ `gapped_clusters` can occur when number of distinct data points < requested number of clusters in kmeans, leading to production of empty clusters and then messing up prg construction. `large_clusters` should never occur, cluster assigment should be consecutive integers from 0. """ gapped_clusters = [0, 2, 2] with self.assertRaises(ValueError): extract_clusters(self.seqdict_ids, gapped_clusters) large_clusters = [5, 6, 6] with self.assertRaises(ValueError): extract_clusters(self.seqdict_ids, large_clusters)
def test_GivenGroupedClusters_ExtractCorrectSequenceClusters(self): cluster_assignment = [1, 1, 0] actual = extract_clusters(self.seqdict_gapped_seqs, cluster_assignment) expected = [["GGG"], ["A-T", "AT-", "TTT"]] self.assertEqual(actual, expected)
def test_GivenGroupedClusters_ExtractCorrectIDClusters(self): cluster_assignment = [0, 0, 1] actual = extract_clusters(self.seqdict_ids, cluster_assignment) expected = [["s1", "s2", "s3"], ["s4"]] self.assertEqual(actual, expected)
def test_GivenTooFewClusterAssignments_Fails(self): cluster_assignment = [0, 1] for seqdict in [self.seqdict_ids, self.seqdict_gapped_seqs]: with self.assertRaises(ValueError): extract_clusters(seqdict, cluster_assignment)