def test_options(self): labels = cut_straight(self.dendrogram, sort_clusters=False) self.assertEqual(len(set(labels)), 2) labels = cut_balanced(self.dendrogram, sort_clusters=False) self.assertEqual(len(set(labels)), 21) labels = cut_balanced(self.dendrogram, max_cluster_size=10) self.assertEqual(len(set(labels)), 5)
def test_cuts(self): labels = cut_straight(self.dendrogram) self.assertEqual(len(set(labels)), 2) labels = cut_straight(self.dendrogram, n_clusters=5) self.assertEqual(len(set(labels)), 5) labels = cut_balanced(self.dendrogram) self.assertEqual(len(set(labels)), 21) labels, new_dendrogram = cut_balanced(self.dendrogram, return_dendrogram=True) self.assertEqual(len(set(labels)), 21) self.assertTupleEqual(new_dendrogram.shape, (20, 4))
def cluster(self, method, n_clust=None, threshold=None): """Cuts the dendrogram and returns cluster IDs. Straight cuts can either set a defined number of clusters, or alternatively set a distance threshold. Cluster sizes can vary widely. Balanced cuts respect a maximum cluster size. The number of clusters is determined on the fly. """ if self.verbose: print(f'clustering with a {method} cut') if method == 'straight': if n_clust is not None and threshold is not None: raise ValueError('Straight cut takes only one of n_clusters or threshold, not both.') self.clusters = cut_straight(self.dendrogram, n_clust, threshold) elif method == 'balanced': if n_clust is None: raise ValueError('Must set maximum cluster size (n_clust) for balanced_cut') self.clusters = cut_balanced(self.dendrogram, n_clust) else: print('Choose \"straight\" or \"balanced\"')