예제 #1
0
 def test_options(self):
     labels = cut_straight(self.dendrogram, sort_clusters=False)
     self.assertEqual(len(set(labels)), 2)
     labels = cut_balanced(self.dendrogram, sort_clusters=False)
     self.assertEqual(len(set(labels)), 21)
     labels = cut_balanced(self.dendrogram, max_cluster_size=10)
     self.assertEqual(len(set(labels)), 5)
예제 #2
0
 def test_cuts(self):
     labels = cut_straight(self.dendrogram)
     self.assertEqual(len(set(labels)), 2)
     labels = cut_straight(self.dendrogram, n_clusters=5)
     self.assertEqual(len(set(labels)), 5)
     labels = cut_balanced(self.dendrogram)
     self.assertEqual(len(set(labels)), 21)
     labels, new_dendrogram = cut_balanced(self.dendrogram,
                                           return_dendrogram=True)
     self.assertEqual(len(set(labels)), 21)
     self.assertTupleEqual(new_dendrogram.shape, (20, 4))
예제 #3
0
    def cluster(self, method, n_clust=None, threshold=None):
        """Cuts the dendrogram and returns cluster IDs. Straight cuts can either
        set a defined number of clusters, or alternatively set a distance threshold. 
        Cluster sizes can vary widely.
        
        Balanced cuts respect a maximum cluster size. The number of clusters is determined 
        on the fly. """

        if self.verbose:
            print(f'clustering with a {method} cut')
        
        if method == 'straight':
            if n_clust is not None and threshold is not None:
                raise ValueError('Straight cut takes only one of n_clusters or threshold, not both.')
            self.clusters = cut_straight(self.dendrogram, n_clust, threshold)
        elif method == 'balanced':
            if n_clust is None:
                raise ValueError('Must set maximum cluster size (n_clust) for balanced_cut')
            self.clusters = cut_balanced(self.dendrogram, n_clust)
        else:
            print('Choose \"straight\" or \"balanced\"')