def test_gromos_seeding(self): points = [(0, 0), (0, 1), (0, -1), (1, 0), (3, 0), (3, 1), (6, 0), (7, 0), (7, 1), (7, -1)] # Maximum distance of connected components is 1, for 1.5 it may discover 3 clusters. # For 3.2 it may discover only 2 # # 1 5 8 # | | | # 0 - 3 4 6 - 7 # | | # 2 9 # matrix = CondensedMatrix(pdist(points)) kmed_alg = KMedoidsAlgorithm(matrix, rand_seed=10) # With a small cutoff we get all 3 connected components numpy.testing.assert_array_equal( kmed_alg.gromos_seeding(3, 1.4), [0, 7, 4] ) # if it's 1.5 we'll have 6 instead of 7 as medoid (as it is the first one to have 3 neighbours) # With a bigger cutoff it has to try to find the minimum cutoff for 3 clusters, then 6 is returned instead of 7 numpy.testing.assert_array_equal(kmed_alg.gromos_seeding(3, 3.2), [3, 6, 5]) # This one is regression numpy.testing.assert_array_equal(kmed_alg.gromos_seeding(2, 3.2), [4, 7]) # This one should return a random sequence, so is only testable because of the rand_seed numpy.testing.assert_array_equal(kmed_alg.gromos_seeding(2, 0), [5, 3])
def test_update_medoids(self): points = [(0, 0), (0, 1), (0, -1), (1, 0), (6, 0), (7, 0), (7, 1), (7, -1)] matrix = CondensedMatrix(pdist(points)) kmed_alg = KMedoidsAlgorithm(matrix) kmed_alg.class_list = [0, 0, 0, 0, 1, 1, 1, 1] numpy.testing.assert_array_equal(kmed_alg.update_medoids(), [0, 5])
def test_naive_case(self): # 1 5 8 # | | | # 0 - 3 4 6 - 7 # | | # 2 9 points = [(0, 0), (0, 1), (0, -1), (1, 0), (3, 0), (3, 1), (6, 0), (7, 0), (7, 1), (7, -1)] matrix = CondensedMatrix(pdist(points)) s_algo = KMedoidsAlgorithm(matrix, 10) clusters = s_algo.perform_clustering({ 'k': 3, 'seeding_type': 'RANDOM' }).clusters for c in clusters: self.assertIn(c.prototype, [0, 4, 6]) self.assertIn(c.all_elements, [[0, 1, 2, 3], [6, 7, 8, 9], [4, 5]])
def __kmedoids_compression(self, clustering, matrix_handler): """ """ representatives = [] for cluster in clustering.clusters: # Guess 'correct' number of elements for this cluster cluster_size = cluster.get_size() expected_cluster_elements = cluster_size * ( float(self.parameters["final_number_of_frames"]) / clustering.total_number_of_elements) expected_cluster_elements = int( math.ceil(expected_cluster_elements)) remapped_matrix = get_submatrix(matrix_handler.distance_matrix, cluster.all_elements) # Prepare and run kmedoids algorithm kmedoids = KMedoidsAlgorithm(remapped_matrix) # print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"] new_clustering = kmedoids.perform_clustering({ "k": expected_cluster_elements, "seeding_type": "EQUIDISTANT" }) # print "NEW CLUSTERING SIZE clusters: %d elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements) # reverse the remapping and add it to representatives remapped_representatives = new_clustering.get_medoids( remapped_matrix) fake_cluster = Cluster(None, remapped_representatives) representatives.extend( Refiner.redefine_cluster_with_map(cluster, fake_cluster).all_elements) return representatives
def perform_clustering(self, kwargs): """ Does the actual clustering by doing a k-medoids clustering of the first k eigenvector rows. @param kwargs: Dictionary with this mandatory keys: - 'k': Number of clusters to generate. Must be <= than max_clusters @return: a Clustering instance with the clustered data. """ # Mandatory parameter k = int(kwargs["k"]) if k > self.max_clusters: print "[ERROR SpectralClusteringAlgorithm::perform_clustering] this algorithm was defined to generate at most %d clusters." % self.max_clusters, algorithm_details = "Spectral algorithm with k = %d and sigma squared = %.3f" % ( int(k), self.sigma_sq) if self.use_k_medoids: # The row vectors we have are in R^k (so k length) eigen_distances = CondensedMatrix(pdist(self.eigenvectors[:, :k])) k_medoids_args = { "k": k, "seeding_max_cutoff": -1, "seeding_type": "RANDOM" } k_medoids_alg = KMedoidsAlgorithm(eigen_distances) clustering = k_medoids_alg.perform_clustering(k_medoids_args) clustering.details = algorithm_details return k_medoids_alg.perform_clustering(k_medoids_args) else: centroid, labels = scipy.cluster.vq.kmeans2( self.eigenvectors[:, :k], k, iter=1000, minit='random') del centroid clusters = gen_clusters_from_class_list(labels) return Clustering(clusters, details=algorithm_details)
def setUpClass(cls): cls.condensed_matrix = CondensedMatrix( [1.0, 4.5, 7.2, 6.7, 8.5, 4.5, 3.6, 7.8, 2.2, 2.0]) cls.kmed_alg = KMedoidsAlgorithm(cls.condensed_matrix)