def __kmedoids_compression(self, clustering, matrix_handler):
        """
        """
        representatives = []
        for cluster in clustering.clusters:
            # Guess 'correct' number of elements for this cluster
            cluster_size = cluster.get_size()
            expected_cluster_elements = cluster_size * (float(self.parameters["final_number_of_frames"]) / clustering.total_number_of_elements)
            expected_cluster_elements = int(math.ceil(expected_cluster_elements))

            remapped_matrix = get_submatrix(matrix_handler.distance_matrix, cluster.all_elements)

            # Prepare and run kmedoids algorithm
            kmedoids = KMedoidsAlgorithm(remapped_matrix)
#             print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"]
            new_clustering = kmedoids.perform_clustering({
                                                      "k": expected_cluster_elements,
                                                      "seeding_type": "EQUIDISTANT"
            })

#             print "NEW CLUSTERING SIZE  clusters: %d  elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements)

            # reverse the remapping and add it to representatives
            remapped_representatives = new_clustering.get_medoids(remapped_matrix)
            fake_cluster = Cluster(None, remapped_representatives)

            representatives.extend(Refiner.redefine_cluster_with_map(cluster, fake_cluster).all_elements)

        return representatives
Exemple #2
0
    def test_gromos_seeding(self):
        points = [(0,0),(0,1),(0,-1),(1,0),
                  (3,0),(3,1),
                  (6,0),(7,0),(7,1),(7,-1)]
#          Maximum distance of connected components is 1, for 1.5 it may discover 3 clusters.
#          For 3.2 it may discover only 2
#
#         1       5         8
#         |       |         |
#         0 - 3   4     6 - 7 
#         |                 |
#         2                 9
#         
         
        matrix = CondensedMatrix(pdist(points))
        kmed_alg = KMedoidsAlgorithm(matrix, rand_seed = 10)
         
        # With a small cutoff we get all 3 connected components
        numpy.testing.assert_array_equal( kmed_alg.gromos_seeding(3, 1.4),[0, 7, 4]) # if it's 1.5 we'll have 6 instead of 7 as medoid (as it is the first one to have 3 neighbours)
         
        # With a bigger cutoff it has to try to find the minimum cutoff for 3 clusters, then 6 is returned instead of 7
        numpy.testing.assert_array_equal( kmed_alg.gromos_seeding(3, 3.2),[3, 6, 5])
         
        # This one is regression
        numpy.testing.assert_array_equal( kmed_alg.gromos_seeding(2, 3.2),[4, 7])
         
        # This one should return a random sequence, so is only testable because of the rand_seed
        numpy.testing.assert_array_equal(kmed_alg.gromos_seeding(2, 0), [5, 3])
Exemple #3
0
    def test_update_medoids(self):
        points = [(0, 0), (0, 1), (0, -1), (1, 0), (6, 0), (7, 0), (7, 1),
                  (7, -1)]

        matrix = CondensedMatrix(pdist(points))
        kmed_alg = KMedoidsAlgorithm(matrix)
        kmed_alg.class_list = [0, 0, 0, 0, 1, 1, 1, 1]
        numpy.testing.assert_array_equal(kmed_alg.update_medoids(), [0, 5])
Exemple #4
0
 def test_update_medoids(self):
     points = [(0,0),(0,1),(0,-1),(1,0),
               (6,0),(7,0),(7,1),(7,-1)]
      
     matrix = CondensedMatrix(pdist(points))
     kmed_alg = KMedoidsAlgorithm(matrix)
     kmed_alg.class_list = [0, 0, 0, 0, 
                            1, 1, 1, 1]
     numpy.testing.assert_array_equal( kmed_alg.update_medoids(), [0,5])
Exemple #5
0
    def test_naive_case(self):
#         1       5         8
#         |       |         |
#         0 - 3   4     6 - 7 
#         |                 |
#         2                 9
        points = [(0,0),(0,1),(0,-1),(1,0),
                  (3,0),(3,1),
                  (6,0),(7,0),(7,1),(7,-1)]
        matrix = CondensedMatrix(pdist(points))
        s_algo = KMedoidsAlgorithm(matrix, 10)
        clusters = s_algo.perform_clustering({'k':3, 'seeding_type':'RANDOM'}).clusters
        
        for c in clusters:
            self.assertIn(c.prototype, [0, 4, 6])
            self.assertIn(c.all_elements, [[0, 1, 2, 3],[6, 7, 8, 9],[4, 5]])
Exemple #6
0
    def test_naive_case(self):
        #         1       5         8
        #         |       |         |
        #         0 - 3   4     6 - 7
        #         |                 |
        #         2                 9
        points = [(0, 0), (0, 1), (0, -1), (1, 0), (3, 0), (3, 1), (6, 0),
                  (7, 0), (7, 1), (7, -1)]
        matrix = CondensedMatrix(pdist(points))
        s_algo = KMedoidsAlgorithm(matrix, 10)
        clusters = s_algo.perform_clustering({
            'k': 3,
            'seeding_type': 'RANDOM'
        }).clusters

        for c in clusters:
            self.assertIn(c.prototype, [0, 4, 6])
            self.assertIn(c.all_elements, [[0, 1, 2, 3], [6, 7, 8, 9], [4, 5]])
Exemple #7
0
    def test_gromos_seeding(self):
        points = [(0, 0), (0, 1), (0, -1), (1, 0), (3, 0), (3, 1), (6, 0),
                  (7, 0), (7, 1), (7, -1)]
        #          Maximum distance of connected components is 1, for 1.5 it may discover 3 clusters.
        #          For 3.2 it may discover only 2
        #
        #         1       5         8
        #         |       |         |
        #         0 - 3   4     6 - 7
        #         |                 |
        #         2                 9
        #

        matrix = CondensedMatrix(pdist(points))
        kmed_alg = KMedoidsAlgorithm(matrix, rand_seed=10)

        # With a small cutoff we get all 3 connected components
        numpy.testing.assert_array_equal(
            kmed_alg.gromos_seeding(3, 1.4), [0, 7, 4]
        )  # if it's 1.5 we'll have 6 instead of 7 as medoid (as it is the first one to have 3 neighbours)

        # With a bigger cutoff it has to try to find the minimum cutoff for 3 clusters, then 6 is returned instead of 7
        numpy.testing.assert_array_equal(kmed_alg.gromos_seeding(3, 3.2),
                                         [3, 6, 5])

        # This one is regression
        numpy.testing.assert_array_equal(kmed_alg.gromos_seeding(2, 3.2),
                                         [4, 7])

        # This one should return a random sequence, so is only testable because of the rand_seed
        numpy.testing.assert_array_equal(kmed_alg.gromos_seeding(2, 0), [5, 3])
    def __kmedoids_compression(self, clustering, matrix_handler):
        """
        """
        representatives = []
        for cluster in clustering.clusters:
            # Guess 'correct' number of elements for this cluster
            cluster_size = cluster.get_size()
            expected_cluster_elements = cluster_size * (
                float(self.parameters["final_number_of_frames"]) /
                clustering.total_number_of_elements)
            expected_cluster_elements = int(
                math.ceil(expected_cluster_elements))

            remapped_matrix = get_submatrix(matrix_handler.distance_matrix,
                                            cluster.all_elements)

            # Prepare and run kmedoids algorithm
            kmedoids = KMedoidsAlgorithm(remapped_matrix)
            #             print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"]
            new_clustering = kmedoids.perform_clustering({
                "k":
                expected_cluster_elements,
                "seeding_type":
                "EQUIDISTANT"
            })

            #             print "NEW CLUSTERING SIZE  clusters: %d  elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements)

            # reverse the remapping and add it to representatives
            remapped_representatives = new_clustering.get_medoids(
                remapped_matrix)
            fake_cluster = Cluster(None, remapped_representatives)

            representatives.extend(
                Refiner.redefine_cluster_with_map(cluster,
                                                  fake_cluster).all_elements)

        return representatives
    def perform_clustering(self, kwargs):
        """
        Does the actual clustering by doing a k-medoids clustering of the first k eigenvector rows.

        @param kwargs: Dictionary with this mandatory keys:
            - 'k': Number of clusters to generate. Must be <= than max_clusters

        @return: a Clustering instance with the clustered data.
        """
        # Mandatory parameter
        k = int(kwargs["k"])

        if k > self.max_clusters:
            print "[ERROR SpectralClusteringAlgorithm::perform_clustering] this algorithm was defined to generate at most %d clusters."%self.max_clusters,

        algorithm_details = "Spectral algorithm with k = %d and sigma squared = %.3f" %(int(k), self.sigma_sq)

        if self.use_k_medoids:
            # The row vectors we have are in R^k (so k length)
            eigen_distances = CondensedMatrix(pdist(self.eigenvectors[:,:k]))
            k_medoids_args = {
                              "k":k,
                              "seeding_max_cutoff":-1,
                              "seeding_type": "RANDOM"
                              }

            k_medoids_alg = KMedoidsAlgorithm(eigen_distances)
            clustering = k_medoids_alg.perform_clustering(k_medoids_args)
            clustering.details = algorithm_details
            return k_medoids_alg.perform_clustering(k_medoids_args)
        else:
            centroid, labels = scipy.cluster.vq.kmeans2(self.eigenvectors[:,:k],
                                                        k,
                                                        iter = 1000,
                                                        minit = 'random')
            del centroid
            clusters = gen_clusters_from_class_list(labels)
            return Clustering(clusters,details = algorithm_details)
Exemple #10
0
    def perform_clustering(self, kwargs):
        """
        Does the actual clustering by doing a k-medoids clustering of the first k eigenvector rows.

        @param kwargs: Dictionary with this mandatory keys:
            - 'k': Number of clusters to generate. Must be <= than max_clusters

        @return: a Clustering instance with the clustered data.
        """
        # Mandatory parameter
        k = int(kwargs["k"])

        if k > self.max_clusters:
            print "[ERROR SpectralClusteringAlgorithm::perform_clustering] this algorithm was defined to generate at most %d clusters." % self.max_clusters,

        algorithm_details = "Spectral algorithm with k = %d and sigma squared = %.3f" % (
            int(k), self.sigma_sq)

        if self.use_k_medoids:
            # The row vectors we have are in R^k (so k length)
            eigen_distances = CondensedMatrix(pdist(self.eigenvectors[:, :k]))
            k_medoids_args = {
                "k": k,
                "seeding_max_cutoff": -1,
                "seeding_type": "RANDOM"
            }

            k_medoids_alg = KMedoidsAlgorithm(eigen_distances)
            clustering = k_medoids_alg.perform_clustering(k_medoids_args)
            clustering.details = algorithm_details
            return k_medoids_alg.perform_clustering(k_medoids_args)
        else:
            centroid, labels = scipy.cluster.vq.kmeans2(
                self.eigenvectors[:, :k], k, iter=1000, minit='random')
            del centroid
            clusters = gen_clusters_from_class_list(labels)
            return Clustering(clusters, details=algorithm_details)
Exemple #11
0
 def setUpClass(cls):
     cls.condensed_matrix = CondensedMatrix(
         [1.0, 4.5, 7.2, 6.7, 8.5, 4.5, 3.6, 7.8, 2.2, 2.0])
     cls.kmed_alg = KMedoidsAlgorithm(cls.condensed_matrix)