Exemple #1
0
    def testMultipleRuns(self):
        """testing 50 runs with 10000 points, 20 clusters and 20 features"""
        nruns = 20
        npoints = 10000
        nclusters = 20
        nfeatures = 20
        points, clusters, partition = self.createEmptyParameterArrays(npoints, nclusters, nfeatures)
        partition[:] = np.random.randint(0, nclusters, npoints)
        for i in xrange(nclusters):
            mask = partition == i
            mean = np.random.uniform(-1.0, 1.0, nfeatures)
            sigma = np.diag(np.random.uniform(0.1, 0.5, size=nfeatures))
            points[mask] = np.random.multivariate_normal(mean, sigma, int(np.sum(mask)))
        l = np.random.randint(0, npoints, nclusters)
        clusters = points[l].copy()
        partition[:] = np.random.randint(0, nclusters, npoints)
        backup_partition = partition.copy()
        backup_clusters = clusters.copy()
        first_partition = None
        first_clusters = None

        for i in xrange(nruns):
            minkowski_p = 2
            swap_threshold = 0
            ccluster.kmeans(points, clusters, partition, minkowski_p, swap_threshold)
            if first_partition is None:
                first_partition = partition.copy()
                first_clusters = clusters.copy()
            else:
                partition_equal_first_partition = (partition == first_partition).all()
                self.assertTrue(partition_equal_first_partition)
                clusters_equal_first_clusters = (clusters == first_clusters).all()
                self.assertTrue(clusters_equal_first_clusters)
            partition = backup_partition.copy()
            clusters = backup_clusters.copy()
Exemple #2
0
    def testSimpleRun(self):
        """testing a single run with 10 points, 2 clusters and 1 feature"""
        points, clusters, partition = self.createEmptyParameterArrays(10, 2, 1)
        points[:5] = np.random.normal(-1.0, 0.5, (5,1))
        points[5:] = np.random.normal(1.0, 0.5, (5,1))
        clusters[0,0] = -10.0
        clusters[1,0] = -5.0
        partition[:] = 0

        minkowski_p = 2
        swap_threshold = 0
        ccluster.kmeans(points, clusters, partition, minkowski_p, swap_threshold)
Exemple #3
0
    def testSimpleRun(self):
        """testing a single run with 10 points, 2 clusters and 1 feature"""
        points, clusters, partition = self.createEmptyParameterArrays(10, 2, 1)
        points[:5] = np.random.normal(-1.0, 0.5, (5, 1))
        points[5:] = np.random.normal(1.0, 0.5, (5, 1))
        clusters[0, 0] = -10.0
        clusters[1, 0] = -5.0
        partition[:] = 0

        minkowski_p = 2
        swap_threshold = 0
        ccluster.kmeans(points, clusters, partition, minkowski_p,
                        swap_threshold)
Exemple #4
0
    def testComplexRun(self):
        """testing a single run with 50000 points, 50 clusters and 50 features"""
        npoints = 50000
        nclusters = 50
        nfeatures = 50
        points, clusters, partition = self.createEmptyParameterArrays(npoints, nclusters, nfeatures)
        partition[:] = np.random.randint(0, nclusters, npoints)
        for i in xrange(nclusters):
            mask = partition == i
            mean = np.random.uniform(-1.0, 1.0, nfeatures)
            sigma = np.diag(np.random.uniform(0.1, 0.5, nfeatures))
            s = np.sum(mask)
            points[mask,:] = np.random.multivariate_normal(mean, sigma, int(np.sum(mask)))
        l = np.random.randint(0, npoints, nclusters)
        clusters = points[l].copy()
        partition[:] = np.random.randint(0, nclusters, npoints)

        minkowski_p = 2
        swap_threshold = 0
        ccluster.kmeans(points, clusters, partition, minkowski_p, swap_threshold)
Exemple #5
0
    def testMultipleRuns(self):
        """testing 50 runs with 10000 points, 20 clusters and 20 features"""
        nruns = 20
        npoints = 10000
        nclusters = 20
        nfeatures = 20
        points, clusters, partition = self.createEmptyParameterArrays(
            npoints, nclusters, nfeatures)
        partition[:] = np.random.randint(0, nclusters, npoints)
        for i in xrange(nclusters):
            mask = partition == i
            mean = np.random.uniform(-1.0, 1.0, nfeatures)
            sigma = np.diag(np.random.uniform(0.1, 0.5, size=nfeatures))
            points[mask] = np.random.multivariate_normal(
                mean, sigma, int(np.sum(mask)))
        l = np.random.randint(0, npoints, nclusters)
        clusters = points[l].copy()
        partition[:] = np.random.randint(0, nclusters, npoints)
        backup_partition = partition.copy()
        backup_clusters = clusters.copy()
        first_partition = None
        first_clusters = None

        for i in xrange(nruns):
            minkowski_p = 2
            swap_threshold = 0
            ccluster.kmeans(points, clusters, partition, minkowski_p,
                            swap_threshold)
            if first_partition is None:
                first_partition = partition.copy()
                first_clusters = clusters.copy()
            else:
                partition_equal_first_partition = (
                    partition == first_partition).all()
                self.assertTrue(partition_equal_first_partition)
                clusters_equal_first_clusters = (
                    clusters == first_clusters).all()
                self.assertTrue(clusters_equal_first_clusters)
            partition = backup_partition.copy()
            clusters = backup_clusters.copy()
Exemple #6
0
    def testComplexRun(self):
        """testing a single run with 50000 points, 50 clusters and 50 features"""
        npoints = 50000
        nclusters = 50
        nfeatures = 50
        points, clusters, partition = self.createEmptyParameterArrays(
            npoints, nclusters, nfeatures)
        partition[:] = np.random.randint(0, nclusters, npoints)
        for i in xrange(nclusters):
            mask = partition == i
            mean = np.random.uniform(-1.0, 1.0, nfeatures)
            sigma = np.diag(np.random.uniform(0.1, 0.5, nfeatures))
            s = np.sum(mask)
            points[mask, :] = np.random.multivariate_normal(
                mean, sigma, int(np.sum(mask)))
        l = np.random.randint(0, npoints, nclusters)
        clusters = points[l].copy()
        partition[:] = np.random.randint(0, nclusters, npoints)

        minkowski_p = 2
        swap_threshold = 0
        ccluster.kmeans(points, clusters, partition, minkowski_p,
                        swap_threshold)