Example #1
0
def score_one_clustering(X, truelabels, num_components, num_iterations):
    """Cluster a dataset and evaluate it using Conditional Entropy"""
    #scipy's builtin K-means is very slow, use mpi-version instead.
    #from scipy.cluster.vq import kmeans,vq
    #clst,dist =  kmeans(X, num_components, NUM_ITERATIONS)
    #labels,dist =  vq(X, clst)
    clst,dist,labels = _mpi_kmeans.kmeans(X, num_components, 200, num_iterations)
    print truelabels
    print labels-1
    return condentropy(truelabels,labels-1)
Example #2
0
def score_one_clustering(X, truelabels, num_components, num_iterations):
    """Cluster a dataset and evaluate it using Conditional Entropy"""
    #scipy's builtin K-means is very slow, use mpi-version instead.
    #from scipy.cluster.vq import kmeans,vq
    #clst,dist =  kmeans(X, num_components, NUM_ITERATIONS)
    #labels,dist =  vq(X, clst)
    clst, dist, labels = _mpi_kmeans.kmeans(X, num_components, 200,
                                            num_iterations)
    print truelabels
    print labels - 1
    return condentropy(truelabels, labels - 1)
Example #3
0
    def process(self, data):
	# Perform KMeans clustering
	if pynopticon.verbosity > 0:
	    print "Performing kmeans clustering with k=%i..." % self.numClusters

        # Sample from data, randomly take self.sampleFromData percent of the vectors
        samplePoints = numpy.random.permutation(range(len(data)))[0:int(round(len(data)*self.sampleFromData))]
        data = numpy.array(data, dtype=c_double)
        data = data[samplePoints,:]

        
	self.codebook, self.dist, self.labels = _mpi_kmeans.kmeans(numpy.array(data, dtype=c_double), self.numClusters, self.maxiter, self.numruns)

	return self.codebook