def score_one_clustering(X, truelabels, num_components, num_iterations): """Cluster a dataset and evaluate it using Conditional Entropy""" #scipy's builtin K-means is very slow, use mpi-version instead. #from scipy.cluster.vq import kmeans,vq #clst,dist = kmeans(X, num_components, NUM_ITERATIONS) #labels,dist = vq(X, clst) clst,dist,labels = _mpi_kmeans.kmeans(X, num_components, 200, num_iterations) print truelabels print labels-1 return condentropy(truelabels,labels-1)
def score_one_clustering(X, truelabels, num_components, num_iterations): """Cluster a dataset and evaluate it using Conditional Entropy""" #scipy's builtin K-means is very slow, use mpi-version instead. #from scipy.cluster.vq import kmeans,vq #clst,dist = kmeans(X, num_components, NUM_ITERATIONS) #labels,dist = vq(X, clst) clst, dist, labels = _mpi_kmeans.kmeans(X, num_components, 200, num_iterations) print truelabels print labels - 1 return condentropy(truelabels, labels - 1)
def process(self, data): # Perform KMeans clustering if pynopticon.verbosity > 0: print "Performing kmeans clustering with k=%i..." % self.numClusters # Sample from data, randomly take self.sampleFromData percent of the vectors samplePoints = numpy.random.permutation(range(len(data)))[0:int(round(len(data)*self.sampleFromData))] data = numpy.array(data, dtype=c_double) data = data[samplePoints,:] self.codebook, self.dist, self.labels = _mpi_kmeans.kmeans(numpy.array(data, dtype=c_double), self.numClusters, self.maxiter, self.numruns) return self.codebook