def func(alpha, nClusters=nClusters): alpha1, alpha2 = alpha[:self.nVars], alpha[self.nVars:] alpha1 = alpha1.reshape((self.nVars, 1)) alpha2 = alpha2.reshape(self.nVars, 1) alpha = numpy.append(alpha1, alpha2, axis=1) smpl = numpy.dot(self.Z, alpha) smpl = orange.ExampleTable(smpl) km1 = orngClustering.KMeans(smpl, centroids=nClusters) km2 = orngClustering.KMeans(smpl, centroids=nClusters) score = (self.totalSize - nClusters - 1) * (km1.score - km2.score) / (km2.score) return -score
def runOptimization(self): if self.optimizationTo > len(set(self.data)): self.error("Not enough unique data instances (%d) for given number of clusters (%d)." % \ (len(set(self.data)), self.optimizationTo)) return random.seed(0) try: self.progressBarInit() Ks = list(range(self.optimizationFrom, self.optimizationTo + 1)) self.optimizationRun =[(k, orngClustering.KMeans( self.data, centroids = k, minscorechange=0, nstart = self.restarts, initialization = self.initializations[self.initializationType][1], distance = self.distanceMeasures[self.distanceMeasure][1], scoring = self.scoringMethods[self.scoring][1], inner_callback = lambda val: self.progressBarSet(min(self.progressEstimate(val)/len(Ks) + k * 100.0 / len(Ks), 100.0)) )) for k in Ks] self.progressBarFinished() self.bestRun = (min if getattr(self.scoringMethods[self.scoring][1], "minimize", False) else max)(self.optimizationRun, key=lambda k_run: k_run[1].score) self.showResults() self.sendData() except Exception as ex: self.error(0, "An error occured while running optimization. Reason: " + str(ex)) raise
def runOptimization(self): if self.optimizationTo > len(set(self.data)): self.error("Not enough unique data instances (%d) for given number of clusters (%d)." % \ (len(set(self.data)), self.optimizationTo)) return random.seed(0) try: self.progressBarInit() Ks = range(self.optimizationFrom, self.optimizationTo + 1) outer_callback_count = len(Ks) * self.restarts outer_callback_state = {"restart": 0} optimizationRun = [] for k in Ks: def outer_progress(km): outer_callback_state["restart"] += 1 self.progressBarSet(100.0 * outer_callback_state["restart"] / outer_callback_count) def inner_progress(km): estimate = self.progressEstimate(km) self.progressBarSet(min(estimate / outer_callback_count + \ outer_callback_state["restart"] * \ 100.0 / outer_callback_count, 100.0)) kmeans = orngClustering.KMeans( self.data, centroids=k, minscorechange=0, nstart=self.restarts, initialization=self.initializations[ self.initializationType][1], distance=self.distanceMeasures[self.distanceMeasure][1], scoring=self.scoringMethods[self.scoring][1], outer_callback=outer_progress, inner_callback=inner_progress) optimizationRun.append((k, kmeans)) if self.restarts == 1: outer_progress(None) self.optimizationRun = optimizationRun self.progressBarFinished() self.bestRun = (min if getattr( self.scoringMethods[self.scoring][1], "minimize", False) else max)(self.optimizationRun, key=lambda (k, run): run.score) self.showResults() self.sendData() except Exception, ex: self.error( 0, "An error occured while running optimization. Reason: " + str(ex)) raise
def func(alpha, nClusters=nClusters): alpha1, alpha2 = alpha[:self.nVars], alpha[self.nVars:] alpha1 = alpha1.reshape((self.nVars, 1)) alpha2 = alpha2.reshape(self.nVars, 1) alpha = numpy.append(alpha1, alpha2, axis=1) smpl = numpy.dot(self.Z, alpha) smpl = orange.ExampleTable(smpl) km = orngClustering.KMeans(smpl, centroids=nClusters) score = orngClustering.score_silhouette(km) return -score
def cluster(self): if self.K > len(set(self.data)): self.error("Not enough unique data instances (%d) for given number of clusters (%d)." % \ (len(set(self.data)), self.K)) return random.seed(0) self.km = orngClustering.KMeans( centroids=self.K, minscorechange=0, nstart=self.restarts, initialization=self.initializations[self.initializationType][1], distance=self.distanceMeasures[self.distanceMeasure][1], scoring=self.scoringMethods[self.scoring][1], inner_callback=self.clusterCallback, ) self.progressBarInit() self.km(self.data) self.sendData() self.progressBarFinished()
import orange import orngClustering import random data_names = ["iris.tab", "housing.tab", "vehicle.tab"] data_sets = [orange.ExampleTable(name) for name in data_names] print "%10s %3s %3s %3s" % ("", "Rnd", "Div", "HC") for data, name in zip(data_sets, data_names): random.seed(42) km_random = orngClustering.KMeans(data, centroids=3) km_diversity = orngClustering.KMeans(data, centroids=3, \ initialization=orngClustering.kmeans_init_diversity) km_hc = orngClustering.KMeans(data, centroids=3, \ initialization=orngClustering.KMeans_init_hierarchicalClustering(n=100)) print "%10s %3d %3d %3d" % (name, km_random.iteration, km_diversity.iteration, km_hc.iteration)
import orange import orngClustering import random random.seed(42) data = orange.ExampleTable("iris") km = orngClustering.KMeans(data, 3) print km.clusters[-10:]
import orange import orngClustering import random data = orange.ExampleTable("iris") # data = orange.ExampleTable("lung-cancer") bestscore = 0 for k in range(2, 10): random.seed(42) km = orngClustering.KMeans( data, k, initialization=orngClustering.KMeans_init_hierarchicalClustering(n=50), nstart=10) score = orngClustering.score_silhouette(km) print "%d: %.3f" % (k, score) if score > bestscore: best_km = km bestscore = score orngClustering.plot_silhouette(best_km, filename='tmp.png')
import orange import orngClustering data = orange.ExampleTable("voting") # data = orange.ExampleTable("iris") for k in range(2, 5): km = orngClustering.KMeans( data, k, initialization=orngClustering.kmeans_init_diversity) score = orngClustering.score_silhouette(km) print k, score km = orngClustering.KMeans(data, 3, initialization=orngClustering.kmeans_init_diversity) orngClustering.plot_silhouette(km, "kmeans-silhouette.png")
import orange import orngClustering import random random.seed(42) def callback(km): print "Iteration: %d, changes: %d, score: %.4f" % (km.iteration, km.nchanges, km.score) data = orange.ExampleTable("iris") km = orngClustering.KMeans(data, 3, minscorechange=0, inner_callback=callback)
xc = [float(d[attx]) for d in km.centroids] yc = [float(d[atty]) for d in km.centroids] pylab.scatter(xc, yc, marker="x", c="k", s=200) pylab.xlabel(attx) pylab.ylabel(atty) if title: pylab.title(title) pylab.savefig("%s-%03d.png" % (filename, km.iteration)) pylab.close() def in_callback(km): print "Iteration: %d, changes: %d, score: %8.6f" % (km.iteration, km.nchanges, km.score) plot_scatter(data, km, "petal width", "petal length", title="Iteration %d" % km.iteration) data = orange.ExampleTable("iris") random.seed(42) km = orngClustering.KMeans(data, 3, minscorechange=0, maxiters=10, inner_callback=in_callback)