class Picker(object): # 结合 K-Means 里的方法来挑选隐藏层中心点 kmeans = KMeans() samples = [] # <Sample Object> # Random Picking def shuffing(self, k): maker = self.kmeans.center_maker maker.samples = self.samples return maker.shuffing(k) # K-Means++ 方法挑选后做聚类更新中心点(预设欧式距离) def clustering(self, k): # 先把 Sample Object 的 features 都取出来做聚类(Clustering) for sample in self.samples: self.kmeans.add_sample(sample.features) kmeans = self.kmeans kmeans.center_choice = Choice.Plus kmeans.make_centers(k) kmeans.max_iteration = 100 kmeans.convergence = 0.001 kmeans.setup() kmeans.run() # 将聚好且修正学习过好的中心点重新做回去 <Sample Object>,回传回去给 RBFNN 使用 clustered_samples = [] # <Sample Object> for group in kmeans.groups: sample = Sample(group.center) # 这里的 Sample Object 不需要设定 targets clustered_samples.append(sample) return clustered_samples
class Test(unittest.TestCase): def setUp(self): self.km = KMeans(iterations=10,useLUT=True) def testIsRGBA(self): fi=open("Mona_Lisa.jpg","r") isRGBA = self.km.testRGBA(Image.open(fi)) fi.close() assert not isRGBA def testProcessFile(self): fi=open("rusty door.jpg","r") fo=open("result.jpg","w") self.km.process(fi,fo,centroids=10) fo.close() fi.close() totalnumberofpixels = self.km.im.size[0]*self.km.im.size[1] assert len(self.km.pixels) == totalnumberofpixels for x in self.km.pixels: #print x r,g,b,centroid = x assert r in range(0,256) assert g in range(0,256) assert b in range(0,256) assert centroid in range(0,self.km.numbercentroids) totpixels=0 assert len(self.km.clusters.keys())==self.km.numbercentroids for x in self.km.clusters.keys(): assert len(self.km.clusters[x])>0 totpixels+= len(self.km.clusters[x]) print "Cluster %d has %d elements" % (x,len(self.km.clusters[x])) assert totpixels == totalnumberofpixels for x in range(0,len(self.km.means)): r,g,b = self.km.means[x] assert r in range(0,256) assert g in range(0,256) assert b in range(0,256) print "Cluster %d is centered at (%d,%d,%d)" % (x,r,g,b)
def construct_kmeans_obj(collection, level): """ Grabs the kmeans data from the db Re-creates the KMeans object Data: { 'class_name': 'doesn't matter, 'k': 3, 'labels': ['achiever', 'collector', 'killer'] 'centroids': [ [] [] [] ], 'clusters': [ [] [] [] ], } """ k = None k_data = collection.find_one({'level': level}) if k_data is not None: k = KMeans(k=k_data['k'], class_name=k_data['class_name']) k.centroids = k_data['centroids'] k.clusters = k_data['clusters'] k.labels = k_data['labels'] return k
def initialise(self, init_FG='random', init_S='random'): ''' Initialise F, S, G, tau, and lambdaFk, lambdaGl (if ARD). ''' assert init_FG in OPTIONS_INIT_FG, "Unknown initialisation option for F and G: %s. Should be in %s." % ( init_FG, OPTIONS_INIT_FG) assert init_S in OPTIONS_INIT_S, "Unknown initialisation option for S: %s. Should be in %s." % ( init_S, OPTIONS_INIT_S) # Initialise lambdaFk, lambdaGl, and compute expectations if self.ARD: self.alphaFk_s, self.betaFk_s = numpy.zeros(self.K), numpy.zeros( self.K) self.alphaGl_s, self.betaGl_s = numpy.zeros(self.L), numpy.zeros( self.L) self.exp_lambdaFk, self.exp_loglambdaFk = numpy.zeros( self.K), numpy.zeros(self.K) self.exp_lambdaGl, self.exp_loglambdaGl = numpy.zeros( self.L), numpy.zeros(self.L) for k in range(self.K): self.alphaFk_s[k] = self.alpha0 self.betaFk_s[k] = self.beta0 self.update_exp_lambdaFk(k) for l in range(self.L): self.alphaGl_s[l] = self.alpha0 self.betaGl_s[l] = self.beta0 self.update_exp_lambdaGl(l) # Initialise parameters F, G self.mu_F, self.tau_F = numpy.zeros((self.I, self.K)), numpy.zeros( (self.I, self.K)) self.mu_G, self.tau_G = numpy.zeros((self.J, self.L)), numpy.zeros( (self.J, self.L)) self.mu_S, self.tau_S = numpy.zeros((self.K, self.L)), numpy.zeros( (self.K, self.L)) if init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R, self.M, self.K) kmeans_F.initialise() kmeans_F.cluster() self.mu_F = kmeans_F.clustering_results for i, k in itertools.product(range(self.I), range(self.K)): self.tau_F[i, k] = 1. print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T, self.M.T, self.L) kmeans_G.initialise() kmeans_G.cluster() self.mu_G = kmeans_G.clustering_results for j, l in itertools.product(range(self.J), range(self.L)): self.tau_G[j, l] = 1. else: # 'random' or 'exp' for i, k in itertools.product(range(self.I), range(self.K)): self.tau_F[i, k] = 1. hyperparam = self.exp_lambdaFk[ k] if self.ARD else self.lambdaF[i, k] self.mu_F[i, k] = exponential_draw( hyperparam) if init_FG == 'random' else 1.0 / hyperparam for j, l in itertools.product(range(self.J), range(self.L)): self.tau_G[j, l] = 1. hyperparam = self.exp_lambdaGl[ l] if self.ARD else self.lambdaG[j, l] self.mu_G[j, l] = exponential_draw( hyperparam) if init_FG == 'random' else 1.0 / hyperparam # Initialise parameters S for k, l in itertools.product(range(self.K), range(self.L)): self.tau_S[k, l] = 1. hyperparam = self.lambdaS[k, l] self.mu_S[k, l] = exponential_draw( hyperparam) if init_S == 'random' else 1.0 / hyperparam # Compute expectations and variances F, G, S self.exp_F, self.var_F = numpy.zeros((self.I, self.K)), numpy.zeros( (self.I, self.K)) self.exp_G, self.var_G = numpy.zeros((self.J, self.L)), numpy.zeros( (self.J, self.L)) self.exp_S, self.var_S = numpy.zeros((self.K, self.L)), numpy.zeros( (self.K, self.L)) for k in range(self.K): self.update_exp_F(k) for l in range(self.L): self.update_exp_G(l) for k, l in itertools.product(range(self.K), range(self.L)): self.update_exp_S(k, l) # Initialise tau and compute expectation self.update_tau() self.update_exp_tau()
def initialise(self,init_S='random',init_FG='random',tauFSG={}): self.tauF = tauFSG['tauF'] if 'tauF' in tauFSG else numpy.ones((self.I,self.K)) self.tauS = tauFSG['tauS'] if 'tauS' in tauFSG else numpy.ones((self.K,self.L)) self.tauG = tauFSG['tauG'] if 'tauG' in tauFSG else numpy.ones((self.J,self.L)) assert init_S in ['exp','random'], "Unrecognised init option for S: %s." % init_S self.muS = 1./self.lambdaS if init_S == 'random': for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)): self.muS[k,l] = exponential_draw(self.lambdaS[k,l]) assert init_FG in ['exp','random','kmeans'], "Unrecognised init option for F,G: %s." % init_FG self.muF, self.muG = 1./self.lambdaF, 1./self.lambdaG if init_FG == 'random': for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)): self.muF[i,k] = exponential_draw(self.lambdaF[i,k]) for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)): self.muG[j,l] = exponential_draw(self.lambdaG[j,l]) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R,self.M,self.K) kmeans_F.initialise() kmeans_F.cluster() self.muF = kmeans_F.clustering_results #+ 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T,self.M.T,self.L) kmeans_G.initialise() kmeans_G.cluster() self.muG = kmeans_G.clustering_results #+ 0.2 # Initialise the expectations and variances self.expF, self.varF = numpy.zeros((self.I,self.K)), numpy.zeros((self.I,self.K)) self.expS, self.varS = numpy.zeros((self.K,self.L)), numpy.zeros((self.K,self.L)) self.expG, self.varG = numpy.zeros((self.J,self.L)), numpy.zeros((self.J,self.L)) for k in range(0,self.K): self.update_exp_F(k) for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)): self.update_exp_S(k,l) for l in range(0,self.L): self.update_exp_G(l) # Initialise tau using the updates self.update_tau() #self.alpha_s, self.beta_s = self.alpha, self.beta self.update_exp_tau()
def initialise(self, init_S='random', init_FG='random'): assert init_S in [ 'random', 'exp' ], "Unknown initialisation option for S: %s. Should be 'random' or 'exp'." % init_S assert init_FG in [ 'random', 'exp', 'kmeans' ], "Unknown initialisation option for S: %s. Should be 'random', 'exp', or 'kmeans." % init_FG self.S = 1. / self.lambdaS if init_S == 'random': for k, l in itertools.product(xrange(0, self.K), xrange(0, self.L)): self.S[k, l] = exponential_draw(self.lambdaS[k, l]) self.F, self.G = 1. / self.lambdaF, 1. / self.lambdaG if init_FG == 'random': for i, k in itertools.product(xrange(0, self.I), xrange(0, self.K)): self.F[i, k] = exponential_draw(self.lambdaF[i, k]) for j, l in itertools.product(xrange(0, self.J), xrange(0, self.L)): self.G[j, l] = exponential_draw(self.lambdaG[j, l]) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R, self.M, self.K) kmeans_F.initialise() kmeans_F.cluster() self.F = kmeans_F.clustering_results + 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T, self.M.T, self.L) kmeans_G.initialise() kmeans_G.cluster() self.G = kmeans_G.clustering_results + 0.2 self.tau = self.alpha_s() / self.beta_s()
def initialise(self, init_FG='random', init_S='random'): ''' Initialise F, S, G, tau, and lambdaFk, lambdaGl (if ARD). ''' assert init_FG in OPTIONS_INIT_FG, "Unknown initialisation option for F and G: %s. Should be in %s." % ( init_FG, OPTIONS_INIT_FG) assert init_S in OPTIONS_INIT_S, "Unknown initialisation option for S: %s. Should be in %s." % ( init_S, OPTIONS_INIT_S) self.F = numpy.zeros((self.I, self.K)) self.S = numpy.zeros((self.K, self.L)) self.G = numpy.zeros((self.J, self.L)) self.lambdaFk = numpy.zeros(self.K) self.lambdaGl = numpy.zeros(self.L) # Initialise lambdaFk, lambdaGl if self.ARD: for k in range(self.K): self.lambdaFk[k] = self.alpha0 / self.beta0 for l in range(self.L): self.lambdaGl[l] = self.alpha0 / self.beta0 # Initialise F, G if init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R, self.M, self.K) kmeans_F.initialise() kmeans_F.cluster() self.F = kmeans_F.clustering_results + 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T, self.M.T, self.L) kmeans_G.initialise() kmeans_G.cluster() self.G = kmeans_G.clustering_results + 0.2 else: # 'random' or 'exp' for i, k in itertools.product(range(self.I), range(self.K)): hyperparam = self.lambdaFk[k] if self.ARD else self.lambdaF[i, k] self.F[i, k] = exponential_draw( hyperparam) if init_FG == 'random' else 1.0 / hyperparam for j, l in itertools.product(range(self.J), range(self.L)): hyperparam = self.lambdaGl[l] if self.ARD else self.lambdaG[j, l] self.G[j, l] = exponential_draw( hyperparam) if init_FG == 'random' else 1.0 / hyperparam # Initialise S for k, l in itertools.product(range(self.K), range(self.L)): hyperparam = self.lambdaS[k, l] self.S[k, l] = exponential_draw( hyperparam) if init_S == 'random' else 1.0 / hyperparam # Initialise tau self.tau = gamma_draw(self.alpha_s(), self.beta_s())
def initialise(self, init_FG='random', init_S='random', expo_prior=1.): ''' Initialise F, S and G. ''' assert init_FG in OPTIONS_INIT_FG, "Unrecognised init option for F,G: %s. Should be one in %s." % ( init_FG, OPTIONS_INIT_FG) assert init_S in OPTIONS_INIT_S, "Unrecognised init option for S: %s. Should be one in %s." % ( init_S, OPTIONS_INIT_S) if init_S == 'ones': self.S = numpy.ones((self.K, self.L)) elif init_S == 'random': self.S = numpy.random.rand(self.K, self.L) elif init_S == 'exponential': self.S = numpy.empty((self.K, self.L)) for k, l in itertools.product(xrange(0, self.K), xrange(0, self.L)): self.S[k, l] = exponential_draw(expo_prior) if init_FG == 'ones': self.F = numpy.ones((self.I, self.K)) self.G = numpy.ones((self.J, self.L)) elif init_FG == 'random': self.F = numpy.random.rand(self.I, self.K) self.G = numpy.random.rand(self.J, self.L) elif init_FG == 'exponential': self.F = numpy.empty((self.I, self.K)) self.G = numpy.empty((self.J, self.L)) for i, k in itertools.product(xrange(0, self.I), xrange(0, self.K)): self.F[i, k] = exponential_draw(expo_prior) for j, l in itertools.product(xrange(0, self.J), xrange(0, self.L)): self.G[j, l] = exponential_draw(expo_prior) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R, self.M, self.K) kmeans_F.initialise() kmeans_F.cluster() self.F = kmeans_F.clustering_results + 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T, self.M.T, self.L) kmeans_G.initialise() kmeans_G.cluster() self.G = kmeans_G.clustering_results + 0.2
def setUp(self): self.km = KMeans(iterations=10,useLUT=True)
from typing import List from kmeans.data_point import DataPoint from kmeans.kmeans import KMeans if __name__ == "__main__": point1: DataPoint = DataPoint([2.0, 1.0, 1.0]) point2: DataPoint = DataPoint([2.0, 2.0, 5.0]) point3: DataPoint = DataPoint([3.0, 1.5, 2.5]) kmeans_test: KMeans[DataPoint] = KMeans(2, [point1, point2, point3]) test_clusters: List[KMeans.Cluster] = kmeans_test.run() for index, cluster in enumerate(test_clusters): print(f"Cluster {index}: {cluster.points}")
parser.add_argument('-c', help='Max iterations before convergence', type=int, default=300) parser.add_argument('-t', help="Number of trials", type=int, default=1) parser.add_argument('-k', help='Value of \'k\' (centroids)', required=True, type=int, default=2) parser.add_argument('-n', help='data size', required=True, type=int, default=1000) # Get/parse arguments args = parser.parse_args() # Number of data points no_points = args.n with open('./trials/time.{0}.{1}.{2}.csv'.format(args.n, args.k, args.t), 'wb') as f: f.write('data size,k,trial,kmeans time (s)\n') for trial in xrange(1, args.t + 1): # KMeans Object k = KMeans(class_name="test", k=args.k, log=args.log, max_iterations=args.c) # Generate Data data = [ [ random.uniform(0.0, 1.0), # time random.uniform(0.0, 1.5), # coins, can be up to 1.5 the ammount of coins random.uniform(0.0, 1.0) # kills ] for i in range(no_points) ] # Inster data (this forces KMeans calculation) start = time.clock() k.put(data) f.write('{0},{1},{2},{3}\n'.format(args.n, args.k, trial, (time.clock() - start) ))
from kmeans.data import Data from kmeans.settings import Settings from kmeans.kmeans import KMeans if __name__ == '__main__': settings = Settings() settings.read_parameters() settings.print_parameters() data = Data(directory='../data/', data='data.txt') km = KMeans(data, settings) km.run()