Beispiel #1
0
class Picker(object):

    # 结合 K-Means 里的方法来挑选隐藏层中心点
    kmeans = KMeans()
    samples = []  # <Sample Object>

    # Random Picking
    def shuffing(self, k):
        maker    = self.kmeans.center_maker
        maker.samples = self.samples
        return maker.shuffing(k)

    # K-Means++ 方法挑选后做聚类更新中心点(预设欧式距离)
    def clustering(self, k):
        # 先把 Sample Object 的 features 都取出来做聚类(Clustering)
        for sample in self.samples:
            self.kmeans.add_sample(sample.features)
        kmeans = self.kmeans
        kmeans.center_choice = Choice.Plus
        kmeans.make_centers(k)
        kmeans.max_iteration = 100
        kmeans.convergence = 0.001
        kmeans.setup()
        kmeans.run()
        # 将聚好且修正学习过好的中心点重新做回去 <Sample Object>,回传回去给 RBFNN 使用
        clustered_samples = []  # <Sample Object>
        for group in kmeans.groups:
            sample = Sample(group.center) # 这里的 Sample Object 不需要设定 targets
            clustered_samples.append(sample)
        return clustered_samples
class Test(unittest.TestCase):


    def setUp(self):
        self.km = KMeans(iterations=10,useLUT=True)

    def testIsRGBA(self):
        fi=open("Mona_Lisa.jpg","r")
        isRGBA = self.km.testRGBA(Image.open(fi))
        fi.close()
        assert not isRGBA
        
    def testProcessFile(self):
        fi=open("rusty door.jpg","r")
        fo=open("result.jpg","w")
        self.km.process(fi,fo,centroids=10)
        fo.close()
        fi.close()
        
        totalnumberofpixels = self.km.im.size[0]*self.km.im.size[1]
        assert len(self.km.pixels) == totalnumberofpixels
        for x in self.km.pixels:
            #print x
            r,g,b,centroid = x
            assert r in range(0,256)
            assert g in range(0,256)
            assert b in range(0,256)
            assert centroid in range(0,self.km.numbercentroids)
        
        totpixels=0
        assert len(self.km.clusters.keys())==self.km.numbercentroids
        for x in self.km.clusters.keys():
            assert len(self.km.clusters[x])>0
            totpixels+= len(self.km.clusters[x])
            print "Cluster %d has %d elements" % (x,len(self.km.clusters[x]))
        assert totpixels == totalnumberofpixels
        
        for x in range(0,len(self.km.means)):
            r,g,b = self.km.means[x]
            assert r in range(0,256)
            assert g in range(0,256)
            assert b in range(0,256)
            print "Cluster %d is centered at (%d,%d,%d)" % (x,r,g,b)
def construct_kmeans_obj(collection, level):
  """
  Grabs the kmeans data from the db
  Re-creates the KMeans object
  Data: 
  {
    'class_name': 'doesn't matter,
    'k': 3,
    'labels': ['achiever', 'collector', 'killer']
    'centroids': [ [] [] [] ],
    'clusters': [ [] [] [] ],
  }
  """
  k = None
  k_data = collection.find_one({'level': level})
  if k_data is not None:
    k = KMeans(k=k_data['k'], class_name=k_data['class_name'])
    k.centroids = k_data['centroids']
    k.clusters = k_data['clusters']
    k.labels = k_data['labels']
  return k
Beispiel #4
0
    def initialise(self, init_FG='random', init_S='random'):
        ''' Initialise F, S, G, tau, and lambdaFk, lambdaGl (if ARD). '''
        assert init_FG in OPTIONS_INIT_FG, "Unknown initialisation option for F and G: %s. Should be in %s." % (
            init_FG, OPTIONS_INIT_FG)
        assert init_S in OPTIONS_INIT_S, "Unknown initialisation option for S: %s. Should be in %s." % (
            init_S, OPTIONS_INIT_S)

        # Initialise lambdaFk, lambdaGl, and compute expectations
        if self.ARD:
            self.alphaFk_s, self.betaFk_s = numpy.zeros(self.K), numpy.zeros(
                self.K)
            self.alphaGl_s, self.betaGl_s = numpy.zeros(self.L), numpy.zeros(
                self.L)
            self.exp_lambdaFk, self.exp_loglambdaFk = numpy.zeros(
                self.K), numpy.zeros(self.K)
            self.exp_lambdaGl, self.exp_loglambdaGl = numpy.zeros(
                self.L), numpy.zeros(self.L)
            for k in range(self.K):
                self.alphaFk_s[k] = self.alpha0
                self.betaFk_s[k] = self.beta0
                self.update_exp_lambdaFk(k)
            for l in range(self.L):
                self.alphaGl_s[l] = self.alpha0
                self.betaGl_s[l] = self.beta0
                self.update_exp_lambdaGl(l)

        # Initialise parameters F, G
        self.mu_F, self.tau_F = numpy.zeros((self.I, self.K)), numpy.zeros(
            (self.I, self.K))
        self.mu_G, self.tau_G = numpy.zeros((self.J, self.L)), numpy.zeros(
            (self.J, self.L))
        self.mu_S, self.tau_S = numpy.zeros((self.K, self.L)), numpy.zeros(
            (self.K, self.L))

        if init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.mu_F = kmeans_F.clustering_results

            for i, k in itertools.product(range(self.I), range(self.K)):
                self.tau_F[i, k] = 1.

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.mu_G = kmeans_G.clustering_results

            for j, l in itertools.product(range(self.J), range(self.L)):
                self.tau_G[j, l] = 1.
        else:
            # 'random' or 'exp'
            for i, k in itertools.product(range(self.I), range(self.K)):
                self.tau_F[i, k] = 1.
                hyperparam = self.exp_lambdaFk[
                    k] if self.ARD else self.lambdaF[i, k]
                self.mu_F[i, k] = exponential_draw(
                    hyperparam) if init_FG == 'random' else 1.0 / hyperparam
            for j, l in itertools.product(range(self.J), range(self.L)):
                self.tau_G[j, l] = 1.
                hyperparam = self.exp_lambdaGl[
                    l] if self.ARD else self.lambdaG[j, l]
                self.mu_G[j, l] = exponential_draw(
                    hyperparam) if init_FG == 'random' else 1.0 / hyperparam

        # Initialise parameters S
        for k, l in itertools.product(range(self.K), range(self.L)):
            self.tau_S[k, l] = 1.
            hyperparam = self.lambdaS[k, l]
            self.mu_S[k, l] = exponential_draw(
                hyperparam) if init_S == 'random' else 1.0 / hyperparam

        # Compute expectations and variances F, G, S
        self.exp_F, self.var_F = numpy.zeros((self.I, self.K)), numpy.zeros(
            (self.I, self.K))
        self.exp_G, self.var_G = numpy.zeros((self.J, self.L)), numpy.zeros(
            (self.J, self.L))
        self.exp_S, self.var_S = numpy.zeros((self.K, self.L)), numpy.zeros(
            (self.K, self.L))

        for k in range(self.K):
            self.update_exp_F(k)
        for l in range(self.L):
            self.update_exp_G(l)
        for k, l in itertools.product(range(self.K), range(self.L)):
            self.update_exp_S(k, l)

        # Initialise tau and compute expectation
        self.update_tau()
        self.update_exp_tau()
Beispiel #5
0
 def initialise(self,init_S='random',init_FG='random',tauFSG={}):
     self.tauF = tauFSG['tauF'] if 'tauF' in tauFSG else numpy.ones((self.I,self.K))
     self.tauS = tauFSG['tauS'] if 'tauS' in tauFSG else numpy.ones((self.K,self.L))
     self.tauG = tauFSG['tauG'] if 'tauG' in tauFSG else numpy.ones((self.J,self.L))
     
     assert init_S in ['exp','random'], "Unrecognised init option for S: %s." % init_S
     self.muS = 1./self.lambdaS
     if init_S == 'random':
         for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)):  
             self.muS[k,l] = exponential_draw(self.lambdaS[k,l])
     
     assert init_FG in ['exp','random','kmeans'], "Unrecognised init option for F,G: %s." % init_FG
     self.muF, self.muG = 1./self.lambdaF, 1./self.lambdaG
     if init_FG == 'random':
         for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)):        
             self.muF[i,k] = exponential_draw(self.lambdaF[i,k])
         for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)):
             self.muG[j,l] = exponential_draw(self.lambdaG[j,l])
     elif init_FG == 'kmeans':
         print "Initialising F using KMeans."
         kmeans_F = KMeans(self.R,self.M,self.K)
         kmeans_F.initialise()
         kmeans_F.cluster()
         self.muF = kmeans_F.clustering_results #+ 0.2            
         
         print "Initialising G using KMeans."
         kmeans_G = KMeans(self.R.T,self.M.T,self.L)   
         kmeans_G.initialise()
         kmeans_G.cluster()
         self.muG = kmeans_G.clustering_results #+ 0.2
     
     # Initialise the expectations and variances
     self.expF, self.varF = numpy.zeros((self.I,self.K)), numpy.zeros((self.I,self.K))
     self.expS, self.varS = numpy.zeros((self.K,self.L)), numpy.zeros((self.K,self.L))
     self.expG, self.varG = numpy.zeros((self.J,self.L)), numpy.zeros((self.J,self.L))
     
     for k in range(0,self.K):
         self.update_exp_F(k)
     for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)):
         self.update_exp_S(k,l)
     for l in range(0,self.L):
         self.update_exp_G(l)
         
     # Initialise tau using the updates
     self.update_tau()
     #self.alpha_s, self.beta_s = self.alpha, self.beta
     self.update_exp_tau()
    def initialise(self, init_S='random', init_FG='random'):
        assert init_S in [
            'random', 'exp'
        ], "Unknown initialisation option for S: %s. Should be 'random' or 'exp'." % init_S
        assert init_FG in [
            'random', 'exp', 'kmeans'
        ], "Unknown initialisation option for S: %s. Should be 'random', 'exp', or 'kmeans." % init_FG

        self.S = 1. / self.lambdaS
        if init_S == 'random':
            for k, l in itertools.product(xrange(0, self.K), xrange(0,
                                                                    self.L)):
                self.S[k, l] = exponential_draw(self.lambdaS[k, l])

        self.F, self.G = 1. / self.lambdaF, 1. / self.lambdaG
        if init_FG == 'random':
            for i, k in itertools.product(xrange(0, self.I), xrange(0,
                                                                    self.K)):
                self.F[i, k] = exponential_draw(self.lambdaF[i, k])
            for j, l in itertools.product(xrange(0, self.J), xrange(0,
                                                                    self.L)):
                self.G[j, l] = exponential_draw(self.lambdaG[j, l])
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.F = kmeans_F.clustering_results + 0.2

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.G = kmeans_G.clustering_results + 0.2

        self.tau = self.alpha_s() / self.beta_s()
Beispiel #7
0
    def initialise(self, init_FG='random', init_S='random'):
        ''' Initialise F, S, G, tau, and lambdaFk, lambdaGl (if ARD). '''
        assert init_FG in OPTIONS_INIT_FG, "Unknown initialisation option for F and G: %s. Should be in %s." % (
            init_FG, OPTIONS_INIT_FG)
        assert init_S in OPTIONS_INIT_S, "Unknown initialisation option for S: %s. Should be in %s." % (
            init_S, OPTIONS_INIT_S)

        self.F = numpy.zeros((self.I, self.K))
        self.S = numpy.zeros((self.K, self.L))
        self.G = numpy.zeros((self.J, self.L))
        self.lambdaFk = numpy.zeros(self.K)
        self.lambdaGl = numpy.zeros(self.L)

        # Initialise lambdaFk, lambdaGl
        if self.ARD:
            for k in range(self.K):
                self.lambdaFk[k] = self.alpha0 / self.beta0
            for l in range(self.L):
                self.lambdaGl[l] = self.alpha0 / self.beta0

        # Initialise F, G
        if init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.F = kmeans_F.clustering_results + 0.2

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.G = kmeans_G.clustering_results + 0.2
        else:
            # 'random' or 'exp'
            for i, k in itertools.product(range(self.I), range(self.K)):
                hyperparam = self.lambdaFk[k] if self.ARD else self.lambdaF[i,
                                                                            k]
                self.F[i, k] = exponential_draw(
                    hyperparam) if init_FG == 'random' else 1.0 / hyperparam
            for j, l in itertools.product(range(self.J), range(self.L)):
                hyperparam = self.lambdaGl[l] if self.ARD else self.lambdaG[j,
                                                                            l]
                self.G[j, l] = exponential_draw(
                    hyperparam) if init_FG == 'random' else 1.0 / hyperparam

        # Initialise S
        for k, l in itertools.product(range(self.K), range(self.L)):
            hyperparam = self.lambdaS[k, l]
            self.S[k, l] = exponential_draw(
                hyperparam) if init_S == 'random' else 1.0 / hyperparam

        # Initialise tau
        self.tau = gamma_draw(self.alpha_s(), self.beta_s())
Beispiel #8
0
    def initialise(self, init_FG='random', init_S='random', expo_prior=1.):
        ''' Initialise F, S and G. '''
        assert init_FG in OPTIONS_INIT_FG, "Unrecognised init option for F,G: %s. Should be one in %s." % (
            init_FG, OPTIONS_INIT_FG)
        assert init_S in OPTIONS_INIT_S, "Unrecognised init option for S: %s. Should be one in %s." % (
            init_S, OPTIONS_INIT_S)

        if init_S == 'ones':
            self.S = numpy.ones((self.K, self.L))
        elif init_S == 'random':
            self.S = numpy.random.rand(self.K, self.L)
        elif init_S == 'exponential':
            self.S = numpy.empty((self.K, self.L))
            for k, l in itertools.product(xrange(0, self.K), xrange(0,
                                                                    self.L)):
                self.S[k, l] = exponential_draw(expo_prior)

        if init_FG == 'ones':
            self.F = numpy.ones((self.I, self.K))
            self.G = numpy.ones((self.J, self.L))
        elif init_FG == 'random':
            self.F = numpy.random.rand(self.I, self.K)
            self.G = numpy.random.rand(self.J, self.L)
        elif init_FG == 'exponential':
            self.F = numpy.empty((self.I, self.K))
            self.G = numpy.empty((self.J, self.L))
            for i, k in itertools.product(xrange(0, self.I), xrange(0,
                                                                    self.K)):
                self.F[i, k] = exponential_draw(expo_prior)
            for j, l in itertools.product(xrange(0, self.J), xrange(0,
                                                                    self.L)):
                self.G[j, l] = exponential_draw(expo_prior)
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.F = kmeans_F.clustering_results + 0.2

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.G = kmeans_G.clustering_results + 0.2
 def setUp(self):
     self.km = KMeans(iterations=10,useLUT=True)
from typing import List

from kmeans.data_point import DataPoint
from kmeans.kmeans import KMeans

if __name__ == "__main__":
    point1: DataPoint = DataPoint([2.0, 1.0, 1.0])
    point2: DataPoint = DataPoint([2.0, 2.0, 5.0])
    point3: DataPoint = DataPoint([3.0, 1.5, 2.5])
    kmeans_test: KMeans[DataPoint] = KMeans(2, [point1, point2, point3])
    test_clusters: List[KMeans.Cluster] = kmeans_test.run()
    for index, cluster in enumerate(test_clusters):
        print(f"Cluster {index}: {cluster.points}")
  parser.add_argument('-c', help='Max iterations before convergence', type=int, default=300)
  parser.add_argument('-t', help="Number of trials", type=int, default=1)
  parser.add_argument('-k', help='Value of \'k\' (centroids)', required=True, type=int, default=2)
  parser.add_argument('-n', help='data size', required=True, type=int, default=1000)

  # Get/parse arguments
  args = parser.parse_args()


  # Number of data points
  no_points = args.n

  with open('./trials/time.{0}.{1}.{2}.csv'.format(args.n, args.k, args.t), 'wb') as f:
    f.write('data size,k,trial,kmeans time (s)\n')
    for trial in xrange(1, args.t + 1):
      # KMeans Object
      k = KMeans(class_name="test", k=args.k, log=args.log, max_iterations=args.c)

      # Generate Data
      data = [ 
        [
          random.uniform(0.0, 1.0), # time
          random.uniform(0.0, 1.5), # coins, can be up to 1.5 the ammount of coins
          random.uniform(0.0, 1.0)  # kills
        ] for i in range(no_points)
      ]

      # Inster data (this forces KMeans calculation)
      start = time.clock()
      k.put(data)
      f.write('{0},{1},{2},{3}\n'.format(args.n, args.k, trial, (time.clock() - start) ))
Beispiel #12
0
from kmeans.data import Data
from kmeans.settings import Settings
from kmeans.kmeans import KMeans

if __name__ == '__main__':

    settings = Settings()
    settings.read_parameters()
    settings.print_parameters()

    data = Data(directory='../data/', data='data.txt')

    km = KMeans(data, settings)
    km.run()