Esempio n. 1
0
nt = 1
assign = yael.ivec(n)
  

def d_chi2(a, b):
  return (a - b) ** 2 / (a + b)

print "clustering %d uniform %dD pts in %d centroids" % (n, d, k) 

for name, flags in ("L2", 0), ("L1", yael.KMEANS_L1), ("Chi2", yael.KMEANS_CHI2):
  print "%s clustering" % name 

  t0 = time.time()
  
  flags |= nt | yael.KMEANS_QUIET

  for run in range(10): 

    final_err = yael.kmeans(d, n, k, 35, pts, flags, 12345+run, 10, cents, None, assign, None)

    # yael.fmat_print(cents, d, k)


    print "run %3d L1 err = %g" % (run, sum([abs(cents[assign[i]*d + j] - pts[i*d+j])
                                      for i in range(n) for j in range(d)]))

    print "run %3d Chi2 err = %g" % (run, sum([d_chi2(cents[assign[i]*d + j], pts[i*d+j])
                                      for i in range(n) for j in range(d)]))

  print "time: %.3f s" % (time.time() - t0) 
    return (a - b)**2 / (a + b)


print "clustering %d uniform %dD pts in %d centroids" % (n, d, k)

for name, flags in ("L2", 0), ("L1", yael.KMEANS_L1), ("Chi2",
                                                       yael.KMEANS_CHI2):
    print "%s clustering" % name

    t0 = time.time()

    flags |= nt | yael.KMEANS_QUIET

    for run in range(10):

        final_err = yael.kmeans(d, n, k, 35, pts, flags, 12345 + run, 10,
                                cents, None, assign, None)

        # yael.fmat_print(cents, d, k)

        print "run %3d L1 err = %g" % (run,
                                       sum([
                                           abs(cents[assign[i] * d + j] -
                                               pts[i * d + j])
                                           for i in range(n) for j in range(d)
                                       ]))

        print "run %3d Chi2 err = %g" % (run,
                                         sum([
                                             d_chi2(cents[assign[i] * d + j],
                                                    pts[i * d + j])
                                             for i in range(n)
Esempio n. 3
0
from yael import yael
import time

k = 100                           # number of cluster to create
d = 128                           # dimensionality of the vectors
n = 10000                        # number of vectors
nt = 10                            # number of threads to use
v = yael.fvec_new_rand (d * n)    # random set of vectors 
niter = 30                        # number of iterations
redo = 1                          # number of redo

#[centroids, dis, assign] = yael_kmeans (v, 100, 'nt', 2, 'niter', 25);

centroids = yael.fvec_new (d * k) # output: centroids
dis = yael.fvec_new (n)           # point-to-cluster distance
assign = yael.ivec_new (n)        # quantization index of each point
nassign = yael.ivec_new (k)       # output: number of vectors assigned to each centroid

nassign = yael.IntArray.acquirepointer (nassign)

t1 = time.time()
yael.kmeans (d, n, k, niter, v, nt, 0, redo, centroids, dis, assign, nassign)
t2 = time.time()

print [nassign[i] for i in xrange(k)]
print 'kmeans performed in %.3fs' % (t2 - t1)