nt = 1 assign = yael.ivec(n) def d_chi2(a, b): return (a - b) ** 2 / (a + b) print "clustering %d uniform %dD pts in %d centroids" % (n, d, k) for name, flags in ("L2", 0), ("L1", yael.KMEANS_L1), ("Chi2", yael.KMEANS_CHI2): print "%s clustering" % name t0 = time.time() flags |= nt | yael.KMEANS_QUIET for run in range(10): final_err = yael.kmeans(d, n, k, 35, pts, flags, 12345+run, 10, cents, None, assign, None) # yael.fmat_print(cents, d, k) print "run %3d L1 err = %g" % (run, sum([abs(cents[assign[i]*d + j] - pts[i*d+j]) for i in range(n) for j in range(d)])) print "run %3d Chi2 err = %g" % (run, sum([d_chi2(cents[assign[i]*d + j], pts[i*d+j]) for i in range(n) for j in range(d)])) print "time: %.3f s" % (time.time() - t0)
return (a - b)**2 / (a + b) print "clustering %d uniform %dD pts in %d centroids" % (n, d, k) for name, flags in ("L2", 0), ("L1", yael.KMEANS_L1), ("Chi2", yael.KMEANS_CHI2): print "%s clustering" % name t0 = time.time() flags |= nt | yael.KMEANS_QUIET for run in range(10): final_err = yael.kmeans(d, n, k, 35, pts, flags, 12345 + run, 10, cents, None, assign, None) # yael.fmat_print(cents, d, k) print "run %3d L1 err = %g" % (run, sum([ abs(cents[assign[i] * d + j] - pts[i * d + j]) for i in range(n) for j in range(d) ])) print "run %3d Chi2 err = %g" % (run, sum([ d_chi2(cents[assign[i] * d + j], pts[i * d + j]) for i in range(n)
from yael import yael import time k = 100 # number of cluster to create d = 128 # dimensionality of the vectors n = 10000 # number of vectors nt = 10 # number of threads to use v = yael.fvec_new_rand (d * n) # random set of vectors niter = 30 # number of iterations redo = 1 # number of redo #[centroids, dis, assign] = yael_kmeans (v, 100, 'nt', 2, 'niter', 25); centroids = yael.fvec_new (d * k) # output: centroids dis = yael.fvec_new (n) # point-to-cluster distance assign = yael.ivec_new (n) # quantization index of each point nassign = yael.ivec_new (k) # output: number of vectors assigned to each centroid nassign = yael.IntArray.acquirepointer (nassign) t1 = time.time() yael.kmeans (d, n, k, niter, v, nt, 0, redo, centroids, dis, assign, nassign) t2 = time.time() print [nassign[i] for i in xrange(k)] print 'kmeans performed in %.3fs' % (t2 - t1)