def kmeans(v, k, distance_type=2, nt=1, niter=30, seed=0, redo=1, verbose=True, normalize=False, init='random', output='centroids'): _check_row_float32(v) n, d = v.shape centroids = numpy.zeros((k, d), dtype=numpy.float32) dis = numpy.empty(n, dtype=numpy.float32) assign = numpy.empty(n, dtype=numpy.int32) nassign = numpy.empty(k, dtype=numpy.int32) flags = nt if not verbose: flags |= yael.KMEANS_QUIET if distance_type == 2: pass # default elif distance_type == 1: flags |= yael.KMEANS_L1 elif distance_type == 3: flags |= yael.KMEANS_CHI2 if init == 'random': flags |= yael.KMEANS_INIT_RANDOM # also default elif init == 'kmeans++': flags |= yael.KMEANS_INIT_BERKELEY if normalize: flags |= yael.KMEANS_NORMALIZE_CENTS qerr = yael.kmeans(d, n, k, niter, yael.numpy_to_fvec_ref(v), flags, seed, redo, yael.numpy_to_fvec_ref(centroids), yael.numpy_to_fvec_ref(dis), yael.numpy_to_ivec_ref(assign), yael.numpy_to_ivec_ref(nassign)) if qerr < 0: raise RuntimeError( "kmeans: clustering failed. Is dataset diverse enough?") if output == 'centroids': return centroids else: return (centroids, qerr, dis, assign, nassign)