Example #1
0
def kmeans(v,
           k,
           distance_type=2,
           nt=1,
           niter=30,
           seed=0,
           redo=1,
           verbose=True,
           normalize=False,
           init='random',
           output='centroids'):
    _check_row_float32(v)
    n, d = v.shape

    centroids = numpy.zeros((k, d), dtype=numpy.float32)
    dis = numpy.empty(n, dtype=numpy.float32)
    assign = numpy.empty(n, dtype=numpy.int32)
    nassign = numpy.empty(k, dtype=numpy.int32)

    flags = nt
    if not verbose:
        flags |= yael.KMEANS_QUIET

    if distance_type == 2:
        pass  # default
    elif distance_type == 1:
        flags |= yael.KMEANS_L1
    elif distance_type == 3:
        flags |= yael.KMEANS_CHI2

    if init == 'random':
        flags |= yael.KMEANS_INIT_RANDOM  # also default
    elif init == 'kmeans++':
        flags |= yael.KMEANS_INIT_BERKELEY

    if normalize:
        flags |= yael.KMEANS_NORMALIZE_CENTS

    qerr = yael.kmeans(d, n, k, niter, yael.numpy_to_fvec_ref(v), flags, seed,
                       redo, yael.numpy_to_fvec_ref(centroids),
                       yael.numpy_to_fvec_ref(dis),
                       yael.numpy_to_ivec_ref(assign),
                       yael.numpy_to_ivec_ref(nassign))

    if qerr < 0:
        raise RuntimeError(
            "kmeans: clustering failed. Is dataset diverse enough?")

    if output == 'centroids':
        return centroids
    else:
        return (centroids, qerr, dis, assign, nassign)
Example #2
0
def kmeans(v, k,
           distance_type=2,
           nt=1,
           niter=30,
           seed=0,
           redo=1,
           verbose=True,
           normalize=False,
           init='random',
           output='centroids'):
    _check_row_float32(v)
    n, d = v.shape

    centroids = numpy.zeros((k, d), dtype=numpy.float32)
    dis = numpy.empty(n, dtype=numpy.float32)
    assign = numpy.empty(n, dtype=numpy.int32)
    nassign = numpy.empty(k, dtype=numpy.int32)

    flags = nt
    if not verbose:
        flags |= yael.KMEANS_QUIET

    if distance_type == 2:
        pass  # default
    elif distance_type == 1:
        flags |= yael.KMEANS_L1
    elif distance_type == 3:
        flags |= yael.KMEANS_CHI2

    if init == 'random':
        flags |= yael.KMEANS_INIT_RANDOM  # also default
    elif init == 'kmeans++':
        flags |= yael.KMEANS_INIT_BERKELEY

    if normalize:
        flags |= yael.KMEANS_NORMALIZE_CENTS

    qerr = yael.kmeans(d, n, k, niter,
                       yael.numpy_to_fvec_ref(v), flags, seed, redo,
                       yael.numpy_to_fvec_ref(centroids),
                       yael.numpy_to_fvec_ref(dis),
                       yael.numpy_to_ivec_ref(assign),
                       yael.numpy_to_ivec_ref(nassign))

    if qerr < 0:
        raise RuntimeError(
            "kmeans: clustering failed. Is dataset diverse enough?")

    if output == 'centroids':
        return centroids
    else:
        return (centroids, qerr, dis, assign, nassign)