Exemple #1
0
def kmeans(v,
           k,
           distance_type=2,
           nt=1,
           niter=30,
           seed=0,
           redo=1,
           verbose=True,
           normalize=False,
           init='random',
           output='centroids'):
    _check_row_float32(v)
    n, d = v.shape

    centroids = numpy.zeros((k, d), dtype=numpy.float32)
    dis = numpy.empty(n, dtype=numpy.float32)
    assign = numpy.empty(n, dtype=numpy.int32)
    nassign = numpy.empty(k, dtype=numpy.int32)

    flags = nt
    if not verbose:
        flags |= yael.KMEANS_QUIET

    if distance_type == 2:
        pass  # default
    elif distance_type == 1:
        flags |= yael.KMEANS_L1
    elif distance_type == 3:
        flags |= yael.KMEANS_CHI2

    if init == 'random':
        flags |= yael.KMEANS_INIT_RANDOM  # also default
    elif init == 'kmeans++':
        flags |= yael.KMEANS_INIT_BERKELEY

    if normalize:
        flags |= yael.KMEANS_NORMALIZE_CENTS

    qerr = yael.kmeans(d, n, k, niter, yael.numpy_to_fvec_ref(v), flags, seed,
                       redo, yael.numpy_to_fvec_ref(centroids),
                       yael.numpy_to_fvec_ref(dis),
                       yael.numpy_to_ivec_ref(assign),
                       yael.numpy_to_ivec_ref(nassign))

    if qerr < 0:
        raise RuntimeError(
            "kmeans: clustering failed. Is dataset diverse enough?")

    if output == 'centroids':
        return centroids
    else:
        return (centroids, qerr, dis, assign, nassign)
Exemple #2
0
def kmeans(v, k,
           distance_type=2,
           nt=1,
           niter=30,
           seed=0,
           redo=1,
           verbose=True,
           normalize=False,
           init='random',
           output='centroids'):
    _check_row_float32(v)
    n, d = v.shape

    centroids = numpy.zeros((k, d), dtype=numpy.float32)
    dis = numpy.empty(n, dtype=numpy.float32)
    assign = numpy.empty(n, dtype=numpy.int32)
    nassign = numpy.empty(k, dtype=numpy.int32)

    flags = nt
    if not verbose:
        flags |= yael.KMEANS_QUIET

    if distance_type == 2:
        pass  # default
    elif distance_type == 1:
        flags |= yael.KMEANS_L1
    elif distance_type == 3:
        flags |= yael.KMEANS_CHI2

    if init == 'random':
        flags |= yael.KMEANS_INIT_RANDOM  # also default
    elif init == 'kmeans++':
        flags |= yael.KMEANS_INIT_BERKELEY

    if normalize:
        flags |= yael.KMEANS_NORMALIZE_CENTS

    qerr = yael.kmeans(d, n, k, niter,
                       yael.numpy_to_fvec_ref(v), flags, seed, redo,
                       yael.numpy_to_fvec_ref(centroids),
                       yael.numpy_to_fvec_ref(dis),
                       yael.numpy_to_ivec_ref(assign),
                       yael.numpy_to_ivec_ref(nassign))

    if qerr < 0:
        raise RuntimeError(
            "kmeans: clustering failed. Is dataset diverse enough?")

    if output == 'centroids':
        return centroids
    else:
        return (centroids, qerr, dis, assign, nassign)
Exemple #3
0
def extract_rows_cols(K, subset_rows, subset_cols):
    " returns K[numpy.ix_(subset_rows, subset_cols)] (also slow in pure numpy)"
    _check_row_float32(K)
    _check_row_int32(subset_rows)
    _check_row_int32(subset_cols)
    nr = subset_rows.size
    nc = subset_cols.size
    assert subset_rows.min() >= 0 and subset_rows.max() < K.shape[0]
    assert subset_cols.min() >= 0 and subset_cols.max() < K.shape[1]
    Ksub = numpy.empty((nr, nc), dtype=numpy.float32)
    yael.fmat_get_rows_cols(yael.numpy_to_fvec_ref(K), K.shape[0], nc,
                            yael.numpy_to_ivec_ref(subset_cols), nr,
                            yael.numpy_to_ivec_ref(subset_rows),
                            yael.numpy_to_fvec_ref(Ksub))
    return Ksub
Exemple #4
0
def extract_rows_cols(K, subset_rows, subset_cols):
    " returns K[numpy.ix_(subset_rows, subset_cols)] (also slow in pure numpy)"
    _check_row_float32(K)
    _check_row_int32(subset_rows)
    _check_row_int32(subset_cols)
    nr = subset_rows.size
    nc = subset_cols.size
    assert subset_rows.min() >= 0 and subset_rows.max() < K.shape[0]
    assert subset_cols.min() >= 0 and subset_cols.max() < K.shape[1]    
    Ksub = numpy.empty((nr, nc), dtype = numpy.float32)
    yael.fmat_get_rows_cols(yael.numpy_to_fvec_ref(K),
                            K.shape[0],
                            nc, yael.numpy_to_ivec_ref(subset_cols),
                            nr, yael.numpy_to_ivec_ref(subset_rows),
                            yael.numpy_to_fvec_ref(Ksub))
    return Ksub
Exemple #5
0
def extract_lines(a, indices):
    " returns a[indices, :] from a matrix a (this operation is slow in numpy) "
    _check_row_float32(a)
    _check_row_int32(indices)
    n, d = a.shape
    assert indices.size == 0 or indices.min() >= 0 and indices.max() < n
    out = numpy.empty((indices.size, d), dtype=numpy.float32)
    yael.fmat_get_columns(yael.numpy_to_fvec_ref(a), d, indices.size,
                          yael.numpy_to_ivec_ref(indices),
                          yael.numpy_to_fvec_ref(out))

    return out
def extract_lines(a, indices):
    " returns a[indices, :] from a matrix a (this operation is slow in numpy) "
    _check_row_float32(a)
    _check_row_int32(indices)
    n, d = a.shape
    assert indices.size == 0 or indices.min() >= 0 and indices.max() < n
    out = numpy.empty((indices.size, d), dtype=numpy.float32)
    yael.fmat_get_columns(
        yael.numpy_to_fvec_ref(a), d, indices.size, yael.numpy_to_ivec_ref(indices), yael.numpy_to_fvec_ref(out)
    )

    return out
Exemple #7
0
def knn(queries, base, nnn=1, distance_type=2, nt=1):
    _check_row_float32(base)
    _check_row_float32(queries)
    n, d = base.shape
    nq, d2 = queries.shape
    assert d == d2, "base and queries must have same nb of rows (got %d != %d) " % (
        d, d2)

    idx = numpy.empty((nq, nnn), dtype=numpy.int32)
    dis = numpy.empty((nq, nnn), dtype=numpy.float32)

    yael.knn_full_thread(distance_type, nq, n, d, nnn,
                         yael.numpy_to_fvec_ref(base),
                         yael.numpy_to_fvec_ref(queries), None,
                         yael.numpy_to_ivec_ref(idx),
                         yael.numpy_to_fvec_ref(dis), nt)
    return idx, dis
Exemple #8
0
def knn(queries, base, 
        nnn = 1, 
        distance_type = 2,
        nt = 1):
    _check_row_float32(base)
    _check_row_float32(queries)
    n, d = base.shape
    nq, d2 = queries.shape
    assert d == d2, "base and queries must have same nb of rows (got %d != %d) " % (d, d2)
    
    idx = numpy.empty((nq, nnn), dtype = numpy.int32)
    dis = numpy.empty((nq, nnn), dtype = numpy.float32)

    yael.knn_full_thread(distance_type, 
                         nq, n, d, nnn,
                         yael.numpy_to_fvec_ref(base),
                         yael.numpy_to_fvec_ref(queries), 
                         None, 
                         yael.numpy_to_ivec_ref(idx), 
                         yael.numpy_to_fvec_ref(dis), 
                         nt)
    return idx, dis