Example #1
0
def fisher(gmm_npy, v, include='mu'):

    _check_row_float32(v)
    n, d = v.shape

    gmm = _numpy_to_gmm(gmm_npy)
    assert d == gmm.d

    flags = 0

    if 'mu' in include:
        flags |= yael.GMM_FLAGS_MU
    if 'sigma' in include:
        flags |= yael.GMM_FLAGS_SIGMA
    if 'w' in include:
        flags |= yael.GMM_FLAGS_W

    d_fisher = yael.gmm_fisher_sizeof(gmm, flags)

    fisher_out = numpy.zeros(d_fisher, dtype=numpy.float32)

    yael.gmm_fisher(n, yael.numpy_to_fvec_ref(v), gmm, flags,
                    yael.numpy_to_fvec_ref(fisher_out))

    return fisher_out
Example #2
0
def fisher(gmm_npy, v,
           include='mu'):

    _check_row_float32(v)
    n, d = v.shape

    gmm = _numpy_to_gmm(gmm_npy)
    assert d == gmm.d

    flags = 0

    if 'mu' in include:
        flags |= yael.GMM_FLAGS_MU
    if 'sigma' in include:
        flags |= yael.GMM_FLAGS_SIGMA
    if 'w' in include:
        flags |= yael.GMM_FLAGS_W

    d_fisher = yael.gmm_fisher_sizeof(gmm, flags)

    fisher_out = numpy.zeros(d_fisher, dtype=numpy.float32)

    yael.gmm_fisher(n, yael.numpy_to_fvec_ref(
        v), gmm, flags, yael.numpy_to_fvec_ref(fisher_out))

    return fisher_out
def extract_lines(a, indices):
    " returns a[indices, :] from a matrix a (this operation is slow in numpy) "
    _check_row_float32(a)
    _check_row_int32(indices)
    n, d = a.shape
    assert indices.size == 0 or indices.min() >= 0 and indices.max() < n
    out = numpy.empty((indices.size, d), dtype=numpy.float32)
    yael.fmat_get_columns(
        yael.numpy_to_fvec_ref(a), d, indices.size, yael.numpy_to_ivec_ref(indices), yael.numpy_to_fvec_ref(out)
    )

    return out
Example #4
0
def extract_lines(a, indices):
    " returns a[indices, :] from a matrix a (this operation is slow in numpy) "
    _check_row_float32(a)
    _check_row_int32(indices)
    n, d = a.shape
    assert indices.size == 0 or indices.min() >= 0 and indices.max() < n
    out = numpy.empty((indices.size, d), dtype=numpy.float32)
    yael.fmat_get_columns(yael.numpy_to_fvec_ref(a), d, indices.size,
                          yael.numpy_to_ivec_ref(indices),
                          yael.numpy_to_fvec_ref(out))

    return out
Example #5
0
def kmeans(v,
           k,
           distance_type=2,
           nt=1,
           niter=30,
           seed=0,
           redo=1,
           verbose=True,
           normalize=False,
           init='random',
           output='centroids'):
    _check_row_float32(v)
    n, d = v.shape

    centroids = numpy.zeros((k, d), dtype=numpy.float32)
    dis = numpy.empty(n, dtype=numpy.float32)
    assign = numpy.empty(n, dtype=numpy.int32)
    nassign = numpy.empty(k, dtype=numpy.int32)

    flags = nt
    if not verbose:
        flags |= yael.KMEANS_QUIET

    if distance_type == 2:
        pass  # default
    elif distance_type == 1:
        flags |= yael.KMEANS_L1
    elif distance_type == 3:
        flags |= yael.KMEANS_CHI2

    if init == 'random':
        flags |= yael.KMEANS_INIT_RANDOM  # also default
    elif init == 'kmeans++':
        flags |= yael.KMEANS_INIT_BERKELEY

    if normalize:
        flags |= yael.KMEANS_NORMALIZE_CENTS

    qerr = yael.kmeans(d, n, k, niter, yael.numpy_to_fvec_ref(v), flags, seed,
                       redo, yael.numpy_to_fvec_ref(centroids),
                       yael.numpy_to_fvec_ref(dis),
                       yael.numpy_to_ivec_ref(assign),
                       yael.numpy_to_ivec_ref(nassign))

    if qerr < 0:
        raise RuntimeError(
            "kmeans: clustering failed. Is dataset diverse enough?")

    if output == 'centroids':
        return centroids
    else:
        return (centroids, qerr, dis, assign, nassign)
Example #6
0
def kmeans(v, k,
           distance_type=2,
           nt=1,
           niter=30,
           seed=0,
           redo=1,
           verbose=True,
           normalize=False,
           init='random',
           output='centroids'):
    _check_row_float32(v)
    n, d = v.shape

    centroids = numpy.zeros((k, d), dtype=numpy.float32)
    dis = numpy.empty(n, dtype=numpy.float32)
    assign = numpy.empty(n, dtype=numpy.int32)
    nassign = numpy.empty(k, dtype=numpy.int32)

    flags = nt
    if not verbose:
        flags |= yael.KMEANS_QUIET

    if distance_type == 2:
        pass  # default
    elif distance_type == 1:
        flags |= yael.KMEANS_L1
    elif distance_type == 3:
        flags |= yael.KMEANS_CHI2

    if init == 'random':
        flags |= yael.KMEANS_INIT_RANDOM  # also default
    elif init == 'kmeans++':
        flags |= yael.KMEANS_INIT_BERKELEY

    if normalize:
        flags |= yael.KMEANS_NORMALIZE_CENTS

    qerr = yael.kmeans(d, n, k, niter,
                       yael.numpy_to_fvec_ref(v), flags, seed, redo,
                       yael.numpy_to_fvec_ref(centroids),
                       yael.numpy_to_fvec_ref(dis),
                       yael.numpy_to_ivec_ref(assign),
                       yael.numpy_to_ivec_ref(nassign))

    if qerr < 0:
        raise RuntimeError(
            "kmeans: clustering failed. Is dataset diverse enough?")

    if output == 'centroids':
        return centroids
    else:
        return (centroids, qerr, dis, assign, nassign)
Example #7
0
def vlad(centroids, v):
    _check_row_float32(v)
    n, d = v.shape

    _check_row_float32(centroids)
    k, d2 = centroids.shape
    assert d2 == d
    
    vlad_out = numpy.zeros((k, d), dtype = numpy.float32)
    yael.vlad_compute(k, d, yael.numpy_to_fvec_ref(centroids),
                      n, yael.numpy_to_fvec_ref(v),
                      yael.numpy_to_fvec_ref(vlad_out))

    return vlad_out
Example #8
0
def vlad(centroids, v):
    _check_row_float32(v)
    n, d = v.shape

    _check_row_float32(centroids)
    k, d2 = centroids.shape
    assert d2 == d

    vlad_out = numpy.zeros((k, d), dtype=numpy.float32)
    yael.vlad_compute(k, d, yael.numpy_to_fvec_ref(centroids), n,
                      yael.numpy_to_fvec_ref(v),
                      yael.numpy_to_fvec_ref(vlad_out))

    return vlad_out
Example #9
0
def extract_rows_cols(K, subset_rows, subset_cols):
    " returns K[numpy.ix_(subset_rows, subset_cols)] (also slow in pure numpy)"
    _check_row_float32(K)
    _check_row_int32(subset_rows)
    _check_row_int32(subset_cols)
    nr = subset_rows.size
    nc = subset_cols.size
    assert subset_rows.min() >= 0 and subset_rows.max() < K.shape[0]
    assert subset_cols.min() >= 0 and subset_cols.max() < K.shape[1]
    Ksub = numpy.empty((nr, nc), dtype=numpy.float32)
    yael.fmat_get_rows_cols(yael.numpy_to_fvec_ref(K), K.shape[0], nc,
                            yael.numpy_to_ivec_ref(subset_cols), nr,
                            yael.numpy_to_ivec_ref(subset_rows),
                            yael.numpy_to_fvec_ref(Ksub))
    return Ksub
Example #10
0
def extract_rows_cols(K, subset_rows, subset_cols):
    " returns K[numpy.ix_(subset_rows, subset_cols)] (also slow in pure numpy)"
    _check_row_float32(K)
    _check_row_int32(subset_rows)
    _check_row_int32(subset_cols)
    nr = subset_rows.size
    nc = subset_cols.size
    assert subset_rows.min() >= 0 and subset_rows.max() < K.shape[0]
    assert subset_cols.min() >= 0 and subset_cols.max() < K.shape[1]    
    Ksub = numpy.empty((nr, nc), dtype = numpy.float32)
    yael.fmat_get_rows_cols(yael.numpy_to_fvec_ref(K),
                            K.shape[0],
                            nc, yael.numpy_to_ivec_ref(subset_cols),
                            nr, yael.numpy_to_ivec_ref(subset_rows),
                            yael.numpy_to_fvec_ref(Ksub))
    return Ksub
Example #11
0
def cross_distances(a, b, distance_type = 12):
    _check_row_float32(a)
    na, d = a.shape
    _check_row_float32(b)
    nb, d2 = b.shape

    assert d2 == d

    dis = numpy.empty((nb, na), dtype = numpy.float32)

    yael.compute_cross_distances_alt_nonpacked(distance_type, d, na, nb,
                                               yael.numpy_to_fvec_ref(a), d,
                                               yael.numpy_to_fvec_ref(b), d,
                                               yael.numpy_to_fvec_ref(dis), na)
    
    return dis                                 
Example #12
0
def knn(queries, base, nnn=1, distance_type=2, nt=1):
    _check_row_float32(base)
    _check_row_float32(queries)
    n, d = base.shape
    nq, d2 = queries.shape
    assert d == d2, "base and queries must have same nb of rows (got %d != %d) " % (
        d, d2)

    idx = numpy.empty((nq, nnn), dtype=numpy.int32)
    dis = numpy.empty((nq, nnn), dtype=numpy.float32)

    yael.knn_full_thread(distance_type, nq, n, d, nnn,
                         yael.numpy_to_fvec_ref(base),
                         yael.numpy_to_fvec_ref(queries), None,
                         yael.numpy_to_ivec_ref(idx),
                         yael.numpy_to_fvec_ref(dis), nt)
    return idx, dis
Example #13
0
def _numpy_to_gmm((w, mu, sigma)):
    # produce a fake gmm from 3 numpy matrices. They should not be
    # deallocated while gmm in use
    _check_row_float32(mu)
    _check_row_float32(sigma)
    
    k, d = mu.shape
    assert sigma.shape == mu.shape
    assert w.shape == (k,)

    gmm = yael.gmm_t()
    gmm.d = d
    gmm.k = k
    gmm.w = yael.numpy_to_fvec_ref(w)
    gmm.mu = yael.numpy_to_fvec_ref(mu)
    gmm.sigma = yael.numpy_to_fvec_ref(sigma)
    gmm.__del__ = _gmm_del
    return gmm
def partial_pca(mat, nev=6, nt=1):
    _check_row_float32(mat)
    n, d = mat.shape

    avg = mat.mean(axis=0)
    mat = mat - avg[numpy.newaxis, :]

    singvals = numpy.empty(nev, dtype=numpy.float32)
    # pdb.set_trace()

    pcamat = yael.fmat_new_pca_part(d, n, nev, yael.numpy_to_fvec_ref(mat), yael.numpy_to_fvec_ref(singvals))
    assert pcamat != None

    # print "SVs", singvals
    pcamat = yael.fvec.acquirepointer(pcamat)
    pcamat = yael.fvec_to_numpy(pcamat, (nev, d))

    return avg, singvals, pcamat
Example #15
0
def partial_pca(mat, nev=6, nt=1):
    _check_row_float32(mat)
    n, d = mat.shape

    avg = mat.mean(axis=0)
    mat = mat - avg[numpy.newaxis, :]

    singvals = numpy.empty(nev, dtype=numpy.float32)
    # pdb.set_trace()

    pcamat = yael.fmat_new_pca_part(d, n, nev, yael.numpy_to_fvec_ref(mat),
                                    yael.numpy_to_fvec_ref(singvals))
    assert pcamat != None

    #print "SVs", singvals
    pcamat = yael.fvec.acquirepointer(pcamat)
    pcamat = yael.fvec_to_numpy(pcamat, (nev, d))

    return avg, singvals, pcamat
Example #16
0
def knn(queries, base, 
        nnn = 1, 
        distance_type = 2,
        nt = 1):
    _check_row_float32(base)
    _check_row_float32(queries)
    n, d = base.shape
    nq, d2 = queries.shape
    assert d == d2, "base and queries must have same nb of rows (got %d != %d) " % (d, d2)
    
    idx = numpy.empty((nq, nnn), dtype = numpy.int32)
    dis = numpy.empty((nq, nnn), dtype = numpy.float32)

    yael.knn_full_thread(distance_type, 
                         nq, n, d, nnn,
                         yael.numpy_to_fvec_ref(base),
                         yael.numpy_to_fvec_ref(queries), 
                         None, 
                         yael.numpy_to_ivec_ref(idx), 
                         yael.numpy_to_fvec_ref(dis), 
                         nt)
    return idx, dis
Example #17
0
def gmm_learn_sw(v, sw, k,
                 nt=1,
                 niter=30,
                 seed=0,
                 redo=1,
                 use_weights=True):
    _check_row_float32(v)
    n, d = v.shape

    flags = 0
    if use_weights:
        flags |= yael.GMM_FLAGS_W

    gmm = yael.gmm_learn_sw(d, n, k, niter,
                            yael.numpy_to_fvec_ref(v),
                            yael.numpy_to_fvec_ref(sw),
                            nt, seed, redo, flags)

    gmm_npy = _gmm_to_numpy(gmm)

    yael.gmm_delete(gmm)
    return gmm_npy
Example #18
0
def fvecs_write(filename, matrix): 
    _check_row_float32(matrix)
    n, d = matrix.shape
    ret = yael.fvecs_write(filename, d, n, yael.numpy_to_fvec_ref(matrix))
    if ret != n:
        raise IOError("write error" + filename)