Exemple #1
0
def readXVecsFromOpenedFile(file, dim, count, extension):
    if extension == 'bvecs':
        points = yael.fvec_new(dim * count)
        yael.b2fvecs_fread(file, points, count)
        a = yael.fvec_to_numpy(points, (count, dim))
        yael.free(points)
        return a
    elif extension == 'fvecs':
        points = yael.fvec_new(dim * count)
        yael.fvecs_fread(file, points, count, dim)
        a = yael.fvec_to_numpy(points, (count, dim))
        yael.free(points)
        return a
    else:
        raise Exception('Bad file extension!')
def readXVecsFromOpenedFile(file, dim, count, extension):
    if extension == 'bvecs':
        points = yael.fvec_new(dim * count)
        yael.b2fvecs_fread(file, points, count)
        a = yael.fvec_to_numpy(points, (count, dim))
        yael.free(points)
        return a
    elif extension == 'fvecs':
        points = yael.fvec_new(dim * count)
        yael.fvecs_fread(file, points, count, dim)
        a = yael.fvec_to_numpy(points, (count, dim))
        yael.free(points)
        return a
    else:
        raise Exception('Bad file extension!')
Exemple #3
0
    def descs_to_sstats(xx, gmm):
        """ Converts the descriptors to sufficient statistics.

        Inputs
        ------
        xx: array [nr_descs, nr_dimensions]
            Data matrix containing the descriptors.

        gmm: yael.gmm instance
            Mixture of Gaussian object.

        Output
        ------
        sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ]
            Concatenation of the averaged posterior probabilities `Q_sum`, the
            first moment `Q_xx` and second-order moment `Q_xx_2`.

        """
        xx = np.atleast_2d(xx)
        N = xx.shape[0]
        K = gmm.k
        D = gmm.d
        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)  # NxK
        yael.free(Q_yael)
        # Compute statistics.
        sstats = np.zeros(K + 2 * K * D, dtype=np.float32)
        sstats[:K] = np.sum(Q, 0) / N  # 1xK
        sstats[K:K + K * D] = dot(Q.T, xx).flatten() / N  # 1xKD
        sstats[K + K * D:K + 2 * K * D] = dot(Q.T, xx**2).flatten() / N  # 1xKD
        return sstats
Exemple #4
0
    def descs_to_sstats(xx, gmm):
        """ Converts the descriptors to sufficient statistics.
        
        Inputs
        ------
        xx: array [nr_descs, nr_dimensions]
            Data matrix containing the descriptors.

        gmm: yael.gmm instance
            Mixture of Gaussian object.

        Output
        ------
        Q_sum: array [nr_clusters, ]
            Averaged posterior probabilities.

        """
        K = gmm.k
        N = xx.shape[0]
        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)
        yael.free(Q_yael)
        # Compute statistics.
        Q_sum = sum(Q, 0) / N  # 1xK
        return np.array(Q_sum, dtype=np.float32)
Exemple #5
0
    def descs_to_sstats(xx, gmm):
        """ Converts the descriptors to sufficient statistics.

        Inputs
        ------
        xx: array [nr_descs, nr_dimensions]
            Data matrix containing the descriptors.

        gmm: yael.gmm instance
            Mixture of Gaussian object.

        Output
        ------
        sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ]
            Concatenation of the averaged posterior probabilities `Q_sum`, the
            first moment `Q_xx` and second-order moment `Q_xx_2`.

        """
        xx = np.atleast_2d(xx)
        N = xx.shape[0]
        K = gmm.k
        D = gmm.d
        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)  # NxK
        yael.free(Q_yael)
        # Compute statistics.
        sstats = np.zeros(K + 2 * K * D, dtype=np.float32)
        sstats[: K] = np.sum(Q, 0) / N                            # 1xK
        sstats[K: K + K * D] = dot(Q.T, xx).flatten() / N         # 1xKD
        sstats[K + K * D: K + 2 * K * D] = dot(
            Q.T, xx ** 2).flatten() / N                           # 1xKD
        return sstats
    def descs_to_sstats(xx, gmm):
        """ Converts the descriptors to sufficient statistics.
        
        Inputs
        ------
        xx: array [nr_descs, nr_dimensions]
            Data matrix containing the descriptors.

        gmm: yael.gmm instance
            Mixture of Gaussian object.

        Output
        ------
        Q_sum: array [nr_clusters, ]
            Averaged posterior probabilities.

        """
        K = gmm.k
        N = xx.shape[0]
        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)
        yael.free(Q_yael)
        # Compute statistics.
        Q_sum = sum(Q, 0) / N                     # 1xK
        return np.array(Q_sum, dtype=np.float32)
def gmm_predict_proba(xx, gmm):
    """Computes posterior probabilities using yael."""
    N = xx.shape[0]
    K = gmm.k

    Q_yael = yael.fvec_new(N * K)
    yael.gmm_compute_p(
        N, yael.numpy_to_fvec_ref(xx), gmm, Q_yael, yael.GMM_FLAGS_W)
    Q = yael.fvec_to_numpy(Q_yael, N * K).reshape(N, K)
    yael.free(Q_yael)

    return Q
Exemple #8
0
def readXvecs(filename, dim, count, tonumpy=True):
    extension = filename.strip().split('.')[-1]
    if extension == 'bvecs':
        points = yael.fvec_new(dim * count)
        yael.b2fvecs_read(filename, dim, count, points)
        if tonumpy:
            a = yael.fvec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
           return points
    elif extension == 'fvecs':
        points = yael.fvec_new(dim * count)
        yael.fvecs_read(filename, dim, count, points)
        if tonumpy:
            a = yael.fvec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    elif extension == 'i8vecs':
        file = open(filename, 'r')
        points = np.zeros((count, dim), dtype='float32')
        for i in xrange(count):
            file.read(4)
            points[i,:] = np.fromfile(file, np.int8, dim).astype('float32')
        return points
    elif extension == 'ivecs':
        points = yael.ivec_new(dim * count)
        yael.ivecs_fread(open(filename, 'r'), points, count, dim)
        if tonumpy:
            a = yael.ivec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    else:
        raise Exception('Bad file extension!')
def readXvecs(filename, dim, count, tonumpy=True):
    extension = filename.strip().split('.')[-1]
    if extension == 'bvecs':
        points = yael.fvec_new(dim * count)
        yael.b2fvecs_read(filename, dim, count, points)
        if tonumpy:
            a = yael.fvec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    elif extension == 'fvecs':
        points = yael.fvec_new(dim * count)
        yael.fvecs_read(filename, dim, count, points)
        if tonumpy:
            a = yael.fvec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    elif extension == 'i8vecs':
        file = open(filename, 'r')
        points = np.zeros((count, dim), dtype='float32')
        for i in xrange(count):
            file.read(4)
            points[i, :] = np.fromfile(file, np.int8, dim).astype('float32')
        return points
    elif extension == 'ivecs':
        points = yael.ivec_new(dim * count)
        yael.ivecs_fread(open(filename, 'r'), points, count, dim)
        if tonumpy:
            a = yael.ivec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    else:
        raise Exception('Bad file extension!')
Exemple #10
0
    def descs_to_spatial_sstats(xx, ll, gmm):
        """ Computes spatial statistics from descriptors and their position.

        Inputs
        ------
        xx: array [N, D], required
            N D-dimensional descriptors from an video (usually, after they are
            processed with PCA).

        ll: array [N, 3], required
            Descriptor locations in an image; on each row, we have the triplet
            (x, y, t).

        gmm: instance of yael object gmm
            Gauassian mixture object.

        Output
        ------
        ss: array [1, K + 2 * 3 * k]
            Sufficient statistics in the form of a vector that concatenates 
            (i) the sum of posteriors, (ii) an expected value of the locations 
            ll under the posterior distribution Q and (iii) the second-order
            moment of the locations ll under the posterior distribution Q.

        """
        N = ll.shape[0] 
        K = gmm.k

        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)
        yael.free(Q_yael)
        # Compute statistics.
        Q_sum = sum(Q, 0) / N                     # 1xK
        Q_ll = dot(Q.T, ll).flatten() / N         # 1x3K
        Q_ll_2 = dot(Q.T, ll ** 2).flatten() / N  # 1x3K 
        return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
Exemple #11
0
    def descs_to_spatial_sstats(xx, ll, gmm):
        """ Computes spatial statistics from descriptors and their position.

        Inputs
        ------
        xx: array [N, D], required
            N D-dimensional descriptors from an video (usually, after they are
            processed with PCA).

        ll: array [N, 3], required
            Descriptor locations in an image; on each row, we have the triplet
            (x, y, t).

        gmm: instance of yael object gmm
            Gauassian mixture object.

        Output
        ------
        ss: array [1, K + 2 * 3 * k]
            Sufficient statistics in the form of a vector that concatenates 
            (i) the sum of posteriors, (ii) an expected value of the locations 
            ll under the posterior distribution Q and (iii) the second-order
            moment of the locations ll under the posterior distribution Q.

        """
        N = ll.shape[0]
        K = gmm.k

        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)
        yael.free(Q_yael)
        # Compute statistics.
        Q_sum = sum(Q, 0) / N  # 1xK
        Q_ll = dot(Q.T, ll).flatten() / N  # 1x3K
        Q_ll_2 = dot(Q.T, ll**2).flatten() / N  # 1x3K
        return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
Exemple #12
0
#!/usr/bin/env python
from yael import yael
import time

n = 20000  # number of vectors
nq = 1000
d = 128  # dimensionality of the vectors
nt = 2  # number of threads to use
k = 1  # number of nn returned

v = yael.fvec_new_rand(d * n)  # random set of vectors
q = yael.fvec_new_rand(d * nq)

idx = yael.ivec_new(nq * k)
dis = yael.fvec_new(nq * k)

t1 = time.time()
yael.knn_thread(nq, n, d, k, v, q, idx, nt)
t2 = time.time()

idx = yael.IntArray.acquirepointer(idx)

print([idx[i] for i in xrange(nq * k)])

print('kmeans performed in %.3fs' % (t2 - t1))
Exemple #13
0
from yael import yael
import time

n = 20000                         # number of vectors
nq = 1000 
d = 128                           # dimensionality of the vectors
nt = 2                            # number of threads to use
k = 1                             # number of nn returned

v = yael.fvec_new_rand (d * n)    # random set of vectors 
q = yael.fvec_new_rand (d * nq)

idx = yael.ivec_new (nq * k)
dis = yael.fvec_new (nq * k)


t1 = time.time()
yael.knn_thread (nq, n, d, k, v, q, idx, nt)
t2 = time.time()

idx = yael.IntArray.acquirepointer (idx)

print [idx[i] for i in xrange (nq * k)]

print 'kmeans performed in %.3fs' % (t2 - t1)
Exemple #14
0
from yael import yael
import time

k = 100                           # number of cluster to create
d = 128                           # dimensionality of the vectors
n = 10000                        # number of vectors
nt = 10                            # number of threads to use
v = yael.fvec_new_rand (d * n)    # random set of vectors 
niter = 30                        # number of iterations
redo = 1                          # number of redo

#[centroids, dis, assign] = yael_kmeans (v, 100, 'nt', 2, 'niter', 25);

centroids = yael.fvec_new (d * k) # output: centroids
dis = yael.fvec_new (n)           # point-to-cluster distance
assign = yael.ivec_new (n)        # quantization index of each point
nassign = yael.ivec_new (k)       # output: number of vectors assigned to each centroid

nassign = yael.IntArray.acquirepointer (nassign)

t1 = time.time()
yael.kmeans (d, n, k, niter, v, nt, 0, redo, centroids, dis, assign, nassign)
t2 = time.time()

print [nassign[i] for i in xrange(k)]
print 'kmeans performed in %.3fs' % (t2 - t1)