Example #1
0
def readXvecs(filename, dim, count, tonumpy=True):
    extension = filename.strip().split('.')[-1]
    if extension == 'bvecs':
        points = yael.fvec_new(dim * count)
        yael.b2fvecs_read(filename, dim, count, points)
        if tonumpy:
            a = yael.fvec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
           return points
    elif extension == 'fvecs':
        points = yael.fvec_new(dim * count)
        yael.fvecs_read(filename, dim, count, points)
        if tonumpy:
            a = yael.fvec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    elif extension == 'i8vecs':
        file = open(filename, 'r')
        points = np.zeros((count, dim), dtype='float32')
        for i in xrange(count):
            file.read(4)
            points[i,:] = np.fromfile(file, np.int8, dim).astype('float32')
        return points
    elif extension == 'ivecs':
        points = yael.ivec_new(dim * count)
        yael.ivecs_fread(open(filename, 'r'), points, count, dim)
        if tonumpy:
            a = yael.ivec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    else:
        raise Exception('Bad file extension!')
Example #2
0
def readXvecs(filename, dim, count, tonumpy=True):
    extension = filename.strip().split('.')[-1]
    if extension == 'bvecs':
        points = yael.fvec_new(dim * count)
        yael.b2fvecs_read(filename, dim, count, points)
        if tonumpy:
            a = yael.fvec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    elif extension == 'fvecs':
        points = yael.fvec_new(dim * count)
        yael.fvecs_read(filename, dim, count, points)
        if tonumpy:
            a = yael.fvec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    elif extension == 'i8vecs':
        file = open(filename, 'r')
        points = np.zeros((count, dim), dtype='float32')
        for i in xrange(count):
            file.read(4)
            points[i, :] = np.fromfile(file, np.int8, dim).astype('float32')
        return points
    elif extension == 'ivecs':
        points = yael.ivec_new(dim * count)
        yael.ivecs_fread(open(filename, 'r'), points, count, dim)
        if tonumpy:
            a = yael.ivec_to_numpy(points, (count, dim))
            yael.free(points)
            return a
        else:
            return points
    else:
        raise Exception('Bad file extension!')
Example #3
0
#!/usr/bin/env python
from yael import yael
import time

n = 20000  # number of vectors
nq = 1000
d = 128  # dimensionality of the vectors
nt = 2  # number of threads to use
k = 1  # number of nn returned

v = yael.fvec_new_rand(d * n)  # random set of vectors
q = yael.fvec_new_rand(d * nq)

idx = yael.ivec_new(nq * k)
dis = yael.fvec_new(nq * k)

t1 = time.time()
yael.knn_thread(nq, n, d, k, v, q, idx, nt)
t2 = time.time()

idx = yael.IntArray.acquirepointer(idx)

print([idx[i] for i in xrange(nq * k)])

print('kmeans performed in %.3fs' % (t2 - t1))
Example #4
0
from yael import yael
import time

n = 20000                         # number of vectors
nq = 1000 
d = 128                           # dimensionality of the vectors
nt = 2                            # number of threads to use
k = 1                             # number of nn returned

v = yael.fvec_new_rand (d * n)    # random set of vectors 
q = yael.fvec_new_rand (d * nq)

idx = yael.ivec_new (nq * k)
dis = yael.fvec_new (nq * k)


t1 = time.time()
yael.knn_thread (nq, n, d, k, v, q, idx, nt)
t2 = time.time()

idx = yael.IntArray.acquirepointer (idx)

print [idx[i] for i in xrange (nq * k)]

print 'kmeans performed in %.3fs' % (t2 - t1)
Example #5
0
from yael import yael
import time

k = 100                           # number of cluster to create
d = 128                           # dimensionality of the vectors
n = 10000                        # number of vectors
nt = 10                            # number of threads to use
v = yael.fvec_new_rand (d * n)    # random set of vectors 
niter = 30                        # number of iterations
redo = 1                          # number of redo

#[centroids, dis, assign] = yael_kmeans (v, 100, 'nt', 2, 'niter', 25);

centroids = yael.fvec_new (d * k) # output: centroids
dis = yael.fvec_new (n)           # point-to-cluster distance
assign = yael.ivec_new (n)        # quantization index of each point
nassign = yael.ivec_new (k)       # output: number of vectors assigned to each centroid

nassign = yael.IntArray.acquirepointer (nassign)

t1 = time.time()
yael.kmeans (d, n, k, niter, v, nt, 0, redo, centroids, dis, assign, nassign)
t2 = time.time()

print [nassign[i] for i in xrange(k)]
print 'kmeans performed in %.3fs' % (t2 - t1)