Ejemplo n.º 1
0
def _gmm_to_numpy(gmm): 
    d, k = gmm.d, gmm.k
    w = yael.fvec_to_numpy(gmm.w, k)
    mu = yael.fvec_to_numpy(gmm.mu, d * k)
    mu = mu.reshape((k, d))
    sigma = yael.fvec_to_numpy(gmm.sigma, d * k)
    sigma = sigma.reshape((k, d))
    return w, mu, sigma
Ejemplo n.º 2
0
def siftgeo_read(filename):

    # I/O via double pointers (too lazy to make proper swig interface)
    v_out = yael.BytePtrArray(1)
    meta_out = yael.FloatPtrArray(1)
    d_out = yael.ivec(2)

    n = yael.bvecs_new_from_siftgeo(filename, d_out, v_out.cast(),     
                                    d_out.plus(1), meta_out.cast())
    
    if n < 0: 
        raise IOError("cannot read " + filename)
    if n == 0: 
        v = numpy.array([[]], dtype = numpy.uint8)
        meta = numpy.array([[]*9], dtype = numpy.float32)
        return v, meta

    v_out = yael.bvec.acquirepointer(v_out[0])
    meta_out = yael.fvec.acquirepointer(meta_out[0])

    d = d_out[0]
    d_meta = d_out[1]
    assert d_meta == 9

    v = yael.bvec_to_numpy(v_out, n * d)
    v = v.reshape((n, d))
    
    meta = yael.fvec_to_numpy(meta_out, n * d_meta)
    meta = meta.reshape((n, d_meta))

    return v, meta
Ejemplo n.º 3
0
def flushBuffer(x, y, t):
	c = int(cnt[x, y, t])
	fvs = []
	for cutFrom, cutTo, fvSize, gmm, partName in parts:
		desc = np.ascontiguousarray(buffer[x, y, t, :c, cutFrom:(1 + cutTo)])
		fv = yael.fvec_new_0(fvSize)
		yael.gmm_fisher(c, yael.FloatArray.acquirepointer(yael.numpy_to_fvec(desc)), gmm, flags, fv)
		fvs.append(yael.fvec_to_numpy(fv, fvSize).flatten())
	
	ndescr[x, y, t] += c
	cnt[x, y, t] = 0
	return np.sqrt(c) * np.hstack(tuple(fvs))
Ejemplo n.º 4
0
def fvecs_read(filename, nmax = -1):
    if nmax < 0: 
        (fvecs, n, d) = yael.fvecs_new_read(filename)
    else: 
        (fvecs, n, d) = yael.fvecs_new_fread_max(open(filename, "r"), nmax)
    if n == -1: 
        raise IOError("could not read " + filename)
    elif n == 0: d = 0    
    fvecs = yael.fvec.acquirepointer(fvecs)
    # TODO find a way to avoid copy
    a = yael.fvec_to_numpy(fvecs, n * d)
    return a.reshape((n, d))
Ejemplo n.º 5
0
def flushBuffer(x, y, t):
    c = int(cnt[x, y, t])
    fvs = []
    for cutFrom, cutTo, fvSize, gmm, partName in parts:
        desc = np.ascontiguousarray(buffer[x, y, t, :c, cutFrom:(1 + cutTo)])
        fv = yael.fvec_new_0(fvSize)
        yael.gmm_fisher(
            c, yael.FloatArray.acquirepointer(yael.numpy_to_fvec(desc)), gmm,
            flags, fv)
        fvs.append(yael.fvec_to_numpy(fv, fvSize).flatten())

    ndescr[x, y, t] += c
    cnt[x, y, t] = 0
    return np.sqrt(c) * np.hstack(tuple(fvs))
Ejemplo n.º 6
0
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset = 0, run_index = 'n'):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)

        # for CUDA-based batch indexing, skip the reshaping
        #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if run_index != 'y':
            part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

            if np_feature_vecs != None:
                np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs))
            else:
                np_feature_vecs = part_np_feature_vecs
        else:
            index(lsh, part_np_feature_vecs, actual_total_nuse)        
            del part_np_feature_vecs
            if index_folder != None:
                save_index(lsh, index_folder, feature_idx_begin)

        actual_total_nuse += int(actual_nuse)

    if run_index != 'y':
        print np_feature_vecs.shape

    return np_feature_vecs
Ejemplo n.º 7
0
def partial_pca(mat, nev=6, nt=1):
    _check_row_float32(mat)
    n, d = mat.shape

    avg = mat.mean(axis=0)
    mat = mat - avg[numpy.newaxis, :]

    singvals = numpy.empty(nev, dtype=numpy.float32)
    # pdb.set_trace()

    pcamat = yael.fmat_new_pca_part(d, n, nev, yael.numpy_to_fvec_ref(mat), yael.numpy_to_fvec_ref(singvals))
    assert pcamat != None

    # print "SVs", singvals
    pcamat = yael.fvec.acquirepointer(pcamat)
    pcamat = yael.fvec_to_numpy(pcamat, (nev, d))

    return avg, singvals, pcamat
Ejemplo n.º 8
0
def partial_pca(mat, nev=6, nt=1):
    _check_row_float32(mat)
    n, d = mat.shape

    avg = mat.mean(axis=0)
    mat = mat - avg[numpy.newaxis, :]

    singvals = numpy.empty(nev, dtype=numpy.float32)
    # pdb.set_trace()

    pcamat = yael.fmat_new_pca_part(d, n, nev, yael.numpy_to_fvec_ref(mat),
                                    yael.numpy_to_fvec_ref(singvals))
    assert pcamat != None

    #print "SVs", singvals
    pcamat = yael.fvec.acquirepointer(pcamat)
    pcamat = yael.fvec_to_numpy(pcamat, (nev, d))

    return avg, singvals, pcamat
Ejemplo n.º 9
0
def fvecs_read(filename, nmax = -1, c_contiguous = True):   
    if nmax < 0:
        fv = numpy.fromfile(filename, dtype = numpy.float32)
        if fv.size == 0:
            return numpy.zeros((0,0))            
        dim = fv.view(numpy.int32)[0]
        assert dim>0
        fv = fv.reshape(-1,1+dim)
        if not all(fv.view(numpy.int32)[:,0]==dim):
            raise IOError("non-uniform vector sizes in " + filename)
        fv = fv[:,1:]
        if c_contiguous:
            fv = fv.copy()
        return fv
    (fvecs, n, d) = yael.fvecs_new_fread_max(open(filename, "r"), nmax)
    if n == -1: 
        raise IOError("could not read " + filename)
    elif n == 0: d = 0    
    fvecs = yael.fvec.acquirepointer(fvecs)
    # TODO find a way to avoid copy
    a = yael.fvec_to_numpy(fvecs, n * d)
    return a.reshape((n, d))
Ejemplo n.º 10
0
def fvecs_read(filename, nmax=-1, c_contiguous=True):
    if nmax < 0:
        fv = numpy.fromfile(filename, dtype=numpy.float32)
        if fv.size == 0:
            return numpy.zeros((0, 0))
        dim = fv.view(numpy.int32)[0]
        assert dim > 0
        fv = fv.reshape(-1, 1 + dim)
        if not all(fv.view(numpy.int32)[:, 0] == dim):
            raise IOError("non-uniform vector sizes in " + filename)
        fv = fv[:, 1:]
        if c_contiguous:
            fv = fv.copy()
        return fv
    (fvecs, n, d) = yael.fvecs_new_fread_max(open(filename, "r"), nmax)
    if n == -1:
        raise IOError("could not read " + filename)
    elif n == 0:
        d = 0
    fvecs = yael.fvec.acquirepointer(fvecs)
    # TODO find a way to avoid copy
    a = yael.fvec_to_numpy(fvecs, n * d)
    return a.reshape((n, d))
Ejemplo n.º 11
0
def load_features(filename, file_format, total_nuse, dimension, offset = 0):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)

        part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if np_feature_vecs != None:
            np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs))
        else:
            np_feature_vecs = part_np_feature_vecs

        actual_total_nuse += int(actual_nuse)

    print np_feature_vecs.shape

    return np_feature_vecs
Ejemplo n.º 12
0
def load_features(filename,
                  file_format,
                  total_nuse,
                  dimension,
                  lsh,
                  index_folder,
                  offset=0,
                  run_index='n'):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs,
         actual_nuse) = yutils.load_vectors_fmt(filename,
                                                file_format,
                                                dimension,
                                                nuse,
                                                feature_idx_begin,
                                                verbose=True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(
                feature_vecs,
                int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(
                feature_vecs,
                int(actual_nuse) * dimension)

        # for CUDA-based batch indexing, skip the reshaping
        #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if run_index != 'y':
            part_np_feature_vecs = part_np_feature_vecs.reshape(
                (int(actual_nuse), dimension))

            if np_feature_vecs != None:
                np_feature_vecs = numpy.concatenate(
                    (np_feature_vecs, part_np_feature_vecs))
            else:
                np_feature_vecs = part_np_feature_vecs
        else:
            index(lsh, part_np_feature_vecs, actual_total_nuse)
            del part_np_feature_vecs
            if index_folder != None:
                save_index(lsh, index_folder, feature_idx_begin)

        actual_total_nuse += int(actual_nuse)

    if run_index != 'y':
        print np_feature_vecs.shape

    return np_feature_vecs