Beispiel #1
0
def siftgeo_read(filename):

    # I/O via double pointers (too lazy to make proper swig interface)
    v_out = yael.BytePtrArray(1)
    meta_out = yael.FloatPtrArray(1)
    d_out = yael.ivec(2)

    n = yael.bvecs_new_from_siftgeo(filename, d_out, v_out.cast(),     
                                    d_out.plus(1), meta_out.cast())
    
    if n < 0: 
        raise IOError("cannot read " + filename)
    if n == 0: 
        v = numpy.array([[]], dtype = numpy.uint8)
        meta = numpy.array([[]*9], dtype = numpy.float32)
        return v, meta

    v_out = yael.bvec.acquirepointer(v_out[0])
    meta_out = yael.fvec.acquirepointer(meta_out[0])

    d = d_out[0]
    d_meta = d_out[1]
    assert d_meta == 9

    v = yael.bvec_to_numpy(v_out, n * d)
    v = v.reshape((n, d))
    
    meta = yael.fvec_to_numpy(meta_out, n * d_meta)
    meta = meta.reshape((n, d_meta))

    return v, meta
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset = 0, run_index = 'n'):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)

        # for CUDA-based batch indexing, skip the reshaping
        #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if run_index != 'y':
            part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

            if np_feature_vecs != None:
                np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs))
            else:
                np_feature_vecs = part_np_feature_vecs
        else:
            index(lsh, part_np_feature_vecs, actual_total_nuse)        
            del part_np_feature_vecs
            if index_folder != None:
                save_index(lsh, index_folder, feature_idx_begin)

        actual_total_nuse += int(actual_nuse)

    if run_index != 'y':
        print np_feature_vecs.shape

    return np_feature_vecs
Beispiel #3
0
def load_features(filename, file_format, total_nuse, dimension, offset = 0):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)

        part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if np_feature_vecs != None:
            np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs))
        else:
            np_feature_vecs = part_np_feature_vecs

        actual_total_nuse += int(actual_nuse)

    print np_feature_vecs.shape

    return np_feature_vecs
Beispiel #4
0
def load_features(filename,
                  file_format,
                  total_nuse,
                  dimension,
                  lsh,
                  index_folder,
                  offset=0,
                  run_index='n'):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs,
         actual_nuse) = yutils.load_vectors_fmt(filename,
                                                file_format,
                                                dimension,
                                                nuse,
                                                feature_idx_begin,
                                                verbose=True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(
                feature_vecs,
                int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(
                feature_vecs,
                int(actual_nuse) * dimension)

        # for CUDA-based batch indexing, skip the reshaping
        #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if run_index != 'y':
            part_np_feature_vecs = part_np_feature_vecs.reshape(
                (int(actual_nuse), dimension))

            if np_feature_vecs != None:
                np_feature_vecs = numpy.concatenate(
                    (np_feature_vecs, part_np_feature_vecs))
            else:
                np_feature_vecs = part_np_feature_vecs
        else:
            index(lsh, part_np_feature_vecs, actual_total_nuse)
            del part_np_feature_vecs
            if index_folder != None:
                save_index(lsh, index_folder, feature_idx_begin)

        actual_total_nuse += int(actual_nuse)

    if run_index != 'y':
        print np_feature_vecs.shape

    return np_feature_vecs