Esempio n. 1
0
def load_ground_truth(filename, file_format, dimension = 1000, nuse = 10000, offset = 0):
 
    (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, offset, verbose = True)
    feature_vecs = yael.ivec_to_numpy(feature_vecs, int(actual_nuse) * dimension)
    feature_vecs = feature_vecs.reshape((int(actual_nuse), dimension))

    return (feature_vecs, actual_nuse)
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset = 0, run_index = 'n'):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)

        # for CUDA-based batch indexing, skip the reshaping
        #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if run_index != 'y':
            part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

            if np_feature_vecs != None:
                np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs))
            else:
                np_feature_vecs = part_np_feature_vecs
        else:
            index(lsh, part_np_feature_vecs, actual_total_nuse)        
            del part_np_feature_vecs
            if index_folder != None:
                save_index(lsh, index_folder, feature_idx_begin)

        actual_total_nuse += int(actual_nuse)

    if run_index != 'y':
        print np_feature_vecs.shape

    return np_feature_vecs
Esempio n. 3
0
def load_ground_truth(filename,
                      file_format,
                      dimension=1000,
                      nuse=10000,
                      offset=0):

    (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename,
                                                          file_format,
                                                          dimension,
                                                          nuse,
                                                          offset,
                                                          verbose=True)
    feature_vecs = yael.ivec_to_numpy(feature_vecs,
                                      int(actual_nuse) * dimension)
    feature_vecs = feature_vecs.reshape((int(actual_nuse), dimension))

    return (feature_vecs, actual_nuse)
Esempio n. 4
0
def load_features(filename, file_format, total_nuse, dimension, offset = 0):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension)

        part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if np_feature_vecs != None:
            np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs))
        else:
            np_feature_vecs = part_np_feature_vecs

        actual_total_nuse += int(actual_nuse)

    print np_feature_vecs.shape

    return np_feature_vecs
Esempio n. 5
0
def load_features(filename,
                  file_format,
                  total_nuse,
                  dimension,
                  lsh,
                  index_folder,
                  offset=0,
                  run_index='n'):

    np_feature_vecs = None
    actual_total_nuse = 0

    for feature_idx_begin in range(offset, total_nuse + offset, 10000000):

        print "loading from " + str(feature_idx_begin)

        nuse = 0
        if (total_nuse + offset) > (feature_idx_begin + 10000000):
            nuse = 10000000
        else:
            nuse = (total_nuse + offset) - feature_idx_begin

        (feature_vecs,
         actual_nuse) = yutils.load_vectors_fmt(filename,
                                                file_format,
                                                dimension,
                                                nuse,
                                                feature_idx_begin,
                                                verbose=True)

        part_np_feature_vecs = None

        if file_format == 'fvecs':
            part_np_feature_vecs = yael.fvec_to_numpy(
                feature_vecs,
                int(actual_nuse) * dimension)
        elif file_format == 'bvecs':
            part_np_feature_vecs = yael.bvec_to_numpy(
                feature_vecs,
                int(actual_nuse) * dimension)

        # for CUDA-based batch indexing, skip the reshaping
        #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension))

        if run_index != 'y':
            part_np_feature_vecs = part_np_feature_vecs.reshape(
                (int(actual_nuse), dimension))

            if np_feature_vecs != None:
                np_feature_vecs = numpy.concatenate(
                    (np_feature_vecs, part_np_feature_vecs))
            else:
                np_feature_vecs = part_np_feature_vecs
        else:
            index(lsh, part_np_feature_vecs, actual_total_nuse)
            del part_np_feature_vecs
            if index_folder != None:
                save_index(lsh, index_folder, feature_idx_begin)

        actual_total_nuse += int(actual_nuse)

    if run_index != 'y':
        print np_feature_vecs.shape

    return np_feature_vecs