def load_ground_truth(filename, file_format, dimension = 1000, nuse = 10000, offset = 0): (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, offset, verbose = True) feature_vecs = yael.ivec_to_numpy(feature_vecs, int(actual_nuse) * dimension) feature_vecs = feature_vecs.reshape((int(actual_nuse), dimension)) return (feature_vecs, actual_nuse)
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset = 0, run_index = 'n'): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) # for CUDA-based batch indexing, skip the reshaping #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if run_index != 'y': part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs else: index(lsh, part_np_feature_vecs, actual_total_nuse) del part_np_feature_vecs if index_folder != None: save_index(lsh, index_folder, feature_idx_begin) actual_total_nuse += int(actual_nuse) if run_index != 'y': print np_feature_vecs.shape return np_feature_vecs
def load_ground_truth(filename, file_format, dimension=1000, nuse=10000, offset=0): (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, offset, verbose=True) feature_vecs = yael.ivec_to_numpy(feature_vecs, int(actual_nuse) * dimension) feature_vecs = feature_vecs.reshape((int(actual_nuse), dimension)) return (feature_vecs, actual_nuse)
def load_features(filename, file_format, total_nuse, dimension, offset = 0): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs actual_total_nuse += int(actual_nuse) print np_feature_vecs.shape return np_feature_vecs
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset=0, run_index='n'): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin, verbose=True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy( feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy( feature_vecs, int(actual_nuse) * dimension) # for CUDA-based batch indexing, skip the reshaping #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if run_index != 'y': part_np_feature_vecs = part_np_feature_vecs.reshape( (int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate( (np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs else: index(lsh, part_np_feature_vecs, actual_total_nuse) del part_np_feature_vecs if index_folder != None: save_index(lsh, index_folder, feature_idx_begin) actual_total_nuse += int(actual_nuse) if run_index != 'y': print np_feature_vecs.shape return np_feature_vecs