def read_descs(infiles, fmt): """ read and concatenate matrices from a list of files""" vl = [] for fname in infiles: print "reading", fname, "\r", sys.stdout.flush() if fmt == 'fvecs': v = ynumpy.fvecs_read(fname) elif fmt == 'siftgeo': v, meta = ynumpy.siftgeo_read(fname) v = v.astype(numpy.float32) else: assert False, "unknown format %s" % informat if v.shape[1] != 0: vl.append(v.T) return numpy.vstack(vl).T
print "queries=" print queries idx, dis = ynumpy.knn(base, queries, nnn, distance_type = 1) print "indices=" print idx print "distances=" print dis try: # v, meta = ynumpy.siftgeo_read('/Users/matthijs//Desktop/papers/lhl/trunk/data/test_query_10k.siftgeo') v, meta = ynumpy.siftgeo_read('/scratch2/bigimbaz/dataset/holidays/siftgeo/hesaff_norm/128300.siftgeo') v = v.astype('float32') except Exception, e: print e print "generating random data" v = numpy.random.normal(0, 1, size = (20, 4)).astype(numpy.float32) v[10:,:] += numpy.tile(numpy.random.uniform(-10, 10, size = (1, 4)), (10, 1)) else: print "vectors = " print v print "meta info = "
import os import sys import numpy as np sys.path.append('/home/sibo/Documents/Projects/yael/yael_v438') from yael import ynumpy # list of available images image_names = [filename.split('.')[0] for filename in os.listdir('holidays_100') if filename.endswith('.jpg')] # load the SIFTs for these images image_descs = [] for imname in image_names: desc, meta = ynumpy.siftgeo_read("holidays_100/%s.siftgeo" % imname) if desc.size == 0: desc = np.zeros((0, 128), dtype = 'uint8') # we drop the meta-information (point coordinates, orientation, etc.) image_descs.append(desc) # make a big matrix with all image descriptors all_desc = np.vstack(image_descs) k = 64 n_sample = k * 1000 # choose n_sample descriptors at random sample_indices = np.random.choice(all_desc.shape[0], n_sample) sample = all_desc[sample_indices] # until now sample was in uint8. Convert to float32 sample = sample.astype('float32')
image_directory = "ukbench_jpg" sift_directory = "ukbench_siftgeo" # indices of the images we want to index image_range = numpy.arange(3000, 3100) print("Collecting a training set...") train_set = [] # take descriptors from one image per group from the end of the set for i in range(10000, 10200, 4): filename = "%s/ukbench%05d.siftgeo" % (sift_directory, i) print(" " + filename + "\r") sys.stdout.flush() sift_descriptors, geometric_info = ynumpy.siftgeo_read(filename) train_set.append(sift_descriptors) train_set = numpy.vstack(train_set) print("Training set of %d local descriptors in %d dimensions" % (train_set.shape[0], train_set.shape[1])) trainset_size = num_gmm_components * 1000 if trainset_size < train_set.shape[0]: print("Subsampling to %d points" % trainset_size) subset = numpy.array( random.sample(range(train_set.shape[0]), trainset_size)) train_set = train_set[subset]
print "queries=" print queries idx, dis = ynumpy.knn(base, queries, nnn, distance_type=1) print "indices=" print idx print "distances=" print dis try: # v, meta = ynumpy.siftgeo_read('/Users/matthijs//Desktop/papers/lhl/trunk/data/test_query_10k.siftgeo') v, meta = ynumpy.siftgeo_read( '/scratch2/bigimbaz/dataset/holidays/siftgeo/hesaff_norm/128300.siftgeo' ) v = v.astype('float32') except Exception, e: print e print "generating random data" v = numpy.random.normal(0, 1, size=(20, 4)).astype(numpy.float32) v[10:, :] += numpy.tile(numpy.random.uniform(-10, 10, size=(1, 4)), (10, 1)) else: print "vectors = " print v
print queries idx, dis = ynumpy.knn(base, queries, nnn, distance_type = 1) print "indices=" print idx print "distances=" print dis try: # v, meta = ynumpy.siftgeo_read('/Users/matthijs//Desktop/papers/lhl/trunk/data/test_query_10k.siftgeo') # v, meta = ynumpy.siftgeo_read('/scratch2/bigimbaz/dataset/holidays/siftgeo/hesaff_norm/128300.siftgeo') v, meta = ynumpy.siftgeo_read('/tmp/128300.siftgeo') v = v.astype('float32') except Exception, e: print e print "generating random data" v = numpy.random.normal(0, 1, size = (20, 4)).astype(numpy.float32) v[10:,:] += numpy.tile(numpy.random.uniform(-10, 10, size = (1, 4)), (10, 1)) else: print "vectors = " print v print "meta info = " print meta