def hyrule_vocab_test(): from yael.yutils import load_ext from os.path import join import sklearn.cluster dbdir = ut.truepath('/raid/work/Oxford/') datadir = dbdir + '/smk_data_iccv_2013/data/' # Files storing descriptors/geometry for Oxford5k dataset test_sift_fname = join(datadir, 'oxford_sift.uint8') # test_nf_fname = join(datadir, 'oxford_nsift.uint32') all_vecs = load_ext(test_sift_fname, ndims=128, verbose=True).astype(np.float32) logger.info(ut.print_object_size(all_vecs)) # nfeats_list = load_ext(test_nf_fname, verbose=True) with ut.embed_on_exception_context: rng = np.random.RandomState(13421421) # init_size = int(config['num_words'] * 8) num_words = int(2**16) init_size = num_words * 4 # converged after 26043 iterations minibatch_params = dict( n_clusters=num_words, init='k-means++', # init='random', init_size=init_size, n_init=1, max_iter=100, batch_size=1000, tol=0.0, max_no_improvement=10, reassignment_ratio=0.01, ) clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False, random_state=rng, verbose=1, **minibatch_params) clusterer.fit(all_vecs) words = clusterer.cluster_centers_ logger.info(words.shape)
def load_oxford_2013(): """ Found this data in README of SMK publication https://hal.inria.fr/hal-00864684/document http://people.rennes.inria.fr/Herve.Jegou/publications.html with download script CommandLine: # Download oxford13 data cd ~/work/Oxford mkdir -p smk_data_iccv_2013 cd smk_data_iccv_2013 wget -nH --cut-dirs=4 -r -Pdata/ ftp://ftp.irisa.fr/local/texmex/corpus/iccv2013/ This dataset has 5063 images wheras 07 has 5062 This dataset seems to contain an extra junk image: ashmolean_000214 # Remember that matlab is 1 indexed! # DONT FORGET TO CONVERT TO 0 INDEXING! """ from yael.ynumpy import fvecs_read from yael.yutils import load_ext import scipy.io import vtool as vt from os.path import join dbdir = ut.truepath('/raid/work/Oxford/') datadir = dbdir + '/smk_data_iccv_2013/data/' # we are not retraining, so this is unused # # Training data descriptors for Paris6k dataset # train_sift_fname = join(datadir, 'paris_sift.uint8') # NOQA # # File storing visual words of Paris6k descriptors used in our ICCV paper # train_vw_fname = join(datadir, 'clust_preprocessed/oxford_train_vw.int32') # Pre-learned quantizer used in ICCV paper (used if docluster=false) codebook_fname = join(datadir, 'clust_preprocessed/oxford_codebook.fvecs') # Files storing descriptors/geometry for Oxford5k dataset test_sift_fname = join(datadir, 'oxford_sift.uint8') test_geom_fname = join(datadir, 'oxford_geom_sift.float') test_nf_fname = join(datadir, 'oxford_nsift.uint32') # File storing visual words of Oxford5k descriptors used in our ICCV paper test_vw_fname = join(datadir, 'clust_preprocessed/oxford_vw.int32') # Ground-truth for Oxford dataset gnd_fname = join(datadir, 'gnd_oxford.mat') oxford_vecs = load_ext(test_sift_fname, ndims=128, verbose=True) oxford_nfeats = load_ext(test_nf_fname, verbose=True) oxford_words = fvecs_read(codebook_fname) oxford_wids = load_ext(test_vw_fname, verbose=True) - 1 test_geom_invV_fname = test_geom_fname + '.invV.pkl' try: all_kpts = ut.load_data(test_geom_invV_fname) logger.info('loaded invV keypoints') except IOError: oxford_kptsZ = load_ext(test_geom_fname, ndims=5, verbose=True) logger.info('converting to invV keypoints') all_kpts = vt.convert_kptsZ_to_kpts(oxford_kptsZ) ut.save_data(test_geom_invV_fname, all_kpts) gnd_ox = scipy.io.loadmat(gnd_fname) imlist = [x[0][0] for x in gnd_ox['imlist']] qx_to_dx = gnd_ox['qidx'] - 1 data_uri_order = imlist query_uri_order = ut.take(data_uri_order, qx_to_dx) offset_list = np.hstack(([0], oxford_nfeats.cumsum())).astype(np.int64) # query_gnd = gnd_ox['gnd'][0][0] # bboxes = query_gnd[0] # qx_to_ok_gtidxs1 = [x[0] for x in query_gnd[1][0]] # qx_to_junk_gtidxs2 = [x[0] for x in query_gnd[2][0]] # # ut.depth_profile(qx_to_gtidxs1) # # ut.depth_profile(qx_to_gtidxs2) assert sum(oxford_nfeats) == len(oxford_vecs) assert offset_list[-1] == len(oxford_vecs) assert len(oxford_wids) == len(oxford_vecs) assert oxford_wids.max() == len(oxford_words) - 1 data = { 'offset_list': offset_list, 'all_kpts': all_kpts, 'all_vecs': oxford_vecs, 'words': oxford_words, 'idx_to_wx': oxford_wids, 'data_uri_order': data_uri_order, 'query_uri_order': query_uri_order, } return data