tic_toc_print('Working on %s (%d/%d)' % (impath, qcurcount+1, len(qimgs))) h = hashes[qid] D = scipy.spatial.distance.cdist(h[np.newaxis, :], hashes, 'hamming') m = np.argsort(D) top_matches = m[0, :nResort] if args['considerRadius'] > 0: top_matches = np.unique(np.concatenate((top_matches, np.arange(qid, qid-args['considerRadius'], -1), np.arange(qid, qid+args['considerRadius'], 1)), axis=0)) tic_toc_print('Re-ranking %d images' % len(top_matches)) actFeat = [] for tm in top_matches.tolist(): try: feat = load_feat(os.path.join(args['featdir'], imgslist[tm] + '.h5')) feat_dim = np.shape(feat)[0] except: sys.stderr.write('Unable to read feature from %s' % imgslist[tm]) feat = np.zeros((feat_dim, 1)) actFeat.append(feat) actFeat = np.squeeze(np.array(actFeat)) # the top feature is not necessarily the one I want, so can't just take actFeat[0] (multiple could have hamming 0) try: qFeat = load_feat(os.path.join(args['featdir'], impath + '.h5')).reshape((1,-1)) except: sys.stderr.write('Unable to read QUERY feature from %s' % imname) qFeat = np.zeros((feat_dim, 1)) D2 = scipy.spatial.distance.cdist(qFeat, actFeat, 'cosine') m2 = np.argsort(D2) final = zip(imgslist_np[top_matches[m2]].tolist()[0], D2[0,m2].astype('float').tolist()[0]) # always store as 1-indexed
with h5py.File(args["paramfile"], "r") as f: R = f["R"].value pc = f["pc"].value mean = f["mean"].value if os.path.exists(args["outpath"]): print("Reading the existing features") with h5py.File(args["outpath"], "r") as f: allhashes = f["hashes"].value.tolist() else: allhashes = [] nDone = len(allhashes) for i in range(nDone, len(imgslist)): impath = imgslist[i] tic_toc_print("Done %d / %d features" % (i, len(imgslist))) try: featpath = os.path.join(args["dir"], impath + ".h5") feat = load_feat(featpath, args["fracfeat"]).transpose() # Normalize this feature (that's how its used in training) feat = feat / np.linalg.norm(feat) hash = ITQ.hash(feat, pc, mean, R) allhashes.append(hash) except: continue allhashes = np.squeeze(np.array(allhashes)).astype("bool") with h5py.File(args["outpath"], "w") as f: f.create_dataset("hashes", data=allhashes, compression="gzip", compression_opts=9)
outfpath = args['outpath'] if not outfpath.endswith('.h5'): outfpath += '.h5' with open(args['list']) as f: imgslist = f.read().splitlines() random.shuffle(imgslist) nFeat = args['numfeat'] allfeats = [] for impath in imgslist: tic_toc_print('Read %d features' % len(allfeats)) try: featpath = os.path.join(args['dir'], impath + '.h5') feat = load_feat(featpath, args['featfrac']).transpose() allfeats.append(feat) except Exception, e: tic_toc_print(e) continue if len(allfeats) >= nFeat: break allfeats = np.squeeze(np.array(allfeats)).astype(np.float64) allfeats[np.isnan(allfeats)] = 0 allfeats[np.isinf(allfeats)] = 0 # V.IMP to normalize each row by L2 norm, else mean/PCA etc calculations get screwed due to overflows norms = np.sum(np.abs(allfeats)**2,axis=-1)**(1./2) allfeats = allfeats / norms[:, np.newaxis]