class TestEngine(unittest.TestCase): def setUp(self): self.engine = Engine(1000) def test_retrieval(self): for k in range(100): self.engine.clean_all_buckets() x = numpy.random.randn(1000) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y = n[0][0] y_data = n[0][1] y_distance = n[0][2] self.assertTrue((y == x).all()) self.assertEqual(y_data, x_data) self.assertEqual(y_distance, 0.0) def test_retrieval_sparse(self): for k in range(100): self.engine.clean_all_buckets() x = scipy.sparse.rand(1000, 1, density=0.05) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y = n[0][0] y_data = n[0][1] y_distance = n[0][2] self.assertTrue((y - x).sum() == 0.0) self.assertEqual(y_data, x_data) self.assertEqual(y_distance, 0.0)
class TestEngine(unittest.TestCase): def setUp(self): self.engine = Engine(1000) def test_storage_issue(self): engine1 = Engine(100) engine2 = Engine(100) for k in range(1000): x = numpy.random.randn(100) x_data = 'data' engine1.store_vector(x, x_data) # Each engine should have its own default storage self.assertEqual(len(engine2.storage.buckets), 0) def test_retrieval(self): for k in range(100): self.engine.clean_all_buckets() x = numpy.random.randn(1000) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y, y_data, y_distance = n[0] normalized_x = unitvec(x) delta = 0.000000001 self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta) self.assertEqual(y_data, x_data) self.assertAlmostEqual(y_distance, 0.0, delta=delta) def test_retrieval_sparse(self): for k in range(100): self.engine.clean_all_buckets() x = scipy.sparse.rand(1000, 1, density=0.05) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y, y_data, y_distance = n[0] normalized_x = unitvec(x) delta = 0.000000001 self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta) self.assertEqual(y_data, x_data) self.assertAlmostEqual(y_distance, 0.0, delta=delta)
class TestEngine(unittest.TestCase): def setUp(self): self.engine = Engine(1000) def test_storage_issue(self): engine1 = Engine(100) engine2 = Engine(100) for k in range(1000): x = numpy.random.randn(100) x_data = 'data' engine1.store_vector(x, x_data) # Each engine should have its own default storage self.assertTrue(len(engine2.storage.buckets)==0) def test_retrieval(self): for k in range(100): self.engine.clean_all_buckets() x = numpy.random.randn(1000) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y = n[0][0] y_data = n[0][1] y_distance = n[0][2] self.assertTrue((y == x).all()) self.assertEqual(y_data, x_data) self.assertEqual(y_distance, 0.0) def test_retrieval_sparse(self): for k in range(100): self.engine.clean_all_buckets() x = scipy.sparse.rand(1000, 1, density=0.05) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y = n[0][0] y_data = n[0][1] y_distance = n[0][2] self.assertTrue((y - x).sum() == 0.0) self.assertEqual(y_data, x_data) self.assertEqual(y_distance, 0.0)
class TestEngine(unittest.TestCase): def setUp(self): self.engine = Engine(1000) def test_storage_issue(self): engine1 = Engine(100) engine2 = Engine(100) for k in range(1000): x = numpy.random.randn(100) x_data = 'data' engine1.store_vector(x, x_data) # Each engine should have its own default storage self.assertTrue(len(engine2.storage.buckets)==0) def test_retrieval(self): for k in range(100): self.engine.clean_all_buckets() x = numpy.random.randn(1000) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y, y_data, y_distance = n[0] normalized_x = unitvec(x) delta = 0.000000001 self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta) self.assertEqual(y_data, x_data) self.assertAlmostEqual(y_distance, 0.0, delta=delta) def test_retrieval_sparse(self): for k in range(100): self.engine.clean_all_buckets() x = scipy.sparse.rand(1000, 1, density=0.05) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y, y_data, y_distance = n[0] normalized_x = unitvec(x) delta = 0.000000001 self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta) self.assertEqual(y_data, x_data) self.assertAlmostEqual(y_distance, 0.0, delta=delta)
class TestEngine(unittest.TestCase): def setUp(self): self.engine = Engine(1000) def test_storage_issue(self): engine1 = Engine(100) engine2 = Engine(100) for k in range(1000): x = numpy.random.randn(100) x_data = 'data' engine1.store_vector(x, x_data) # Each engine should have its own default storage self.assertTrue(len(engine2.storage.buckets) == 0) def test_retrieval(self): for k in range(100): self.engine.clean_all_buckets() x = numpy.random.randn(1000) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y = n[0][0] y_data = n[0][1] y_distance = n[0][2] self.assertTrue((y == x).all()) self.assertEqual(y_data, x_data) self.assertEqual(y_distance, 0.0) def test_retrieval_sparse(self): for k in range(100): self.engine.clean_all_buckets() x = scipy.sparse.rand(1000, 1, density=0.05) x_data = 'data' self.engine.store_vector(x, x_data) n = self.engine.neighbours(x) y = n[0][0] y_data = n[0][1] y_distance = n[0][2] self.assertTrue((y - x).sum() == 0.0) self.assertEqual(y_data, x_data) self.assertEqual(y_distance, 0.0)
class GraphStateQueryIndex: def __init__(self): redis_object = redis.Redis(host='localhost', port=6379, db=0) redis_storage = RedisStorage(redis_object) # Get hash config from redis config = redis_storage.load_hash_configuration('MyHash') if config is None: # Config is not existing, create hash from scratch, with 5 projections self.lshash = RandomBinaryProjections('MyHash', 5) else: # Config is existing, create hash with None parameters self.lshash = RandomBinaryProjections(None, None) # Apply configuration loaded from redis self.lshash.apply_config(config) # print("HERE") # Create engine for feature space of 100 dimensions and use our hash. # This will set the dimension of the lshash only the first time, not when # using the configuration loaded from redis. Use redis storage to store # buckets. self.engine = Engine(4, lshashes=[self.lshash], storage=redis_storage) redis_storage.store_hash_configuration(self.lshash) def findMatch(self, v): matches = self.engine.neighbours(v) return matches def addVector(self, v, trainingText): self.engine.store_vector(v, trainingText) def clearIndex(self): self.engine.clean_all_buckets() def clearHashInstance(self, name): self.engine.clean_buckets(name)
class Reranker(): def __init__(self,params): self.dataset= params['dataset'] self.image_path = params['database_images'] self.dimension = params['dimension'] self.layer = params['layer'] self.top_n = params['num_rerank'] self.reranking_path = params['reranking_path'] self.REG_BOXES = params['use_regressed_boxes'] self.pooling = params['pooling'] self.stage = params['stage'] self.N_QE = params['N_QE'] self.N_display = params['N_display'] self.class_scores = params['use_class_scores'] self.network_name = params['network_name'] with open(params['frame_list'],'r') as f: self.database_list = f.read().splitlines() with open(params['query_list'],'r') as f: self.query_names = f.read().splitlines() # Distance type self.dist_type = params['distance'] # Where to store the rankings self.rankings_dir = params['rankings_dir'] cfg.TEST.HAS_RPN = True # self.net = caffe.Net(params['net_proto'], params['net'], caffe.TEST) self.net = get_network(self.network_name) self.queries = params['query_names'] # List of queries if self.pooling is 'sum': # PCA Models if self.dataset is 'paris': self.pca = pickle.load(open(params['pca_model'] + '_oxford.pkl', 'rb')) elif self.dataset is 'oxford': self.pca = pickle.load(open(params['pca_model'] + '_paris.pkl', 'rb')) #lsh lshash=RandomBinaryProjections('re_rankHash', 1) self.engine=Engine(self.dimension, lshashes=[lshash]) def extract_feat_image(self, sess, image): im = cv2.imread(image) layer_roi = 'pool_5' scores, boxes, feat = test_ops.im_detect(sess, self.net, layer_roi, im, False, boxes = None) return feat,boxes,scores def read_ranking(self,query): with open(os.path.join(self.rankings_dir,os.path.basename(query.split('_query')[0]) +'.txt'),'r') as f: ranking = f.read().splitlines() return ranking def query_info(self,filename): ''' For oxford and paris, get query frame and box ''' data = np.loadtxt(filename, dtype="str") if self.dataset is 'paris': query = data[0] elif self.dataset is 'oxford': query = data[0].split('oxc1_')[1] bbx = data[1:].astype(float).astype(int) if self.dataset is 'paris': query = os.path.join(self.image_path,query.split('_')[1],query + '.jpg') elif self.dataset is 'oxford': query = os.path.join(self.image_path,query + '.jpg') return query, bbx def get_query_local_feat(self, sess, query, box=None): ''' Extract local query feature using bbx ''' if box is None: # For paris and oxford query,bbx = self.query_info(query) else: # locations are provided xmin = box[0] ymin = box[1] xmax = box[2] ymax = box[3] im = cv2.imread(query) height = np.shape(im)[0] width = np.shape(im)[1] # Forward pass scores, boxes, feat = test_ops.im_detect(sess, self.net, self.layer, im, True, boxes = None) # Get conv5 layer # feat = self.net.blobs[self.layer].data.squeeze() feat = feat.squeeze() # Get the image/feature ratio mult_h = float(np.shape(feat)[1])/height mult_w = float(np.shape(feat)[2])/width # Resize the bounding box to feature size if box is None: # Adjust query bounding box to feature space bbx[0] *= mult_w bbx[2] *= mult_w bbx[1] *= mult_h bbx[3] *= mult_h else: bbx = [int(math.floor(xmin*mult_w)),int(math.floor(ymin*mult_h)),int(math.ceil(xmax*mult_w)),int(math.ceil(ymax*mult_h))] # Crop local features with bounding box local_feat = feat[:,bbx[1]:bbx[3],bbx[0]:bbx[2]] # sum pool if self.pooling is 'sum': local_feat = np.sum(np.sum(local_feat,axis=1),axis=1) else: local_feat = np.max(np.max(local_feat,axis=1),axis=1) return local_feat def rerank_one_query(self, sess, query,num_queries): # Init query feat vector query_feats = np.zeros((self.dimension)) for i in np.arange(num_queries)+1: query_ = query query_name = os.path.basename(query).rsplit('_',2)[0] # Generate query feature and add it to matrix query_feats += self.get_query_local_feat(sess, query_) query_feats/=num_queries query_feats = query_feats.reshape(-1, 1) if self.stage is 'rerank2nd': # second stage of reranking. taking N locations at top N ranking as queries... with open(os.path.join(self.reranking_path,os.path.basename(query.split('_query')[0]) + '.pkl') ,'rb') as f: distances = pickle.load(f) locations = pickle.load(f) frames = pickle.load(f) class_ids = pickle.load(f) frames_sorted = np.array(frames)[np.argsort(distances)] locations_sorted = np.array(locations)[np.argsort(distances)] for i_qe in range(self.N_QE): query_feats +=self.get_query_local_feat(sess, frames_sorted[i_qe],locations_sorted[i_qe]) query_feats/=(self.N_QE+1) query_feats = query_feats.T query_feats = normalize(query_feats) if self.pooling is 'sum': # Apply PCA query_feats = self.pca.transform(query_feats) query_feats = normalize(query_feats) # Read baseline ranking ranking = self.read_ranking(query) # Rerank distances,locations, frames,class_ids = self.rerank_top_n(sess, query_feats,ranking,query_name) with open(os.path.join(self.reranking_path,os.path.basename(query.split('_query')[0]) + '.pkl') ,'wb') as f: pickle.dump(distances,f) pickle.dump(locations,f) pickle.dump(frames,f) pickle.dump(class_ids,f) # Write new ranking to disk self.write_rankings(query,ranking,distances) def rerank_top_n(self,sess, query_feats,ranking,query_name): distances = [] locations = [] frames = [] class_ids = [] #query_feats = query_feats.T # query class (+1 because class 0 is the background) cls_ind = np.where(np.array(self.queries) == str(query_name))[0][0] + 1 for im_ in ranking[0:self.N_display]: if self.dataset is 'paris': frame_to_read = os.path.join(self.image_path,im_.split('_')[1],im_ + '.jpg') elif self.dataset is 'oxford': frame_to_read = os.path.join(self.image_path,im_ + '.jpg') frames.append(frame_to_read) # Get features of current element feats,boxes,scores = self.extract_feat_image(sess, frame_to_read) # we rank based on class scores if self.class_scores: scores = feats[:,cls_ind] # position with highest score for that class best_pos = np.argmax(scores) # array of boxes with higher score for that class best_box_array = boxes[best_pos,:] # single box with max score for query class best_box = best_box_array[4*cls_ind:4*(cls_ind + 1)] # the actual score distances.append(np.max(scores)) locations.append(best_box) class_ids.append(cls_ind) else: if self.pooling is 'sum': # pca transform feats = np.sum(np.sum(feats,axis=1),axis=1) feats = normalize(feats) feats = self.pca.transform(feats) feats = normalize(feats) else: feats = np.max(np.max(feats,axis=1),axis=1) feats = normalize(feats) feats = feats.T query_feats = query_feats.T query_feats = query_feats.reshape(self.dimension,) # Compute distances # dist_array = pairwise_distances(query_feats,feats,self.dist_type, n_jobs=-1) for fidx in range(feats.shape[1]): self.engine.store_vector(feats[:,fidx], fidx) N = self.engine.neighbours(query_feats) # Select minimum distance distances.append(N[0][2]) # Array of boxes with min distance idx = N[0][1] # Select array of locations with minimum distance best_box_array = boxes[idx,:] # Discard background score scores = scores[:,1:] # Class ID with max score . cls_ind = np.argmax(scores[idx,:]) class_ids.append(cls_ind+1) # Select the best box for the best class best_box = best_box_array[4*cls_ind:4*(cls_ind + 1)] locations.append(best_box) self.engine.clean_all_buckets() return distances,locations, frames, class_ids def rerank(self, sess): iter_ = self.query_names num_queries = 1 i = 0 for query in iter_: print "Reranking for query", i, "out of", len(iter_), '...' i+=1 self.rerank_one_query(sess, query,num_queries) def write_rankings(self,query,ranking,distances): if self.class_scores: new_top_r = list(np.array(ranking[0:self.top_n])[np.argsort(distances)[::-1]]) else: new_top_r = list(np.array(ranking[0:self.top_n])[np.argsort(distances)]) ranking[0:self.top_n] = new_top_r savefile = open(os.path.join(self.rankings_dir,os.path.basename(query.split('_query')[0]) +'.txt'),'w') for res in ranking: savefile.write(os.path.basename(res).split('.jpg')[0] + '\n') savefile.close()