Ejemplo n.º 1
0
class TestEngine(unittest.TestCase):

    def setUp(self):
        self.engine = Engine(1000)

    def test_retrieval(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = numpy.random.randn(1000)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y = n[0][0]
            y_data = n[0][1]
            y_distance = n[0][2]
            self.assertTrue((y == x).all())
            self.assertEqual(y_data, x_data)
            self.assertEqual(y_distance, 0.0)

    def test_retrieval_sparse(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = scipy.sparse.rand(1000, 1, density=0.05)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y = n[0][0]
            y_data = n[0][1]
            y_distance = n[0][2]
            self.assertTrue((y - x).sum() == 0.0)
            self.assertEqual(y_data, x_data)
            self.assertEqual(y_distance, 0.0)
Ejemplo n.º 2
0
class TestEngine(unittest.TestCase):
    def setUp(self):
        self.engine = Engine(1000)

    def test_retrieval(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = numpy.random.randn(1000)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y = n[0][0]
            y_data = n[0][1]
            y_distance = n[0][2]
            self.assertTrue((y == x).all())
            self.assertEqual(y_data, x_data)
            self.assertEqual(y_distance, 0.0)

    def test_retrieval_sparse(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = scipy.sparse.rand(1000, 1, density=0.05)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y = n[0][0]
            y_data = n[0][1]
            y_distance = n[0][2]
            self.assertTrue((y - x).sum() == 0.0)
            self.assertEqual(y_data, x_data)
            self.assertEqual(y_distance, 0.0)
Ejemplo n.º 3
0
class TestEngine(unittest.TestCase):
    def setUp(self):
        self.engine = Engine(1000)

    def test_storage_issue(self):
        engine1 = Engine(100)
        engine2 = Engine(100)

        for k in range(1000):
            x = numpy.random.randn(100)
            x_data = 'data'
            engine1.store_vector(x, x_data)

        # Each engine should have its own default storage
        self.assertEqual(len(engine2.storage.buckets), 0)

    def test_retrieval(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = numpy.random.randn(1000)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y, y_data, y_distance = n[0]
            normalized_x = unitvec(x)
            delta = 0.000000001
            self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(),
                                   0,
                                   delta=delta)
            self.assertEqual(y_data, x_data)
            self.assertAlmostEqual(y_distance, 0.0, delta=delta)

    def test_retrieval_sparse(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = scipy.sparse.rand(1000, 1, density=0.05)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y, y_data, y_distance = n[0]
            normalized_x = unitvec(x)
            delta = 0.000000001
            self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(),
                                   0,
                                   delta=delta)
            self.assertEqual(y_data, x_data)
            self.assertAlmostEqual(y_distance, 0.0, delta=delta)
Ejemplo n.º 4
0
class TestEngine(unittest.TestCase):

    def setUp(self):
        self.engine = Engine(1000)

    def test_storage_issue(self):
        engine1 = Engine(100)
        engine2 = Engine(100)

        for k in range(1000):
            x = numpy.random.randn(100)
            x_data = 'data'
            engine1.store_vector(x, x_data)

        # Each engine should have its own default storage
        self.assertTrue(len(engine2.storage.buckets)==0)

    def test_retrieval(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = numpy.random.randn(1000)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y = n[0][0]
            y_data = n[0][1]
            y_distance = n[0][2]
            self.assertTrue((y == x).all())
            self.assertEqual(y_data, x_data)
            self.assertEqual(y_distance, 0.0)

    def test_retrieval_sparse(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = scipy.sparse.rand(1000, 1, density=0.05)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y = n[0][0]
            y_data = n[0][1]
            y_distance = n[0][2]
            self.assertTrue((y - x).sum() == 0.0)
            self.assertEqual(y_data, x_data)
            self.assertEqual(y_distance, 0.0)
Ejemplo n.º 5
0
class TestEngine(unittest.TestCase):

    def setUp(self):
        self.engine = Engine(1000)

    def test_storage_issue(self):
        engine1 = Engine(100)
        engine2 = Engine(100)

        for k in range(1000):
            x = numpy.random.randn(100)
            x_data = 'data'
            engine1.store_vector(x, x_data)

        # Each engine should have its own default storage
        self.assertTrue(len(engine2.storage.buckets)==0)

    def test_retrieval(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = numpy.random.randn(1000)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y, y_data, y_distance  = n[0]
            normalized_x = unitvec(x)
            delta = 0.000000001
            self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta)
            self.assertEqual(y_data, x_data)
            self.assertAlmostEqual(y_distance, 0.0, delta=delta)

    def test_retrieval_sparse(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = scipy.sparse.rand(1000, 1, density=0.05)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y, y_data, y_distance = n[0]
            normalized_x = unitvec(x)
            delta = 0.000000001
            self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta)
            self.assertEqual(y_data, x_data)
            self.assertAlmostEqual(y_distance, 0.0, delta=delta)
Ejemplo n.º 6
0
class TestEngine(unittest.TestCase):
    def setUp(self):
        self.engine = Engine(1000)

    def test_storage_issue(self):
        engine1 = Engine(100)
        engine2 = Engine(100)

        for k in range(1000):
            x = numpy.random.randn(100)
            x_data = 'data'
            engine1.store_vector(x, x_data)

        # Each engine should have its own default storage
        self.assertTrue(len(engine2.storage.buckets) == 0)

    def test_retrieval(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = numpy.random.randn(1000)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y = n[0][0]
            y_data = n[0][1]
            y_distance = n[0][2]
            self.assertTrue((y == x).all())
            self.assertEqual(y_data, x_data)
            self.assertEqual(y_distance, 0.0)

    def test_retrieval_sparse(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = scipy.sparse.rand(1000, 1, density=0.05)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y = n[0][0]
            y_data = n[0][1]
            y_distance = n[0][2]
            self.assertTrue((y - x).sum() == 0.0)
            self.assertEqual(y_data, x_data)
            self.assertEqual(y_distance, 0.0)
Ejemplo n.º 7
0
class GraphStateQueryIndex:
    def __init__(self):
        redis_object = redis.Redis(host='localhost', port=6379, db=0)
        redis_storage = RedisStorage(redis_object)

        # Get hash config from redis
        config = redis_storage.load_hash_configuration('MyHash')

        if config is None:
            # Config is not existing, create hash from scratch, with 5 projections
            self.lshash = RandomBinaryProjections('MyHash', 5)
        else:
            # Config is existing, create hash with None parameters
            self.lshash = RandomBinaryProjections(None, None)
            # Apply configuration loaded from redis
            self.lshash.apply_config(config)
        # print("HERE")

        # Create engine for feature space of 100 dimensions and use our hash.
        # This will set the dimension of the lshash only the first time, not when
        # using the configuration loaded from redis. Use redis storage to store
        # buckets.
        self.engine = Engine(4, lshashes=[self.lshash], storage=redis_storage)
        redis_storage.store_hash_configuration(self.lshash)

    def findMatch(self, v):
        matches = self.engine.neighbours(v)
        return matches

    def addVector(self, v, trainingText):
        self.engine.store_vector(v, trainingText)

    def clearIndex(self):
        self.engine.clean_all_buckets()

    def clearHashInstance(self, name):
        self.engine.clean_buckets(name)
Ejemplo n.º 8
0
class Reranker():

    def __init__(self,params):

        self.dataset= params['dataset']
        self.image_path = params['database_images']
        self.dimension = params['dimension']
        self.layer = params['layer']
        self.top_n = params['num_rerank']
        self.reranking_path = params['reranking_path']
        self.REG_BOXES = params['use_regressed_boxes']
        self.pooling = params['pooling']
        self.stage = params['stage']
        self.N_QE = params['N_QE']
        self.N_display = params['N_display']
        self.class_scores = params['use_class_scores']
        self.network_name = params['network_name']

        with open(params['frame_list'],'r') as f:
            self.database_list = f.read().splitlines()

        with open(params['query_list'],'r') as f:
            self.query_names = f.read().splitlines()

        # Distance type
        self.dist_type = params['distance']

        # Where to store the rankings
        self.rankings_dir = params['rankings_dir']

        cfg.TEST.HAS_RPN = True
        # self.net = caffe.Net(params['net_proto'], params['net'], caffe.TEST)
        self.net = get_network(self.network_name)
        self.queries = params['query_names']
        # List of queries


        if self.pooling is 'sum':
            # PCA Models
            if self.dataset is 'paris':

                self.pca = pickle.load(open(params['pca_model'] + '_oxford.pkl', 'rb'))

            elif self.dataset is 'oxford':

                self.pca = pickle.load(open(params['pca_model'] + '_paris.pkl', 'rb'))

        #lsh
        lshash=RandomBinaryProjections('re_rankHash', 1)
        self.engine=Engine(self.dimension, lshashes=[lshash])

    def extract_feat_image(self, sess, image):

        im = cv2.imread(image)

        layer_roi = 'pool_5'
        scores, boxes, feat = test_ops.im_detect(sess, self.net, layer_roi, im, False, boxes = None)

        return feat,boxes,scores

    def read_ranking(self,query):


        with open(os.path.join(self.rankings_dir,os.path.basename(query.split('_query')[0]) +'.txt'),'r') as f:
            ranking = f.read().splitlines()


        return ranking

    def query_info(self,filename):

        '''
        For oxford and paris, get query frame and box
        '''

        data = np.loadtxt(filename, dtype="str")

        if self.dataset is 'paris':

            query = data[0]

        elif self.dataset is 'oxford':

            query = data[0].split('oxc1_')[1]

        bbx = data[1:].astype(float).astype(int)

        if self.dataset is 'paris':
            query = os.path.join(self.image_path,query.split('_')[1],query + '.jpg')
        elif self.dataset is 'oxford':
            query = os.path.join(self.image_path,query + '.jpg')

        return query, bbx

    def get_query_local_feat(self, sess, query, box=None):

        '''
        Extract local query feature using bbx
        '''
        if box is None:

            # For paris and oxford
            query,bbx = self.query_info(query)

        else:

            # locations are provided
            xmin = box[0]
            ymin = box[1]
            xmax = box[2]
            ymax = box[3]

        im = cv2.imread(query)

        height = np.shape(im)[0]
        width = np.shape(im)[1]


        # Forward pass
        scores, boxes, feat = test_ops.im_detect(sess, self.net, self.layer, im, True, boxes = None)

        # Get conv5 layer
        # feat = self.net.blobs[self.layer].data.squeeze()
        feat = feat.squeeze()

        # Get the image/feature ratio
        mult_h = float(np.shape(feat)[1])/height
        mult_w = float(np.shape(feat)[2])/width

        # Resize the bounding box to feature size
        if box is None:

            # Adjust query bounding box to feature space
            bbx[0] *= mult_w
            bbx[2] *= mult_w
            bbx[1] *= mult_h
            bbx[3] *= mult_h

        else:

            bbx = [int(math.floor(xmin*mult_w)),int(math.floor(ymin*mult_h)),int(math.ceil(xmax*mult_w)),int(math.ceil(ymax*mult_h))]

        # Crop local features with bounding box
        local_feat = feat[:,bbx[1]:bbx[3],bbx[0]:bbx[2]]

        # sum pool
        if self.pooling is 'sum':
            local_feat =  np.sum(np.sum(local_feat,axis=1),axis=1)
        else:
            local_feat =  np.max(np.max(local_feat,axis=1),axis=1)

        return local_feat

    def rerank_one_query(self, sess, query,num_queries):

        # Init query feat vector
        query_feats = np.zeros((self.dimension))
        for i in np.arange(num_queries)+1:

            query_ = query
            query_name = os.path.basename(query).rsplit('_',2)[0]

            # Generate query feature and add it to matrix
            query_feats += self.get_query_local_feat(sess, query_)

        query_feats/=num_queries
        query_feats = query_feats.reshape(-1, 1)

        if self.stage is 'rerank2nd':
            # second stage of reranking. taking N locations at top N ranking as queries...

            with open(os.path.join(self.reranking_path,os.path.basename(query.split('_query')[0]) + '.pkl') ,'rb') as f:
                distances = pickle.load(f)
                locations = pickle.load(f)
                frames = pickle.load(f)
                class_ids = pickle.load(f)

            frames_sorted = np.array(frames)[np.argsort(distances)]
            locations_sorted = np.array(locations)[np.argsort(distances)]

            for i_qe in range(self.N_QE):
                query_feats +=self.get_query_local_feat(sess, frames_sorted[i_qe],locations_sorted[i_qe])

            query_feats/=(self.N_QE+1)

        query_feats = query_feats.T

        query_feats = normalize(query_feats)


        if self.pooling is 'sum':
            # Apply PCA
            query_feats = self.pca.transform(query_feats)

            query_feats = normalize(query_feats)

        # Read baseline ranking
        ranking = self.read_ranking(query)

        # Rerank
        distances,locations, frames,class_ids = self.rerank_top_n(sess, query_feats,ranking,query_name)

        with open(os.path.join(self.reranking_path,os.path.basename(query.split('_query')[0]) + '.pkl') ,'wb') as f:
            pickle.dump(distances,f)
            pickle.dump(locations,f)
            pickle.dump(frames,f)
            pickle.dump(class_ids,f)
        # Write new ranking to disk
        self.write_rankings(query,ranking,distances)

    def rerank_top_n(self,sess, query_feats,ranking,query_name):

        distances = []
        locations = []
        frames = []
        class_ids = []
        #query_feats = query_feats.T

        # query class (+1 because class 0 is the background)
        cls_ind = np.where(np.array(self.queries) == str(query_name))[0][0] + 1

        for im_ in ranking[0:self.N_display]:

            if self.dataset is 'paris':
                frame_to_read = os.path.join(self.image_path,im_.split('_')[1],im_ + '.jpg')
            elif self.dataset is 'oxford':
                frame_to_read = os.path.join(self.image_path,im_ + '.jpg')

            frames.append(frame_to_read)
            # Get features of current element
            feats,boxes,scores = self.extract_feat_image(sess, frame_to_read)

            # we rank based on class scores
            if self.class_scores:

                scores = feats[:,cls_ind]

                # position with highest score for that class
                best_pos = np.argmax(scores)

                # array of boxes with higher score for that class
                best_box_array = boxes[best_pos,:]

                # single box with max score for query class
                best_box = best_box_array[4*cls_ind:4*(cls_ind + 1)]

                # the actual score
                distances.append(np.max(scores))
                locations.append(best_box)
                class_ids.append(cls_ind)

            else:


                if self.pooling is 'sum':
                    # pca transform
                    feats = np.sum(np.sum(feats,axis=1),axis=1)
                    feats = normalize(feats)
                    feats = self.pca.transform(feats)
                    feats = normalize(feats)
                else:
                    feats = np.max(np.max(feats,axis=1),axis=1)
                    feats = normalize(feats)

                feats = feats.T
                query_feats = query_feats.T
                query_feats = query_feats.reshape(self.dimension,)

                # Compute distances
                # dist_array = pairwise_distances(query_feats,feats,self.dist_type, n_jobs=-1)

                for fidx in range(feats.shape[1]):
                    self.engine.store_vector(feats[:,fidx], fidx)

                N = self.engine.neighbours(query_feats)

                # Select minimum distance
                distances.append(N[0][2])

                # Array of boxes with min distance
                idx = N[0][1]

                # Select array of locations with minimum distance
                best_box_array = boxes[idx,:]

                # Discard background score
                scores = scores[:,1:]

                # Class ID with max score .
                cls_ind = np.argmax(scores[idx,:])
                class_ids.append(cls_ind+1)

                # Select the best box for the best class
                best_box = best_box_array[4*cls_ind:4*(cls_ind + 1)]

                locations.append(best_box)

                self.engine.clean_all_buckets()

        return distances,locations, frames, class_ids

    def rerank(self, sess):

        iter_ = self.query_names
        num_queries = 1

        i = 0
        for query in iter_:
            print "Reranking for query", i, "out of", len(iter_), '...'
            i+=1
            self.rerank_one_query(sess, query,num_queries)

    def write_rankings(self,query,ranking,distances):

        if self.class_scores:
            new_top_r = list(np.array(ranking[0:self.top_n])[np.argsort(distances)[::-1]])
        else:
            new_top_r = list(np.array(ranking[0:self.top_n])[np.argsort(distances)])

        ranking[0:self.top_n] = new_top_r


        savefile = open(os.path.join(self.rankings_dir,os.path.basename(query.split('_query')[0]) +'.txt'),'w')

        for res in ranking:

            savefile.write(os.path.basename(res).split('.jpg')[0] + '\n')

        savefile.close()