Ejemplo n.º 1
0
class LOPQRetriever(BaseRetriever):

    def __init__(self,name,approximator):
        super(LOPQRetriever, self).__init__(name=name,approximator=approximator,algorithm="LOPQ")
        self.approximate = True
        self.name = name
        self.loaded_entries = {}
        self.entries = []
        self.support_batching = False
        self.approximator = approximator
        self.approximator.load()
        self.searcher = LOPQSearcher(model=self.approximator.model)

    def load_index(self,numpy_matrix=None,entries=None):
        codes = []
        ids = []
        last_index = len(self.entries)
        for i, e in enumerate(entries):
            codes.append((tuple(e['codes'][0]),tuple(e['codes'][1])))
            ids.append(i+last_index)
            self.entries.append(e)
        self.searcher.add_codes(codes,ids)

    def nearest(self,vector=None,n=12):
        results = []
        pca_vec = self.approximator.get_pca_vector(vector)
        results_indexes, visited = self.searcher.search(pca_vec,quota=n)
        for r in results_indexes:
            results.append(self.entries[r.id])
        return results
Ejemplo n.º 2
0
class LOPQRetriever(object):
    """ Deprecated and soon to be removed """

    def __init__(self, name, approximator):
        self.approximate = True
        self.name = name
        self.loaded_entries = set()
        self.entries = []
        self.support_batching = False
        self.approximator = approximator
        self.approximator.load()
        self.searcher = LOPQSearcher(model=self.approximator.model)

    def add_entries(self, entries, video_id, entry_type):
        codes = []
        ids = []
        last_index = len(self.entries)
        for i, e in enumerate(entries):
            codes.append((tuple(e[1][0]), tuple(e[1][1])))
            ids.append(i + last_index)
            self.entries.append({"id":e[0],"type":entry_type,"video":video_id})
        self.searcher.add_codes(codes, ids)

    def nearest(self, vector=None, n=12):
        results = []
        pca_vec = self.approximator.get_pca_vector(vector)
        results_indexes, visited = self.searcher.search(pca_vec, quota=n)
        for r in results_indexes:
            results.append(self.entries[r.id])
        return results
Ejemplo n.º 3
0
class ApproximateIndexer(object):

    def __init__(self,index_name,model_path,lmdb_path,V=16, M=16):
        self.model = LOPQModel(V,M)
        self.index_name = index_name
        self.searcher = None
        self.model_path = model_path
        self.lmdb_path = lmdb_path

    def load(self):
        self.model.load_proto(self.model_path)

    def fit(self,train):
        print train.shape
        self.pca_reduction = PCA(n_components=256)
        self.pca_reduction.fit(train)
        train = self.pca_reduction.transform(train)
        self.P, self.mu = pca(train)
        train = np.dot(train, self.P)
        print train.shape
        self.model.fit(train, n_init=1)

    def transform(self,test):
        print test.shape
        test = self.pca_reduction.transform(test)
        test = test - self.mu
        test = np.dot(test,self.P)
        print test.shape
        return test

    def fit_model(self,train):
        self.fit(train)
        self.model.export_proto(self.model_path)
        self.searcher = LOPQSearcher(self.model) # LOPQSearcherLMDB(self.model,self.lmdb_path)

    def experiment(self,data):
        train, test = train_test_split(data, test_size=0.1)
        print data.shape,train.shape,test.shape
        nns = compute_all_neighbors(test, train)
        self.fit_model(train)
        self.searcher.add_data(self.transform(train))
        recall, _ = get_recall(self.searcher, self.transform(test), nns)
        print 'Recall (V={}, M={}, subquants={}): {}'.format(self.model.V, self.model.M, self.model.subquantizer_clusters, str(recall))

    def add_data(self,data):
        self.searcher.add_data(data)

    def search(self,x):
        return self.searcher.search(x,quota=100)
Ejemplo n.º 4
0
class LOPQ(BaseANN):
    def __init__(self, v):
        m = 4
        self.name = 'LOPQ(v={}, m={})'.format(v, m)
        self._m = m
        self._model = LOPQModel(V=v, M=m)
        self._searcher = None

    def fit(self, X):
        X = numpy.array(X)
        X = X.astype(numpy.float32)
        self._model.fit(X)
        self._searcher = LOPQSearcher(self._model)
        self._searcher.add_data(X)

    def query(self, v, n):
        v = v.astype(numpy.float32)
        nns = self._searcher.search(v, quota=100)
        return nns
Ejemplo n.º 5
0
Archivo: lopq.py Proyecto: Aguin/CBVR
 with open(annotation_path, 'r') as annotation_file:
     json_obj = json.load(annotation_file)
 if results is None:
     query_names = json_obj.keys()
     query_names = [str(query_name) for query_name in query_names]
     query_indexs = []
     for query_name in query_names:
         tmp = np.where(names == query_name)
         if len(tmp) != 0 and len(tmp[0]) != 0:
             query_indexs.append(tmp[0][0])
         else:
             print('skip query: ', query_name)
     query_features = np.squeeze(global_features[query_indexs])
     # similarities = calculate_similarities(query_features, global_features)
     for query_feature in query_features:
         nns = searcher.search(query_feature, quota=100)
         print(nns.shape)
         print(nns[0])
     results = dict()
     for query_idx, query_name in enumerate(query_names):
         cur_sim = similarities[query_idx]
         query_result = dict(map(lambda v: (names[v[0]], v[1]), cur_sim))
         del query_result[query_name]
         results[query_name] = query_result
 mAPOffcial, precisions = evaluateOfficial(
     annotations=gtobj.annotations,
     results=results,
     relevant_labels=relevant_labels_mapping[task_name],
     dataset=gtobj.dataset,
     quiet=False)
 print('{} mAPOffcial is {}'.format(task_name, np.mean(mAPOffcial)))
Ejemplo n.º 6
0
def main(new=True):
    # data: 3000 x 128dim
    if not new:
        # load data
        data = np.load('./data.npy')
    else:
        data = np.vstack((np.random.rand(1000, 128), np.random.rand(1000, 128) + 1, np.random.rand(1000, 128) - 1))
        print 'make data'
        # save data
        np.save('data.npy', data)

    # wanted to know this nearest neighbors
    x = np.ones(128) * 2

    print 'naive implementation'
    start = time.time()
    dist = np.sum(np.power((data - x), 2), axis=1)
    res = np.argsort(dist)
    print res[0:10] # return indices; top 10
    print time.time() - start, 's taken for naive NNsearch'

    model = None
    if not new:
        # load model
        model = LOPQModel.load_mat('params.mat')
    else:
        # Define a model and fit it to data
        model = LOPQModel(V=3, M=2, subquantizer_clusters=64)
        start = time.time()
        model.fit(data)
        print time.time() -start, 's taken for model fitting'
        # save model
        model.export_mat('params.mat')

    # Compute the LOPQ codes for a vector
    # if we define SC as subquantizer_clusters,
    # input vec(128dim); output: coarse codes(V, V), fine codes(SC, SC) because M = 2

    """
    for i in xrange(10):
        y = np.random.rand(128)
        code = model.predict(y)
        print 'output: ', code
    """

    # Create a searcher to index data with the model
    searcher = LOPQSearcher(model)
    searcher.add_data(data)

    start = time.time()
    # Retrieve ranked nearest neighbors
    nns = searcher.search(x, quota=10)
    ans = [nns[0][i][0] for i in range(10)]
    print ans
    print time.time() -start, 's taken for prediction top 10'

    count = 0
    for element in ans:
        if element in res[0:10]:
            count += 1
    else:
        print 'accuracy: ', count, '/', 10