コード例 #1
0
 def fit(self, X):
     X = numpy.array(X)
     X = X.astype(numpy.float32)
     self._model.fit(X)
     self._searcher = LOPQSearcher(self._model)
     self._searcher.add_data(X)
     print("Fit done")
コード例 #2
0
 def __init__(self, name, approximator):
     self.approximate = True
     self.name = name
     self.loaded_entries = set()
     self.entries = []
     self.support_batching = False
     self.approximator = approximator
     self.approximator.load()
     self.searcher = LOPQSearcher(model=self.approximator.model)
コード例 #3
0
def main(input_dir='/Users/aub3/temptest/gtin/',
         output_dir="/Users/aub3/temptest/products"):
    products = external_indexed.ProductsIndex(path=output_dir)
    # products.prepare(input_dir)
    products.build_approximate()
    data = products.data
    # data = load_oxford_data()
    print data.shape
    pca_reduction = PCA(n_components=32)
    pca_reduction.fit(data)
    data = pca_reduction.transform(data)
    print data.shape
    P, mu = pca(data)
    data = data - mu
    data = np.dot(data, P)
    train, test = train_test_split(data, test_size=0.2)
    print train.shape, test.shape
    nns = compute_all_neighbors(test, train)
    m = LOPQModel(V=16, M=8)
    m.fit(train, n_init=1)
    print "fitted"
    searcher = LOPQSearcher(m)
    print "adding data"
    searcher.add_data(train)
    recall, _ = get_recall(searcher, test, nns)
    print 'Recall (V=%d, M=%d, subquants=%d): %s' % (
        m.V, m.M, m.subquantizer_clusters, str(recall))
    m2 = LOPQModel(V=16, M=16, parameters=(m.Cs, None, None, None))
    m2.fit(train, n_init=1)
    searcher = LOPQSearcher(m2)
    searcher.add_data(train)
    recall, _ = get_recall(searcher, test, nns)
    print 'Recall (V=%d, M=%d, subquants=%d): %s' % (
        m2.V, m2.M, m2.subquantizer_clusters, str(recall))
    m3 = LOPQModel(V=16,
                   M=8,
                   subquantizer_clusters=512,
                   parameters=(m.Cs, m.Rs, m.mus, None))
    m3.fit(train, n_init=1)
    searcher = LOPQSearcher(m3)
    searcher.add_data(train)
    recall, _ = get_recall(searcher, test, nns)
    print 'Recall (V=%d, M=%d, subquants=%d): %s' % (
        m3.V, m3.M, m3.subquantizer_clusters, str(recall))
コード例 #4
0
 def __init__(self,name,approximator):
     super(LOPQRetriever, self).__init__(name=name,approximator=approximator,algorithm="LOPQ")
     self.approximate = True
     self.name = name
     self.loaded_entries = {}
     self.entries = []
     self.support_batching = False
     self.approximator = approximator
     self.approximator.load()
     self.searcher = LOPQSearcher(model=self.approximator.model)
コード例 #5
0
 def cluster(self):
     print self.data.shape
     pca_reduction = PCA(n_components=32)
     pca_reduction.fit(self.data)
     self.data = pca_reduction.transform(self.data)
     print self.data.shape
     P, mu = self.pca()
     self.data = self.data - mu
     data = np.dot(self.data, P)
     train, test = train_test_split(self.data, test_size=0.2)
     print train.shape, test.shape
     nns = compute_all_neighbors(test, train)
     m = LOPQModel(V=16, M=8)
     m.fit(train, n_init=1)
     print "fitted"
     searcher = LOPQSearcher(m)
     print "adding data"
     searcher.add_data(train)
     recall, _ = get_recall(searcher, test, nns)
     print 'Recall (V=%d, M=%d, subquants=%d): %s' % (
         m.V, m.M, m.subquantizer_clusters, str(recall))
     m2 = LOPQModel(V=16, M=16, parameters=(m.Cs, None, None, None))
     m2.fit(train, n_init=1)
     searcher = LOPQSearcher(m2)
     searcher.add_data(train)
     recall, _ = get_recall(searcher, test, nns)
     print 'Recall (V=%d, M=%d, subquants=%d): %s' % (
         m2.V, m2.M, m2.subquantizer_clusters, str(recall))
     m3 = LOPQModel(V=16,
                    M=8,
                    subquantizer_clusters=512,
                    parameters=(m.Cs, m.Rs, m.mus, None))
     m3.fit(train, n_init=1)
     searcher = LOPQSearcher(m3)
     searcher.add_data(train)
     recall, _ = get_recall(searcher, test, nns)
     print 'Recall (V=%d, M=%d, subquants=%d): %s' % (
         m3.V, m3.M, m3.subquantizer_clusters, str(recall))
コード例 #6
0
 def cluster(self):
     self.pca_reduction = PCA(n_components=self.n_components)
     self.pca_reduction.fit(self.data)
     self.data = self.pca_reduction.transform(self.data)
     self.P, self.mu = self.pca()
     self.data = self.data - self.mu
     self.data = np.dot(self.data, self.P)
     train, test = train_test_split(self.data, test_size=0.2)
     self.model = LOPQModel(V=self.v,
                            M=self.m,
                            subquantizer_clusters=self.sub)
     self.model.fit(train, n_init=1)
     self.searcher = LOPQSearcher(self.model)
     if self.test_mode:
         self.searcher.add_data(train)
         nns = compute_all_neighbors(test, train)
         recall, _ = get_recall(self.searcher, test, nns)
         print 'Recall (V=%d, M=%d, subquants=%d): %s' % (
             self.model.V, self.model.M, self.model.subquantizer_clusters,
             str(recall))
     for i, e in enumerate(self.entries):
         e['coarse'] = self.model.predict(self.data[i]).coarse
         e['fine'] = self.model.predict(self.data[i]).fine
コード例 #7
0
 def fit_model(self,train):
     self.fit(train)
     self.model.export_proto(self.model_path)
     self.searcher = LOPQSearcher(self.model) # LOPQSearcherLMDB(self.model,self.lmdb_path)
コード例 #8
0
ファイル: example.py プロジェクト: erobic/lopq
def main():
    """
    A brief demo script showing how to train various LOPQ models with brief
    discussion of trade offs.
    """

    # Get the oxford dataset
    data = load_oxford_data()

    # Compute PCA of oxford dataset. See README in data/oxford for details
    # about this dataset.
    P, mu = pca(data)

    # Mean center and rotate the data; includes dimension permutation.
    # It is worthwhile see how this affects recall performance. On this
    # dataset, which is already PCA'd from higher dimensional features,
    # this additional step to variance balance the dimensions typically
    # improves recall@1 by 3-5%. The benefit can be much greater depending
    # on the dataset.
    data = data - mu
    data = np.dot(data, P)

    # Create a train and test split. The test split will become
    # a set of queries for which we will compute the true nearest neighbors.
    train, test = train_test_split(data, test_size=0.2)

    # Compute distance-sorted neighbors in training set for each point in test set.
    # These will be our groundtruth for recall evaluation.
    nns = compute_all_neighbors(test, train)

    # Fit model
    m = LOPQModel(V=16, M=8)
    m.fit(train, n_init=1)

    # Note that we didn't specify a random seed for fitting the model, so different
    # runs will be different. You may also see a warning that some local projections
    # can't be estimated because too few points fall in a cluster. This is ok for the
    # purposes of this demo, but you might want to avoid this by increasing the amount
    # of training data or decreasing the number of clusters (the V hyperparameter).

    # With a model in hand, we can test it's recall. We populate a LOPQSearcher
    # instance with data and get recall stats. By default, we will retrieve 1000
    # ranked results for each query vector for recall evaluation.
    searcher = LOPQSearcher(m)
    searcher.add_data(train)
    recall, _ = get_recall(searcher, test, nns)
    print('Recall (V=%d, M=%d, subquants=%d): %s' %
          (m.V, m.M, m.subquantizer_clusters, str(recall)))

    # We can experiment with other hyperparameters without discarding all
    # parameters everytime. Here we train a new model that uses the same coarse
    # quantizers but a higher number of subquantizers, i.e. we increase M.
    m2 = LOPQModel(V=16, M=16, parameters=(m.Cs, None, None, None))
    m2.fit(train, n_init=1)

    # Let's evaluate again.
    searcher = LOPQSearcher(m2)
    searcher.add_data(train)
    recall, _ = get_recall(searcher, test, nns)
    print('Recall (V=%d, M=%d, subquants=%d): %s' %
          (m2.V, m2.M, m2.subquantizer_clusters, str(recall)))

    # The recall is probably higher. We got better recall with a finer quantization
    # at the expense of more data required for index items.

    # We can also hold both coarse quantizers and rotations fixed and see what
    # increasing the number of subquantizer clusters does to performance.
    m3 = LOPQModel(V=16,
                   M=8,
                   subquantizer_clusters=512,
                   parameters=(m.Cs, m.Rs, m.mus, None))
    m3.fit(train, n_init=1)

    searcher = LOPQSearcher(m3)
    searcher.add_data(train)
    recall, _ = get_recall(searcher, test, nns)
    print('Recall (V=%d, M=%d, subquants=%d): %s' %
          (m3.V, m3.M, m3.subquantizer_clusters, str(recall)))
コード例 #9
0
ファイル: lopq.py プロジェクト: Aguin/CBVR
gtobj = GTOBJ()
relevant_labels_mapping = {
    'DSVR': ['ND', 'DS'],
    'CSVR': ['ND', 'DS', 'CS'],
    'ISVR': ['ND', 'DS', 'CS', 'IS'],
}

print('LOPQModel!')
start = time.time()
final_vids, features, vid2features = load_features(
    '/home/camp/FIVR/features/vcms_v1', is_gv=False)
# Define a model and fit it to data
model = LOPQModel(V=8, M=4)
model.fit(np.array(features).reshape(-1, 512))
# Create a searcher to index data with the model
searcher = LOPQSearcher(model)
searcher.add_data(features)
print('Read time: %.2f' % (time.time() - start))

# 加载特征
vids = list(vid2features.keys())
print(vids[:10])
global_features = np.squeeze(
    np.asarray(list(vid2features.values()), np.float32))
print(np.shape(global_features))

# 加载vid2name 和 name2vid
with open('/home/camp/FIVR/vid2name.pk', 'rb') as pk_file:
    vid2names = pk.load(pk_file)
with open('/home/camp/FIVR/vid2name.pk', 'rb') as pk_file:
    name2vids = pk.load(pk_file)
コード例 #10
0
def main(new=True):
    # data: 3000 x 128dim
    if not new:
        # load data
        data = np.load('./data.npy')
    else:
        data = np.vstack((np.random.rand(1000, 128), np.random.rand(1000, 128) + 1, np.random.rand(1000, 128) - 1))
        print 'make data'
        # save data
        np.save('data.npy', data)

    # wanted to know this nearest neighbors
    x = np.ones(128) * 2

    print 'naive implementation'
    start = time.time()
    dist = np.sum(np.power((data - x), 2), axis=1)
    res = np.argsort(dist)
    print res[0:10] # return indices; top 10
    print time.time() - start, 's taken for naive NNsearch'

    model = None
    if not new:
        # load model
        model = LOPQModel.load_mat('params.mat')
    else:
        # Define a model and fit it to data
        model = LOPQModel(V=3, M=2, subquantizer_clusters=64)
        start = time.time()
        model.fit(data)
        print time.time() -start, 's taken for model fitting'
        # save model
        model.export_mat('params.mat')

    # Compute the LOPQ codes for a vector
    # if we define SC as subquantizer_clusters,
    # input vec(128dim); output: coarse codes(V, V), fine codes(SC, SC) because M = 2

    """
    for i in xrange(10):
        y = np.random.rand(128)
        code = model.predict(y)
        print 'output: ', code
    """

    # Create a searcher to index data with the model
    searcher = LOPQSearcher(model)
    searcher.add_data(data)

    start = time.time()
    # Retrieve ranked nearest neighbors
    nns = searcher.search(x, quota=10)
    ans = [nns[0][i][0] for i in range(10)]
    print ans
    print time.time() -start, 's taken for prediction top 10'

    count = 0
    for element in ans:
        if element in res[0:10]:
            count += 1
    else:
        print 'accuracy: ', count, '/', 10