Beispiel #1
0
class ApproximateIndexer(object):

    def __init__(self,index_name,model_path,lmdb_path,V=16, M=16):
        self.model = LOPQModel(V,M)
        self.index_name = index_name
        self.searcher = None
        self.model_path = model_path
        self.lmdb_path = lmdb_path

    def load(self):
        self.model.load_proto(self.model_path)

    def fit(self,train):
        print train.shape
        self.pca_reduction = PCA(n_components=256)
        self.pca_reduction.fit(train)
        train = self.pca_reduction.transform(train)
        self.P, self.mu = pca(train)
        train = np.dot(train, self.P)
        print train.shape
        self.model.fit(train, n_init=1)

    def transform(self,test):
        print test.shape
        test = self.pca_reduction.transform(test)
        test = test - self.mu
        test = np.dot(test,self.P)
        print test.shape
        return test

    def fit_model(self,train):
        self.fit(train)
        self.model.export_proto(self.model_path)
        self.searcher = LOPQSearcher(self.model) # LOPQSearcherLMDB(self.model,self.lmdb_path)

    def experiment(self,data):
        train, test = train_test_split(data, test_size=0.1)
        print data.shape,train.shape,test.shape
        nns = compute_all_neighbors(test, train)
        self.fit_model(train)
        self.searcher.add_data(self.transform(train))
        recall, _ = get_recall(self.searcher, self.transform(test), nns)
        print 'Recall (V={}, M={}, subquants={}): {}'.format(self.model.V, self.model.M, self.model.subquantizer_clusters, str(recall))

    def add_data(self,data):
        self.searcher.add_data(data)

    def search(self,x):
        return self.searcher.search(x,quota=100)
Beispiel #2
0
class Clustering(object):
    def __init__(self,
                 fnames,
                 n_components,
                 model_proto_filename,
                 m,
                 v,
                 sub,
                 test_mode=False,
                 dc=None):
        """
        Simplify this mess haivng a seperate create vs load/init
        """
        data = []
        self.dc = dc
        self.fnames = fnames
        self.entries = []
        for fname in fnames:
            nmat = np.load(fname)
            if nmat.ndim > 2:
                nmat = nmat.squeeze()
            data.append(nmat)
            for e in json.load(file(fname.replace('npy', 'json'))):
                self.entries.append(e)
        if data:
            if len(data) > 1:
                self.data = np.concatenate(data)
            else:
                self.data = data[0]
            logging.info(self.data.shape)
        self.test_mode = test_mode
        self.n_components = n_components
        self.m = m
        self.v = v
        self.sub = sub
        self.model = None
        self.searcher = None
        self.pca_reduction = None
        self.P = None
        self.mu = None
        self.model_proto_filename = model_proto_filename
        self.P_filename = model_proto_filename.replace('.proto', '.P.npy')
        self.mu_filename = model_proto_filename.replace('.proto', '.mu.npy')
        self.pca_filename = model_proto_filename.replace('.proto', '.pca.pkl')
        self.model_lmdb_filename = model_proto_filename.replace(
            '.proto', '_lmdb')
        self.permuted_inds_filename = model_proto_filename.replace(
            '.proto', '.permuted_inds.pkl')
        self.permuted_inds = None

    def pca(self):
        """
        A simple PCA implementation that demonstrates how eigenvalue allocation
        is used to permute dimensions in order to balance the variance across
        subvectors. There are plenty of PCA implementations elsewhere. What is
        important is that the eigenvalues can be used to compute a variance-balancing
        dimension permutation.
        """
        count, D = self.data.shape
        mu = self.data.sum(axis=0) / float(count)
        summed_covar = reduce(lambda acc, x: acc + np.outer(x, x), self.data,
                              np.zeros((D, D)))
        A = summed_covar / (count - 1) - np.outer(mu, mu)
        eigenvalues, P = np.linalg.eigh(A)
        self.permuted_inds = eigenvalue_allocation(2, eigenvalues)
        P = P[:, self.permuted_inds]
        return P, mu

    def cluster(self):
        self.pca_reduction = PCA(n_components=self.n_components)
        self.pca_reduction.fit(self.data)
        self.data = self.pca_reduction.transform(self.data)
        self.P, self.mu = self.pca()
        self.data = self.data - self.mu
        self.data = np.dot(self.data, self.P)
        train, test = train_test_split(self.data, test_size=0.2)
        self.model = LOPQModel(V=self.v,
                               M=self.m,
                               subquantizer_clusters=self.sub)
        self.model.fit(train, n_init=1)
        for i, e in enumerate(
                self.entries):  # avoid doing this twice again in searcher
            r = self.model.predict(self.data[i])
            e['coarse'] = r.coarse
            e['fine'] = r.fine
            e['index'] = i
        self.searcher = LOPQSearcherLMDB(self.model, self.model_lmdb_filename)
        if self.test_mode:
            self.searcher.add_data(train)
            nns = compute_all_neighbors(test, train)
            recall, _ = get_recall(self.searcher, test, nns)
            print 'Recall (V=%d, M=%d, subquants=%d): %s' % (
                self.model.V, self.model.M, self.model.subquantizer_clusters,
                str(recall))
        else:
            self.searcher.add_data(self.data)

    def find(self):
        i, selected = random.choice([k for k in enumerate(self.entries)])
        print selected
        for k in self.searcher.get_result_quota(self.data[i], 10):
            print k

    def save(self):
        self.model.export_proto(self.model_proto_filename)
        with open(self.pca_filename, 'w') as out:
            pickle.dump(self.pca_reduction, out)
        with open(self.P_filename, 'w') as out:
            np.save(out, self.P)
        with open(self.mu_filename, 'w') as out:
            np.save(out, self.mu)
        with open(self.permuted_inds_filename, 'w') as out:
            pickle.dump(self.permuted_inds, out)
        self.searcher.env.close()

    def load(self):
        self.model = LOPQModel.load_proto(self.model_proto_filename)
        self.pca_reduction = pickle.load(file(self.pca_filename))
        self.P = np.load(file(self.P_filename))
        self.mu = np.load(file(self.mu_filename))
        self.permuted_inds = np.load(file(self.permuted_inds_filename))
        self.searcher = LOPQSearcherLMDB(model=self.model,
                                         lmdb_path=self.model_lmdb_filename)

    def apply(self, vector, count=None):
        vector = np.dot((self.pca_reduction.transform(vector) - self.mu),
                        self.P).transpose().squeeze()
        codes = self.model.predict(vector)
        if count:
            results = self.searcher.search(vector, quota=count)
        else:
            results = None
        return codes.coarse, codes.fine, results
Beispiel #3
0
class Clustering(object):

    def __init__(self,fnames,n_components,model_proto_filename,m,v,sub,test_mode=False,dc=None):
        """
        Simplify this mess haivng a seperate create vs load/init
        """
        data = []
        self.dc = dc
        self.fnames = fnames
        self.entries = []
        for fname in fnames:
            nmat = np.load(fname)
            if nmat.ndim > 2:
                nmat = nmat.squeeze()
            data.append(nmat)
            for e in json.load(file(fname.replace('npy','json'))):
                self.entries.append(e)
        if data:
            if len(data) > 1:
                self.data = np.concatenate(data)
            else:
                self.data = data[0]
            logging.info(self.data.shape)
        self.test_mode = test_mode
        self.n_components = n_components
        self.m = m
        self.v = v
        self.sub = sub
        self.model = None
        self.searcher = None
        self.pca_reduction = None
        self.P = None
        self.mu = None
        self.model_proto_filename = model_proto_filename
        self.P_filename = model_proto_filename.replace('.proto','.P.npy')
        self.mu_filename = model_proto_filename.replace('.proto','.mu.npy')
        self.pca_filename = model_proto_filename.replace('.proto', '.pca.pkl')
        self.model_lmdb_filename = model_proto_filename.replace('.proto', '_lmdb')
        self.permuted_inds_filename = model_proto_filename.replace('.proto', '.permuted_inds.pkl')
        self.permuted_inds = None

    def pca(self):
        """
        A simple PCA implementation that demonstrates how eigenvalue allocation
        is used to permute dimensions in order to balance the variance across
        subvectors. There are plenty of PCA implementations elsewhere. What is
        important is that the eigenvalues can be used to compute a variance-balancing
        dimension permutation.
        """
        count, D = self.data.shape
        mu = self.data.sum(axis=0) / float(count)
        summed_covar = reduce(lambda acc, x: acc + np.outer(x, x), self.data, np.zeros((D, D)))
        A = summed_covar / (count - 1) - np.outer(mu, mu)
        eigenvalues, P = np.linalg.eigh(A)
        self.permuted_inds = eigenvalue_allocation(2, eigenvalues)
        P = P[:, self.permuted_inds]
        return P, mu

    def cluster(self):
        self.pca_reduction = PCA(n_components=self.n_components)
        self.pca_reduction.fit(self.data)
        self.data = self.pca_reduction.transform(self.data)
        self.P, self.mu = self.pca()
        self.data = self.data - self.mu
        self.data = np.dot(self.data,self.P)
        train, test = train_test_split(self.data, test_size=0.2)
        self.model = LOPQModel(V=self.v, M=self.m, subquantizer_clusters=self.sub)
        self.model.fit(train, n_init=1)
        for i,e in enumerate(self.entries): # avoid doing this twice again in searcher
            r = self.model.predict(self.data[i])
            e['coarse'] = r.coarse
            e['fine'] = r.fine
            e['index'] = i
        self.searcher = LOPQSearcherLMDB(self.model,self.model_lmdb_filename)
        if self.test_mode:
            self.searcher.add_data(train)
            nns = compute_all_neighbors(test, train)
            recall, _ = get_recall(self.searcher, test, nns)
            print 'Recall (V=%d, M=%d, subquants=%d): %s' % (self.model.V, self.model.M, self.model.subquantizer_clusters, str(recall))
        else:
            self.searcher.add_data(self.data)

    def find(self):
        i,selected = random.choice([k for k in enumerate(self.entries)])
        print selected
        for k in self.searcher.get_result_quota(self.data[i],10):
            print k

    def save(self):
        self.model.export_proto(self.model_proto_filename)
        with open(self.pca_filename,'w') as out:
            pickle.dump(self.pca_reduction,out)
        with open(self.P_filename, 'w') as out:
            np.save(out,self.P)
        with open(self.mu_filename, 'w') as out:
            np.save(out,self.mu)
        with open(self.permuted_inds_filename, 'w') as out:
            pickle.dump(self.permuted_inds,out)
        self.searcher.env.close()

    def load(self):
        self.model = LOPQModel.load_proto(self.model_proto_filename)
        self.pca_reduction = pickle.load(file(self.pca_filename))
        self.P = np.load(file(self.P_filename))
        self.mu = np.load(file(self.mu_filename))
        self.permuted_inds = np.load(file(self.permuted_inds_filename))
        self.searcher = LOPQSearcherLMDB(model=self.model,lmdb_path=self.model_lmdb_filename)

    def apply(self,vector,count=None):
        vector = np.dot((self.pca_reduction.transform(vector) - self.mu), self.P).transpose().squeeze()
        codes = self.model.predict(vector)
        if count:
            results = self.searcher.search(vector,quota=count)
        else:
            results = None
        return codes.coarse,codes.fine,results
class LOPQTrainer(object):
    def __init__(self, name, components, m, v, sub, dirname,
                 source_indexer_shasum):
        self.name = name
        self.n_components = int(components)
        self.m = int(m)
        self.v = int(v)
        self.dirname = dirname
        self.sub = int(sub)
        self.model = None
        self.pca_reduction = None
        self.P = None
        self.mu = None
        self.permuted_inds = None
        self.source_indexer_shasum = source_indexer_shasum

    def pca(self, training_data):
        """
        A simple PCA implementation that demonstrates how eigenvalue allocation
        is used to permute dimensions in order to balance the variance across
        sub vectors. There are plenty of PCA implementations elsewhere. What is
        important is that the eigenvalues can be used to compute a variance-balancing
        dimension permutation.
        """
        count, D = training_data.shape
        mu = training_data.sum(axis=0) / float(count)
        summed_covar = reduce(lambda acc, x: acc + np.outer(x, x),
                              training_data, np.zeros((D, D)))
        A = summed_covar / (count - 1) - np.outer(mu, mu)
        eigenvalues, P = np.linalg.eigh(A)
        self.permuted_inds = eigenvalue_allocation(2, eigenvalues)
        P = P[:, self.permuted_inds]
        return P, mu

    def train(self, training_data):
        self.pca_reduction = PCA(n_components=self.n_components)
        self.pca_reduction.fit(training_data)
        training_data = self.pca_reduction.transform(training_data)
        self.P, self.mu = self.pca(training_data)
        training_data = training_data - self.mu
        training_data = np.dot(training_data, self.P)
        self.model = LOPQModel(V=self.v,
                               M=self.m,
                               subquantizer_clusters=self.sub)
        self.model.fit(training_data, n_init=1)  # replace self.data by train

    def save(self):
        model_proto_filename = "{}/model.proto".format(self.dirname)
        P_filename = "{}/model.P.npy".format(self.dirname)
        mu_filename = "{}/model.mu.npy".format(self.dirname)
        pca_filename = "{}/model.pca.pkl".format(self.dirname)
        permind_filename = "{}/model.permind.pkl".format(self.dirname)
        with open(model_proto_filename, 'w') as f:
            self.model.export_proto(f)
        with open(pca_filename, 'w') as out:
            pickle.dump(self.pca_reduction, out)
        with open(P_filename, 'w') as out:
            np.save(out, self.P)
        with open(mu_filename, 'w') as out:
            np.save(out, self.mu)
        with open(permind_filename, 'w') as out:
            pickle.dump(self.permuted_inds, out)
        j = {
            "name":
            self.name,
            "algorithm":
            "LOPQ",
            "shasum":
            hashlib.sha1(file(model_proto_filename).read()).hexdigest(),
            "model_type":
            "P",
            "arguments": {
                'm': self.m,
                'v': self.v,
                'sub': self.sub,
                'components': self.n_components,
                'indexer_shasum': self.source_indexer_shasum
            },
            "files": [{
                "filename": "model.proto",
                "url": "{}/model.proto".format(self.dirname)
            }, {
                "filename": "model.P.npy",
                "url": "{}/model.P.npy".format(self.dirname)
            }, {
                "filename": "model.mu.npy",
                "url": "{}/model.mu.npy".format(self.dirname)
            }, {
                "filename": "model.pca.pkl",
                "url": "{}/model.pca.pkl".format(self.dirname)
            }, {
                "filename": "model.permind.pkl",
                "url": "{}/model.permind.pkl".format(self.dirname)
            }]
        }
        return j
Beispiel #5
0
class LOPQRetriever(BaseRetriever):
    def __init__(self, name, args):
        data = []
        self.name = name
        self.fnames = args.get('fnames', [])
        self.entries = []
        for fname in self.fnames:
            nmat = np.load(fname)
            if nmat.ndim > 2:
                logging.info("squeezing  shape {} with dimensions {}".format(
                    nmat.shape, nmat.ndim))
                nmat = nmat.squeeze(axis=1)
            elif nmat.ndim == 1:
                logging.info("expanding  shape {} with dimensions {}".format(
                    nmat.shape, nmat.ndim))
                nmat = np.expand_dims(nmat, axis=0)
            else:
                logging.info(
                    "keeping same  shape {} with dimensions {}".format(
                        nmat.shape, nmat.ndim))
            data.append(nmat)
            for e in json.load(file(fname.replace('npy', 'json'))):
                self.entries.append(e)
        if data:
            if len(data) > 1:
                self.data = np.concatenate(data)
            else:
                self.data = data[0]
            logging.info(self.data.shape)
        self.test_mode = args.get('test_mode', False)
        self.n_components = int(args['components'])
        self.m = int(args['m'])
        self.v = int(args['v'])
        self.sub = int(args['sub'])
        self.model = None
        self.searcher = None
        self.pca_reduction = None
        self.P = None
        self.mu = None
        self.model_proto_filename = args['proto_filename']
        self.P_filename = args['proto_filename'].replace('.proto', '.P.npy')
        self.mu_filename = args['proto_filename'].replace('.proto', '.mu.npy')
        self.pca_filename = args['proto_filename'].replace(
            '.proto', '.pca.pkl')
        self.model_lmdb_filename = args['proto_filename'].replace(
            '.proto', '_lmdb')
        self.permuted_inds_filename = args['proto_filename'].replace(
            '.proto', '.permuted_inds.pkl')
        self.permuted_inds = None

    def pca(self):
        """
        A simple PCA implementation that demonstrates how eigenvalue allocation
        is used to permute dimensions in order to balance the variance across
        subvectors. There are plenty of PCA implementations elsewhere. What is
        important is that the eigenvalues can be used to compute a variance-balancing
        dimension permutation.
        """
        count, D = self.data.shape
        mu = self.data.sum(axis=0) / float(count)
        summed_covar = reduce(lambda acc, x: acc + np.outer(x, x), self.data,
                              np.zeros((D, D)))
        A = summed_covar / (count - 1) - np.outer(mu, mu)
        eigenvalues, P = np.linalg.eigh(A)
        self.permuted_inds = eigenvalue_allocation(2, eigenvalues)
        P = P[:, self.permuted_inds]
        return P, mu

    def cluster(self):
        self.pca_reduction = PCA(n_components=self.n_components)
        self.pca_reduction.fit(self.data)
        self.data = self.pca_reduction.transform(self.data)
        self.P, self.mu = self.pca()
        self.data = self.data - self.mu
        self.data = np.dot(self.data, self.P)
        # train, test = train_test_split(self.data, test_size=0.2)
        self.model = LOPQModel(V=self.v,
                               M=self.m,
                               subquantizer_clusters=self.sub)
        self.model.fit(self.data, n_init=1)  # replace self.data by train
        for i, e in enumerate(
                self.entries):  # avoid doing this twice again in searcher
            r = self.model.predict(self.data[i])
            e['coarse'] = r.coarse
            e['fine'] = r.fine
            e['index'] = i
        self.searcher = LOPQSearcherLMDB(self.model, self.model_lmdb_filename)
        # if self.test_mode:
        #     self.searcher.add_data(train)
        #     nns = compute_all_neighbors(test, train)
        #     recall, _ = get_recall(self.searcher, test, nns)
        #     print 'Recall (V=%d, M=%d, subquants=%d): %s' % (self.model.V, self.model.M, self.model.subquantizer_clusters, str(recall))
        # else:
        self.searcher.add_data(self.data)

    def find(self):
        i, selected = random.choice([k for k in enumerate(self.entries)])
        print selected
        for k in self.searcher.get_result_quota(self.data[i], 10):
            print k

    def save(self):
        with open(self.model_proto_filename, 'w') as f:
            self.model.export_proto(f)
            with open(self.pca_filename, 'w') as out:
                pickle.dump(self.pca_reduction, out)
            with open(self.P_filename, 'w') as out:
                np.save(out, self.P)
            with open(self.mu_filename, 'w') as out:
                np.save(out, self.mu)
            with open(self.permuted_inds_filename, 'w') as out:
                pickle.dump(self.permuted_inds, out)
            self.searcher.env.close()

    def load(self):
        self.model = LOPQModel.load_proto(self.model_proto_filename)
        self.pca_reduction = pickle.load(file(self.pca_filename))
        self.P = np.load(file(self.P_filename))
        self.mu = np.load(file(self.mu_filename))
        self.permuted_inds = np.load(file(self.permuted_inds_filename))
        self.searcher = LOPQSearcherLMDB(model=self.model,
                                         lmdb_path=self.model_lmdb_filename)

    def apply(self, vector, count=None):
        vector = np.dot((self.pca_reduction.transform(vector) - self.mu),
                        self.P).transpose().squeeze()
        codes = self.model.predict(vector)
        if count:
            results = self.searcher.search(vector, quota=count)
        else:
            results = None
        return codes.coarse, codes.fine, results

    def nearest(self, vector=None, n=12, retriever_pk=None, entry_getter=None):
        results = []
        coarse, fine, results_indexes = self.apply(vector, n)
        for i, k in enumerate(results_indexes[0]):
            e = entry_getter(k.id, retriever_pk)
            if e.detection_id:
                results.append({
                    'rank': i + 1,
                    'dist': i,
                    'detection_primary_key': e.detection_id,
                    'frame_index': e.frame.frame_index,
                    'frame_primary_key': e.frame_id,
                    'video_primary_key': e.video_id,
                    'type': 'detection',
                })
            else:
                results.append({
                    'rank': i + 1,
                    'dist': i,
                    'frame_index': e.frame.frame_index,
                    'frame_primary_key': e.frame_id,
                    'video_primary_key': e.video_id,
                    'type': 'frame',
                })
        return results
Beispiel #6
0
class LOPQRetriever(BaseRetriever):
    def __init__(self, name, proto_filename, args, test_mode=False):
        super(BaseRetriever, self).__init__()
        self.name = name
        self.proto_filename = proto_filename
        self.entries = []
        self.test_mode = test_mode
        self.n_components = int(args['components'])
        self.m = int(args['m'])
        self.v = int(args['v'])
        self.sub = int(args['sub'])
        self.model = None
        self.searcher = None
        self.pca_reduction = None
        self.P = None
        self.mu = None
        self.permuted_inds = None
        self.model_proto_filename = proto_filename
        self.P_filename = proto_filename.replace('.proto', '.P.npy')
        self.entries_filename = proto_filename.replace('.proto', '.json')
        self.mu_filename = proto_filename.replace('.proto', '.mu.npy')
        self.pca_filename = proto_filename.replace('.proto', '.pca.pkl')
        self.model_lmdb_filename = proto_filename.replace('.proto', '_lmdb')
        self.permuted_inds_filename = proto_filename.replace(
            '.proto', '.permuted_inds.pkl')

    def pca(self):
        """
        A simple PCA implementation that demonstrates how eigenvalue allocation
        is used to permute dimensions in order to balance the variance across
        sub vectors. There are plenty of PCA implementations elsewhere. What is
        important is that the eigenvalues can be used to compute a variance-balancing
        dimension permutation.
        """
        count, D = self.data.shape
        mu = self.data.sum(axis=0) / float(count)
        summed_covar = reduce(lambda acc, x: acc + np.outer(x, x), self.data,
                              np.zeros((D, D)))
        A = summed_covar / (count - 1) - np.outer(mu, mu)
        eigenvalues, P = np.linalg.eigh(A)
        self.permuted_inds = eigenvalue_allocation(2, eigenvalues)
        P = P[:, self.permuted_inds]
        return P, mu

    def cluster(self):
        self.data = self.index
        self.pca_reduction = PCA(n_components=self.n_components)
        self.pca_reduction.fit(self.data)
        self.data = self.pca_reduction.transform(self.data)
        self.P, self.mu = self.pca()
        self.data = self.data - self.mu
        self.data = np.dot(self.data, self.P)
        self.model = LOPQModel(V=self.v,
                               M=self.m,
                               subquantizer_clusters=self.sub)
        self.model.fit(self.data, n_init=1)  # replace self.data by train
        for i, e in enumerate(
                self.entries):  # avoid doing this twice again in searcher
            r = self.model.predict(self.data[i])
            e['coarse'] = r.coarse
            e['fine'] = r.fine
            e['index'] = i
        self.searcher = LOPQSearcherLMDB(self.model, self.model_lmdb_filename)
        self.searcher.add_data(self.data)
        # cluster_codes = []
        # for e in c.entries:
        #     cc.video_id = e['video_primary_key']
        #     if 'detection_primary_key' in e:
        #         cc.detection_id = e['detection_primary_key']
        #         cc.frame_id = Region.objects.get(pk=cc.detection_id).frame_id
        #     else:
        #         cc.frame_id = e['frame_primary_key']
        #     cc.clusters = dc
        #     cc.coarse = e['coarse']
        #     cc.fine = e['fine']
        #     cc.coarse_text = " ".join(map(str, e['coarse']))
        #     cc.fine_text = " ".join(map(str, e['fine']))
        #     cc.searcher_index = e['index']
        #     cluster_codes.append(cc)

    def find(self):
        i, selected = random.choice([k for k in enumerate(self.entries)])
        print selected
        for k in self.searcher.get_result_quota(self.data[i], 10):
            print k

    def save(self):
        with open(self.model_proto_filename, 'w') as f:
            self.model.export_proto(f)
            with open(self.pca_filename, 'w') as out:
                pickle.dump(self.pca_reduction, out)
            with open(self.P_filename, 'w') as out:
                np.save(out, self.P)
            with open(self.mu_filename, 'w') as out:
                np.save(out, self.mu)
            with open(self.entries_filename, 'w') as out:
                json.dump(out, self.entries)
            with open(self.permuted_inds_filename, 'w') as out:
                pickle.dump(self.permuted_inds, out)
            self.searcher.env.close()

    def load(self):
        self.model = LOPQModel.load_proto(self.model_proto_filename)
        self.pca_reduction = pickle.load(file(self.pca_filename))
        self.P = np.load(file(self.P_filename))
        self.mu = np.load(file(self.mu_filename))
        self.permuted_inds = np.load(file(self.permuted_inds_filename))
        self.searcher = LOPQSearcherLMDB(model=self.model,
                                         lmdb_path=self.model_lmdb_filename)

    def apply(self, vector, count=None):
        vector = np.dot((self.pca_reduction.transform(vector) - self.mu),
                        self.P).transpose().squeeze()
        codes = self.model.predict(vector)
        if count:
            results = self.searcher.search(vector, quota=count)
        else:
            results = None
        return codes.coarse, codes.fine, results

    def nearest(self, vector=None, n=12, retriever_pk=None, entry_getter=None):
        results = []
        coarse, fine, results_indexes = self.apply(vector, n)
        for i, k in enumerate(results_indexes[0]):
            e = entry_getter(k.id, retriever_pk)
            if e.detection_id:
                results.append({
                    'rank': i + 1,
                    'dist': i,
                    'detection_primary_key': e.detection_id,
                    'frame_index': e.frame.frame_index,
                    'frame_primary_key': e.frame_id,
                    'video_primary_key': e.video_id,
                    'type': 'detection',
                })
            else:
                results.append({
                    'rank': i + 1,
                    'dist': i,
                    'frame_index': e.frame.frame_index,
                    'frame_primary_key': e.frame_id,
                    'video_primary_key': e.video_id,
                    'type': 'frame',
                })
        return results