def test_oxford5k(): random_state = 40 data = load_oxford_data() train, test = train_test_split(data, test_size=0.2, random_state=random_state) # Compute distance-sorted neighbors in training set for each point in test set nns = compute_all_neighbors(test, train) # Fit model m = LOPQModel(V=16, M=8) m.fit(train, n_init=1, random_state=random_state) # Assert correct code computation assert_equal(m.predict(test[0]), ((3, 2), (14, 164, 83, 49, 185, 29, 196, 250))) # Assert low number of empty cells h = get_cell_histogram(train, m) assert_equal(np.count_nonzero(h == 0), 6) # Assert true NN recall on test set searcher = LOPQSearcher(m) searcher.add_data(train) recall, _ = get_recall(searcher, test, nns) assert_true(np.all(recall > [0.51, 0.92, 0.97, 0.97])) # Test partial fitting with just coarse quantizers m2 = LOPQModel(V=16, M=8, parameters=(m.Cs, None, None, None)) m2.fit(train, n_init=1, random_state=random_state) searcher = LOPQSearcher(m2) searcher.add_data(train) recall, _ = get_recall(searcher, test, nns) assert_true(np.all(recall > [0.51, 0.92, 0.97, 0.97])) # Test partial fitting with coarse quantizers and rotations m3 = LOPQModel(V=16, M=8, parameters=(m.Cs, m.Rs, m.mus, None)) m3.fit(train, n_init=1, random_state=random_state) searcher = LOPQSearcher(m3) searcher.add_data(train) recall, _ = get_recall(searcher, test, nns) assert_true(np.all(recall > [0.51, 0.92, 0.97, 0.97]))
def test_proto_partial(): import os filename = './temp_proto_partial.lopq' c = (np.random.rand(8, 8), np.random.rand(8, 8)) m = LOPQModel(parameters=(c, None, None, None)) m.export_proto(filename) m2 = LOPQModel.load_proto(filename) assert_equal(m.V, m2.V) assert_equal(m.M, m2.M) assert_equal(m.subquantizer_clusters, m2.subquantizer_clusters) assert_true(np.allclose(m.Cs[0], m2.Cs[0])) assert_true(m.Rs == m2.Rs) assert_true(m.mus == m2.mus) assert_true(m.subquantizers == m.subquantizers) os.remove(filename)
if STEP_COARSE in args.steps: Cs = train_coarse(sc, data, args.V, seed=args.seed) else: Cs = model.Cs # Get rotations if STEP_ROTATION in args.steps: Rs, mus, counts = train_rotations(sc, data, args.M, Cs) else: Rs = model.Rs mus = model.mus # Get subquantizers if STEP_SUBQUANT in args.steps: model = LOPQModel(V=args.V, M=args.M, subquantizer_clusters=args.subquantizer_clusters, parameters=(Cs, Rs, mus, None)) if args.subquantizer_sampling_ratio != 1.0: data = data.sample(False, args.subquantizer_sampling_ratio, args.seed) subs = train_subquantizers(sc, data, args.M, args.subquantizer_clusters, model, seed=args.seed) # Final output model model = LOPQModel(V=args.V,
def train_index(self): if self.model_type == "lopq": train_np = self.get_train_features(self.nb_train, nb_min_train=self.nb_min_train) print("Got train features array with shape: {}".format( train_np.shape)) nb_train_feats = train_np.shape[0] sys.stdout.flush() if nb_train_feats >= self.nb_train: from lopq.model import LOPQModel # we could have default values for those parameters and/or heuristic to estimate them based on data count... lopq_model = LOPQModel( V=self.model_params['V'], M=self.model_params['M'], subquantizer_clusters=self.model_params['subq']) # we could have separate training/indexing features msg = "[{}.train_model: info] Starting local training of 'lopq' model with parameters {} using {} features." print(msg.format(self.pp, self.model_params, nb_train_feats)) start_train = time.time() # specify a n_init < 10 (default value) to speed-up training? lopq_model.fit(train_np, verbose=True) # save model self.storer.save(self.build_model_str(), lopq_model) print( "[{}.train_model: info] Trained lopq model in {}s.".format( self.pp, time.time() - start_train)) return lopq_model else: msg = "[{}.train_model: error] Could not train model, not enough training samples." print(msg.format(self.pp)) elif self.model_type == "lopq_pca": # lopq_pca training. from lopq.model import LOPQModelPCA # we could have default values for those parameters # and/or heuristic to estimate them based on data count... lopq_model = LOPQModelPCA( V=self.model_params['V'], M=self.model_params['M'], subquantizer_clusters=self.model_params['subq'], renorm=True) # pca loading/training first pca_model = self.storer.load(self.build_pca_model_str()) if pca_model is None: train_np = self.get_train_features( self.nb_train_pca, nb_min_train=self.nb_min_train_pca) msg = "[{}.train_model: info] Training PCA model, keeping {} dimensions from features {}." print( msg.format(self.pp, self.model_params['pca'], train_np.shape)) sys.stdout.flush() start_train_pca = time.time() lopq_model.fit_pca(train_np, pca_dims=self.model_params['pca']) info_msg = "[{}.train_model: info] Trained pca model in {}s." print(info_msg.format(self.pp, time.time() - start_train_pca)) del train_np self.storer.save(self.build_pca_model_str(), { "P": lopq_model.pca_P, "mu": lopq_model.pca_mu }) else: lopq_model.pca_P = pca_model["P"] lopq_model.pca_mu = pca_model["mu"] # train model train_np = self.get_train_features(self.nb_train, lopq_pca_model=lopq_model, nb_min_train=self.nb_min_train) msg = "[{}.train_model: info] Training 'lopq_pca' model with parameters {} using features {}" print(msg.format(self.pp, self.model_params, train_np.shape)) sys.stdout.flush() start_train = time.time() # specify a n_init < 10 (default value) to speed-up training? lopq_model.fit(train_np, verbose=True, apply_pca=False, train_pca=False) # TODO: we could evaluate model based on reconstruction of some randomly sampled features? # save model self.storer.save(self.build_model_str(), lopq_model) info_msg = "[{}.train_model: info] Trained lopq model in {}s." print(info_msg.format(self.pp, time.time() - start_train)) sys.stdout.flush() return lopq_model # err_msg = "[{}.train_model: error] Local training of 'lopq_pca' model not yet implemented." # raise NotImplementedError(err_msg.format(self.pp)) else: err_msg = "[{}.train_model: error] Unknown 'lopq' type {}." raise ValueError(err_msg.format(self.pp, self.model_type))
def make_random_model(): m = LOPQModel(V=5, M=4, subquantizer_clusters=10) m.fit(np.random.RandomState(42).rand(200, 8), n_init=1) return m