def test_fit(self): N, D, M, Ks = 100, 12, 4, 10 X = np.random.random((N, D)).astype(np.float32) opq = nanopq.OPQ(M=M, Ks=Ks) opq.fit(X) self.assertEqual(opq.Ds, D / M) self.assertEqual(opq.codewords.shape, (M, Ks, D / M)) self.assertEqual(opq.R.shape, (D, D)) opq2 = nanopq.OPQ(M=M, Ks=Ks).fit(X) # Can be called as a chain self.assertTrue(np.allclose(opq.codewords, opq2.codewords))
def test_parametric_init(self): N, D, M, Ks = 100, 12, 4, 10 X = np.random.random((N, D)).astype(np.float32) opq = nanopq.OPQ(M=M, Ks=Ks) opq.fit(X, parametric_init=False, rotation_iter=1) err_init = np.linalg.norm(opq.rotate(X) - opq.decode(opq.encode(X))) opq = nanopq.OPQ(M=M, Ks=Ks) opq.fit(X, parametric_init=True, rotation_iter=1) err = np.linalg.norm(opq.rotate(X) - opq.decode(opq.encode(X))) self.assertLess(err_init, err)
def test_nanopq_to_faiss(self): D, M, Ks = 32, 4, 256 Nt, Nb, Nq = 2000, 10000, 100 Xt = np.random.rand(Nt, D).astype(np.float32) Xb = np.random.rand(Nb, D).astype(np.float32) Xq = np.random.rand(Nq, D).astype(np.float32) pq_nanopq = nanopq.PQ(M=M, Ks=Ks) pq_nanopq.fit(vecs=Xt) with self.assertRaises(AssertionError): # opq is not supported opq = nanopq.OPQ(M=M, Ks=Ks) nanopq.nanopq_to_faiss(opq) pq_faiss = nanopq.nanopq_to_faiss(pq_nanopq) # IndexPQ # Encoded results should be same Cb_nanopq = pq_nanopq.encode(vecs=Xb) Cb_faiss = pq_faiss.pq.compute_codes(x=Xb) # ProductQuantizer in IndexPQ self.assertTrue(np.array_equal(Cb_nanopq, Cb_faiss)) # Search result should be same topk = 10 pq_faiss.add(Xb) _, ids1 = pq_faiss.search(x=Xq, k=topk) ids2 = np.array( [ np.argsort(pq_nanopq.dtable(query=xq).adist(codes=Cb_nanopq))[:topk] for xq in Xq ] ) self.assertTrue(np.array_equal(ids1, ids2))
def fit(self, vecs, iter=20, seed=123): """Given training vectors, train a codec (PQ or OPQ instance) This should be called first and only once. Args: vecs (np.ndarray): Traning vectors with shape=(Nt, D) and dtype=np.float32. iter (int): The number of iteration for k-means of PQ/OPQ seed (int): The seed for random process Returns: object: self """ assert self.fine_quantizer is None, "`fit` should be called only once" assert vecs.dtype == np.float32 if self.codec == "pq": self.fine_quantizer = nanopq.PQ(M=self.M, Ks=self.Ks, verbose=self.verbose) self.fine_quantizer.fit(vecs=vecs, iter=iter, seed=seed) elif self.codec == "opq": self.fine_quantizer = nanopq.OPQ(M=self.M, Ks=self.Ks, verbose=self.verbose) # rotation_iter is currently fixed to 10 self.fine_quantizer.fit(vecs=vecs, pq_iter=iter, rotation_iter=10, seed=seed) # Set trained codewords to cpp impl self.impl_cpp.set_codewords(self.fine_quantizer.codewords) return self
def test_eq(self): import copy N, D, M, Ks = 100, 12, 4, 10 X = np.random.random((N, D)).astype(np.float32) opq1 = nanopq.OPQ(M=M, Ks=Ks) opq2 = nanopq.OPQ(M=M, Ks=Ks) opq3 = copy.deepcopy(opq1) opq4 = nanopq.OPQ(M=M, Ks=2 * Ks) self.assertTrue(opq1 == opq1) self.assertTrue(opq1 == opq2) self.assertTrue(opq1 == opq3) self.assertTrue(opq1 != opq4) opq1.fit(X) opq2.fit(X) opq3 = copy.deepcopy(opq1) opq4.fit(X) self.assertTrue(opq1 == opq1) self.assertTrue(opq1 == opq2) self.assertTrue(opq1 == opq3) self.assertTrue(opq1 != opq4)
def test_rotate(self): N, D, M, Ks = 100, 12, 4, 10 X = np.random.random((N, D)).astype(np.float32) opq = nanopq.OPQ(M=M, Ks=Ks) opq.fit(X) rotated_vec = opq.rotate(X[0]) rotated_vecs = opq.rotate(X[:3]) self.assertEqual(rotated_vec.shape, (D, )) self.assertEqual(rotated_vecs.shape, (3, D)) # Because R is a rotation matrix (R^t * R = I), R^t should be R^(-1) self.assertAlmostEqual(np.linalg.norm(opq.R.T - np.linalg.inv(opq.R)), 0.0, places=3)
def test_property(self): opq = nanopq.OPQ(M=4, Ks=256) self.assertEqual( (opq.M, opq.Ks, opq.verbose, opq.code_dtype), (opq.pq.M, opq.pq.Ks, opq.pq.verbose, opq.pq.code_dtype), )