Esempio n. 1
0
    def test_fit(self):
        N, D, M, Ks = 100, 12, 4, 10
        X = np.random.random((N, D)).astype(np.float32)
        opq = nanopq.OPQ(M=M, Ks=Ks)
        opq.fit(X)
        self.assertEqual(opq.Ds, D / M)
        self.assertEqual(opq.codewords.shape, (M, Ks, D / M))
        self.assertEqual(opq.R.shape, (D, D))

        opq2 = nanopq.OPQ(M=M, Ks=Ks).fit(X)  # Can be called as a chain
        self.assertTrue(np.allclose(opq.codewords, opq2.codewords))
Esempio n. 2
0
    def test_parametric_init(self):
        N, D, M, Ks = 100, 12, 4, 10
        X = np.random.random((N, D)).astype(np.float32)
        opq = nanopq.OPQ(M=M, Ks=Ks)
        opq.fit(X, parametric_init=False, rotation_iter=1)
        err_init = np.linalg.norm(opq.rotate(X) - opq.decode(opq.encode(X)))

        opq = nanopq.OPQ(M=M, Ks=Ks)
        opq.fit(X, parametric_init=True, rotation_iter=1)
        err = np.linalg.norm(opq.rotate(X) - opq.decode(opq.encode(X)))

        self.assertLess(err_init, err)
Esempio n. 3
0
    def test_nanopq_to_faiss(self):
        D, M, Ks = 32, 4, 256
        Nt, Nb, Nq = 2000, 10000, 100
        Xt = np.random.rand(Nt, D).astype(np.float32)
        Xb = np.random.rand(Nb, D).astype(np.float32)
        Xq = np.random.rand(Nq, D).astype(np.float32)
        pq_nanopq = nanopq.PQ(M=M, Ks=Ks)
        pq_nanopq.fit(vecs=Xt)

        with self.assertRaises(AssertionError):  # opq is not supported
            opq = nanopq.OPQ(M=M, Ks=Ks)
            nanopq.nanopq_to_faiss(opq)

        pq_faiss = nanopq.nanopq_to_faiss(pq_nanopq)  # IndexPQ

        # Encoded results should be same
        Cb_nanopq = pq_nanopq.encode(vecs=Xb)
        Cb_faiss = pq_faiss.pq.compute_codes(x=Xb)  # ProductQuantizer in IndexPQ
        self.assertTrue(np.array_equal(Cb_nanopq, Cb_faiss))

        # Search result should be same
        topk = 10
        pq_faiss.add(Xb)
        _, ids1 = pq_faiss.search(x=Xq, k=topk)
        ids2 = np.array(
            [
                np.argsort(pq_nanopq.dtable(query=xq).adist(codes=Cb_nanopq))[:topk]
                for xq in Xq
            ]
        )

        self.assertTrue(np.array_equal(ids1, ids2))
Esempio n. 4
0
    def fit(self, vecs, iter=20, seed=123):
        """Given training vectors, train a codec (PQ or OPQ instance)
        This should be called first and only once.

        Args:
            vecs (np.ndarray): Traning vectors with shape=(Nt, D) and dtype=np.float32.
            iter (int): The number of iteration for k-means of PQ/OPQ
            seed (int): The seed for random process

        Returns:
            object: self

        """
        assert self.fine_quantizer is None, "`fit` should be called only once"
        assert vecs.dtype == np.float32

        if self.codec == "pq":
            self.fine_quantizer = nanopq.PQ(M=self.M,
                                            Ks=self.Ks,
                                            verbose=self.verbose)
            self.fine_quantizer.fit(vecs=vecs, iter=iter, seed=seed)
        elif self.codec == "opq":
            self.fine_quantizer = nanopq.OPQ(M=self.M,
                                             Ks=self.Ks,
                                             verbose=self.verbose)
            # rotation_iter is currently fixed to 10
            self.fine_quantizer.fit(vecs=vecs,
                                    pq_iter=iter,
                                    rotation_iter=10,
                                    seed=seed)

        # Set trained codewords to cpp impl
        self.impl_cpp.set_codewords(self.fine_quantizer.codewords)

        return self
Esempio n. 5
0
    def test_eq(self):
        import copy
        N, D, M, Ks = 100, 12, 4, 10
        X = np.random.random((N, D)).astype(np.float32)
        opq1 = nanopq.OPQ(M=M, Ks=Ks)
        opq2 = nanopq.OPQ(M=M, Ks=Ks)
        opq3 = copy.deepcopy(opq1)
        opq4 = nanopq.OPQ(M=M, Ks=2 * Ks)
        self.assertTrue(opq1 == opq1)
        self.assertTrue(opq1 == opq2)
        self.assertTrue(opq1 == opq3)
        self.assertTrue(opq1 != opq4)

        opq1.fit(X)
        opq2.fit(X)
        opq3 = copy.deepcopy(opq1)
        opq4.fit(X)
        self.assertTrue(opq1 == opq1)
        self.assertTrue(opq1 == opq2)
        self.assertTrue(opq1 == opq3)
        self.assertTrue(opq1 != opq4)
Esempio n. 6
0
    def test_rotate(self):
        N, D, M, Ks = 100, 12, 4, 10
        X = np.random.random((N, D)).astype(np.float32)
        opq = nanopq.OPQ(M=M, Ks=Ks)
        opq.fit(X)
        rotated_vec = opq.rotate(X[0])
        rotated_vecs = opq.rotate(X[:3])
        self.assertEqual(rotated_vec.shape, (D, ))
        self.assertEqual(rotated_vecs.shape, (3, D))

        # Because R is a rotation matrix (R^t * R = I), R^t should be R^(-1)
        self.assertAlmostEqual(np.linalg.norm(opq.R.T - np.linalg.inv(opq.R)),
                               0.0,
                               places=3)
Esempio n. 7
0
 def test_property(self):
     opq = nanopq.OPQ(M=4, Ks=256)
     self.assertEqual(
         (opq.M, opq.Ks, opq.verbose, opq.code_dtype),
         (opq.pq.M, opq.pq.Ks, opq.pq.verbose, opq.pq.code_dtype),
     )