Example #1
0
    def test_on_large_random_data(self):
        for n_jobs in [1, 4]:
            for _ in range(50):
                X = np.random.RandomState(33).randn(10, 302)
                Y = np.random.RandomState(331).randn(8, 302)

                # Squared kernel matrix
                K = generalized_tanimoto_kernel(X, n_jobs=n_jobs)
                np.testing.assert_equal(K.shape, (10, 10))
                self.assertTrue((np.all(K) >= 0) & (np.all(K) <= 1))
                np.testing.assert_equal(K, K.T)
                np.testing.assert_equal(np.diag(K), np.ones((10, )))

                for (i, j) in it.combinations(range(10), 2):
                    np.testing.assert_allclose(K[i, j],
                                               self._gentan(X[i], X[j]))

                # Rectangular kernel matrix
                K = generalized_tanimoto_kernel(X, Y, n_jobs=n_jobs)
                np.testing.assert_equal(K.shape, (10, 8))
                self.assertTrue((np.all(K) >= 0) & (np.all(K) <= 1))

                for (i, j) in it.product(range(10), range(8)):
                    np.testing.assert_allclose(K[i, j],
                                               self._gentan(X[i], Y[j]))
Example #2
0
    def test_on_small_data(self):
        X = np.array([[0, 1.75, -3.25, 0, 1], [2.35, 1, 0, 0, 2],
                      [0, 7.45, 0, 3, 0]])
        # |X|_1 = (6, 5.35, 10.45)
        Y = np.array([[0, 19, 5.64, 2, 0], [2.35, 1, 9, 0, -2],
                      [0, 0, 0, 0, 1], [-1, 2, -3, 4, -5]])
        # |Y|_1 = (26.64, 14.35, 1, 15)

        # Square kernel matrix
        KX = generalized_tanimoto_kernel(X)

        np.testing.assert_equal(KX.shape, (3, 3))
        np.testing.assert_equal(np.diag(KX), np.ones((3, )))
        self.assertTrue((np.all(KX) >= 0) & (np.all(KX) <= 1))
        np.testing.assert_equal(KX, KX.T)

        np.testing.assert_allclose(KX[0, 1],
                                   (6 + 5.35 - np.sum(np.abs(X[0] - X[1]))) /
                                   (6 + 5.35 + np.sum(np.abs(X[0] - X[1]))))
        np.testing.assert_allclose(KX[0, 2],
                                   (6 + 10.45 - np.sum(np.abs(X[0] - X[2]))) /
                                   (6 + 10.45 + np.sum(np.abs(X[0] - X[2]))))
        np.testing.assert_allclose(
            KX[1, 2], (5.35 + 10.45 - np.sum(np.abs(X[1] - X[2]))) /
            (5.35 + 10.45 + np.sum(np.abs(X[1] - X[2]))))

        # Non-square kernel matrix
        KXY = generalized_tanimoto_kernel(X, Y)

        np.testing.assert_equal(KXY.shape, (3, 4))
        self.assertTrue((np.all(KX) >= 0) & (np.all(KX) <= 1))

        np.testing.assert_allclose(KXY[0, 0],
                                   (6 + 26.64 - np.sum(np.abs(X[0] - Y[0]))) /
                                   (6 + 26.64 + np.sum(np.abs(X[0] - Y[0]))))
        np.testing.assert_allclose(KXY[0, 1],
                                   (6 + 14.35 - np.sum(np.abs(X[0] - Y[1]))) /
                                   (6 + 14.35 + np.sum(np.abs(X[0] - Y[1]))))
        np.testing.assert_allclose(
            KXY[1, 0], (5.35 + 26.64 - np.sum(np.abs(X[1] - Y[0]))) /
            (5.35 + 26.64 + np.sum(np.abs(X[1] - Y[0]))))
        np.testing.assert_allclose(KXY[1, 2],
                                   (5.35 + 1 - np.sum(np.abs(X[1] - Y[2]))) /
                                   (5.35 + 1 + np.sum(np.abs(X[1] - Y[2]))))
Example #3
0
    def test_on_larger_random_data(self):
        def jacscr(x, y):
            """
            Calculate Jaccard score
            """
            x, y = set(np.where(x)[0]), set(np.where(y)[0])

            n_inter = len(x & y)
            n_union = len(x | y)

            return n_inter / n_union

        X_A = np.random.RandomState(493).randint(0, 2, size=(51, 32))
        X_B = np.random.RandomState(493).randint(0, 2, size=(12, 32))

        # ----------------
        # Symmetric kernel
        # ----------------
        K = tanimoto_kernel(X_A)
        np.testing.assert_equal(K.shape, (51, 51))
        np.testing.assert_equal(K[3, 6], jacscr(X_A[3], X_A[6]))
        np.testing.assert_equal(K[1, 1], jacscr(X_A[1], X_A[1]))
        np.testing.assert_equal(K[0, 9], jacscr(X_A[0], X_A[9]))
        np.testing.assert_equal(K[5, 10], jacscr(X_A[5], X_A[10]))
        np.testing.assert_equal(K[6, 3], K[3, 6])
        np.testing.assert_equal(K[0, 9], K[9, 0])
        np.testing.assert_equal(K[5, 10], K[10, 5])
        np.testing.assert_equal(np.diag(K), np.ones((51, )))

        # Test against generalized tanimoto kernel implementation
        K_gen = generalized_tanimoto_kernel(X_A)
        np.testing.assert_equal(K, K_gen)

        # --------------------
        # Non-symmetric kernel
        # --------------------
        K = tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K.shape, (51, 12))
        np.testing.assert_equal(K[3, 6], jacscr(X_A[3], X_B[6]))
        np.testing.assert_equal(K[1, 1], jacscr(X_A[1], X_B[1]))

        # Test against generalized tanimoto kernel implementation
        K_gen = generalized_tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K, K_gen)
Example #4
0
    def test_vector_vs_parallel_performance(self):
        d = 301
        n1 = 10000
        n2 = 10000
        S1 = np.random.RandomState(1).randn(n1, d)
        S2 = np.random.RandomState(2).randn(n2, d)

        start = time.time()
        _ = generalized_tanimoto_kernel(S1,
                                        S2,
                                        shallow_input_check=True,
                                        n_jobs=1)
        print("NEW -- njobs=1: %.5fs" % (time.time() - start))

        start = time.time()
        _ = generalized_tanimoto_kernel(S1,
                                        S2,
                                        shallow_input_check=True,
                                        n_jobs=4)
        print("NEW -- njobs=4: %.5fs" % (time.time() - start))
Example #5
0
    def test_on_small_data(self):
        X_A = np.array([[1, 1, 0], [0, 1, 1], [1, 0, 0]])
        X_B = np.array([[1, 0, 1], [1, 1, 1], [0, 0, 0], [1, 1, 0]])

        # ----------------
        # Symmetric kernel
        # ----------------
        K = tanimoto_kernel(X_A)
        np.testing.assert_equal(K.shape, (3, 3))
        np.testing.assert_equal(np.diag(K), np.ones((3, )))
        np.testing.assert_equal(K[0, 1], 1. / 3.)
        np.testing.assert_equal(K[1, 0], 1. / 3.)
        np.testing.assert_equal(K[0, 2], 1. / 2.)
        np.testing.assert_equal(K[2, 0], 1. / 2.)
        assert (np.max(K) <= 1.), "Kernel values must be <= 1"
        assert (np.min(K) >= 0.), "Kernel values must be >= 0"

        # Test against generalized tanimoto kernel implementation
        K_gen = generalized_tanimoto_kernel(X_A)
        np.testing.assert_equal(K, K_gen)

        # --------------------
        # Non-symmetric kernel
        # --------------------
        K = tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K.shape, (3, 4))
        np.testing.assert_equal(K[0, 1], 2. / 3.)
        np.testing.assert_equal(K[1, 0], 1. / 3.)
        np.testing.assert_equal(K[0, 2], 0.)
        np.testing.assert_equal(K[2, 0], 1. / 2.)
        assert (np.max(K) <= 1.), "Kernel values must be <= 1"
        assert (np.min(K) >= 0.), "Kernel values must be >= 0"

        # Test against generalized tanimoto kernel implementation
        K_gen = generalized_tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K, K_gen)
Example #6
0
    def _get_kernel(self, X, Y=None, n_jobs=1):
        """
        Calculate kernel matrix for given sets of features.

        :param X: array-like, shape = (n_samples_a, n_features)

        :param Y: array-like, shape = (n_samples_b, n_features), default=None

        :param n_jobs: integer, number of jobs passed to 'pairwise_kernels', default=1

        :return: array-like, Kernel matrix between feature sets A and A or A and B, shape:
            (n_samples_a, n_samples_a) if     Y is None
            (n_samples_a, n_samples_b) if not Y is None
        """
        # Set up kernel parameters
        if callable(self.kernel):
            params = self.kernel_params or {}
        else:
            params = {
                "gamma": self.gamma,
                "degree": self.degree,
                "coef0": self.coef0
            }

        # Calculate the kernel
        if self.kernel == "tanimoto":
            K_XY = tanimoto_kernel(X, Y, shallow_input_check=True)
        elif self.kernel in ["minmax", "generalized_tanimoto"]:
            K_XY = generalized_tanimoto_kernel(X, Y, shallow_input_check=True)
        else:
            K_XY = pairwise_kernels(X,
                                    Y,
                                    metric=self.kernel,
                                    filter_params=True,
                                    n_jobs=n_jobs,
                                    **params)

        return K_XY