def test_on_larger_random_data_with_sparse_matrix(self):
        self.skipTest("Not implemented")

        def jacscr(x, y):
            """
            Calculate Jaccard score
            """
            x, y = set(np.where(x)[0]), set(np.where(y)[0])

            n_inter = len(x & y)
            n_union = len(x | y)

            return n_inter / n_union

        X_A = np.random.RandomState(493).randint(0, 2, size=(51, 32))
        X_B = np.random.RandomState(493).randint(0, 2, size=(12, 32))

        # symmetric kernel
        K = tanimoto_kernel(csr_matrix(X_A))
        np.testing.assert_equal(K.shape, (51, 51))
        np.testing.assert_equal(K[3, 6], jacscr(X_A[3], X_A[6]))
        np.testing.assert_equal(K[1, 1], jacscr(X_A[1], X_A[1]))
        np.testing.assert_equal(K[0, 9], jacscr(X_A[0], X_A[9]))
        np.testing.assert_equal(K[5, 10], jacscr(X_A[5], X_A[10]))
        np.testing.assert_equal(K[6, 3], K[3, 6])
        np.testing.assert_equal(K[0, 9], K[9, 0])
        np.testing.assert_equal(K[5, 10], K[10, 5])
        np.testing.assert_equal(np.diag(K), np.ones((51, )))

        # non-symmetric kernel
        K = tanimoto_kernel(csr_matrix(X_A), csr_matrix(X_B))
        np.testing.assert_equal(K.shape, (51, 12))
        np.testing.assert_equal(K[3, 6], jacscr(X_A[3], X_B[6]))
        np.testing.assert_equal(K[1, 1], jacscr(X_A[1], X_B[1]))
    def test_compatibility_with_sparse_matrix(self):
        self.skipTest("Not implemented")

        X_A = csr_matrix(np.array([[1, 1, 0], [0, 1, 1], [1, 0, 0]]))
        X_B = csr_matrix(np.array([[1, 0, 1], [1, 1, 1], [0, 0, 0], [1, 1,
                                                                     0]]))

        # symmetric kernel
        K = tanimoto_kernel(X_A)
        np.testing.assert_equal(K.shape, (3, 3))
        np.testing.assert_equal(np.diag(K), np.ones((3, )))
        np.testing.assert_equal(K[0, 1], 1. / 3.)
        np.testing.assert_equal(K[1, 0], 1. / 3.)
        np.testing.assert_equal(K[0, 2], 1. / 2.)
        np.testing.assert_equal(K[2, 0], 1. / 2.)
        assert (np.max(K) <= 1.), "Kernel values must be <= 1"
        assert (np.min(K) >= 0.), "Kernel values must be >= 0"

        # non-symmetric kernel
        K = tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K.shape, (3, 4))
        np.testing.assert_equal(K[0, 1], 2. / 3.)
        np.testing.assert_equal(K[1, 0], 1. / 3.)
        np.testing.assert_equal(K[0, 2], 0.)
        np.testing.assert_equal(K[2, 0], 1. / 2.)
        assert (np.max(K) <= 1.), "Kernel values must be <= 1"
        assert (np.min(K) >= 0.), "Kernel values must be >= 0"
    def test_on_larger_random_data(self):
        def jacscr(x, y):
            """
            Calculate Jaccard score
            """
            x, y = set(np.where(x)[0]), set(np.where(y)[0])

            n_inter = len(x & y)
            n_union = len(x | y)

            return n_inter / n_union

        X_A = np.random.RandomState(493).randint(0, 2, size=(51, 32))
        X_B = np.random.RandomState(493).randint(0, 2, size=(12, 32))

        # ----------------
        # Symmetric kernel
        # ----------------
        K = tanimoto_kernel(X_A)
        np.testing.assert_equal(K.shape, (51, 51))
        np.testing.assert_equal(K[3, 6], jacscr(X_A[3], X_A[6]))
        np.testing.assert_equal(K[1, 1], jacscr(X_A[1], X_A[1]))
        np.testing.assert_equal(K[0, 9], jacscr(X_A[0], X_A[9]))
        np.testing.assert_equal(K[5, 10], jacscr(X_A[5], X_A[10]))
        np.testing.assert_equal(K[6, 3], K[3, 6])
        np.testing.assert_equal(K[0, 9], K[9, 0])
        np.testing.assert_equal(K[5, 10], K[10, 5])
        np.testing.assert_equal(np.diag(K), np.ones((51, )))

        # Test against generalized tanimoto kernel implementation
        K_gen = generalized_tanimoto_kernel(X_A)
        np.testing.assert_equal(K, K_gen)

        # --------------------
        # Non-symmetric kernel
        # --------------------
        K = tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K.shape, (51, 12))
        np.testing.assert_equal(K[3, 6], jacscr(X_A[3], X_B[6]))
        np.testing.assert_equal(K[1, 1], jacscr(X_A[1], X_B[1]))

        # Test against generalized tanimoto kernel implementation
        K_gen = generalized_tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K, K_gen)
    def test_on_small_data(self):
        X_A = np.array([[1, 1, 0], [0, 1, 1], [1, 0, 0]])
        X_B = np.array([[1, 0, 1], [1, 1, 1], [0, 0, 0], [1, 1, 0]])

        # ----------------
        # Symmetric kernel
        # ----------------
        K = tanimoto_kernel(X_A)
        np.testing.assert_equal(K.shape, (3, 3))
        np.testing.assert_equal(np.diag(K), np.ones((3, )))
        np.testing.assert_equal(K[0, 1], 1. / 3.)
        np.testing.assert_equal(K[1, 0], 1. / 3.)
        np.testing.assert_equal(K[0, 2], 1. / 2.)
        np.testing.assert_equal(K[2, 0], 1. / 2.)
        assert (np.max(K) <= 1.), "Kernel values must be <= 1"
        assert (np.min(K) >= 0.), "Kernel values must be >= 0"

        # Test against generalized tanimoto kernel implementation
        K_gen = generalized_tanimoto_kernel(X_A)
        np.testing.assert_equal(K, K_gen)

        # --------------------
        # Non-symmetric kernel
        # --------------------
        K = tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K.shape, (3, 4))
        np.testing.assert_equal(K[0, 1], 2. / 3.)
        np.testing.assert_equal(K[1, 0], 1. / 3.)
        np.testing.assert_equal(K[0, 2], 0.)
        np.testing.assert_equal(K[2, 0], 1. / 2.)
        assert (np.max(K) <= 1.), "Kernel values must be <= 1"
        assert (np.min(K) >= 0.), "Kernel values must be >= 0"

        # Test against generalized tanimoto kernel implementation
        K_gen = generalized_tanimoto_kernel(X_A, X_B)
        np.testing.assert_equal(K, K_gen)
Exemple #5
0
    def _get_kernel(self, X, Y=None, n_jobs=1):
        """
        Calculate kernel matrix for given sets of features.

        :param X: array-like, shape = (n_samples_a, n_features)

        :param Y: array-like, shape = (n_samples_b, n_features), default=None

        :param n_jobs: integer, number of jobs passed to 'pairwise_kernels', default=1

        :return: array-like, Kernel matrix between feature sets A and A or A and B, shape:
            (n_samples_a, n_samples_a) if     Y is None
            (n_samples_a, n_samples_b) if not Y is None
        """
        # Set up kernel parameters
        if callable(self.kernel):
            params = self.kernel_params or {}
        else:
            params = {
                "gamma": self.gamma,
                "degree": self.degree,
                "coef0": self.coef0
            }

        # Calculate the kernel
        if self.kernel == "tanimoto":
            K_XY = tanimoto_kernel(X, Y, shallow_input_check=True)
        elif self.kernel in ["minmax", "generalized_tanimoto"]:
            K_XY = generalized_tanimoto_kernel(X, Y, shallow_input_check=True)
        else:
            K_XY = pairwise_kernels(X,
                                    Y,
                                    metric=self.kernel,
                                    filter_params=True,
                                    n_jobs=n_jobs,
                                    **params)

        return K_XY