コード例 #1
0
    def test_eigenvals_only(self):
        data = self.generate_normed_data()
        out = covariance_eig(data, norm=1, eigvals_only=True)
        self.assertNotIsInstance(out, tuple)

        out = covariance_eig(data, norm=1, eigvals_only=False)
        self.assertIsInstance(out, tuple)
コード例 #2
0
    def test_bad_norm(self):
        d, n = 3, 10
        data = self.generate_normed_data(d, n)
        data *= 2

        with self.assertWarns(PrivacyLeakWarning):
            covariance_eig(data, epsilon=float("inf"), norm=None)

        with self.assertRaises(ValueError):
            covariance_eig(data, epsilon=float("inf"), norm=1)
コード例 #3
0
 def test_large_dims(self):
     n, d = 10, 3
     data = self.generate_normed_data(d, n)
     out = covariance_eig(data, norm=1, dims=50)
     self.assertIsNotNone(out)
     self.assertEqual(out[0].size, 3)
     self.assertEqual(out[1].size, 3 * 3)
コード例 #4
0
    def test_svd(self):
        data = self.generate_normed_data(5, 10)

        u, s, v = np.linalg.svd(data.T.dot(data))
        vals, vecs = covariance_eig(data, norm=1, epsilon=float("inf"))

        self.assertTrue(np.allclose(vals, s))
        self.assertTrue(np.allclose(abs(vecs.T.dot(u)), np.eye(5)))
        self.assertTrue(np.allclose(abs(vecs.T.dot(v.T)), np.eye(5)))
コード例 #5
0
    def test_inf_epsilon(self):
        d, n = 3, 50
        data = self.generate_normed_data(d, n)

        dp_vals, dp_vecs = covariance_eig(data, epsilon=float("inf"), norm=1)
        vals, vecs = np.linalg.eig(data.T.dot(data))

        self.assertTrue(
            np.allclose(vals[vals.argsort()], dp_vals[dp_vals.argsort()]))
        self.assertTrue(np.allclose(abs(dp_vecs.T.dot(vecs).sum(axis=1)), 1))
        self.assertTrue(np.allclose(abs(dp_vecs.T.dot(vecs).sum(axis=0)), 1))
コード例 #6
0
    def test_simple(self):
        d = 5
        data = self.generate_normed_data(d)

        vals, vecs = covariance_eig(data, norm=1)
        self.assertIsNotNone(vals)
        self.assertIsNotNone(vecs)

        self.assertEqual(d, vals.size)
        self.assertEqual(d, vecs.shape[0])
        # Unitary matrix output
        self.assertTrue(np.allclose(vecs.dot(vecs.T), np.eye(d)))
        self.assertTrue(np.all(vals >= 0))
コード例 #7
0
    def test_dims(self):
        d, n = 5, 10
        data = self.generate_normed_data(d, n)

        vals, vecs = covariance_eig(data, norm=1, dims=3)
        self.assertEqual(vecs.shape, (5, 3))
        self.assertEqual(vals.shape, (5, ))

        vals, vecs = covariance_eig(data, norm=1, dims=10)
        self.assertEqual(vecs.shape, (5, 5))
        self.assertEqual(vals.shape, (5, ))

        vals, vecs = covariance_eig(data, norm=1, dims=0)
        self.assertEqual(vecs.shape, (5, 0))
        self.assertEqual(vals.shape, (5, ))

        with self.assertRaises(ValueError):
            covariance_eig(data, dims=-5, norm=1)

        with self.assertRaises(TypeError):
            covariance_eig(data, dims=0.5, norm=1)
コード例 #8
0
    def _fit_full(self, X, n_components):
        self.accountant.check(self.epsilon, 0)

        n_samples, n_features = X.shape

        if self.centered:
            self.mean_ = np.zeros_like(np.mean(X, axis=0))
        else:
            if self.bounds is None:
                warnings.warn(
                    "Bounds parameter hasn't been specified, so falling back to determining range from the data.\n"
                    "This will result in additional privacy leakage. To ensure differential privacy with no "
                    "additional privacy loss, specify `range` for each valued returned by np.mean().",
                    PrivacyLeakWarning)

                self.bounds = (np.min(X, axis=0), np.max(X, axis=0))

            self.bounds = check_bounds(self.bounds, n_features)
            self.mean_ = mean(X, epsilon=self.epsilon / 2, bounds=self.bounds, axis=0, accountant=BudgetAccountant())

        X -= self.mean_

        if self.data_norm is None:
            warnings.warn("Data norm has not been specified and will be calculated on the data provided.  This will "
                          "result in additional privacy leakage. To ensure differential privacy and no additional "
                          "privacy leakage, specify `data_norm` at initialisation.", PrivacyLeakWarning)
            self.data_norm = np.linalg.norm(X, axis=1).max()

        X = clip_to_norm(X, self.data_norm)

        s, u = covariance_eig(X, epsilon=self.epsilon if self.centered else self.epsilon / 2, norm=self.data_norm,
                              dims=n_components if isinstance(n_components, Integral) else None)
        u, _ = svd_flip(u, np.zeros_like(u).T)
        s = np.sqrt(s)

        components_ = u.T

        # Get variance explained by singular values
        explained_variance_ = np.sort((s ** 2) / (n_samples - 1))[::-1]
        total_var = explained_variance_.sum()
        explained_variance_ratio_ = explained_variance_ / total_var
        singular_values_ = s.copy()  # Store the singular values.

        # Post-process the number of components required
        if n_components == 'mle':
            # TODO: Update when sklearn requirement changes to >= 0.23, removing try...except
            try:
                n_components = sk_pca._infer_dimension(explained_variance_, n_samples)
            except AttributeError:
                n_components = sk_pca._infer_dimension_(explained_variance_, n_samples, n_features)
        elif 0 < n_components < 1.0:
            # number of components for which the cumulated explained
            # variance percentage is superior to the desired threshold
            ratio_cumsum = stable_cumsum(explained_variance_ratio_)
            n_components = np.searchsorted(ratio_cumsum, n_components) + 1

        # Compute noise covariance using Probabilistic PCA model
        # The sigma2 maximum likelihood (cf. eq. 12.46)
        if n_components < min(n_features, n_samples):
            self.noise_variance_ = explained_variance_[n_components:].mean()
        else:
            self.noise_variance_ = 0.

        self.n_samples_, self.n_features_ = n_samples, n_features
        self.components_ = components_[:n_components]
        self.n_components_ = n_components
        self.explained_variance_ = explained_variance_[:n_components]
        self.explained_variance_ratio_ = explained_variance_ratio_[:n_components]
        self.singular_values_ = singular_values_[:n_components]

        self.accountant.spend(self.epsilon, 0)

        return u, s[:n_components], u.T