def test_reduce_lambda(self): N, d = 100, 5 lam = 11 lam_new = 10 rng = check_random_state(self.seed) X_data = rng.randn(N, d) L_data = example_eval_L_polynomial(X_data) rls_exact = la.solve(L_data + lam * np.eye(N), L_data).diagonal() dict_approx = self.__create_lambda_acc_dictionary(X_data, example_eval_L_polynomial, lam, rng) rls_estimates = estimate_rls_bless(dict_approx, X_data, example_eval_L_polynomial, lam) np.testing.assert_allclose(rls_estimates, rls_exact, rtol=0.5) dict_reduced = reduce_lambda(X_data, example_eval_L_polynomial, dict_approx, lam_new, rng) rls_estimates_reduced = estimate_rls_bless(dict_reduced, X_data, example_eval_L_polynomial, lam_new) rls_exact_reduced = la.solve(L_data + lam_new * np.eye(N), L_data).diagonal() np.testing.assert_allclose(rls_estimates_reduced, rls_exact_reduced, rtol=0.5) self.assertTrue(len(dict_reduced.idx) <= len(dict_approx.idx))
def test_estimate_rls_bless(self): N, d = 100, 5 lam = 11 lam_new = 10 rng = check_random_state(self.seed) X_data = rng.randn(N, d) L_data = example_eval_L_polynomial(X_data) rls_exact = la.solve(L_data + lam_new * np.eye(N), L_data).diagonal() dict_exact = self.__create_lambda_acc_dictionary( X_data, example_eval_L_polynomial, 0.0, rng) dict_approx = self.__create_lambda_acc_dictionary( X_data, example_eval_L_polynomial, lam, rng) rls_estimates_exact = estimate_rls_bless(dict_exact, X_data, example_eval_L_polynomial, lam_new) rls_estimates_approx = estimate_rls_bless(dict_approx, X_data, example_eval_L_polynomial, lam_new) np.testing.assert_almost_equal(rls_estimates_exact, rls_exact) np.testing.assert_allclose(rls_estimates_approx, rls_exact, rtol=1 / 2.)
def test_bless(self): N, d = 100, 5 lam = 11 rng = check_random_state(self.seed) X_data = rng.randn(N, d) L_data = example_eval_L_polynomial(X_data) rls_exact = la.solve(L_data + lam * np.eye(N), L_data).diagonal() dict_reduced = bless(X_data, example_eval_L_polynomial, lam_final=lam, rls_oversample_param=5, random_state=rng, verbose=False) rls_estimates = estimate_rls_bless(dict_reduced, X_data, example_eval_L_polynomial, lam) np.testing.assert_allclose(rls_estimates, rls_exact, rtol=1 / 2.) self.assertTrue(len(dict_reduced.idx) <= 5 * rls_exact.sum())
def compute_nystrom_dict(X_data, eval_L, rls_oversample_bless, rls_oversample_dppvfx, rng, nb_iter_bless=None, verbose=True): """ Computes the initial dictionary necessary for the algorithm. Internally invoke BLESS. :param array_like X_data: dataset that we must approximate :param callable eval_L: likelihood function :param float rls_oversample_bless: see :func:`vfx_sampling_precompute_constants` :param float rls_oversample_dppvfx: see :func:`vfx_sampling_precompute_constants` :param RandomState rng: random source used for sampling :param int nb_iter_bless: iterations for BLESS, if None it is set to log(n) :param bool verbose: controls verbosity of debug output, including progress bars. the progress bar reports: - lam: lambda value of the current iteration - m: current size of the dictionary (number of centers contained) - m_expected: expected size of the dictionary before sampling - probs_dist: (mean, max, min) of the approximate rlss at the current iteration :return: an (eps, lambda)-accurate dictionary for Nystrom approximation :rtype: CentersDictionary """ n, _ = X_data.shape # Phase 1: compute initial dictionary D_bless with small rls_oversample_bless # D_bless is used only to estimate all RLS dict_bless = bless(X_data, eval_L, 1.0, rls_oversample_bless, rng, nb_iter_bless=nb_iter_bless, verbose=verbose) bless_rls_estimate = estimate_rls_bless(dict_bless, X_data, eval_L, 1.0) # Phase 2: use estimate RLS to sample the dict_dppvfx dictionary, i.e. the one used to construct A # here theory says that to have high acceptance probability we need the oversampling factor to be ~deff^2 # but even with constant oversampling factor we seem to accept fast probs = np.minimum(rls_oversample_dppvfx * bless_rls_estimate, 1.0) if not np.all(probs >= 0.0): raise ValueError( 'Some estimated RLS is negative, this should never happen. Min prob: {}' .format(np.min(probs))) selected = rng.rand(n) <= probs s = selected.sum() if not s > 0: raise ValueError( 'No point selected during RLS sampling step, try to increase rls_oversample_bless. ' 'Expected number of points: {:.3f}'.format(probs.sum())) dict_dppvfx = CentersDictionary(idx=selected.nonzero()[0], X=X_data[selected, :], probs=probs[selected], lam=1, rls_oversample=rls_oversample_dppvfx) return dict_dppvfx