def test_beta_jac(key): """Tests that algorithms computing the Jacobian return the same Jacobian""" if key == "svm" or key == "svr" or key == "ssvr": X_s = X_r else: X_s = X_c supp1, dense1, jac1 = get_beta_jac_iterdiff( X, y, dict_log_alpha[key], tol=tol, model=models[key]) supp2, dense2, jac2 = get_beta_jac_fast_iterdiff( X, y, dict_log_alpha[key], tol=tol, model=models[key], tol_jac=tol) supp3, dense3, jac3 = get_beta_jac_iterdiff( X_s, y, dict_log_alpha[key], tol=tol, model=models[key]) supp4, dense4, jac4 = get_beta_jac_fast_iterdiff( X_s, y, dict_log_alpha[key], tol=tol, model=models[key], tol_jac=tol) assert np.all(supp1 == supp2) assert np.allclose(dense1, dense2) assert np.allclose(jac1, jac2, atol=1e-6) assert np.all(supp2 == supp3) assert np.allclose(dense2, dense3) assert np.allclose(jac2, jac3, atol=1e-6) assert np.all(supp3 == supp4) assert np.allclose(dense3, dense4) assert np.allclose(jac3, jac4, atol=1e-6) get_beta_jac_t_v_implicit( X, y, dict_log_alpha[key], get_v, model=models[key])
def test_beta_jac(model): supp1, dense1, jac1 = get_beta_jac_iterdiff(X[idx_train, :], y[idx_train], log_alpha, tol=tol, model=model, compute_jac=True, max_iter=1000) clf = LogisticRegression(penalty="l1", tol=1e-12, C=(1 / (alpha * len(idx_train))), fit_intercept=False, max_iter=100000, solver="saga") clf.fit(X[idx_train, :], y[idx_train]) supp_sk = clf.coef_ != 0 dense_sk = clf.coef_[supp_sk] supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X[idx_train, :], y[idx_train], log_alpha, get_v, tol=tol, model=model, tol_jac=1e-12) supp3, dense3, jac3 = get_beta_jac_iterdiff(X[idx_train, :], y[idx_train], log_alpha, tol=tol, model=model, compute_jac=True, max_iter=1000) supp4, dense4, jac4 = get_beta_jac_fast_iterdiff(X_s[idx_train, :], y[idx_train], log_alpha, get_v_s, tol=tol, model=model, tol_jac=1e-12) assert np.all(supp1 == supp_sk) assert np.allclose(dense1, dense_sk, atol=1e-4) assert np.all(supp1 == supp2) assert np.allclose(dense1, dense2) assert np.allclose(jac1, jac2, atol=1e-4) assert np.all(supp2 == supp3) assert np.allclose(dense2, dense3) assert np.allclose(jac2, jac3, atol=1e-4) assert np.all(supp3 == supp4) assert np.allclose(dense3, dense4) assert np.allclose(jac3, jac4, atol=1e-4)
def test_beta_jac(): ######################################################################### # check that the methods computing the full Jacobian compute the same sol # maybe we could add a test comparing with sklearn for key in models.keys(): supp1, dense1, jac1 = get_beta_jac_iterdiff(X[idx_train, :], y[idx_train], dict_log_alpha[key], tol=tol, model=models[key]) supp1sk, dense1sk, jac1sk = get_beta_jac_iterdiff(X[idx_train, :], y[idx_train], dict_log_alpha[key], tol=tol, model=models[key]) supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X[idx_train, :], y[idx_train], dict_log_alpha[key], get_v, tol=tol, model=models[key], tol_jac=tol) supp3, dense3, jac3 = get_beta_jac_iterdiff(X_s[idx_train], y[idx_train], dict_log_alpha[key], tol=tol, model=models[key]) supp4, dense4, jac4 = get_beta_jac_fast_iterdiff(X_s[idx_train], y[idx_train], dict_log_alpha[key], get_v, tol=tol, model=models[key], tol_jac=tol) assert np.all(supp1 == supp1sk) assert np.all(supp1 == supp2) assert np.allclose(dense1, dense1sk) assert np.allclose(dense1, dense2) assert np.allclose(jac1, jac2, atol=1e-6) assert np.all(supp2 == supp3) assert np.allclose(dense2, dense3) assert np.allclose(jac2, jac3, atol=1e-6) assert np.all(supp3 == supp4) assert np.allclose(dense3, dense4) assert np.allclose(jac3, jac4, atol=1e-6) get_beta_jac_t_v_implicit(X[idx_train, :], y[idx_train], dict_log_alpha[key], get_v, model=models[key])
def test_beta_jac(): supp1, dense1, jac1 = get_beta_jac_iterdiff(X[idx_train, :], y[idx_train], np.array( [log_alpha1, log_alpha2]), tol=tol, model=model, compute_jac=True, max_iter=max_iter) estimator = linear_model.ElasticNet(alpha=(alpha_1 + alpha_2), fit_intercept=False, l1_ratio=alpha_1 / (alpha_1 + alpha_2), tol=1e-16, max_iter=max_iter) estimator.fit(X[idx_train, :], y[idx_train]) supp2, dense2, jac2 = get_beta_jac_fast_iterdiff( X[idx_train, :], y[idx_train], np.array([log_alpha1, log_alpha2]), get_v, tol=tol, model=model, tol_jac=1e-16, max_iter=max_iter, niter_jac=10000) np.testing.assert_allclose(dense1, estimator.coef_[estimator.coef_ != 0]) assert np.all(supp1 == supp2) np.testing.assert_allclose(dense1, dense2)
def get_val(self, model, X, y, log_alpha, tol=1e-3): mask, dense, _ = get_beta_jac_iterdiff( X, y, log_alpha, model, # TODO max_iter max_iter=model.max_iter, tol=tol, compute_jac=False) mask, dense = model.get_beta(mask, dense) val = self.get_val_outer( X[self.idx_val], y[self.idx_val], mask, dense) return val
def get_val(self, model, X, y, log_alpha, tol=1e-3): # TODO add warm start # TODO on train or on test ? mask, dense, _ = get_beta_jac_iterdiff( X[self.idx_val], y[self.idx_val], log_alpha, model, tol=tol, compute_jac=False) return self.get_val_outer( X[self.idx_val, :], y[self.idx_val], mask, dense)
def get_beta_jac_t_v_implicit( X_train, y_train, log_alpha, get_v, mask0=None, dense0=None, tol=1e-3, model="lasso", sk=False, max_iter=1000, sol_lin_sys=None, n=1, sigma=0, delta=0, epsilon=0): alpha = np.exp(log_alpha) n_samples, n_features = X_train.shape mask, dense, _ = get_beta_jac_iterdiff( X_train, y_train, log_alpha, mask0=mask0, dense0=dense0, tol=tol, max_iter=max_iter, compute_jac=False, model=model) mat_to_inv = model.get_hessian(X_train, y_train, mask, dense, log_alpha) size_mat = mat_to_inv.shape[0] v = get_v(mask, dense) if hasattr(model, 'dual'): v = model.get_dual_v(X_train, y_train, v, log_alpha) # TODO: to clean is_sparse = issparse(X_train) if not alpha.shape: alphas = np.ones(n_features) * alpha else: alphas = alpha.copy() if sol_lin_sys is not None and not hasattr(model, 'dual'): sol0 = init_dbeta0_new(sol_lin_sys, mask, mask0) else: size_mat = mat_to_inv.shape[0] sol0 = np.zeros(size_mat) try: sol = cg( mat_to_inv, - model.generalized_supp(X_train, v, log_alpha), # x0=sol0, tol=tol, maxiter=1e5) x0=sol0, tol=tol) if sol[1] == 0: sol_lin_sys = sol[0] else: raise ValueError('cg did not converge.') except Exception: print("Matrix to invert was badly conditioned") size_mat = mat_to_inv.shape[0] if is_sparse: reg_amount = 1e-7 * norm(model.reduce_X(X_train, mask).todense(), ord=2) ** 2 mat_to_inv += reg_amount * identity(size_mat) else: reg_amount = 1e-7 * norm(model.reduce_X(X_train, mask), ord=2) ** 2 mat_to_inv += reg_amount * np.eye(size_mat) sol = cg( mat_to_inv + reg_amount * identity(size_mat), - model.generalized_supp(X_train, v, log_alpha), x0=sol0, atol=1e-3) sol_lin_sys = sol[0] jac_t_v = model._get_jac_t_v( X_train, y_train, sol_lin_sys, mask, dense, alphas, v.copy(), n_samples) return mask, dense, jac_t_v, sol[0]
def get_val(self, model, X, y, log_alpha, tol=1e-3): # TODO add warm start mask, dense, _ = get_beta_jac_iterdiff(X[self.idx_train], y[self.idx_train], log_alpha, model, tol=tol, mask0=self.mask0, dense0=self.dense0, compute_jac=False) mask2, dense2, _ = get_beta_jac_iterdiff(X[self.idx_train], y[self.idx_train] + self.epsilon * self.delta, log_alpha, model, tol=tol, compute_jac=False) val = self.get_val_outer(mask, dense, mask2, dense2) return val
def get_val(self, model, X, y, log_alpha, monitor=None, tol=1e-3): # TODO add warm start mask, dense, _ = get_beta_jac_iterdiff(X[self.idx_train], y[self.idx_train], log_alpha, model, tol=tol, compute_jac=False) val = self.get_val_outer(X[self.idx_val, :], y[self.idx_val], mask, dense) if monitor is not None: monitor(val, None, mask, dense, alpha=np.exp(log_alpha)) return val
def test_beta_jac(model): supp1, dense1, jac1 = get_beta_jac_iterdiff(X[idx_train, :], y[idx_train], log_C, tol=tol, model=model, compute_jac=True, max_iter=10000) beta = np.zeros(len(idx_train)) beta[supp1] = dense1 full_supp = np.logical_and(beta > 0, beta < C) # full_supp = np.logical_or(beta <= 0, beta >= C) Q = (y[idx_train, np.newaxis] * X[idx_train, :]) @ (y[idx_train, np.newaxis] * X[idx_train, :]).T v = (np.eye(len(idx_train), len(idx_train)) - Q)[np.ix_( full_supp, beta >= C)] @ (np.ones((beta >= C).sum()) * C) jac_dense = np.linalg.solve(Q[np.ix_(full_supp, full_supp)], v) assert np.allclose(jac_dense, jac1[dense1 < C]) if issparse(X): primal = np.sum(X[idx_train, :][supp1, :].T.multiply( y[idx_train][supp1] * dense1), axis=1) primal = primal.T else: primal = np.sum(y[idx_train][supp1] * dense1 * X[idx_train, :][supp1, :].T, axis=1) clf = LinearSVC(loss="hinge", fit_intercept=False, C=C, tol=tol, max_iter=100000) clf.fit(X[idx_train, :], y[idx_train]) supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X[idx_train, :], y[idx_train], log_C, get_v, tol=tol, model=model, tol_jac=1e-16, max_iter=10000) assert np.allclose(primal, clf.coef_) assert np.all(supp1 == supp2) assert np.allclose(dense1, dense2) assert np.allclose(jac1, jac2, atol=1e-4)
def get_val(self, model, X, y, log_alpha, monitor=None, tol=1e-3): # TODO add warm start if not self.init_delta_epsilon: self._init_delta_epsilon(X) mask, dense, _ = get_beta_jac_iterdiff(X, y, log_alpha, model, tol=tol, mask0=self.mask0, dense0=self.dense0, compute_jac=False) mask2, dense2, _ = get_beta_jac_iterdiff(X, y + self.epsilon * self.delta, log_alpha, model, tol=tol, compute_jac=False) val = self.get_val_outer(X, y, mask, dense, mask2, dense2) if monitor is not None: monitor(val, None, mask, dense, alpha=np.exp(log_alpha)) return val
def get_beta_jac_fast_iterdiff( X, y, log_alpha, get_v, model, mask0=None, dense0=None, jac0=None, tol=1e-3, max_iter=1000, niter_jac=1000, tol_jac=1e-6, verbose=False): mask, dense, _ = get_beta_jac_iterdiff( X, y, log_alpha, mask0=mask0, dense0=dense0, jac0=jac0, tol=tol, max_iter=max_iter, compute_jac=False, model=model, verbose=verbose) dbeta0_new = model._init_dbeta0(mask, mask0, jac0) reduce_alpha = model._reduce_alpha(np.exp(log_alpha), mask) _, r = model._init_beta_r(X, y, mask, dense) jac = get_only_jac( model.reduce_X(X, mask), model.reduce_y(y, mask), r, reduce_alpha, model.sign(dense, log_alpha), dbeta=dbeta0_new, niter_jac=niter_jac, tol_jac=tol_jac, model=model, mask=mask, dense=dense, verbose=verbose) return mask, dense, jac
def get_beta_jac_v(self, X, y, log_alpha, model, get_v, mask0=None, dense0=None, quantity_to_warm_start=None, max_iter=1000, tol=1e-3, compute_jac=False, full_jac_v=False): mask, dense, list_sign = get_beta_jac_iterdiff( X, y, log_alpha, model, mask0=mask0, dense0=dense0, jac0=None, max_iter=max_iter, tol=tol, compute_jac=compute_jac, return_all=True, use_stop_crit=self.use_stop_crit) v = np.zeros(X.shape[1]) v[mask] = get_v(mask, dense) jac_v = get_only_jac_backward(X, np.exp(log_alpha), list_sign, v, model, jac_v0=quantity_to_warm_start) if not full_jac_v: jac_v = model.get_mask_jac_v(mask, jac_v) jac_v = np.atleast_1d(jac_v) return mask, dense, jac_v, jac_v