def test_grad_search(Optimizer, model, crit): """check that the paths are the same in the line search""" n_outer = 2 criterion = HeldOutMSE(idx_train, idx_val) monitor1 = Monitor() algo = Forward() optimizer = Optimizer(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor1) criterion = HeldOutMSE(idx_train, idx_val) monitor2 = Monitor() algo = Implicit() optimizer = Optimizer(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor2) criterion = HeldOutMSE(idx_train, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) optimizer = Optimizer(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor3) np.testing.assert_allclose(np.array(monitor1.alphas), np.array(monitor3.alphas)) np.testing.assert_allclose(np.array(monitor1.grads), np.array(monitor3.grads), rtol=1e-5) np.testing.assert_allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
def test_val_grad(model): criterion = HeldOutLogistic(idx_val, idx_val) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutLogistic(idx_val, idx_val) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutLogistic(idx_val, idx_val) algo = Implicit() val_imp, grad_imp = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) assert np.allclose(val_fwd, val_imp_fwd, atol=1e-4) assert np.allclose(grad_fwd, grad_imp_fwd, atol=1e-4) assert np.allclose(val_imp_fwd, val_imp, atol=1e-4) # for the implcit the conjugate grad does not converge # hence the rtol=1e-2 assert np.allclose(grad_imp_fwd, grad_imp, rtol=1e-2)
def test_grad_search(model, crit): """check that the paths are the same in the line search""" if crit == 'cv': n_outer = 2 criterion = HeldOutMSE(idx_train, idx_val) else: n_outer = 2 criterion = SmoothedSURE(sigma_star) # TODO MM@QBE if else scheme surprising criterion = HeldOutMSE(idx_train, idx_val) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, model, X, y, log_alpha, monitor1, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(idx_train, idx_val) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, model, X, y, log_alpha, monitor2, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(idx_train, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) grad_search(algo, criterion, model, X, y, log_alpha, monitor3, n_outer=n_outer, tol=1e-16) np.testing.assert_allclose(np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) np.testing.assert_allclose(np.array(monitor1.grads), np.array(monitor3.grads), atol=1e-8) np.testing.assert_allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
def test_val_grad(): ####################################################################### # Not all methods computes the full Jacobian, but all # compute the gradients # check that the gradient returned by all methods are the same criterion = HeldOutMSE(idx_train, idx_val) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(model, X, y, np.array( [log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad( model, X, y, np.array([log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000) val_imp_fwd_custom, grad_imp_fwd_custom = criterion.get_val_grad( model, X, y, np.array([log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = Implicit() val_imp, grad_imp = criterion.get_val_grad(model, X, y, np.array( [log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol) np.testing.assert_allclose(val_fwd, val_imp_fwd) np.testing.assert_allclose(grad_fwd, grad_imp_fwd) np.testing.assert_allclose(val_imp_fwd, val_imp) np.testing.assert_allclose(val_imp_fwd, val_imp_fwd_custom) # for the implcit the conjugate grad does not converge # hence the rtol=1e-2 np.testing.assert_allclose(grad_imp_fwd, grad_imp, atol=1e-3) np.testing.assert_allclose(grad_imp_fwd, grad_imp_fwd_custom)
def test_grad_search(model, crit): """check that the paths are the same in the line search""" n_outer = 2 criterion = HeldOutLogistic(idx_val, idx_val) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, model, X, y, log_alpha, monitor1, n_outer=n_outer, tol=tol) criterion = HeldOutLogistic(idx_val, idx_val) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, model, X, y, log_alpha, monitor2, n_outer=n_outer, tol=tol) criterion = HeldOutLogistic(idx_val, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000) grad_search(algo, criterion, model, X, y, log_alpha, monitor3, n_outer=n_outer, tol=tol) assert np.allclose(np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads), atol=1e-4) assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
def test_grad_search(model, crit): """check that the paths are the same in the line search""" if crit == 'MSE': n_outer = 2 criterion = HeldOutMSE(idx_train, idx_val) else: n_outer = 2 criterion = FiniteDiffMonteCarloSure(sigma_star) # TODO MM@QBE if else scheme surprising criterion = HeldOutMSE(idx_train, idx_val) monitor1 = Monitor() algo = Forward() optimizer = LineSearch(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor1) criterion = HeldOutMSE(idx_train, idx_val) monitor2 = Monitor() algo = Implicit() optimizer = LineSearch(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor2) criterion = HeldOutMSE(idx_train, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) optimizer = LineSearch(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor3) np.testing.assert_allclose(np.array(monitor1.alphas), np.array(monitor3.alphas)) np.testing.assert_allclose(np.array(monitor1.grads), np.array(monitor3.grads), rtol=1e-5) np.testing.assert_allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
log_alpha, algo.get_beta_jac_v, tol=1e-12, monitor=monitor, max_iter=maxit) else: model = Lasso(max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) if method == "forward": algo = Forward() elif method == "implicit_forward": algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=maxit, max_iter=1000) elif method == "implicit": algo = Implicit(max_iter=1000) elif method == "backward": algo = Backward() else: 1 / 0 algo.max_iter = maxit algo.use_stop_crit = False val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol, monitor=monitor, max_iter=maxit)
def parallel_function(dataset_name, div_alpha, method, ind_rep, random_state=10): maxit = dict_maxits[(dataset_name, div_alpha)][ind_rep] print("Dataset %s, algo %s, maxit %i" % (dataset_name, method, maxit)) X, y = fetch_libsvm(dataset_name) n_samples = len(y) kf = KFold(n_splits=5, random_state=random_state, shuffle=True) for i in range(2): alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples log_alpha = np.log(alpha_max / div_alpha) monitor = Monitor() if method == "celer": clf = Lasso_celer( alpha=np.exp(log_alpha), fit_intercept=False, # TODO maybe change this tol tol=1e-8, max_iter=maxit) model = Lasso(estimator=clf, max_iter=maxit) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=maxit, use_stop_crit=False) algo.max_iter = maxit val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol, monitor=monitor, max_iter=maxit) elif method == "ground_truth": for file in os.listdir("results/"): if file.startswith("hypergradient_%s_%i_%s" % (dataset_name, div_alpha, method)): return else: clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, warm_start=True, tol=1e-13, max_iter=10000) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) algo = Implicit(criterion) model = Lasso(estimator=clf, max_iter=10000) val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=1e-13, monitor=monitor) else: model = Lasso(max_iter=maxit) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) if method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(use_stop_crit=False, tol_jac=1e-8, n_iter_jac=maxit, max_iter=1000) elif method == "implicit": algo = Implicit(use_stop_crit=False, max_iter=1000) elif method == "backward": algo = Backward() else: 1 / 0 algo.max_iter = maxit algo.use_stop_crit = False val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol, monitor=monitor, max_iter=maxit) results = (dataset_name, div_alpha, method, maxit, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = [ 'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time' ] str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % ( dataset_name, div_alpha, method, maxit) df.to_pickle(str_results)
def parallel_function(dataset_name, method): X, y = fetch_libsvm(dataset_name) X, y = fetch_libsvm(dataset_name) if dataset_name == "real-sim": X = X[:, :2000] X = csr_matrix(X) # very important for SVM my_bool = norm(X, axis=1) != 0 X = X[my_bool, :] y = y[my_bool] logC = dict_logC[dataset_name] for max_iter in dict_max_iter[dataset_name]: print("Dataset %s, max iter %i" % (method, max_iter)) for i in range(2): # TODO change this sss1 = StratifiedShuffleSplit(n_splits=2, test_size=0.3333, random_state=0) idx_train, idx_val = sss1.split(X, y) idx_train = idx_train[0] idx_val = idx_val[0] monitor = Monitor() criterion = HeldOutSmoothedHinge(idx_train, idx_val) model = SVM(estimator=None, max_iter=10_000) if method == "ground_truth": for file in os.listdir("results_svm/"): if file.startswith("hypergradient_svm_%s_%s" % (dataset_name, method)): return clf = LinearSVC(C=np.exp(logC), tol=1e-32, max_iter=10_000, loss='hinge', permute=False) algo = Implicit(criterion) model.estimator = clf val, grad = criterion.get_val_grad(model, X, y, logC, algo.compute_beta_grad, tol=1e-14, monitor=monitor) else: if method == "sota": clf = LinearSVC(C=np.exp(logC), loss='hinge', max_iter=max_iter, tol=1e-32, permute=False) model.estimator = clf algo = ImplicitForward(tol_jac=1e-32, n_iter_jac=max_iter, use_stop_crit=False) elif method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=max_iter, use_stop_crit=False) else: raise NotImplementedError algo.max_iter = max_iter algo.use_stop_crit = False val, grad = criterion.get_val_grad(model, X, y, logC, algo.compute_beta_grad, tol=tol, monitor=monitor, max_iter=max_iter) results = (dataset_name, method, max_iter, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = ['dataset', 'method', 'maxit', 'val', 'grad', 'time'] str_results = "results_svm/hypergradient_svm_%s_%s_%i.pkl" % ( dataset_name, method, max_iter) df.to_pickle(str_results)
def test_grad_search(): n_outer = 3 criterion = HeldOutMSE(idx_train, idx_val) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, model, X, y, np.array([log_alpha1, log_alpha2]), monitor1, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(idx_train, idx_val) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, model, X, y, np.array([log_alpha1, log_alpha2]), monitor2, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(idx_train, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-3, n_iter_jac=1000) grad_search(algo, criterion, model, X, y, np.array([log_alpha1, log_alpha2]), monitor3, n_outer=n_outer, tol=1e-16) [np.linalg.norm(grad) for grad in monitor1.grads] [np.exp(alpha) for alpha in monitor1.log_alphas] np.testing.assert_allclose(np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) np.testing.assert_allclose(np.array(monitor1.grads), np.array(monitor3.grads), rtol=1e-6) np.testing.assert_allclose(np.array(monitor1.objs), np.array(monitor3.objs), rtol=1e-6) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times)) np.testing.assert_allclose(np.array(monitor1.log_alphas), np.array(monitor2.log_alphas), atol=1e-2) np.testing.assert_allclose(np.array(monitor1.grads), np.array(monitor2.grads), atol=1e-2) np.testing.assert_allclose(np.array(monitor1.objs), np.array(monitor2.objs), atol=1e-2) assert not np.allclose(np.array(monitor1.times), np.array(monitor2.times))
def parallel_function(dataset_name, div_alpha, method): X, y = fetch_libsvm(dataset_name) n_samples = len(y) if dataset_name == "news20" and div_alpha == 100: rng = np.random.RandomState(42) y += rng.randn(n_samples) * 0.01 for maxit in dict_maxits[(dataset_name, div_alpha)]: print("Dataset %s, maxit %i" % (method, maxit)) for i in range(2): rng = np.random.RandomState(i) idx_train = rng.choice(n_samples, n_samples // 2, replace=False) idx = np.arange(0, n_samples) idx_val = idx[np.logical_not(np.isin(idx, idx_train))] alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train]))) alpha_max /= len(idx_train) log_alpha = np.log(alpha_max / div_alpha) monitor = Monitor() if method == "celer": clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, tol=1e-12, max_iter=maxit) model = Lasso(estimator=clf, max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-32, n_iter_jac=maxit, use_stop_crit=False) algo.max_iter = maxit val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=1e-12, monitor=monitor, max_iter=maxit) elif method == "ground_truth": for file in os.listdir("results/"): if file.startswith("hypergradient_%s_%i_%s" % (dataset_name, div_alpha, method)): return clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, warm_start=True, tol=1e-14, max_iter=10000) criterion = HeldOutMSE(idx_train, idx_val) if dataset_name == "news20": algo = ImplicitForward(tol_jac=1e-11, n_iter_jac=100000) else: algo = Implicit(criterion) model = Lasso(estimator=clf, max_iter=10000) val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=1e-14, monitor=monitor) else: model = Lasso(max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) if method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=maxit, use_stop_crit=False) elif method == "implicit": algo = Implicit(max_iter=1000) elif method == "backward": algo = Backward() else: raise NotImplementedError algo.max_iter = maxit algo.use_stop_crit = False val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=tol, monitor=monitor, max_iter=maxit) results = (dataset_name, div_alpha, method, maxit, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = [ 'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time' ] str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % ( dataset_name, div_alpha, method, maxit) df.to_pickle(str_results)
def test_val_grad(): ####################################################################### # Not all methods computes the full Jacobian, but all # compute the gradients # check that the gradient returned by all methods are the same for key in models.keys(): log_alpha = dict_log_alpha[key] model = models[key] criterion = HeldOutMSE(idx_train, idx_val) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = Implicit() val_imp, grad_imp = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = Backward() val_bwd, grad_bwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) assert np.allclose(val_fwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_imp_fwd) # assert np.allclose(val_imp_fwd, val_imp) assert np.allclose(val_bwd, val_fwd) assert np.allclose(val_bwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_bwd) assert np.allclose(grad_bwd, grad_imp_fwd) # for the implcit the conjugate grad does not converge # hence the rtol=1e-2 assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-3) for key in models.keys(): log_alpha = dict_log_alpha[key] model = models[key] criterion = SmoothedSURE(sigma_star) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = SmoothedSURE(sigma_star) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = SmoothedSURE(sigma_star) algo = Implicit(criterion) val_imp, grad_imp = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = SmoothedSURE(sigma_star) algo = Backward() val_bwd, grad_bwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) assert np.allclose(val_fwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_imp_fwd) assert np.allclose(val_imp_fwd, val_imp) assert np.allclose(val_bwd, val_fwd) assert np.allclose(val_bwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_bwd) assert np.allclose(grad_bwd, grad_imp_fwd)
from sparse_ho.algo.implicit_forward import get_beta_jac_fast_iterdiff from sparse_ho.algo.implicit import get_beta_jac_t_v_implicit from sparse_ho.criterion import ( HeldOutMSE, FiniteDiffMonteCarloSure, HeldOutLogistic) from sparse_ho.tests.common import ( X, X_s, y, sigma_star, idx_train, idx_val, dict_log_alpha, models, custom_models, dict_cvxpy_func, dict_vals_cvxpy, dict_grads_cvxpy, dict_list_log_alphas, get_v, list_model_crit, list_model_names) # list of algorithms to be tested list_algos = [ Forward(), ImplicitForward(tol_jac=1e-16, n_iter_jac=5000), Implicit() # Backward() # XXX to fix ] tol = 1e-15 X_r = X_s.tocsr() X_c = X_s @pytest.mark.parametrize('key', list(models.keys())) def test_beta_jac(key): """Tests that algorithms computing the Jacobian return the same Jacobian""" if key == "svm" or key == "svr" or key == "ssvr": X_s = X_r else: X_s = X_c