def adalasso_bic_nu(X, p, eps=1e-3): """ Fits the whole deal as well as choosing nu via BIC. """ T = len(X) R = compute_covariance(X, p_max=p) return _adalasso_bic_nu(R, T, eps)
def solve_adalasso(X, p, lmbda, nu, step_rule=0.1, line_srch=None, eps=1e-6, maxiter=MAXITER, method="fista"): R = compute_covariance(X, p) B0 = _wld_init(R) W = 1. / np.abs(B0)**nu B0 = dither(B0) return _solve_lasso(R, B0, lmbda, W, step_rule, line_srch, eps, maxiter, method)
def solve_lasso(X, p, lmbda=0.0, W=1.0, step_rule=0.1, line_srch=None, eps=1e-6, maxiter=MAXITER, method="ista"): assert np.all(W >= 0), "W must be non-negative" R = compute_covariance(X, p_max=p) B0 = dither(_wld_init(R)) return _solve_lasso(R, B0, lmbda, W, step_rule=step_rule, line_srch=line_srch, eps=eps, maxiter=maxiter, method="ista")
def adalasso_bic_path(X, p, nu=1.25, lmbda_path=np.logspace(-6, 1.0, 250), eps=1e-3): """ Fit a VAR(p) model by solving lasso and searching for optimal regularizer by solving lasso along a regularization path, and then using the BIC criterion. """ T = len(X) R = compute_covariance(X, p_max=p) return _adalasso_bic_path(R, T, p, nu=nu, lmbda_path=lmbda_path, eps=eps)
def adalasso_bic(X, p, nu=1.25, lmbda_max=None): """ Fit a VAR(p) model by optimizing BIC with a bisection method. This will be faster than adalasso_bic_path, but it is possible it will pick a bad regularization parameter. It also (obviously) won't return the BIC path. """ T = len(X) R = compute_covariance(X, p_max=p) return _adalasso_bic(R, T, p, nu, lmbda_max)
def regularization_path(X, p, lmbda_path, W=1.0, step_rule=0.1, line_srch=None, eps=1e-6, maxiter=MAXITER, method="ista"): """ Given an iterable for lmbda, return the whole regularization path as a 4D array indexed as [lmbda, tau, i, j]. line_srch can either be None for constant step sizes, or a tuple (L0, eta) specifying the initial stepsize as 1/L0 and with L increasing exponential with factor eta to find workable steps. Must have L0 > 0, eta > 1 """ R = compute_covariance(X, p_max=p) B0 = dither(_wld_init(R)) return _regularization_path(R, B0, lmbda_path, W=W, step_rule=step_rule, line_srch=line_srch, eps=eps, maxiter=maxiter, method=method)
def fit_VAR(X, p_max, nu=1.25, eps=1e-3): if nu is None: # set nu=None to estimate nu via BIC as well nu = 1.25 fit_nu = True else: fit_nu = False T = len(X) R = compute_covariance(X, p_max=p_max) bic_star = -np.inf cost_star = np.inf lmbda_star = None B_star = None for p in range(1, p_max + 1): B, cost, lmbda, bic = _adalasso_bic(R[:p + 1], T, p, nu, lmbda_max=None, eps=eps) if bic > bic_star: B_star = B cost_star = cost bic_star = bic lmbda_star = lmbda elif bic < 0.75 * bic_star: break while np.all(B_star[-1] == 0) and len(B_star) > 1: B_star = B_star[:-1] if fit_nu: p_star = len(B_star) B_star, cost_star, lmbda_star, bic_star, nu_star =\ _adalasso_bic_nu(R[:p_star + 1], T, eps=eps) return B_star, cost_star, lmbda_star, bic_star, nu_star else: return B_star, cost_star, lmbda_star, bic_star
def convergence_example(): np.random.seed(0) T = 1000 n = 50 p = 15 X = np.random.normal(size=(T, n)) X[1:] = 0.25 * np.random.normal(size=(T - 1, n)) + X[:-1, ::-1] X[2:] = 0.25 * np.random.normal(size=(T - 2, n)) + X[:-2, ::-1] X[2:, 0] = 0.25 * np.random.normal(size=T - 2) + X[:-2, 1] X[3:, 1] = 0.25 * np.random.normal(size=T - 3) + X[:-3, 2] R = compute_covariance(X, p) A, _, _ = whittle_lev_durb(R) B0 = A_to_B(A) B0 = B0 + 0.1 * np.random.normal(size=B0.shape) lmbda = 0.025 B_basic = B0 B_decay_step = B0 B_bt = B0 L_bt = 0.01 B_f = B0 L_f = 0.01 t_f = 1.0 M_f = B0 W = 1. / np.abs(B0)**(1.25) # Adaptive weighting B_star, _ = _solve_lasso(R, B0, lmbda, W, step_rule=0.01, line_srch=1.1, method="fista", eps=-np.inf, maxiter=3000) cost_star = cost_function(B_star, R, lmbda=lmbda, W=W) N_iters = 100 N_algs = 4 GradRes = np.empty((N_iters, N_algs)) Cost = np.empty((N_iters, N_algs)) for it in range(N_iters): B_basic, err_basic = _basic_prox_descent(R, B_basic, lmbda=lmbda, maxiter=1, ss=0.01, eps=-np.inf, W=W) B_decay_step, err_decay_step = _basic_prox_descent(R, B_decay_step, lmbda=lmbda, maxiter=1, ss=1. / (it + 1), eps=-np.inf, W=W) B_bt, err_bt, L_bt = _backtracking_prox_descent(R, B_bt, lmbda, eps=-np.inf, maxiter=1, L=L_bt, eta=1.1, W=W) B_f, err_f, L_f, t_f, M_f = _fast_prox_descent(R, B_f, lmbda, eps=-np.inf, maxiter=1, L=L_f, eta=1.1, t=t_f, M0=M_f, W=W) Cost[it, 0] = cost_function(B_basic, R, lmbda, W=W) Cost[it, 1] = cost_function(B_decay_step, R, lmbda, W=W) Cost[it, 2] = cost_function(B_bt, R, lmbda, W=W) Cost[it, 3] = cost_function(B_f, R, lmbda, W=W) GradRes[it, 0] = err_basic GradRes[it, 1] = err_decay_step GradRes[it, 2] = err_bt GradRes[it, 3] = err_f Cost = Cost - cost_star fig, axes = plt.subplots(2, 1, sharex=True) axes[0].plot(GradRes[:, 0], label="ISTA (constant stepsize)", linewidth=2) axes[0].plot(GradRes[:, 1], label="ISTA (1/t stepsize)", linewidth=2) axes[0].plot(GradRes[:, 2], label="ISTA with Backtracking Line Search", linewidth=2) axes[0].plot(GradRes[:, 3], label="FISTA with Backtracking Line Search", linewidth=2) axes[1].plot(Cost[:, 0], linewidth=2) axes[1].plot(Cost[:, 1], linewidth=2) axes[1].plot(Cost[:, 2], linewidth=2) axes[1].plot(Cost[:, 3], linewidth=2) axes[1].set_xlabel("Iteration Count") axes[0].set_ylabel("Log Gradient Residuals") axes[1].set_ylabel("Log (cost - cost_opt)") axes[0].set_yscale("log") axes[1].set_yscale("log") axes[0].legend() fig.suptitle("Prox Gradient for Time-Series AdaLASSO " "(n = {}, p = {})".format(n, p)) plt.savefig("figures/convergence.png") plt.savefig("figures/convergence.pdf") plt.show() return