Beispiel #1
0
def adalasso_bic_nu(X, p, eps=1e-3):
    """
    Fits the whole deal as well as choosing nu via BIC.
    """
    T = len(X)
    R = compute_covariance(X, p_max=p)
    return _adalasso_bic_nu(R, T, eps)
Beispiel #2
0
def solve_adalasso(X, p, lmbda, nu, step_rule=0.1, line_srch=None,
                   eps=1e-6, maxiter=MAXITER, method="fista"):
    R = compute_covariance(X, p)
    B0 = _wld_init(R)
    W = 1. / np.abs(B0)**nu
    B0 = dither(B0)
    return _solve_lasso(R, B0, lmbda, W, step_rule, line_srch,
                        eps, maxiter, method)
Beispiel #3
0
def solve_lasso(X, p, lmbda=0.0, W=1.0, step_rule=0.1,
                line_srch=None, eps=1e-6, maxiter=MAXITER,
                method="ista"):
    assert np.all(W >= 0), "W must be non-negative"
    R = compute_covariance(X, p_max=p)
    B0 = dither(_wld_init(R))
    return _solve_lasso(R, B0, lmbda, W, step_rule=step_rule,
                        line_srch=line_srch, eps=eps, maxiter=maxiter,
                        method="ista")
Beispiel #4
0
def adalasso_bic_path(X, p, nu=1.25, lmbda_path=np.logspace(-6, 1.0, 250),
                      eps=1e-3):
    """
    Fit a VAR(p) model by solving lasso and searching for optimal
    regularizer by solving lasso along a regularization path, and then
    using the BIC criterion.
    """
    T = len(X)
    R = compute_covariance(X, p_max=p)
    return _adalasso_bic_path(R, T, p, nu=nu, lmbda_path=lmbda_path, eps=eps)
Beispiel #5
0
def adalasso_bic(X, p, nu=1.25, lmbda_max=None):
    """
    Fit a VAR(p) model by optimizing BIC with a bisection method.
    This will be faster than adalasso_bic_path, but it is possible it
    will pick a bad regularization parameter.  It also (obviously)
    won't return the BIC path.
    """
    T = len(X)
    R = compute_covariance(X, p_max=p)
    return _adalasso_bic(R, T, p, nu, lmbda_max)
Beispiel #6
0
def regularization_path(X, p, lmbda_path, W=1.0, step_rule=0.1,
                        line_srch=None, eps=1e-6, maxiter=MAXITER,
                        method="ista"):
    """
    Given an iterable for lmbda, return the whole regularization path
    as a 4D array indexed as [lmbda, tau, i, j].

    line_srch can either be None for constant step sizes, or a tuple
    (L0, eta) specifying the initial stepsize as 1/L0 and with L
    increasing exponential with factor eta to find workable steps.
    Must have L0 > 0, eta > 1
    """
    R = compute_covariance(X, p_max=p)
    B0 = dither(_wld_init(R))
    return _regularization_path(R, B0, lmbda_path, W=W, step_rule=step_rule,
                                line_srch=line_srch, eps=eps, maxiter=maxiter,
                                method=method)
Beispiel #7
0
def fit_VAR(X, p_max, nu=1.25, eps=1e-3):
    if nu is None:
        # set nu=None to estimate nu via BIC as well
        nu = 1.25
        fit_nu = True
    else:
        fit_nu = False

    T = len(X)
    R = compute_covariance(X, p_max=p_max)

    bic_star = -np.inf
    cost_star = np.inf
    lmbda_star = None
    B_star = None
    for p in range(1, p_max + 1):
        B, cost, lmbda, bic = _adalasso_bic(R[:p + 1], T, p, nu,
                                            lmbda_max=None, eps=eps)

        if bic > bic_star:
            B_star = B
            cost_star = cost
            bic_star = bic
            lmbda_star = lmbda

        elif bic < 0.75 * bic_star:
            break

    while np.all(B_star[-1] == 0) and len(B_star) > 1:
        B_star = B_star[:-1]

    if fit_nu:
        p_star = len(B_star)
        B_star, cost_star, lmbda_star, bic_star, nu_star =\
            _adalasso_bic_nu(R[:p_star + 1], T, eps=eps)
        return B_star, cost_star, lmbda_star, bic_star, nu_star
    else:
        return B_star, cost_star, lmbda_star, bic_star
Beispiel #8
0
def convergence_example():
    np.random.seed(0)
    T = 1000
    n = 50
    p = 15
    X = np.random.normal(size=(T, n))
    X[1:] = 0.25 * np.random.normal(size=(T - 1, n)) + X[:-1, ::-1]
    X[2:] = 0.25 * np.random.normal(size=(T - 2, n)) + X[:-2, ::-1]
    X[2:, 0] = 0.25 * np.random.normal(size=T - 2) + X[:-2, 1]
    X[3:, 1] = 0.25 * np.random.normal(size=T - 3) + X[:-3, 2]

    R = compute_covariance(X, p)
    A, _, _ = whittle_lev_durb(R)
    B0 = A_to_B(A)
    B0 = B0 + 0.1 * np.random.normal(size=B0.shape)

    lmbda = 0.025

    B_basic = B0
    B_decay_step = B0
    B_bt = B0
    L_bt = 0.01
    B_f = B0
    L_f = 0.01
    t_f = 1.0
    M_f = B0

    W = 1. / np.abs(B0)**(1.25)  # Adaptive weighting

    B_star, _ = _solve_lasso(R,
                             B0,
                             lmbda,
                             W,
                             step_rule=0.01,
                             line_srch=1.1,
                             method="fista",
                             eps=-np.inf,
                             maxiter=3000)
    cost_star = cost_function(B_star, R, lmbda=lmbda, W=W)

    N_iters = 100
    N_algs = 4
    GradRes = np.empty((N_iters, N_algs))
    Cost = np.empty((N_iters, N_algs))
    for it in range(N_iters):
        B_basic, err_basic = _basic_prox_descent(R,
                                                 B_basic,
                                                 lmbda=lmbda,
                                                 maxiter=1,
                                                 ss=0.01,
                                                 eps=-np.inf,
                                                 W=W)
        B_decay_step, err_decay_step = _basic_prox_descent(R,
                                                           B_decay_step,
                                                           lmbda=lmbda,
                                                           maxiter=1,
                                                           ss=1. / (it + 1),
                                                           eps=-np.inf,
                                                           W=W)
        B_bt, err_bt, L_bt = _backtracking_prox_descent(R,
                                                        B_bt,
                                                        lmbda,
                                                        eps=-np.inf,
                                                        maxiter=1,
                                                        L=L_bt,
                                                        eta=1.1,
                                                        W=W)
        B_f, err_f, L_f, t_f, M_f = _fast_prox_descent(R,
                                                       B_f,
                                                       lmbda,
                                                       eps=-np.inf,
                                                       maxiter=1,
                                                       L=L_f,
                                                       eta=1.1,
                                                       t=t_f,
                                                       M0=M_f,
                                                       W=W)

        Cost[it, 0] = cost_function(B_basic, R, lmbda, W=W)
        Cost[it, 1] = cost_function(B_decay_step, R, lmbda, W=W)
        Cost[it, 2] = cost_function(B_bt, R, lmbda, W=W)
        Cost[it, 3] = cost_function(B_f, R, lmbda, W=W)

        GradRes[it, 0] = err_basic
        GradRes[it, 1] = err_decay_step
        GradRes[it, 2] = err_bt
        GradRes[it, 3] = err_f

    Cost = Cost - cost_star
    fig, axes = plt.subplots(2, 1, sharex=True)
    axes[0].plot(GradRes[:, 0], label="ISTA (constant stepsize)", linewidth=2)
    axes[0].plot(GradRes[:, 1], label="ISTA (1/t stepsize)", linewidth=2)
    axes[0].plot(GradRes[:, 2],
                 label="ISTA with Backtracking Line Search",
                 linewidth=2)
    axes[0].plot(GradRes[:, 3],
                 label="FISTA with Backtracking Line Search",
                 linewidth=2)

    axes[1].plot(Cost[:, 0], linewidth=2)
    axes[1].plot(Cost[:, 1], linewidth=2)
    axes[1].plot(Cost[:, 2], linewidth=2)
    axes[1].plot(Cost[:, 3], linewidth=2)

    axes[1].set_xlabel("Iteration Count")
    axes[0].set_ylabel("Log Gradient Residuals")
    axes[1].set_ylabel("Log (cost - cost_opt)")
    axes[0].set_yscale("log")
    axes[1].set_yscale("log")

    axes[0].legend()

    fig.suptitle("Prox Gradient for Time-Series AdaLASSO "
                 "(n = {}, p = {})".format(n, p))
    plt.savefig("figures/convergence.png")
    plt.savefig("figures/convergence.pdf")
    plt.show()
    return