Ejemplo n.º 1
0
def grad_l1(beta, game_matrix_list, l):
    '''
    compute the gradient of the model (neg_log_like + l1)
    ----------
    Input:
    beta: TxN array or a TN vector
    game_matrix_list: TxNxN array
    l: coefficient of penalty term
    ----------
    Output:
    objective: negative log likelihood + l1 penalty
    '''
    # reshape beta into TxN array
    T, N = game_matrix_list.shape[0:2]
    beta = np.reshape(beta, [T, N])

    # compute l1 penalty
    l1_grad = model.grad_nl(beta, game_matrix_list)
    diff = beta[1:] - beta[:-1]
    l1_grad[N:] += l * np.array([np.sign(diff[i])
                                 for i in range(T - 1)]).reshape(
                                     ((T - 1) * N, 1))
    l1_grad[:-N] += l * np.array([-np.sign(diff[i])
                                  for i in range(T - 1)]).reshape(
                                      ((T - 1) * N, 1))

    return l1_grad
Ejemplo n.º 2
0
def grad_l2(beta, game_matrix_list, l):
    '''
    compute the gradient of the model (neg_log_like + l2)
    ----------
    Input:
    beta: TxN array or a TN vector
    game_matrix_list: TxNxN array
    l: coefficient of penalty term
    ----------
    Output:
    objective: negative log likelihood + l2 penalty
    '''
    # reshape beta into TxN array
    T, N = game_matrix_list.shape[0:2]
    beta = np.reshape(beta, [T, N])

    # compute l2 penalty
    l2_grad = model.grad_nl(beta, game_matrix_list)
    diff = beta[1:] - beta[:-1]
    w = np.array([np.linalg.norm(diff[i]) for i in range(T - 1)])
    w[w != 0] = 1 / w[w != 0]
    l2_grad[N:] += l * np.array([diff[i] * w[i]
                                 for i in range(T - 1)]).reshape(
                                     ((T - 1) * N, 1))

    diff = beta[:-1] - beta[1:]
    w = np.array([np.linalg.norm(diff[i]) for i in range(T - 1)])
    w[w != 0] = 1 / w[w != 0]
    l2_grad[:-N] += l * np.array([diff[i] * w[i]
                                  for i in range(T - 1)]).reshape(
                                      ((T - 1) * N, 1))

    return l2_grad
Ejemplo n.º 3
0
def admm_sub_beta(data,
                  T,
                  N,
                  A,
                  lam,
                  eta,
                  beta,
                  muk,
                  thetak,
                  paras,
                  out=sys.stdout):
    step_init, ths, max_iter, max_back, a, b = paras
    obj_old = np.inf

    for i in range(max_iter):
        # compute gradient
        gradient = model.grad_nl(beta, data) + A.T @ muk + \
            eta * A.T @ (A @ beta - thetak)
        hessian = model.hess_nl(beta, data) + eta * A.T @ A
        gradient = gradient[1:]
        hessian = hessian[1:, 1:]
        # proximal gradient update
        s = step_init
        beta_new = beta - 0  # make a copy

        for j in range(max_back):
            v = -sc.linalg.solve(hessian, gradient)
            beta_new[1:] = beta[1:] + s * v
            obj_new = obj_amlag(beta_new, data, A, muk, eta, thetak)

            if obj_new <= obj_old + b * s * gradient.T @ v:
                break
            s *= a

        beta = beta_new
        if abs(obj_old - obj_new) < ths:
            break
        elif i >= max_iter - 1:
            out.write("Not converged.\n")
            out.flush()
        obj_old = obj_new

    return beta
Ejemplo n.º 4
0
def grad_l2_sq(beta, game_matrix_list, l):
    '''
    compute the gradient of the model (neg_log_like + l2_square)
    ----------
    Input:
    beta: TxN array or a TN vector
    game_matrix_list: TxNxN array
    ----------
    Output:
    objective: negative log likelihood + squared l2 penalty
    '''
    # reshape beta into TxN array
    T, N = game_matrix_list.shape[0:2]
    beta = np.reshape(beta, [T, N])

    # compute l2 penalty
    l2_grad = model.grad_nl(beta, game_matrix_list)
    l2_grad[N:] += l * 2 * ((beta[1:] - beta[:-1])).reshape(((T - 1) * N, 1))
    l2_grad[:-N] += l * 2 * ((beta[:-1] - beta[1:])).reshape(((T - 1) * N, 1))

    return l2_grad
Ejemplo n.º 5
0
def pgd_l2_sq(data,
              l_penalty=1,
              max_iter=1000,
              ths=1e-12,
              step_init=0.5,
              max_back=200,
              a=0.2,
              b=0.5,
              beta_init=None,
              verbose=False,
              out=sys.stdout):
    # initialize optimization
    T, N = data.shape[0:2]
    if beta_init is None:
        beta = np.zeros(data.shape[:2])
    else:
        beta = beta_init
    nll = model.neg_log_like(beta, data)

    # initialize record
    objective_wback = [objective_l2_sq(beta, data, l_penalty)]
    if verbose:
        out.write("initial objective value: %f\n" % objective_wback[-1])
        out.flush()

    # iteration
    for i in range(max_iter):
        # compute gradient
        gradient = model.grad_nl(beta, data).reshape([T, N])

        # backtracking line search
        s = step_init
        for j in range(max_back):
            beta_new = prox_l2_sq(beta - s * gradient, s, l_penalty)
            beta_diff = beta_new - beta

            nll_new = model.neg_log_like(beta_new, data)
            nll_back = (nll + np.sum(gradient * beta_diff) +
                        np.sum(np.square(beta_diff)) / (2 * s))

            if nll_new <= nll_back:
                break
            s *= b

        # proximal gradient update
        beta = beta_new
        nll = nll_new

        # record objective value
        objective_wback.append(objective_l2_sq(beta, data, l_penalty))

        if verbose:
            out.write("%d-th PGD, objective value: %f\n" %
                      (i + 1, objective_wback[-1]))
            out.flush()
        if abs(objective_wback[-2] - objective_wback[-1]) < ths:
            if verbose:
                out.write("Converged!\n")
                out.flush()
            break
        elif i >= max_iter - 1:
            if verbose:
                out.write("Not converged.\n")
                out.flush()

    return objective_wback, beta