コード例 #1
0
 def wrapped(beta, X, y, z, u, rho):
     beta = maybe_to_cupy(beta, X)
     z = maybe_to_cupy(z, X)
     u = maybe_to_cupy(u, X)
     res = func(beta, X,
                y) + (rho / 2) * np.dot(beta - z + u, beta - z + u)
     return normalize_to_array(res)
コード例 #2
0
 def compute_loss_grad(beta, X, y):
     beta = maybe_to_cupy(beta, X)
     scatter_beta = scatter_array(
         beta, dask_distributed_client) if dask_distributed_client else beta
     loss_fn = pointwise_loss(scatter_beta, X, y)
     gradient_fn = pointwise_gradient(scatter_beta, X, y)
     loss, gradient = compute(loss_fn, gradient_fn)
     return normalize_to_array(loss), normalize_to_array(gradient.copy())
コード例 #3
0
def test_basic_unreg_descent(func, kwargs, N, nchunks, family, is_cupy):
    beta = np.random.normal(size=2)
    M = len(beta)
    X = da.random.random((N, M), chunks=(N // nchunks, M))
    y = make_y(X, beta=np.array(beta), chunks=(N // nchunks, ))

    if is_cupy:
        cupy = pytest.importorskip('cupy')
        X, y = to_dask_cupy_array_xy(X, y, cupy)

    X, y = persist(X, y)

    result = func(X, y, family=family, **kwargs)
    test_vec = np.random.normal(size=2)
    test_vec = maybe_to_cupy(test_vec, X)

    opt = family.pointwise_loss(result, X, y).compute()
    test_val = family.pointwise_loss(test_vec, X, y).compute()

    assert opt < test_val
コード例 #4
0
def lbfgs(X,
          y,
          regularizer=None,
          lamduh=1.0,
          max_iter=100,
          tol=1e-4,
          family=Logistic,
          verbose=False,
          **kwargs):
    """L-BFGS solver using scipy.optimize implementation

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
    y : array-like, shape (n_samples,)
    regularizer : str or Regularizer
    lamduh : float
    max_iter : int
        maximum number of iterations to attempt before declaring
        failure to converge
    tol : float
        Maximum allowed change from prior iteration required to
        declare convergence
    family : Family
    verbose : bool, default False
        whether to print diagnostic information during convergence

    Returns
    -------
    beta : array-like, shape (n_features,)
    """
    dask_distributed_client = get_distributed_client()
    pointwise_loss = family.pointwise_loss
    pointwise_gradient = family.pointwise_gradient
    if regularizer is not None:
        regularizer = Regularizer.get(regularizer)
        pointwise_loss = regularizer.add_reg_f(pointwise_loss, lamduh)
        pointwise_gradient = regularizer.add_reg_grad(pointwise_gradient,
                                                      lamduh)

    n, p = X.shape
    beta0 = np.zeros(p)

    def compute_loss_grad(beta, X, y):
        beta = maybe_to_cupy(beta, X)
        scatter_beta = scatter_array(
            beta, dask_distributed_client) if dask_distributed_client else beta
        loss_fn = pointwise_loss(scatter_beta, X, y)
        gradient_fn = pointwise_gradient(scatter_beta, X, y)
        loss, gradient = compute(loss_fn, gradient_fn)
        return normalize_to_array(loss), normalize_to_array(gradient.copy())

    with dask.config.set(fuse_ave_width=0):  # optimizations slows this down
        beta, loss, info = fmin_l_bfgs_b(compute_loss_grad,
                                         beta0,
                                         fprime=None,
                                         args=(X, y),
                                         iprint=(verbose > 0) - 1,
                                         pgtol=tol,
                                         maxiter=max_iter)
    beta = maybe_to_cupy(beta, X)
    return beta
コード例 #5
0
def admm(X,
         y,
         regularizer='l1',
         lamduh=0.1,
         rho=1,
         over_relax=1,
         max_iter=250,
         abstol=1e-4,
         reltol=1e-2,
         family=Logistic,
         **kwargs):
    """
    Alternating Direction Method of Multipliers

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
    y : array-like, shape (n_samples,)
    regularizer : str or Regularizer
    lamduh : float
    rho : float
    over_relax : FLOAT
    max_iter : int
        maximum number of iterations to attempt before declaring
        failure to converge
    abstol, reltol : float
    family : Family

    Returns
    -------
    beta : array-like, shape (n_features,)
    """
    pointwise_loss = family.pointwise_loss
    pointwise_gradient = family.pointwise_gradient
    regularizer = Regularizer.get(regularizer)

    def create_local_gradient(func):
        @functools.wraps(func)
        def wrapped(beta, X, y, z, u, rho):
            beta = maybe_to_cupy(beta, X)
            z = maybe_to_cupy(z, X)
            u = maybe_to_cupy(u, X)
            res = func(beta, X, y) + rho * (beta - z + u)
            return normalize_to_array(res)

        return wrapped

    def create_local_f(func):
        @functools.wraps(func)
        def wrapped(beta, X, y, z, u, rho):
            beta = maybe_to_cupy(beta, X)
            z = maybe_to_cupy(z, X)
            u = maybe_to_cupy(u, X)
            res = func(beta, X,
                       y) + (rho / 2) * np.dot(beta - z + u, beta - z + u)
            return normalize_to_array(res)

        return wrapped

    f = create_local_f(pointwise_loss)
    fprime = create_local_gradient(pointwise_gradient)

    nchunks = getattr(X, 'npartitions', 1)
    # nchunks = X.npartitions
    (n, p) = X.shape
    # XD = X.to_delayed().flatten().tolist()
    # yD = y.to_delayed().flatten().tolist()
    if isinstance(X, da.Array):
        XD = X.rechunk((None, X.shape[-1])).to_delayed().flatten().tolist()
    else:
        XD = [X]
    if isinstance(y, da.Array):
        yD = y.rechunk((None, y.shape[-1])).to_delayed().flatten().tolist()
    else:
        yD = [y]

    z = np.zeros(p)
    u = np.array([np.zeros(p) for i in range(nchunks)])
    betas = np.array([np.ones(p) for i in range(nchunks)])

    for k in range(max_iter):

        # x-update step
        new_betas = [
            delayed(local_update)(xx, yy, bb, z, uu, rho, f=f, fprime=fprime)
            for xx, yy, bb, uu in zip(XD, yD, betas, u)
        ]
        new_betas = np.array(da.compute(*new_betas))

        beta_hat = over_relax * new_betas + (1 - over_relax) * z

        #  z-update step
        zold = z.copy()
        ztilde = np.mean(beta_hat + np.array(u), axis=0)
        z = regularizer.proximal_operator(ztilde, lamduh / (rho * nchunks))

        # u-update step
        u += beta_hat - z

        # check for convergence
        primal_res = np.linalg.norm(new_betas - z)
        dual_res = np.linalg.norm(rho * (z - zold))

        eps_pri = np.sqrt(p * nchunks) * abstol + reltol * np.maximum(
            np.linalg.norm(new_betas),
            np.sqrt(nchunks) * np.linalg.norm(z))
        eps_dual = np.sqrt(p * nchunks) * abstol + \
            reltol * np.linalg.norm(rho * u)

        if primal_res < eps_pri and dual_res < eps_dual:
            break

    return maybe_to_cupy(z, X)