예제 #1
0
def lbfgs(X,
          y,
          regularizer=None,
          lamduh=1.0,
          max_iter=100,
          tol=1e-4,
          family=Logistic,
          verbose=False,
          **kwargs):
    """L-BFGS solver using scipy.optimize implementation

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
    y : array-like, shape (n_samples,)
    regularizer : str or Regularizer
    lamduh : float
    max_iter : int
        maximum number of iterations to attempt before declaring
        failure to converge
    tol : float
        Maximum allowed change from prior iteration required to
        declare convergence
    family : Family
    verbose : bool, default False
        whether to print diagnostic information during convergence

    Returns
    -------
    beta : array-like, shape (n_features,)
    """
    pointwise_loss = family.pointwise_loss
    pointwise_gradient = family.pointwise_gradient
    if regularizer is not None:
        regularizer = Regularizer.get(regularizer)
        pointwise_loss = regularizer.add_reg_f(pointwise_loss, lamduh)
        pointwise_gradient = regularizer.add_reg_grad(pointwise_gradient,
                                                      lamduh)

    n, p = X.shape
    beta0 = np.zeros(p)

    def compute_loss_grad(beta, X, y):
        loss_fn = pointwise_loss(beta, X, y)
        gradient_fn = pointwise_gradient(beta, X, y)
        loss, gradient = compute(loss_fn, gradient_fn)
        return loss, gradient.copy()

    with dask.config.set(fuse_ave_width=0):  # optimizations slows this down
        beta, loss, info = fmin_l_bfgs_b(compute_loss_grad,
                                         beta0,
                                         fprime=None,
                                         args=(X, y),
                                         iprint=(verbose > 0) - 1,
                                         pgtol=tol,
                                         maxiter=max_iter)

    return beta
예제 #2
0
    assert opt < test_val


@pytest.mark.parametrize('func,kwargs', [
    (admm, {
        'abstol': 1e-4
    }),
    (proximal_grad, {
        'tol': 1e-7
    }),
])
@pytest.mark.parametrize('N', [1000])
@pytest.mark.parametrize('nchunks', [1, 10])
@pytest.mark.parametrize('family', Family._all_children())
@pytest.mark.parametrize('lam', [0.01, 1.2, 4.05])
@pytest.mark.parametrize('reg', Regularizer._all_children())
def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg):
    beta = np.random.normal(size=2)
    M = len(beta)
    X = da.random.random((N, M), chunks=(N // nchunks, M))
    y = make_y(X, beta=np.array(beta), chunks=(N // nchunks, ))

    X, y = persist(X, y)

    result = func(X, y, family=family, lamduh=lam, regularizer=reg, **kwargs)
    test_vec = np.random.normal(size=2)

    f = reg.add_reg_f(family.pointwise_loss, lam)

    opt = f(result, X, y).compute()
    test_val = f(test_vec, X, y).compute()
예제 #3
0
import dask.array as da
import dask.dataframe as dd
import numpy as np
import pandas as pd
import pytest
from dask.dataframe.utils import assert_eq
from dask_glm.regularizers import Regularizer
from sklearn.pipeline import make_pipeline

from dask_ml.datasets import make_classification, make_counts, make_regression
from dask_ml.linear_model import LinearRegression, LogisticRegression, PoissonRegression
from dask_ml.linear_model.utils import add_intercept
from dask_ml.model_selection import GridSearchCV


@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
def solver(request):
    """Parametrized fixture for all the solver names"""
    return request.param


@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
def regularizer(request):
    """Parametrized fixture for all the regularizer names"""
    return request.param


class DoNothingTransformer:
    def fit(self, X, y=None):
        return self
예제 #4
0
def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
                  max_iter=100, tol=1e-8, **kwargs):
    """

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
    y : array-like, shape (n_samples,)
    max_iter : int
        maximum number of iterations to attempt before declaring
        failure to converge
    tol : float
        Maximum allowed change from prior iteration required to
        declare convergence
    family : Family
    verbose : bool, default False
        whether to print diagnostic information during convergence

    Returns
    -------
    beta : array-like, shape (n_features,)
    """
    n, p = X.shape
    firstBacktrackMult = 0.1
    nextBacktrackMult = 0.5
    armijoMult = 0.1
    stepGrowth = 1.25
    stepSize = 1.0
    recalcRate = 10
    backtrackMult = firstBacktrackMult
    beta = np.zeros(p)
    regularizer = Regularizer.get(regularizer)

    for k in range(max_iter):
        # Compute the gradient
        if k % recalcRate == 0:
            Xbeta = X.dot(beta)
            func = family.loglike(Xbeta, y)

        gradient = family.gradient(Xbeta, X, y)

        Xbeta, func, gradient = persist(
            Xbeta, func, gradient)

        obeta = beta

        # Compute the step size
        lf = func
        for ii in range(100):
            beta = regularizer.proximal_operator(obeta - stepSize * gradient, stepSize * lamduh)
            step = obeta - beta
            Xbeta = X.dot(beta)

            Xbeta, beta = persist(Xbeta, beta)

            func = family.loglike(Xbeta, y)
            func = persist(func)[0]
            func = compute(func)[0]
            df = lf - func
            if df > 0:
                break
            stepSize *= backtrackMult
        if stepSize == 0:
            break
        df /= max(func, lf)
        if df < tol:
            break
        stepSize *= stepGrowth
        backtrackMult = nextBacktrackMult

    # L2-regularization returned a dask-array
    try:
        return beta.compute()
    except AttributeError:
        return beta
예제 #5
0
def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1,
         max_iter=250, abstol=1e-4, reltol=1e-2, family=Logistic, **kwargs):
    """
    Alternating Direction Method of Multipliers

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
    y : array-like, shape (n_samples,)
    regularizer : str or Regularizer
    lambuh : float
    rho : float
    over_relax : FLOAT
    max_iter : int
        maximum number of iterations to attempt before declaring
        failure to converge
    abstol, reltol : float
    family : Family

    Returns
    -------
    beta : array-like, shape (n_features,)
    """
    pointwise_loss = family.pointwise_loss
    pointwise_gradient = family.pointwise_gradient
    regularizer = Regularizer.get(regularizer)

    def create_local_gradient(func):
        @functools.wraps(func)
        def wrapped(beta, X, y, z, u, rho):
            return func(beta, X, y) + rho * (beta - z + u)
        return wrapped

    def create_local_f(func):
        @functools.wraps(func)
        def wrapped(beta, X, y, z, u, rho):
            return func(beta, X, y) + (rho / 2) * np.dot(beta - z + u,
                                                         beta - z + u)
        return wrapped

    f = create_local_f(pointwise_loss)
    fprime = create_local_gradient(pointwise_gradient)

    nchunks = getattr(X, 'npartitions', 1)
    # nchunks = X.npartitions
    (n, p) = X.shape
    # XD = X.to_delayed().flatten().tolist()
    # yD = y.to_delayed().flatten().tolist()
    if isinstance(X, da.Array):
        XD = X.rechunk((None, X.shape[-1])).to_delayed().flatten().tolist()
    else:
        XD = [X]
    if isinstance(y, da.Array):
        yD = y.rechunk((None, y.shape[-1])).to_delayed().flatten().tolist()
    else:
        yD = [y]

    z = np.zeros(p)
    u = np.array([np.zeros(p) for i in range(nchunks)])
    betas = np.array([np.ones(p) for i in range(nchunks)])

    for k in range(max_iter):

        # x-update step
        new_betas = [delayed(local_update)(xx, yy, bb, z, uu, rho, f=f,
                                           fprime=fprime) for
                     xx, yy, bb, uu in zip(XD, yD, betas, u)]
        new_betas = np.array(da.compute(*new_betas))

        beta_hat = over_relax * new_betas + (1 - over_relax) * z

        #  z-update step
        zold = z.copy()
        ztilde = np.mean(beta_hat + np.array(u), axis=0)
        z = regularizer.proximal_operator(ztilde, lamduh / (rho * nchunks))

        # u-update step
        u += beta_hat - z

        # check for convergence
        primal_res = np.linalg.norm(new_betas - z)
        dual_res = np.linalg.norm(rho * (z - zold))

        eps_pri = np.sqrt(p * nchunks) * abstol + reltol * np.maximum(
            np.linalg.norm(new_betas), np.sqrt(nchunks) * np.linalg.norm(z))
        eps_dual = np.sqrt(p * nchunks) * abstol + \
            reltol * np.linalg.norm(rho * u)

        if primal_res < eps_pri and dual_res < eps_dual:
            break

    return z
예제 #6
0
def proximal_grad(X,
                  y,
                  regularizer='l1',
                  lamduh=0.1,
                  family=Logistic,
                  max_iter=100,
                  tol=1e-8,
                  verbose=False):

    n, p = X.shape
    firstBacktrackMult = 0.1
    nextBacktrackMult = 0.5
    armijoMult = 0.1
    stepGrowth = 1.25
    stepSize = 1.0
    recalcRate = 10
    backtrackMult = firstBacktrackMult
    beta = np.zeros(p)
    regularizer = Regularizer.get(regularizer)

    if verbose:
        print('#       -f        |df/f|    |dx/x|    step')
        print('----------------------------------------------')

    for k in range(max_iter):
        # Compute the gradient
        if k % recalcRate == 0:
            Xbeta = X.dot(beta)
            func = family.loglike(Xbeta, y)

        gradient = family.gradient(Xbeta, X, y)

        Xbeta, func, gradient = persist(Xbeta, func, gradient)

        obeta = beta

        # Compute the step size
        lf = func
        for ii in range(100):
            beta = regularizer.proximal_operator(obeta - stepSize * gradient,
                                                 stepSize * lamduh)
            step = obeta - beta
            Xbeta = X.dot(beta)

            overflow = (Xbeta < 700).all()
            overflow, Xbeta, beta = persist(overflow, Xbeta, beta)
            overflow = compute(overflow)[0]

            # This prevents overflow
            if overflow:
                func = family.loglike(Xbeta, y)
                func = persist(func)[0]
                func = compute(func)[0]
                df = lf - func
                if df > 0:
                    break
            stepSize *= backtrackMult
        if stepSize == 0:
            print('No more progress')
            break
        df /= max(func, lf)
        db = 0
        if verbose:
            print('%2d  %.6e %9.2e  %.2e  %.1e' %
                  (k + 1, func, df, db, stepSize))
        if df < tol:
            print('Converged')
            break
        stepSize *= stepGrowth
        backtrackMult = nextBacktrackMult

    return beta
예제 #7
0
def admm(X,
         y,
         regularizer='l1',
         lamduh=0.1,
         rho=1,
         over_relax=1,
         max_iter=250,
         abstol=1e-4,
         reltol=1e-2,
         family=Logistic):

    pointwise_loss = family.pointwise_loss
    pointwise_gradient = family.pointwise_gradient
    regularizer = Regularizer.get(regularizer)

    def create_local_gradient(func):
        @functools.wraps(func)
        def wrapped(beta, X, y, z, u, rho):
            return func(beta, X, y) + rho * (beta - z + u)

        return wrapped

    def create_local_f(func):
        @functools.wraps(func)
        def wrapped(beta, X, y, z, u, rho):
            return func(beta, X,
                        y) + (rho / 2) * np.dot(beta - z + u, beta - z + u)

        return wrapped

    f = create_local_f(pointwise_loss)
    fprime = create_local_gradient(pointwise_gradient)

    nchunks = getattr(X, 'npartitions', 1)
    # nchunks = X.npartitions
    (n, p) = X.shape
    # XD = X.to_delayed().flatten().tolist()
    # yD = y.to_delayed().flatten().tolist()
    if isinstance(X, da.Array):
        XD = X.rechunk((None, X.shape[-1])).to_delayed().flatten().tolist()
    else:
        XD = [X]
    if isinstance(y, da.Array):
        yD = y.rechunk((None, y.shape[-1])).to_delayed().flatten().tolist()
    else:
        yD = [y]

    z = np.zeros(p)
    u = np.array([np.zeros(p) for i in range(nchunks)])
    betas = np.array([np.ones(p) for i in range(nchunks)])

    for k in range(max_iter):

        # x-update step
        new_betas = [
            delayed(local_update)(xx, yy, bb, z, uu, rho, f=f, fprime=fprime)
            for xx, yy, bb, uu in zip(XD, yD, betas, u)
        ]
        new_betas = np.array(da.compute(*new_betas))

        beta_hat = over_relax * new_betas + (1 - over_relax) * z

        #  z-update step
        zold = z.copy()
        ztilde = np.mean(beta_hat + np.array(u), axis=0)
        z = regularizer.proximal_operator(ztilde, lamduh / (rho * nchunks))

        # u-update step
        u += beta_hat - z

        # check for convergence
        primal_res = np.linalg.norm(new_betas - z)
        dual_res = np.linalg.norm(rho * (z - zold))

        eps_pri = np.sqrt(p * nchunks) * abstol + reltol * np.maximum(
            np.linalg.norm(new_betas),
            np.sqrt(nchunks) * np.linalg.norm(z))
        eps_dual = np.sqrt(p * nchunks) * abstol + \
            reltol * np.linalg.norm(rho * u)

        if primal_res < eps_pri and dual_res < eps_dual:
            print("Converged!", k)
            break

    return z
예제 #8
0
    opt = family.pointwise_loss(result, X, y).compute()
    test_val = family.pointwise_loss(test_vec, X, y).compute()

    assert opt < test_val


@pytest.mark.parametrize('func,kwargs', [
    (admm, {'abstol': 1e-4}),
    (proximal_grad, {'tol': 1e-7}),
])
@pytest.mark.parametrize('N', [1000])
@pytest.mark.parametrize('nchunks', [1, 10])
@pytest.mark.parametrize('family', [Logistic, Normal, Poisson])
@pytest.mark.parametrize('lam', [0.01, 1.2, 4.05])
@pytest.mark.parametrize('reg', [r() for r in Regularizer.__subclasses__()])
def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg):
    beta = np.random.normal(size=2)
    M = len(beta)
    X = da.random.random((N, M), chunks=(N // nchunks, M))
    y = make_y(X, beta=np.array(beta), chunks=(N // nchunks,))

    X, y = persist(X, y)

    result = func(X, y, family=family, lamduh=lam, regularizer=reg, **kwargs)
    test_vec = np.random.normal(size=2)

    f = reg.add_reg_f(family.pointwise_loss, lam)

    opt = f(result, X, y).compute()
    test_val = f(test_vec, X, y).compute()