Exemple #1
0
def newton(X, y, max_iter=50, tol=1e-8, family='logistic', **kwargs):
    """Newtons Method for Logistic Regression.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
    y : array-like, shape (n_samples,)
    max_iter : int
        maximum number of iterations to attempt before declaring
        failure to converge
    tol : float
        Maximum allowed change from prior iteration required to
        declare convergence
    family : Family

    Returns
    -------
    beta : array-like, shape (n_features,)
    """
    family = Family.get(family)
    gradient, hessian = family.gradient, family.hessian
    n, p = X.shape
    beta = np.zeros(p)  # always init to zeros?
    Xbeta = dot(X, beta)

    iter_count = 0
    converged = False

    while not converged:
        beta_old = beta

        # should this use map_blocks()?
        hess = hessian(Xbeta, X)
        grad = gradient(Xbeta, X, y)

        hess, grad = da.compute(hess, grad)

        # should this be dask or numpy?
        # currently uses Python 3 specific syntax
        step, _, _, _ = np.linalg.lstsq(hess, grad)
        beta = (beta_old - step)

        iter_count += 1

        # should change this criterion
        coef_change = np.absolute(beta_old - beta)
        converged = (
            (not np.any(coef_change > tol)) or (iter_count > max_iter))

        if not converged:
            Xbeta = dot(X, beta)  # numpy -> dask converstion of beta

    return beta
Exemple #2
0
def test_dot_with_sparse():
    A = sparse.random((1024, 64))
    B = sparse.random((64))
    ans = sparse.dot(A, B)

    # dot(sparse.array, sparse.array)
    res = utils.dot(A, B)
    assert_eq(ans, res)

    # dot(sparse.array, dask.array)
    res = utils.dot(A, da.from_array(B, chunks=B.shape))
    assert_eq(ans, res.compute())

    # dot(dask.array, sparse.array)
    res = utils.dot(da.from_array(A, chunks=A.shape), B)
    assert_eq(ans, res.compute())
Exemple #3
0
def test_dot_with_cupy():
    cupy = pytest.importorskip('cupy')

    # dot(cupy.array, cupy.array)
    A = cupy.random.rand(100, 100)
    B = cupy.random.rand(100)
    ans = cupy.dot(A, B)
    res = utils.dot(A, B)
    assert_eq(ans, res)

    # dot(dask.array, cupy.array)
    dA = da.from_array(A, chunks=(10, 100))
    res = utils.dot(dA, B).compute()
    assert_eq(ans, res)

    # dot(cupy.array, dask.array)
    dB = da.from_array(B, chunks=(10))
    res = utils.dot(A, dB).compute()
    assert_eq(ans, res)
Exemple #4
0
    def loglikelihood(self, Xbeta, y):
        """
        Evaluate the logistic loglikelihood

        Parameters
        ----------
        Xbeta : array, shape (n_samples, n_features)
        y : array, shape (n_samples)
        """
        enXbeta = exp(-Xbeta)
        return (Xbeta + log1p(enXbeta)).sum() - dot(y, Xbeta)
Exemple #5
0
    def decision_function(self, X):
        """Predict confidence scores for samples in X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        T : array-like, shape = [n_samples, n_classes]
            The confidence score of the sample for each class in the model.
        """
        X_ = self._check_array(X)
        return dot(X_, self._coef)
Exemple #6
0
    def predict(self, X):
        """Predict count for samples in X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        C : array, shape = [n_samples,]
            Predicted count for each sample
        """
        X_ = self._check_array(X)
        return exp(dot(X_, self._coef))
Exemple #7
0
    def decision_function(self, X):
        """Predict confidence scores for samples in X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        S : array-like, shape = [n_samples,]
            Confidence scores for each sample.
        """
        X_ = self._check_array(X)
        return dot(X_, self._coef)
Exemple #8
0
    def predict_proba(self, X):
        """Probability estimates for samples in X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        T : array-like, shape = [n_samples, n_classes]
            The probability of the sample for each class in the model.
        """
        X_ = self._check_array(X)
        return sigmoid(dot(X_, self._coef))
Exemple #9
0
def newton(X, y, max_iter=50, tol=1e-8, family=Logistic):
    '''Newtons Method for Logistic Regression.'''

    gradient, hessian = family.gradient, family.hessian
    n, p = X.shape
    beta = np.zeros(p)  # always init to zeros?
    Xbeta = dot(X, beta)

    iter_count = 0
    converged = False

    while not converged:
        beta_old = beta

        # should this use map_blocks()?
        hess = hessian(Xbeta, X)
        grad = gradient(Xbeta, X, y)

        hess, grad = da.compute(hess, grad)

        # should this be dask or numpy?
        # currently uses Python 3 specific syntax
        step, _, _, _ = np.linalg.lstsq(hess, grad)
        beta = (beta_old - step)

        iter_count += 1

        # should change this criterion
        coef_change = np.absolute(beta_old - beta)
        converged = ((not np.any(coef_change > tol))
                     or (iter_count > max_iter))

        if not converged:
            Xbeta = dot(X, beta)  # numpy -> dask converstion of beta

    return beta
Exemple #10
0
 def gradient(self, Xbeta, X, y):
     eXbeta = exp(Xbeta)
     return dot(X.T, eXbeta - y)
Exemple #11
0
 def hessian(self, Xbeta, X):
     return 2 * dot(X.T, X)
Exemple #12
0
 def gradient(self, Xbeta, X, y):
     return 2 * dot(X.T, Xbeta) - 2 * dot(X.T, y)
Exemple #13
0
 def hessian(self, Xbeta, X):
     """Logistic hessian"""
     p = sigmoid(Xbeta)
     return dot(p * (1 - p) * X.T, X)
Exemple #14
0
 def gradient(self, Xbeta, X, y):
     """Logistic gradient"""
     p = sigmoid(Xbeta)
     return dot(X.T, p - y)
Exemple #15
0
 def gradient(Xbeta, X, y):
     p = sigmoid(Xbeta)
     return dot(X.T, p - y)
Exemple #16
0
 def hessian(self, Xbeta, X):
     eXbeta = exp(Xbeta)
     x_diag_eXbeta = eXbeta[:, None] * X
     return dot(X.T, x_diag_eXbeta)
Exemple #17
0
 def hessian(Xbeta, X):
     p = sigmoid(Xbeta)
     return dot(p * (1 - p) * X.T, X)
Exemple #18
0
def bfgs(X, y, max_iter=500, tol=1e-14, family=Logistic):
    '''Simple implementation of BFGS.'''

    n, p = X.shape
    y = y.squeeze()

    recalcRate = 10
    stepSize = 1.0
    armijoMult = 1e-4
    backtrackMult = 0.5
    stepGrowth = 1.25

    beta = np.zeros(p)
    Hk = np.eye(p)
    for k in range(max_iter):

        if k % recalcRate == 0:
            Xbeta = X.dot(beta)
            eXbeta = exp(Xbeta)
            func = log1p(eXbeta).sum() - dot(y, Xbeta)

        e1 = eXbeta + 1.0
        gradient = dot(X.T,
                       eXbeta / e1 - y)  # implicit numpy -> dask conversion

        if k:
            yk = yk + gradient  # TODO: gradient is dasky and yk is numpy-y
            rhok = 1 / yk.dot(sk)
            adj = np.eye(p) - rhok * dot(sk, yk.T)
            Hk = dot(adj, dot(Hk, adj.T)) + rhok * dot(sk, sk.T)

        step = dot(Hk, gradient)
        steplen = dot(step, gradient)
        Xstep = dot(X, step)

        # backtracking line search
        lf = func
        old_Xbeta = Xbeta
        stepSize, _, _, func = compute_stepsize_dask(
            beta,
            step,
            Xbeta,
            Xstep,
            y,
            func,
            family=family,
            backtrackMult=backtrackMult,
            armijoMult=armijoMult,
            stepSize=stepSize)

        beta, stepSize, Xbeta, gradient, lf, func, step, Xstep = persist(
            beta, stepSize, Xbeta, gradient, lf, func, step, Xstep)

        stepSize, lf, func, step = compute(stepSize, lf, func, step)

        beta = beta - stepSize * step  # tiny bit of repeat work here to avoid communication
        Xbeta = Xbeta - stepSize * Xstep

        if stepSize == 0:
            print('No more progress')
            break

        # necessary for gradient computation
        eXbeta = exp(Xbeta)

        yk = -gradient
        sk = -stepSize * step
        stepSize *= stepGrowth

        if stepSize == 0:
            print('No more progress')
            break

        df = lf - func
        df /= max(func, lf)
        if df < tol:
            print('Converged')
            break

    return beta
 def hessian(Xbeta, X):
     return 2 * dot(X.T, X)
Exemple #20
0
 def loglike(Xbeta, y):
     eXbeta = exp(Xbeta)
     return (log1p(eXbeta)).sum() - dot(y, Xbeta)