Esempio n. 1
0
def lsmr_annihilate(x: csc_matrix, y: ndarray, use_cache: bool = True, x_hash=None,
                    **lsmr_options) -> ndarray:
    r"""
    Removes projection of x on y from y

    Parameters
    ----------
    x : csc_matrix
        Sparse array of regressors
    y : ndarray
        Array with shape (nobs, nvar)
    use_cache : bool
        Flag indicating whether results should be stored in the cache,
        and retrieved if available.
    x_hash : object
        Hashable object representing the values in x
    lsmr_options: dict
        Dictionary of options to pass to scipy.sparse.linalg.lsmr

    Returns
    -------
    resids : ndarray
        Returns the residuals from regressing y on x, (nobs, nvar)

    Notes
    -----
    Residuals are estiamted column-by-column as

    .. math::

        \hat{\epsilon}_{j} = y_{j} - x^\prime \hat{\beta}

    where :math:`\hat{\beta}` is computed using lsmr.
    """

    use_cache = use_cache and x_hash is not None
    regressor_hash = x_hash if x_hash is not None else ''
    default_opts = dict(atol=1e-8, btol=1e-8, show=False)
    default_opts.update(lsmr_options)
    resids = []
    for i in range(y.shape[1]):
        _y = y[:, i:i + 1]

        variable_digest = ''
        if use_cache:
            hasher = hash_func()
            hasher.update(ascontiguousarray(_y.data))
            variable_digest = hasher.hexdigest()

        if use_cache and variable_digest in _VARIABLE_CACHE[regressor_hash]:
            resid = _VARIABLE_CACHE[regressor_hash][variable_digest]
        else:
            beta = lsmr(x, _y, **default_opts)[0]
            resid = y[:, i:i + 1] - (x.dot(csc_matrix(beta[:, None]))).A
            _VARIABLE_CACHE[regressor_hash][variable_digest] = resid
        resids.append(resid)
    if resids:
        return column_stack(resids)
    else:
        return empty_like(y)
Esempio n. 2
0
    def fit(self, X: csc_matrix, y):
        print("Fit starts")

        # X, X_, y, y_ = train_test_split(X, y, self.test_fraction)
        e = self.predict(X) - y
        q = X.dot(self.V)
        n_samples, n_features = X.shape

        X = X.tocsc()
        for i in range(self.n_iter):
            # self.evaluate(i, X_, y_)

            # Global bias
            w0_ = -(e - self.w0).sum() / (n_samples + self.lambda_w0)
            e += w0_ - self.w0
            self.w0 = w0_
            # self.evaluate(i, X_, y_)

            # 1-way interaction
            for l in range(n_features):
                Xl = X.getcol(l).toarray()
                print(("\r Iteration #{} 1-way interaction "
                       "progress {:.2%}; train error {}").format(
                           i, l / n_features, err(e)),
                      end="")
                w_ = -((e - self.w[l] * Xl) * Xl).sum() / (
                    np.power(Xl, 2).sum() + self.lambda_w)
                e += (w_ - self.w[l]) * Xl
                self.w[l] = w_
            # self.evaluate(i, X_, y_)

            # 2-way interaction
            for f in range(self.latent_dimension):
                Qf = q[:, f].reshape(-1, 1)
                for l in range(n_features):
                    Xl = X.getcol(l)
                    idx = Xl.nonzero()[0]
                    Xl = Xl.data.reshape(-1, 1)
                    Vlf = self.V[l, f]
                    print((
                        "\r Iteration #{} 2-way interaction progress {:.2%};" +
                        "error {:.5}; validation_error NO").format(
                            i, (f * n_features + l) /
                            (self.latent_dimension * n_features), err(e)),
                          end="")
                    h = Xl * Qf[idx] - np.power(Xl, 2) * Vlf
                    v_ = -((e[idx] - Vlf * h) * h).sum() / (
                        np.power(h, 2).sum() + self.lambda_v)
                    e[idx] += (v_ - Vlf) * h
                    Qf[idx] += (v_ - Vlf) * Xl
                    self.V[l, f] = v_
                q[:, f] = Qf.reshape(-1)
Esempio n. 3
0
def trust_region(x: np.ndarray, g: np.ndarray, hess: np.ndarray,
                 scaling: csc_matrix, delta: float, dv: np.ndarray,
                 theta: float, lb: np.ndarray, ub: np.ndarray,
                 subspace_dim: SubSpaceDim,
                 stepback_strategy: StepBackStrategy, refine_stepback: bool,
                 logger: logging.Logger) -> Step:
    """
    Compute a step according to the solution of the trust-region subproblem.
    If step-back is necessary, gradient and reflected trust region step are
    also evaluated in terms of their performance according to the local
    quadratic approximation

    :param x:
        Current values of the optimization variables
    :param g:
        Objective function gradient at x
    :param hess:
        (Approximate) objective function Hessian at x
    :param scaling:
        Scaling transformation according to distance to boundary
    :param delta:
        Trust region radius, note that this applies after scaling
        transformation
    :param dv:
        derivative of scaling transformation
    :param theta:
        parameter regulating stepback
    :param lb:
        lower optimization variable boundaries
    :param ub:
        upper optimization variable boundaries
    :param subspace_dim:
        Subspace dimension in which the subproblem will be solved. Larger
        subspaces require more compute time but can yield higher quality step
        proposals.
    :param stepback_strategy:
        Strategy that is applied when the proposed step exceeds the
        optimization boundary.
    :param refine_stepback:
        If set to True, proposed steps that are computed via the specified
        stepback_strategy will be refined via optimization.
    :param logger:
        logging.Logger instance to be used for logging

    :return:
        s: proposed step,
        ss: rescaled proposed step,
        qpval: expected function value according to local quadratic
        approximation,
        subspace: computed subspace for reuse if proposed step is not accepted,
        steptype: type of step that was selected for proposal
    """
    sg = scaling.dot(g)
    g_dscaling = csc_matrix(np.diag(np.abs(g) * dv))

    if subspace_dim == SubSpaceDim.TWO:
        tr_step = TRStep2D(x, sg, hess, scaling, g_dscaling, delta, theta, ub,
                           lb, logger)
    elif subspace_dim == SubSpaceDim.FULL:
        tr_step = TRStepFull(x, sg, hess, scaling, g_dscaling, delta, theta,
                             ub, lb, logger)
    else:
        raise ValueError('Invalid choice of subspace dimension.')
    tr_step.calculate()

    # in case of truncation, we hit the boundary and we check both the
    # gradient and the reflected step, either of which could be better than the
    # TR step

    steps = [tr_step]
    if tr_step.alpha < 1.0 and len(g) > 1:
        g_step = GradientStep(x, sg, hess, scaling, g_dscaling, delta, theta,
                              ub, lb, logger)
        g_step.calculate()

        steps.append(g_step)

        if stepback_strategy == StepBackStrategy.SINGLE_REFLECT:
            rtr_step = TRStepReflected(x, sg, hess, scaling, g_dscaling, delta,
                                       theta, ub, lb, tr_step)
            rtr_step.calculate()
            steps.append(rtr_step)

        if stepback_strategy in [
                StepBackStrategy.REFLECT, StepBackStrategy.MIXED
        ]:
            steps.extend(
                stepback_reflect(tr_step, x, sg, hess, scaling, g_dscaling,
                                 delta, theta, ub, lb))
        if stepback_strategy in [
                StepBackStrategy.TRUNCATE, StepBackStrategy.MIXED
        ]:
            steps.extend(
                stepback_truncate(tr_step, x, sg, hess, scaling, g_dscaling,
                                  delta, theta, ub, lb))
        if refine_stepback:
            steps.extend(
                stepback_refine(steps, x, sg, hess, scaling, g_dscaling, delta,
                                theta, ub, lb))

    if len(steps) > 1:
        rcountstrs = [
            str(step.reflection_count) * int(step.reflection_count > 0)
            for step in steps
        ]
        logger.debug(' | '.join([
            f'{step.type + rcountstr}: [qp:'
            f' {step.qpval:.2E}, '
            f'a: {step.alpha:.2E}]'
            for rcountstr, step in zip(rcountstrs, steps)
        ]))

    qpvals = [step.qpval for step in steps]
    return steps[int(np.argmin(qpvals))]
Esempio n. 4
0
def trust_region(x: np.ndarray, g: np.ndarray, hess: np.ndarray,
                 scaling: csc_matrix, delta: float, dv: np.ndarray,
                 theta: float, lb: np.ndarray, ub: np.ndarray,
                 subspace_dim: SubSpaceDim,
                 stepback_strategy: StepBackStrategy,
                 logger: logging.Logger) -> Step:
    """
    Compute a step according to the solution of the trust-region subproblem.
    If step-back is necessary, gradient and reflected trust region step are
    also evaluated in terms of their performance according to the local
    quadratic approximation

    :param x:
        Current values of the optimization variables
    :param g:
        Objective function gradient at x
    :param hess:
        (Approximate) objective function Hessian at x
    :param scaling:
        Scaling transformation according to distance to boundary
    :param delta:
        Trust region radius, note that this applies after scaling
        transformation
    :param dv:
        derivative of scaling transformation
    :param theta:
        parameter regulating stepback
    :param lb:
        lower optimization variable boundaries
    :param ub:
        upper optimization variable boundaries
    :param subspace_dim:
        Subspace dimension in which the subproblem will be solved. Larger
        subspaces require more compute time but can yield higher quality step
        proposals.
    :param stepback_strategy:
        Strategy that is applied when the proposed step exceeds the
        optimization boundary.
    :param logger:
        logging.Logger instance to be used for logging

    :return:
        s: proposed step,
    """
    sg = scaling.dot(g)
    # diag(g_k)*J^v_k Eq (2.5) [ColemanLi1994]
    g_dscaling = csc_matrix(np.diag(np.abs(g) * dv))

    step_options = {
        SubSpaceDim.TWO: TRStep2D,
        SubSpaceDim.FULL: TRStepFull,
        SubSpaceDim.STEIHAUG: TRStepSteihaug,
    }
    tr_step = step_options[subspace_dim](x, sg, hess, scaling, g_dscaling,
                                         delta, theta, ub, lb, logger)
    tr_step.calculate()

    # in case of truncation, we hit the boundary and we check both the
    # gradient and the reflected step, either of which could be better than the
    # TR step

    steps = [tr_step]
    if tr_step.alpha < 1.0 and len(g) > 1:
        g_step = GradientStep(x, sg, hess, scaling, g_dscaling, delta, theta,
                              ub, lb, logger)
        g_step.calculate()
        steps.append(g_step)
        if stepback_strategy == StepBackStrategy.SINGLE_REFLECT:
            rtr_step = TRStepReflected(x, sg, hess, scaling, g_dscaling, delta,
                                       theta, ub, lb, tr_step)
            rtr_step.calculate()
            steps.append(rtr_step)

        if stepback_strategy in [
                StepBackStrategy.REFLECT, StepBackStrategy.MIXED
        ]:
            steps.extend(
                stepback_reflect(tr_step, x, sg, hess, scaling, g_dscaling,
                                 delta, theta, ub, lb))

        if stepback_strategy in [
                StepBackStrategy.TRUNCATE, StepBackStrategy.MIXED
        ]:
            steps.extend(
                stepback_truncate(tr_step, x, sg, hess, scaling, g_dscaling,
                                  delta, theta, ub, lb))

        if stepback_strategy == StepBackStrategy.REFINE and \
                tr_step.subspace.shape[1] > 1:
            ref_step = RefinedStep(x, sg, hess, scaling, g_dscaling, delta,
                                   theta, ub, lb, tr_step)
            ref_step.calculate()
            steps.append(ref_step)

    if len(steps) > 1:
        rcountstrs = [
            str(step.reflection_count) * int(step.reflection_count > 0)
            for step in steps
        ]
        logger.debug(' | '.join([
            f'{step.type + rcountstr}: [qp:'
            f' {step.qpval:.2E}, '
            f'a: {step.alpha:.2E}]'
            for rcountstr, step in zip(rcountstrs, steps)
        ]))

    qpvals = [step.qpval for step in steps]
    return steps[np.argmin(qpvals)]
def optimize(sparse_km: csc_matrix, gamma: float,
             regcoef: float, L1: float, eps: float, max_iter: int) -> (csc_matrix, dict):
    """
    Perform SVM on sparsified kernel matrix.
    :param sparse_km: sparsified kernel matrix.
    :param eps: epsilon value.
    :param max_iter: maximal number of iterations.
    :return: object weights.
    """

    if sparse_km.shape[0] != sparse_km.shape[1]:
        raise Exception("Kernel matrix is not a squared matrix")

    log = {"grad_norm": [], "time": []}

    N = sparse_km.shape[0]

    def grad_f(x):
        t = x.copy()
        t.data -= 1 / (2 * N * regcoef)
        return -csr_matrix((N, 1)) + sparse_km.dot(x) - \
               gamma*sparse_clip(-x, 0, None) + gamma*sparse_clip(t, 0, None)


    x0 = csr_matrix((N, 1))
    x0[0, 0] = 1/2
    grad_f0 = grad_f(x0)
    grad_min = BasicGradientUpdater(grad_f0.T)
    grad_max = BasicGradientUpdater(-grad_f0.T)

    iter_counter = 0

    start = timeit.default_timer()

    current_point = x0
    true_grad = grad_f0

    while grad_min.get_norm() > eps**2 or iter_counter < max_iter:

        #if true_grad == grad_min.get():

        log["grad_norm"].append(grad_min.get_norm())
        log["time"].append(timeit.default_timer() - start)

        i_plus = grad_max.get_coordinate()
        g_plus = -grad_max.get_value()
        i_minus = grad_min.get_coordinate()
        g_minus = grad_min.get_value()

        h_val = 1/(4*L1)*(g_plus - g_minus)
        h = csr_matrix((N, 1))
        h[i_plus, 0] = h_val
        h[i_minus, 0] = -h_val

        t = current_point.copy() # had to make this turnaround 'cause "sparse vector + constant" operation hasn't been implemented yet
        t.data -= 1 / (2 * N * regcoef)

        delta_grad = sparse_km.dot(h)
        delta_grad -= gamma*sparse_clip(-current_point - h, 0, None)
        delta_grad += gamma*sparse_clip(-current_point, 0, None)
        delta_grad += gamma*sparse_clip(t + h, 0, None)
        delta_grad -= gamma*sparse_clip(t, 0, None)

        grad_min.update(delta_grad.T)
        grad_max.update(-delta_grad.T)

        current_point += h
        true_grad = grad_f(current_point.T)
        iter_counter += 1

    return h, log