Ejemplo n.º 1
0
 def _grad_neg_log_likelihood_and_fim_fn(x):
     predicted_linear_response = (
         fisher_scoring.compute_predicted_linear_response(
             model_matrix, x))
     g, h_middle = _grad_neg_log_likelihood_and_fim(
         model_matrix, predicted_linear_response, response, model)
     return g, model_matrix, h_middle
Ejemplo n.º 2
0
 def _neg_log_likelihood(x):
     predicted_linear_response = (
         fisher_scoring.compute_predicted_linear_response(
             model_matrix, x))
     log_probs = model.log_prob(response, predicted_linear_response)
     return -log_probs
Ejemplo n.º 3
0
def fit_sparse_one_step(model_matrix,
                        response,
                        model,
                        model_coefficients_start,
                        tolerance,
                        l1_regularizer,
                        l2_regularizer=None,
                        maximum_full_sweeps=None,
                        learning_rate=None,
                        name=None):
    """One step of (the outer loop of) the GLM fitting algorithm.

  This function returns a new value of `model_coefficients`, equal to
  `model_coefficients_start + model_coefficients_update`.  The increment
  `model_coefficients_update in R^n` is computed by a coordinate descent method,
  that is, by a loop in which each iteration updates exactly one coordinate of
  `model_coefficients_update`.  (Some updates may leave the value of the
  coordinate unchanged.)

  The particular update method used is to apply an L1-based proximity operator,
  "soft threshold", whose fixed point `model_coefficients_update^*` is the
  desired minimum

  ```none
  model_coefficients_update^* = argmin{
      -LogLikelihood(model_coefficients_start + model_coefficients_update')
        + l1_regularizer *
            ||model_coefficients_start + model_coefficients_update'||_1
        + l2_regularizer *
            ||model_coefficients_start + model_coefficients_update'||_2**2
      : model_coefficients_update' }
  ```

  where in each iteration `model_coefficients_update'` has at most one nonzero
  coordinate.

  This update method preserves sparsity, i.e., tends to find sparse solutions if
  `model_coefficients_start` is sparse.  Additionally, the choice of step size
  is based on curvature (Fisher information matrix), which significantly speeds
  up convergence.

  Args:
    model_matrix: (Batch of) matrix-shaped, `float` `Tensor` or `SparseTensor`
      where each row represents a sample's features.  Has shape `[N, n]` where
      `N` is the number of data samples and `n` is the number of features per
      sample.
    response: (Batch of) vector-shaped `Tensor` with the same dtype as
      `model_matrix` where each element represents a sample's observed response
      (to the corresponding row of features).
    model: `tfp.glm.ExponentialFamily`-like instance, which specifies the link
      function and distribution of the GLM, and thus characterizes the negative
      log-likelihood which will be minimized. Must have sufficient statistic
      equal to the response, that is, `T(y) = y`.
    model_coefficients_start: (Batch of) vector-shaped, `float` `Tensor` with
      the same dtype as `model_matrix`, representing the initial values of the
      coefficients for the GLM regression.  Has shape `[n]` where `model_matrix`
      has shape `[N, n]`.
    tolerance: scalar, `float` `Tensor` representing the convergence threshold.
      The optimization step will terminate early, returning its current value of
      `model_coefficients_start + model_coefficients_update`, once the following
      condition is met:
      `||model_coefficients_update_end - model_coefficients_update_start||_2
         / (1 + ||model_coefficients_start||_2)
       < sqrt(tolerance)`,
      where `model_coefficients_update_end` is the value of
      `model_coefficients_update` at the end of a sweep and
      `model_coefficients_update_start` is the value of
      `model_coefficients_update` at the beginning of that sweep.
    l1_regularizer: scalar, `float` `Tensor` representing the weight of the L1
      regularization term (see equation above).
    l2_regularizer: scalar, `float` `Tensor` representing the weight of the L2
      regularization term (see equation above).
      Default value: `None` (i.e., no L2 regularization).
    maximum_full_sweeps: Python integer specifying maximum number of sweeps to
      run.  A "sweep" consists of an iteration of coordinate descent on each
      coordinate. After this many sweeps, the algorithm will terminate even if
      convergence has not been reached.
      Default value: `1`.
    learning_rate: scalar, `float` `Tensor` representing a multiplicative factor
      used to dampen the proximal gradient descent steps.
      Default value: `None` (i.e., factor is conceptually `1`).
    name: Python string representing the name of the TensorFlow operation. The
      default name is `"fit_sparse_one_step"`.

  Returns:
    model_coefficients: (Batch of) `Tensor` having the same shape and dtype as
      `model_coefficients_start`, representing the updated value of
      `model_coefficients`, that is, `model_coefficients_start +
      model_coefficients_update`.
    is_converged: scalar, `bool` `Tensor` indicating whether convergence
      occurred across all batches within the specified number of sweeps.
    iter: scalar, `int` `Tensor` representing the actual number of coordinate
      updates made (before achieving convergence).  Since each sweep consists of
      `tf.size(model_coefficients_start)` iterations, the maximum number of
      updates is `maximum_full_sweeps * tf.size(model_coefficients_start)`.
  """
    with tf.name_scope(name or 'fit_sparse_one_step'):
        predicted_linear_response = (
            fisher_scoring.compute_predicted_linear_response(
                model_matrix, model_coefficients_start))
        g, h_middle = _grad_neg_log_likelihood_and_fim(
            model_matrix, predicted_linear_response, response, model)

        return tfp.optimizer.proximal_hessian_sparse_one_step(
            gradient_unregularized_loss=g,
            hessian_unregularized_loss_outer=model_matrix,
            hessian_unregularized_loss_middle=h_middle,
            x_start=model_coefficients_start,
            l1_regularizer=l1_regularizer,
            l2_regularizer=l2_regularizer,
            maximum_full_sweeps=maximum_full_sweeps,
            tolerance=tolerance,
            learning_rate=learning_rate,
            name=name)