예제 #1
0
 def testGetStaticConstant(self):
     x = tf.constant(2, dtype=tf.int32)
     self.assertEqual(np.array(2, dtype=np.int32),
                      distribution_util.maybe_get_static_value(x))
     self.assertAllClose(
         np.array(2.),
         distribution_util.maybe_get_static_value(x, dtype=np.float64))
 def testGetStaticPlaceholder(self):
   if tf.executing_eagerly(): return
   x = tf1.placeholder_with_default(
       np.array([2.], dtype=np.int32), shape=[1])
   self.assertEqual(None, distribution_util.maybe_get_static_value(x))
   self.assertEqual(
       None, distribution_util.maybe_get_static_value(x, dtype=np.float64))
예제 #3
0
 def is_last_day_of_season(t):
     t_ = dist_util.maybe_get_static_value(t)
     if t_ is not None:  # static case
         step_in_cycle = t_ % num_steps_per_cycle
         return any(step_in_cycle == changepoints)
     else:
         step_in_cycle = tf.math.floormod(t, num_steps_per_cycle)
         return tf.reduce_any(tf.equal(step_in_cycle, changepoints))
예제 #4
0
 def is_last_day_of_season(t):
   t_ = dist_util.maybe_get_static_value(t)
   if t_ is not None:  # static case
     step_in_cycle = t_ % num_steps_per_cycle
     return any(step_in_cycle == changepoints)
   else:
     step_in_cycle = tf.floormod(t, num_steps_per_cycle)
     return tf.reduce_any(tf.equal(step_in_cycle, changepoints))
  def _maybe_get_static_event_ndims(self):
    if self.event_shape.ndims is not None:
      return self.event_shape.ndims

    event_ndims = tf.size(self.event_shape_tensor())
    event_ndims_ = distribution_util.maybe_get_static_value(event_ndims)

    if event_ndims_ is not None:
      return event_ndims_

    return event_ndims
    def _maybe_get_static_event_ndims(self):
        if tensorshape_util.rank(self.event_shape) is not None:
            return tensorshape_util.rank(self.event_shape)

        event_ndims = tf.size(input=self.event_shape_tensor())
        event_ndims_ = distribution_util.maybe_get_static_value(event_ndims)

        if event_ndims_ is not None:
            return event_ndims_

        return event_ndims
  def _maybe_get_static_event_ndims(self):
    if self.event_shape.ndims is not None:
      return self.event_shape.ndims

    event_ndims = tf.size(self.event_shape_tensor())
    event_ndims_ = distribution_util.maybe_get_static_value(event_ndims)

    if event_ndims_ is not None:
      return event_ndims_

    return event_ndims
    def _maybe_get_static_event_ndims(self, override_event_shape):

        if tensorshape_util.rank(self.event_shape) is not None:
            return tensorshape_util.rank(self.event_shape)

        event_ndims = tf.size(
            self._event_shape_tensor(override_event_shape,
                                     self.distribution.event_shape_tensor()))
        event_ndims_ = distribution_util.maybe_get_static_value(event_ndims)

        if event_ndims_ is not None:
            return event_ndims_

        return event_ndims
예제 #9
0
  def _maybe_get_static_event_ndims(self, event_ndims):
    """Helper which returns tries to return an integer static value."""
    event_ndims_ = distribution_util.maybe_get_static_value(event_ndims)

    if isinstance(event_ndims_, (np.generic, np.ndarray)):
      if event_ndims_.dtype not in (np.int32, np.int64):
        raise ValueError('Expected integer dtype, got dtype {}'.format(
            event_ndims_.dtype))

      if isinstance(event_ndims_, np.ndarray) and len(event_ndims_.shape):
        raise ValueError(
            'Expected a scalar integer, got {}'.format(event_ndims_))
      event_ndims_ = int(event_ndims_)

    return event_ndims_
예제 #10
0
  def _maybe_get_static_event_ndims(self, event_ndims):
    """Helper which returns tries to return an integer static value."""
    event_ndims_ = distribution_util.maybe_get_static_value(event_ndims)

    if isinstance(event_ndims_, (np.generic, np.ndarray)):
      if event_ndims_.dtype not in (np.int32, np.int64):
        raise ValueError("Expected integer dtype, got dtype {}".format(
            event_ndims_.dtype))

      if isinstance(event_ndims_, np.ndarray) and len(event_ndims_.shape):
        raise ValueError(
            "Expected a scalar integer, got {}".format(event_ndims_))
      event_ndims_ = int(event_ndims_)

    return event_ndims_
예제 #11
0
    def _forward_log_det_jacobian(self, x, **kwargs):
        x = tf.convert_to_tensor(value=x, name="x")

        fldj = tf.cast(0., dtype=dtype_util.base_dtype(x.dtype))

        if not self.bijectors:
            return fldj

        event_ndims = self._maybe_get_static_event_ndims(
            self.forward_min_event_ndims)

        if _use_static_shape(x, event_ndims):
            event_shape = x.shape[tensorshape_util.rank(x.shape) -
                                  event_ndims:]
        else:
            event_shape = tf.shape(input=x)[tf.rank(x) - event_ndims:]

        # TODO(b/129973548): Document and simplify.
        for b in reversed(self.bijectors):
            fldj += b.forward_log_det_jacobian(x,
                                               event_ndims=event_ndims,
                                               **kwargs.get(b.name, {}))
            if _use_static_shape(x, event_ndims):
                event_shape = b.forward_event_shape(event_shape)
                event_ndims = self._maybe_get_static_event_ndims(
                    tensorshape_util.rank(event_shape))
            else:
                event_shape = b.forward_event_shape_tensor(event_shape)
                event_shape_ = distribution_util.maybe_get_static_value(
                    event_shape)
                event_ndims = tf.size(input=event_shape)
                event_ndims_ = self._maybe_get_static_event_ndims(event_ndims)

                if event_ndims_ is not None and event_shape_ is not None:
                    event_ndims = event_ndims_
                    event_shape = event_shape_

            x = b.forward(x, **kwargs.get(b.name, {}))

        return fldj
예제 #12
0
    def _inverse_log_det_jacobian(self, y, **kwargs):
        y = tf.convert_to_tensor(value=y, name="y")
        ildj = tf.cast(0., dtype=dtype_util.base_dtype(y.dtype))

        if not self.bijectors:
            return ildj

        event_ndims = self._maybe_get_static_event_ndims(
            self.inverse_min_event_ndims)

        if _use_static_shape(y, event_ndims):
            event_shape = y.shape[tensorshape_util.rank(y.shape) -
                                  event_ndims:]
        else:
            event_shape = tf.shape(input=y)[tf.rank(y) - event_ndims:]

        # TODO(b/129973548): Document and simplify.
        for b in self.bijectors:
            ildj += b.inverse_log_det_jacobian(y,
                                               event_ndims=event_ndims,
                                               **kwargs.get(b.name, {}))

            if _use_static_shape(y, event_ndims):
                event_shape = b.inverse_event_shape(event_shape)
                event_ndims = self._maybe_get_static_event_ndims(
                    tensorshape_util.rank(event_shape))
            else:
                event_shape = b.inverse_event_shape_tensor(event_shape)
                event_shape_ = distribution_util.maybe_get_static_value(
                    event_shape)
                event_ndims = tf.size(input=event_shape)
                event_ndims_ = self._maybe_get_static_event_ndims(event_ndims)

                if event_ndims_ is not None and event_shape_ is not None:
                    event_ndims = event_ndims_
                    event_shape = event_shape_

            y = b.inverse(y, **kwargs.get(b.name, {}))
        return ildj
def fit_one_step(
    model_matrix,
    response,
    model,
    model_coefficients_start=None,
    predicted_linear_response_start=None,
    l2_regularizer=None,
    dispersion=None,
    offset=None,
    learning_rate=None,
    fast_unsafe_numerics=True,
    l2_regularization_penalty_factor=None,
    name=None):
  """Runs one step of Fisher scoring.

  Args:
    model_matrix: (Batch of) `float`-like, matrix-shaped `Tensor` where each row
      represents a sample's features.
    response: (Batch of) vector-shaped `Tensor` where each element represents a
      sample's observed response (to the corresponding row of features). Must
      have same `dtype` as `model_matrix`.
    model: `tfp.glm.ExponentialFamily`-like instance used to construct the
      negative log-likelihood loss, gradient, and expected Hessian (i.e., the
      Fisher information matrix).
    model_coefficients_start: Optional (batch of) vector-shaped `Tensor`
      representing the initial model coefficients, one for each column in
      `model_matrix`. Must have same `dtype` as `model_matrix`.
      Default value: Zeros.
    predicted_linear_response_start: Optional `Tensor` with `shape`, `dtype`
      matching `response`; represents `offset` shifted initial linear
      predictions based on `model_coefficients_start`.
      Default value: `offset` if `model_coefficients is None`, and
      `tf.linalg.matvec(model_matrix, model_coefficients_start) + offset`
      otherwise.
    l2_regularizer: Optional scalar `Tensor` representing L2 regularization
      penalty, i.e.,
      `loss(w) = sum{-log p(y[i]|x[i],w) : i=1..n} + l2_regularizer ||w||_2^2`.
      Default value: `None` (i.e., no L2 regularization).
    dispersion: Optional (batch of) `Tensor` representing `response` dispersion,
      i.e., as in, `p(y|theta) := exp((y theta - A(theta)) / dispersion)`.
      Must broadcast with rows of `model_matrix`.
      Default value: `None` (i.e., "no dispersion").
    offset: Optional `Tensor` representing constant shift applied to
      `predicted_linear_response`.  Must broadcast to `response`.
      Default value: `None` (i.e., `tf.zeros_like(response)`).
    learning_rate: Optional (batch of) scalar `Tensor` used to dampen iterative
      progress. Typically only needed if optimization diverges, should be no
      larger than `1` and typically very close to `1`.
      Default value: `None` (i.e., `1`).
    fast_unsafe_numerics: Optional Python `bool` indicating if solve should be
      based on Cholesky or QR decomposition.
      Default value: `True` (i.e., "prefer speed via Cholesky decomposition").
    l2_regularization_penalty_factor: Optional (batch of) vector-shaped
      `Tensor`, representing a separate penalty factor to apply to each model
      coefficient, length equal to columns in `model_matrix`. Each penalty
      factor multiplies l2_regularizer to allow differential regularization. Can
      be 0 for some coefficients, which implies no regularization. Default is 1
      for all coefficients.
      `loss(w) = sum{-log p(y[i]|x[i],w) : i=1..n} + l2_regularizer ||w *
        l2_regularization_penalty_factor||_2^2`
    name: Python `str` used as name prefix to ops created by this function.
      Default value: `"fit_one_step"`.

  Returns:
    model_coefficients: (Batch of) vector-shaped `Tensor`; represents the
      next estimate of the model coefficients, one for each column in
      `model_matrix`.
    predicted_linear_response: `response`-shaped `Tensor` representing linear
      predictions based on new `model_coefficients`, i.e.,
      `tf.linalg.matvec(model_matrix, model_coefficients_next) + offset`.
  """
  with tf.name_scope(name or 'fit_one_step'):

    [
        model_matrix,
        response,
        model_coefficients_start,
        predicted_linear_response_start,
        offset,
    ] = prepare_args(
        model_matrix,
        response,
        model_coefficients_start,
        predicted_linear_response_start,
        offset)

    # Compute: mean, grad(mean, predicted_linear_response_start), and variance.
    mean, variance, grad_mean = model(predicted_linear_response_start)

    # If either `grad_mean` or `variance is non-finite or zero, then we'll
    # replace it with a value such that the row is zeroed out. Although this
    # procedure may seem circuitous, it is necessary to ensure this algorithm is
    # itself differentiable.
    is_valid = (
        tf.math.is_finite(grad_mean) & tf.not_equal(grad_mean, 0.)
        & tf.math.is_finite(variance) & (variance > 0.))

    def mask_if_invalid(x, mask):
      return tf.where(
          is_valid, x, np.array(mask, dtype_util.as_numpy_dtype(x.dtype)))

    # Run one step of iteratively reweighted least-squares.
    # Compute "`z`", the adjusted predicted linear response.
    # z = predicted_linear_response_start
    #     + learning_rate * (response - mean) / grad_mean
    z = (response - mean) / mask_if_invalid(grad_mean, 1.)
    # TODO(jvdillon): Rather than use learning rate, we should consider using
    # backtracking line search.
    if learning_rate is not None:
      z *= learning_rate[..., tf.newaxis]
    z += predicted_linear_response_start
    if offset is not None:
      z -= offset

    # Compute "`w`", the per-sample weight.
    if dispersion is not None:
      # For convenience, we'll now scale the variance by the dispersion factor.
      variance *= dispersion
    w = (
        mask_if_invalid(grad_mean, 0.) *
        tf.math.rsqrt(mask_if_invalid(variance, np.inf)))

    a = model_matrix * w[..., tf.newaxis]
    b = z * w
    # Solve `min{ || A @ model_coefficients - b ||_2**2 : model_coefficients }`
    # where `@` denotes `matmul`.

    if l2_regularizer is None:
      l2_regularizer = np.array(0, dtype_util.as_numpy_dtype(a.dtype))
    else:
      l2_regularizer_ = distribution_util.maybe_get_static_value(
          l2_regularizer, dtype_util.as_numpy_dtype(a.dtype))
      if l2_regularizer_ is not None:
        l2_regularizer = l2_regularizer_

    def _embed_l2_regularization():
      """Adds synthetic observations to implement L2 regularization."""
      # `tf.matrix_solve_ls` does not respect the `l2_regularization` argument
      # when `fast_unsafe_numerics` is `False`. This function  adds synthetic
      # observations to the data to implement the regularization instead.
      # Adding observations `sqrt(l2_regularizer) * I` is mathematically
      # equivalent to adding the term
      # `-l2_regularizer ||coefficients||_2**2` to the log-likelihood.
      num_model_coefficients = num_cols(model_matrix)
      batch_shape = tf.shape(model_matrix)[:-2]
      if l2_regularization_penalty_factor is None:
        eye = tf.eye(
            num_model_coefficients, batch_shape=batch_shape, dtype=a.dtype)
      else:
        eye = tf.linalg.tensor_diag(
            tf.cast(l2_regularization_penalty_factor, dtype=a.dtype))
        broadcasted_shape = prefer_static.concat(
            [batch_shape, [num_model_coefficients, num_model_coefficients]],
            axis=0)
        eye = tf.broadcast_to(eye, broadcasted_shape)
      a_ = tf.concat([a, tf.sqrt(l2_regularizer) * eye], axis=-2)
      b_ = distribution_util.pad(
          b, count=num_model_coefficients, axis=-1, back=True)
      # Return l2_regularizer=0 since its now embedded.
      l2_regularizer_ = np.array(0, dtype_util.as_numpy_dtype(a.dtype))
      return a_, b_, l2_regularizer_

    a, b, l2_regularizer = prefer_static.cond(
        prefer_static.reduce_all([
            prefer_static.logical_or(
                not(fast_unsafe_numerics),
                l2_regularization_penalty_factor is not None),
            l2_regularizer > 0.
        ]),
        _embed_l2_regularization,
        lambda: (a, b, l2_regularizer))

    model_coefficients_next = tf.linalg.lstsq(
        a,
        b[..., tf.newaxis],
        fast=fast_unsafe_numerics,
        l2_regularizer=l2_regularizer,
        name='model_coefficients_next')
    model_coefficients_next = model_coefficients_next[..., 0]

    # TODO(b/79122261): The approach used in `matrix_solve_ls` could be made
    # faster by avoiding explicitly forming Q and instead keeping the
    # factorization in 'implicit' form with stacked (rescaled) Householder
    # vectors underneath the 'R' and then applying the (accumulated)
    # reflectors in the appropriate order to apply Q'. However, we don't
    # presently do this because we lack core TF functionality. For reference,
    # the vanilla QR approach is:
    #   q, r = tf.linalg.qr(a)
    #   c = tf.matmul(q, b, adjoint_a=True)
    #   model_coefficients_next = tf.matrix_triangular_solve(
    #       r, c, lower=False, name='model_coefficients_next')

    predicted_linear_response_next = compute_predicted_linear_response(
        model_matrix,
        model_coefficients_next,
        offset,
        name='predicted_linear_response_next')

    return model_coefficients_next, predicted_linear_response_next
예제 #14
0
 def testGetStaticInt(self):
     x = 2
     self.assertEqual(x, distribution_util.maybe_get_static_value(x))
     self.assertAllClose(
         np.array(2.),
         distribution_util.maybe_get_static_value(x, dtype=np.float64))
예제 #15
0
def fit_one_step(
    model_matrix,
    response,
    model,
    model_coefficients_start=None,
    predicted_linear_response_start=None,
    l2_regularizer=None,
    dispersion=None,
    offset=None,
    learning_rate=None,
    fast_unsafe_numerics=True,
    name=None):
  """Runs one step of Fisher scoring.

  Args:
    model_matrix: (Batch of) `float`-like, matrix-shaped `Tensor` where each row
      represents a sample's features.
    response: (Batch of) vector-shaped `Tensor` where each element represents a
      sample's observed response (to the corresponding row of features). Must
      have same `dtype` as `model_matrix`.
    model: `tfp.glm.ExponentialFamily`-like instance used to construct the
      negative log-likelihood loss, gradient, and expected Hessian (i.e., the
      Fisher information matrix).
    model_coefficients_start: Optional (batch of) vector-shaped `Tensor`
      representing the initial model coefficients, one for each column in
      `model_matrix`. Must have same `dtype` as `model_matrix`.
      Default value: Zeros.
    predicted_linear_response_start: Optional `Tensor` with `shape`, `dtype`
      matching `response`; represents `offset` shifted initial linear
      predictions based on `model_coefficients_start`.
      Default value: `offset` if `model_coefficients is None`, and
      `tf.linalg.matvec(model_matrix, model_coefficients_start) + offset`
      otherwise.
    l2_regularizer: Optional scalar `Tensor` representing L2 regularization
      penalty, i.e.,
      `loss(w) = sum{-log p(y[i]|x[i],w) : i=1..n} + l2_regularizer ||w||_2^2`.
      Default value: `None` (i.e., no L2 regularization).
    dispersion: Optional (batch of) `Tensor` representing `response` dispersion,
      i.e., as in, `p(y|theta) := exp((y theta - A(theta)) / dispersion)`.
      Must broadcast with rows of `model_matrix`.
      Default value: `None` (i.e., "no dispersion").
    offset: Optional `Tensor` representing constant shift applied to
      `predicted_linear_response`.  Must broadcast to `response`.
      Default value: `None` (i.e., `tf.zeros_like(response)`).
    learning_rate: Optional (batch of) scalar `Tensor` used to dampen iterative
      progress. Typically only needed if optimization diverges, should be no
      larger than `1` and typically very close to `1`.
      Default value: `None` (i.e., `1`).
    fast_unsafe_numerics: Optional Python `bool` indicating if solve should be
      based on Cholesky or QR decomposition.
      Default value: `True` (i.e., "prefer speed via Cholesky decomposition").
    name: Python `str` used as name prefix to ops created by this function.
      Default value: `"fit_one_step"`.

  Returns:
    model_coefficients: (Batch of) vector-shaped `Tensor`; represents the
      next estimate of the model coefficients, one for each column in
      `model_matrix`.
    predicted_linear_response: `response`-shaped `Tensor` representing linear
      predictions based on new `model_coefficients`, i.e.,
      `tf.linalg.matvec(model_matrix, model_coefficients_next) + offset`.
  """
  graph_deps = [model_matrix, response, model_coefficients_start,
                predicted_linear_response_start, dispersion, learning_rate]
  with tf.name_scope(name, 'fit_one_step', graph_deps):

    [
        model_matrix,
        response,
        model_coefficients_start,
        predicted_linear_response_start,
        offset,
    ] = prepare_args(
        model_matrix,
        response,
        model_coefficients_start,
        predicted_linear_response_start,
        offset)

    # Compute: mean, grad(mean, predicted_linear_response_start), and variance.
    mean, variance, grad_mean = model(predicted_linear_response_start)

    # If either `grad_mean` or `variance is non-finite or zero, then we'll
    # replace it with a value such that the row is zeroed out. Although this
    # procedure may seem circuitous, it is necessary to ensure this algorithm is
    # itself differentiable.
    is_valid = (tf.is_finite(grad_mean) & tf.not_equal(grad_mean, 0.) &
                tf.is_finite(variance) & (variance > 0.))
    def mask_if_invalid(x, mask):
      mask = tf.fill(tf.shape(x), value=np.array(mask, x.dtype.as_numpy_dtype))
      return tf.where(is_valid, x, mask)

    # Run one step of iteratively reweighted least-squares.
    # Compute "`z`", the adjusted predicted linear response.
    # z = predicted_linear_response_start
    #     + learning_rate * (response - mean) / grad_mean
    z = (response - mean) / mask_if_invalid(grad_mean, 1.)
    # TODO(jvdillon): Rather than use learning rate, we should consider using
    # backtracking line search.
    if learning_rate is not None:
      z *= learning_rate[..., tf.newaxis]
    z += predicted_linear_response_start

    # Compute "`w`", the per-sample weight.
    if dispersion is not None:
      # For convenience, we'll now scale the variance by the dispersion factor.
      variance *= dispersion
    w = (mask_if_invalid(grad_mean, 0.) *
         tf.rsqrt(mask_if_invalid(variance, np.inf)))

    a = model_matrix * w[..., tf.newaxis]
    b = z * w
    # Solve `min{ || A @ model_coefficients - b ||_2**2 : model_coefficients }`
    # where `@` denotes `matmul`.

    if l2_regularizer is None:
      l2_regularizer = np.array(0, a.dtype.as_numpy_dtype)
    else:
      l2_regularizer_ = distribution_util.maybe_get_static_value(
          l2_regularizer, a.dtype.as_numpy_dtype)
      if l2_regularizer_ is not None:
        l2_regularizer = l2_regularizer_

    def _embed_l2_regularization():
      """Adds synthetic observations to implement L2 regularization."""
      # `tf.matrix_solve_ls` does not respect the `l2_regularization` argument
      # when `fast_unsafe_numerics` is `False`. This function  adds synthetic
      # observations to the data to implement the regularization instead.
      # Adding observations `sqrt(l2_regularizer) * I` is mathematically
      # equivalent to adding the term
      # `-l2_regularizer ||coefficients||_2**2` to the log-likelihood.
      num_model_coefficients = num_cols(model_matrix)
      batch_shape = tf.shape(model_matrix)[:-2]
      eye = tf.eye(
          num_model_coefficients, batch_shape=batch_shape, dtype=a.dtype)
      a_ = tf.concat([a, tf.sqrt(l2_regularizer) * eye], axis=-2)
      b_ = distribution_util.pad(
          b, count=num_model_coefficients, axis=-1, back=True)
      # Return l2_regularizer=0 since its now embedded.
      l2_regularizer_ = np.array(0, a.dtype.as_numpy_dtype)
      return a_, b_, l2_regularizer_

    a, b, l2_regularizer = tf.contrib.framework.smart_cond(
        smart_reduce_all([not(fast_unsafe_numerics),
                          l2_regularizer > 0.]),
        _embed_l2_regularization,
        lambda: (a, b, l2_regularizer))

    model_coefficients_next = tf.matrix_solve_ls(
        a, b[..., tf.newaxis],
        fast=fast_unsafe_numerics,
        l2_regularizer=l2_regularizer,
        name='model_coefficients_next')
    model_coefficients_next = model_coefficients_next[..., 0]

    # TODO(b/79122261): The approach used in `matrix_solve_ls` could be made
    # faster by avoiding explicitly forming Q and instead keeping the
    # factorization in 'implicit' form with stacked (rescaled) Householder
    # vectors underneath the 'R' and then applying the (accumulated)
    # reflectors in the appropriate order to apply Q'. However, we don't
    # presently do this because we lack core TF functionality. For reference,
    # the vanilla QR approach is:
    #   q, r = tf.linalg.qr(a)
    #   c = tf.matmul(q, b, adjoint_a=True)
    #   model_coefficients_next = tf.matrix_triangular_solve(
    #       r, c, lower=False, name='model_coefficients_next')

    predicted_linear_response_next = calculate_linear_predictor(
        model_matrix,
        model_coefficients_next,
        offset,
        name='predicted_linear_response_next')

    return model_coefficients_next, predicted_linear_response_next