Esempio n. 1
0
 def make_response_likelihood(self, w, x):
     if tensorshape_util.rank(w.shape) == 1:
         y_bar = tf.matmul(w[tf.newaxis], x)[0]
     else:
         y_bar = tf.matmul(w, x)
     return tfd.Normal(loc=y_bar, scale=tf.ones_like(y_bar))  # [n]
Esempio n. 2
0
 def _stddev(self):
   return self.scale * tf.ones_like(self.loc) * np.pi / np.sqrt(2.)
Esempio n. 3
0
def reduce_weighted_logsumexp(logx,
                              w=None,
                              axis=None,
                              keep_dims=False,
                              return_sign=False,
                              name=None):
    """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`.

  If all weights `w` are known to be positive, it is more efficient to directly
  use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more
  efficient than `du.reduce_weighted_logsumexp(logx, w)`.

  Reduces `input_tensor` along the dimensions given in `axis`.
  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
  entry in `axis`. If `keep_dims` is true, the reduced dimensions
  are retained with length 1.

  If `axis` has no entries, all dimensions are reduced, and a
  tensor with a single element is returned.

  This function is more numerically stable than log(sum(w * exp(input))). It
  avoids overflows caused by taking the exp of large inputs and underflows
  caused by taking the log of small inputs.

  For example:

  ```python
  x = tf.constant([[0., 0, 0],
                   [0, 0, 0]])

  w = tf.constant([[-1., 1, 1],
                   [1, 1, 1]])

  du.reduce_weighted_logsumexp(x, w)
  # ==> log(-1*1 + 1*1 + 1*1 + 1*1 + 1*1 + 1*1) = log(4)

  du.reduce_weighted_logsumexp(x, w, axis=0)
  # ==> [log(-1+1), log(1+1), log(1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1)
  # ==> [log(-1+1+1), log(1+1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1, keep_dims=True)
  # ==> [[log(-1+1+1)], [log(1+1+1)]]

  du.reduce_weighted_logsumexp(x, w, axis=[0, 1])
  # ==> log(-1+5)
  ```

  Args:
    logx: The tensor to reduce. Should have numeric type.
    w: The weight tensor. Should have numeric type identical to `logx`.
    axis: The dimensions to reduce. If `None` (the default), reduces all
      dimensions. Must be in the range `[-rank(input_tensor),
      rank(input_tensor))`.
    keep_dims: If true, retains reduced dimensions with length 1.
    return_sign: If `True`, returns the sign of the result.
    name: A name for the operation (optional).

  Returns:
    lswe: The `log(abs(sum(weight * exp(x))))` reduced tensor.
    sign: (Optional) The sign of `sum(weight * exp(x))`.
  """
    with tf.name_scope(name or 'reduce_weighted_logsumexp'):
        logx = tf.convert_to_tensor(logx, name='logx')
        if w is None:
            lswe = tf.reduce_logsumexp(logx, axis=axis, keepdims=keep_dims)
            if return_sign:
                sgn = tf.ones_like(lswe)
                return lswe, sgn
            return lswe
        w = tf.convert_to_tensor(w, dtype=logx.dtype, name='w')
        log_absw_x = logx + tf.math.log(tf.abs(w))
        max_log_absw_x = tf.reduce_max(log_absw_x, axis=axis, keepdims=True)
        # If the largest element is `-inf` or `inf` then we don't bother subtracting
        # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That
        # this is ok follows from the fact that we're actually free to subtract any
        # value we like, so long as we add it back after taking the `log(sum(...))`.
        max_log_absw_x = tf.where(tf.math.is_inf(max_log_absw_x),
                                  tf.zeros([], max_log_absw_x.dtype),
                                  max_log_absw_x)
        wx_over_max_absw_x = (tf.sign(w) * tf.exp(log_absw_x - max_log_absw_x))
        sum_wx_over_max_absw_x = tf.reduce_sum(wx_over_max_absw_x,
                                               axis=axis,
                                               keepdims=keep_dims)
        if not keep_dims:
            max_log_absw_x = tf.squeeze(max_log_absw_x, axis)
        sgn = tf.sign(sum_wx_over_max_absw_x)
        lswe = max_log_absw_x + tf.math.log(sgn * sum_wx_over_max_absw_x)
        if return_sign:
            return lswe, sgn
        return lswe
 def cdf(x):
     ones = tf.ones_like(x)
     answer = tf1.where(x < 3, 0.6 * ones, ones)
     answer = tf1.where(x < 2, 0.3 * ones, answer)
     answer = tf1.where(x < 1, 0.1 * ones, answer)
     return tf1.where(x < 0, 0 * ones, answer)
Esempio n. 5
0
def sample_lkj(num_samples,
               dimension,
               concentration,
               cholesky_space=False,
               seed=None,
               name=None):
    """Returns a Tensor of samples from an LKJ distribution.

  Args:
    num_samples: Python `int`. The number of samples to draw.
    dimension: Python `int`. The dimension of correlation matrices.
    concentration: `Tensor` representing the concentration of the LKJ
      distribution.
    cholesky_space: Python `bool`. Whether to take samples from LKJ or
      Chol(LKJ).
    seed: Python integer seed for RNG
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    samples: A Tensor of correlation matrices (or Cholesky factors of
      correlation matrices if `cholesky_space = True`) with shape
      `[n] + B + [D, D]`, where `B` is the shape of the `concentration`
      parameter, and `D` is the `dimension`.

  Raises:
    ValueError: If `dimension` is negative.
  """
    if dimension < 0:
        raise ValueError(
            'Cannot sample negative-dimension correlation matrices.')
    # Notation below: B is the batch shape, i.e., tf.shape(concentration)
    seed = SeedStream(seed, 'sample_lkj')
    with tf.name_scope('sample_lkj' or name):
        concentration = tf.convert_to_tensor(concentration)
        if not dtype_util.is_floating(concentration.dtype):
            raise TypeError(
                'The concentration argument should have floating type, not '
                '{}'.format(dtype_util.name(concentration.dtype)))

        concentration = _replicate(num_samples, concentration)
        concentration_shape = tf.shape(concentration)
        if dimension <= 1:
            # For any dimension <= 1, there is only one possible correlation matrix.
            shape = tf.concat([concentration_shape, [dimension, dimension]],
                              axis=0)
            return tf.ones(shape=shape, dtype=concentration.dtype)
        beta_conc = concentration + (dimension - 2.) / 2.
        beta_dist = beta.Beta(concentration1=beta_conc,
                              concentration0=beta_conc)

        # Note that the sampler below deviates from [1], by doing the sampling in
        # cholesky space. This does not change the fundamental logic of the
        # sampler, but does speed up the sampling.

        # This is the correlation coefficient between the first two dimensions.
        # This is also `r` in reference [1].
        corr12 = 2. * beta_dist.sample(seed=seed()) - 1.

        # Below we construct the Cholesky of the initial 2x2 correlation matrix,
        # which is of the form:
        # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the
        # first two dimensions.
        # This is the top-left corner of the cholesky of the final sample.
        first_row = tf.concat([
            tf.ones_like(corr12)[..., tf.newaxis],
            tf.zeros_like(corr12)[..., tf.newaxis]
        ],
                              axis=-1)
        second_row = tf.concat(
            [corr12[..., tf.newaxis],
             tf.sqrt(1 - corr12**2)[..., tf.newaxis]],
            axis=-1)

        chol_result = tf.concat(
            [first_row[..., tf.newaxis, :], second_row[..., tf.newaxis, :]],
            axis=-2)

        for n in range(2, dimension):
            # Loop invariant: on entry, result has shape B + [n, n]
            beta_conc = beta_conc - 0.5
            # norm is y in reference [1].
            norm = beta.Beta(concentration1=n / 2.,
                             concentration0=beta_conc).sample(seed=seed())
            # distance shape: B + [1] for broadcast
            distance = tf.sqrt(norm)[..., tf.newaxis]
            # direction is u in reference [1].
            # direction shape: B + [n]
            direction = _uniform_unit_norm(n, concentration_shape,
                                           concentration.dtype, seed)
            # raw_correlation is w in reference [1].
            raw_correlation = distance * direction  # shape: B + [n]

            # This is the next row in the cholesky of the result,
            # which differs from the construction in reference [1].
            # In the reference, the new row `z` = chol_result @ raw_correlation^T
            # = C @ raw_correlation^T (where as short hand we use C = chol_result).
            # We prove that the below equation is the right row to add to the
            # cholesky, by showing equality with reference [1].
            # Let S be the sample constructed so far, and let `z` be as in
            # reference [1]. Then at this iteration, the new sample S' will be
            # [[S z^T]
            #  [z 1]]
            # In our case we have the cholesky decomposition factor C, so
            # we want our new row x (same size as z) to satisfy:
            #  [[S z^T]  [[C 0]    [[C^T  x^T]         [[CC^T  Cx^T]
            #   [z 1]] =  [x k]]    [0     k]]  =       [xC^t   xx^T + k**2]]
            # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible,
            # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k
            # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 -
            # distance**2).
            new_row = tf.concat(
                [raw_correlation,
                 tf.sqrt(1. - norm[..., tf.newaxis])],
                axis=-1)

            # Finally add this new row, by growing the cholesky of the result.
            chol_result = tf.concat([
                chol_result,
                tf.zeros_like(chol_result[..., 0][..., tf.newaxis])
            ],
                                    axis=-1)

            chol_result = tf.concat([chol_result, new_row[..., tf.newaxis, :]],
                                    axis=-2)

        if cholesky_space:
            return chol_result

        result = tf.matmul(chol_result, chol_result, transpose_b=True)
        # The diagonal for a correlation matrix should always be ones. Due to
        # numerical instability the matmul might not achieve that, so manually set
        # these to ones.
        result = tf.linalg.set_diag(
            result, tf.ones(shape=tf.shape(result)[:-1], dtype=result.dtype))
        # This sampling algorithm can produce near-PSD matrices on which standard
        # algorithms such as `tf.cholesky` or `tf.linalg.self_adjoint_eigvals`
        # fail. Specifically, as documented in b/116828694, around 2% of trials
        # of 900,000 5x5 matrices (distributed according to 9 different
        # concentration parameter values) contained at least one matrix on which
        # the Cholesky decomposition failed.
        return result
Esempio n. 6
0
 def _entropy(self):
     log_normalization = 0.5 * np.log(2. * np.pi) + tf.math.log(self.scale)
     entropy = 0.5 + log_normalization
     return entropy * tf.ones_like(self.loc)
def _generate_detections_per_image(boxes,
                                   scores,
                                   max_total_size=100,
                                   nms_iou_threshold=0.3,
                                   score_threshold=0.05,
                                   pre_nms_num_boxes=5000):
  """Generate the final detections per image given the model outputs.

  Args:
    boxes: a tensor with shape [N, num_classes, 4] or [N, 1, 4], which box
      predictions on all feature levels. The N is the number of total anchors on
      all levels.
    scores: a tensor with shape [N, num_classes], which stacks class probability
      on all feature levels. The N is the number of total anchors on all levels.
      The num_classes is the number of classes predicted by the model. Note that
      the class_outputs here is the raw score.
    max_total_size: a scalar representing maximum number of boxes retained over
      all classes.
    nms_iou_threshold: a float representing the threshold for deciding whether
      boxes overlap too much with respect to IOU.
    score_threshold: a float representing the threshold for deciding when to
      remove boxes based on score.
    pre_nms_num_boxes: an int number of top candidate detections per class
      before NMS.

  Returns:
    nms_boxes: `float` Tensor of shape [max_total_size, 4] representing top
      detected boxes in [y1, x1, y2, x2].
    nms_scores: `float` Tensor of shape [max_total_size] representing sorted
      confidence scores for detected boxes. The values are between [0, 1].
    nms_classes: `int` Tensor of shape [max_total_size] representing classes for
      detected boxes.
    valid_detections: `int` Tensor of shape [1] only the top `valid_detections`
      boxes are valid detections.
  """
  nmsed_boxes = []
  nmsed_scores = []
  nmsed_classes = []
  num_classes_for_box = boxes.get_shape().as_list()[1]
  num_classes = scores.get_shape().as_list()[1]
  for i in range(num_classes):
    boxes_i = boxes[:, min(num_classes_for_box-1, i)]
    scores_i = scores[:, i]

    # Obtains pre_nms_num_boxes before running NMS.
    scores_i, indices = tf.nn.top_k(
        scores_i, k=tf.minimum(tf.shape(input=scores_i)[-1], pre_nms_num_boxes))
    boxes_i = tf.gather(boxes_i, indices)

    (nmsed_indices_i,
     nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
         tf.cast(boxes_i, tf.float32),
         tf.cast(scores_i, tf.float32),
         max_total_size,
         iou_threshold=nms_iou_threshold,
         score_threshold=score_threshold,
         pad_to_max_output_size=True,
         name='nms_detections_' + str(i))
    nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
    nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i)
    # Sets scores of invalid boxes to -1.
    nmsed_scores_i = tf.where(
        tf.less(tf.range(max_total_size), [nmsed_num_valid_i]), nmsed_scores_i,
        -tf.ones_like(nmsed_scores_i))
    nmsed_classes_i = tf.fill([max_total_size], i)
    nmsed_boxes.append(nmsed_boxes_i)
    nmsed_scores.append(nmsed_scores_i)
    nmsed_classes.append(nmsed_classes_i)
  # Concats results from all classes and sort them.
  nmsed_boxes = tf.concat(nmsed_boxes, axis=0)
  nmsed_scores = tf.concat(nmsed_scores, axis=0)
  nmsed_classes = tf.concat(nmsed_classes, axis=0)
  nmsed_scores, indices = tf.nn.top_k(
      nmsed_scores,
      k=max_total_size,
      sorted=True)
  nmsed_boxes = tf.gather(nmsed_boxes, indices)
  nmsed_classes = tf.gather(nmsed_classes, indices)
  valid_detections = tf.reduce_sum(
      input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32))
  return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
Esempio n. 8
0
    def __init__(self,
                 level_scale_prior=None,
                 slope_mean_prior=None,
                 slope_scale_prior=None,
                 autoregressive_coef_prior=None,
                 initial_level_prior=None,
                 initial_slope_prior=None,
                 observed_time_series=None,
                 constrain_ar_coef_stationary=True,
                 constrain_ar_coef_positive=False,
                 name=None):
        """Specify a semi-local linear trend model.

    Args:
      level_scale_prior: optional `tfd.Distribution` instance specifying a prior
        on the `level_scale` parameter. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      slope_mean_prior: optional `tfd.Distribution` instance specifying a prior
        on the `slope_mean` parameter. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      slope_scale_prior: optional `tfd.Distribution` instance specifying a prior
        on the `slope_scale` parameter. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      autoregressive_coef_prior: optional `tfd.Distribution` instance specifying
        a prior on the `autoregressive_coef` parameter. If `None`, the default
        prior is a standard `Normal(0., 1.)`. Note that the prior may be
        implicitly truncated by `constrain_ar_coef_stationary` and/or
        `constrain_ar_coef_positive`.
        Default value: `None`.
      initial_level_prior: optional `tfd.Distribution` instance specifying a
        prior on the initial level. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      initial_slope_prior: optional `tfd.Distribution` instance specifying a
        prior on the initial slope. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      observed_time_series: optional `float` `Tensor` of shape
        `batch_shape + [T, 1]` (omitting the trailing unit dimension is also
        supported when `T > 1`), specifying an observed time series.
        Any priors not explicitly set will be given default values according to
        the scale of the observed time series (or batch of time series). May
        optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes
        a mask `Tensor` to specify timesteps with missing observations.
        Default value: `None`.
      constrain_ar_coef_stationary: if `True`, perform inference using a
        parameterization that restricts `autoregressive_coef` to the interval
        `(-1, 1)`, or `(0, 1)` if `force_positive_ar_coef` is also `True`,
        corresponding to stationary processes. This will implicitly truncates
        the support of `autoregressive_coef_prior`.
        Default value: `True`.
      constrain_ar_coef_positive: if `True`, perform inference using a
        parameterization that restricts `autoregressive_coef` to be positive,
        or in `(0, 1)` if `constrain_ar_coef_stationary` is also `True`. This
        will implicitly truncate the support of `autoregressive_coef_prior`.
        Default value: `False`.
      name: the name of this model component.
        Default value: 'SemiLocalLinearTrend'.
    """

        with tf.name_scope(name or 'SemiLocalLinearTrend') as name:
            if observed_time_series is not None:
                _, observed_stddev, observed_initial = sts_util.empirical_statistics(
                    observed_time_series)
            else:
                observed_stddev, observed_initial = 1., 0.

            # Heuristic default priors. Overriding these may dramatically
            # change inference performance and results.
            if level_scale_prior is None:
                level_scale_prior = tfd.LogNormal(loc=tf.math.log(
                    .01 * observed_stddev),
                                                  scale=2.)
            if slope_mean_prior is None:
                slope_mean_prior = tfd.Normal(loc=0., scale=observed_stddev)
            if slope_scale_prior is None:
                slope_scale_prior = tfd.LogNormal(loc=tf.math.log(
                    .01 * observed_stddev),
                                                  scale=2.)
            if autoregressive_coef_prior is None:
                autoregressive_coef_prior = tfd.Normal(
                    loc=0., scale=tf.ones_like(observed_initial))
            if initial_level_prior is None:
                initial_level_prior = tfd.Normal(
                    loc=observed_initial,
                    scale=tf.abs(observed_initial) + observed_stddev)
            if initial_slope_prior is None:
                initial_slope_prior = tfd.Normal(loc=0., scale=observed_stddev)

            self._initial_state_prior = tfd.MultivariateNormalDiag(
                loc=tf.stack(
                    [initial_level_prior.mean(),
                     initial_slope_prior.mean()],
                    axis=-1),
                scale_diag=tf.stack([
                    initial_level_prior.stddev(),
                    initial_slope_prior.stddev()
                ],
                                    axis=-1))

            # Constrain the support of the autoregressive coefficient.
            if constrain_ar_coef_stationary and constrain_ar_coef_positive:
                autoregressive_coef_bijector = tfb.Sigmoid(
                )  # support in (0, 1)
            elif constrain_ar_coef_positive:
                autoregressive_coef_bijector = tfb.Softplus(
                )  # support in (0, infty)
            elif constrain_ar_coef_stationary:
                autoregressive_coef_bijector = tfb.Tanh()  # support in (-1, 1)
            else:
                autoregressive_coef_bijector = tfb.Identity()  # unconstrained

            stddev_preconditioner = tfb.Scale(scale=observed_stddev)
            scaled_softplus = tfb.Chain(
                [stddev_preconditioner, tfb.Softplus()])
            super(SemiLocalLinearTrend, self).__init__(parameters=[
                Parameter('level_scale', level_scale_prior, scaled_softplus),
                Parameter('slope_mean', slope_mean_prior,
                          stddev_preconditioner),
                Parameter('slope_scale', slope_scale_prior, scaled_softplus),
                Parameter('autoregressive_coef', autoregressive_coef_prior,
                          autoregressive_coef_bijector),
            ],
                                                       latent_size=2,
                                                       name=name)
Esempio n. 9
0
 def make_response_likelihood(self, w, x):
     if w.shape.ndims == 1:
         y_bar = tf.matmul(w[tf.newaxis], x)[0]
     else:
         y_bar = tf.matmul(w, x)
     return tfd.Normal(loc=y_bar, scale=tf.ones_like(y_bar))  # [n]
def _calculate_spline_coeffs(x_data, y_data):
    """Calculates the coefficients for the spline interpolation.

  These are the values of the second derivative of the spline at `x_data`.
  See p.548 of [1].

  Below is an outline of the function when number of observations if equal to 7.
  The coefficients are obtained by building and solving a tridiagonal linear
  system of equations with symmetric matrix

   w2,  dx2,   0,   0,   0
   dx2,  w3, dx3,   0,   0
   0,  dx3,   w4, dx4,   0
   0,    0,  dx4,  w5, dx5
   0,    0,    0, dx5,  w6

   where:
   wn = 2 * (x_data[n-2] + x_data[n-1])
   dxn = x_data[n-1] - x_data[n-2]

   and the right hand side of the equation is:
   [[3*( (d2-d1)/X1 - (d1-d0)/x0],
    [3*( (d3-d2)/X2 - (d2-d1)/x1],
    ...
   ]

   with di = y_data[..., i]

   Solve for `spline_coeffs`, so that  matrix * spline_coeffs = rhs

   the solution is the `spline_coeffs` parameter of the spline equation:

   y_pred = a(spline_coeffs) * t^3 + b(spline_coeffs) * t^2
            + c(spline_coeffs) * t + d(spline_coeffs)
   with t being the proportion of the difference between the x value of
   the spline used and the nx_value of the next spline:

   t = (x_values - x_data[:,n]) / (x_data[:,n+1]-x_data[:,n])

   and `a`, `b`, `c`, and `d` are functions of `spline_coeffs` and `x_data` and
   are provided in the `interpolate` function.

  ## References:
  [1]: R. Sedgewick, Algorithms in C, 1990, p. 545-550.
    Link: http://index-of.co.uk/Algorithms/Algorithms%20in%20C.pdf

  Args:
    x_data: A real `Tensor` of shape `[..., num_points]` containing
      X-coordinates of points to fit the splines to. The values have to
      be monotonically non-decreasing along the last dimension.
    y_data: A `Tensor` of the same shape and `dtype` as `x_data` containing
      Y-coordinates of points to fit the splines to.

  Returns:
     A `Tensor` of the same shape and `dtype` as `x_data`. Represents the
     spline coefficients for the cubic spline interpolation.
  """

    # `dx` is the distances between the x points. It is 1 element shorter than
    # `x_data`
    dx = x_data[..., 1:] - x_data[..., :-1]

    # `diag_values` are the diagonal values 2 * (x_data[i+1] - x_data[i-1])
    # its length 2 shorter

    diag_values = 2.0 * (x_data[..., 2:] - x_data[..., :-2])
    superdiag = dx[..., 1:]
    subdiag = dx[..., :-1]

    corr_term = tf.logical_or(tf.equal(superdiag, 0), tf.equal(subdiag, 0))
    diag_values_corr = tf.where(corr_term, tf.ones_like(diag_values),
                                diag_values)
    superdiag_corr = tf.where(tf.equal(subdiag, 0), tf.zeros_like(superdiag),
                              superdiag)
    subdiag_corr = tf.where(tf.equal(superdiag, 0), tf.zeros_like(subdiag),
                            subdiag)
    diagonals = tf.stack([superdiag_corr, diag_values_corr, subdiag_corr],
                         axis=-2)

    # determine the rhs of the equation
    dd = (y_data[..., 1:] - y_data[..., :-1]) / dx
    dd = tf.where(tf.equal(dx, 0), tf.zeros_like(dd), dd)
    # rhs is a column vector:
    # [[-3((y1-y0)/dx0 - (y2-y1)/dx0], ...]
    rhs = -3 * (dd[..., :-1] - dd[..., 1:])
    rhs = tf.where(corr_term, tf.zeros_like(rhs), rhs)
    # Partial pivoting is unnecessary since the matrix is diagonally dominant.
    spline_coeffs = tf.linalg.tridiagonal_solve(diagonals,
                                                rhs,
                                                partial_pivoting=False)
    # Reshape `spline_coeffs`
    zero = tf.zeros_like(dx[..., :1], dtype=x_data.dtype)
    spline_coeffs = tf.concat([zero, spline_coeffs, zero], axis=-1)
    return spline_coeffs
Esempio n. 11
0
def _replicate(n, tensor):
    """Replicate the input tensor n times along a new (major) dimension."""
    # TODO(axch) Does this already exist somewhere?  Should it get contributed?
    multiples = tf.concat([[n], tf.ones_like(tensor.shape)], axis=0)
    return tf.tile(tf.expand_dims(tensor, axis=0), multiples)
Esempio n. 12
0
        def collater_fn(batch: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
            batch = mm_collater_fn(batch)

            retrieve_masked = config.get('retrieve_masked', False)

            # Subselect mentions for which to retrieve corresponding memory.
            # We want to sample mentions which are linked, not masked, and not padded.
            scores = tf.random.uniform(
                tf.shape(batch['mention_target_is_masked'])) + 2 * tf.cast(
                    batch['mention_target_weights'], tf.float32)

            if not retrieve_masked:
                scores -= tf.cast(batch['mention_target_is_masked'],
                                  tf.float32)

            _, mention_target_retrieval_indices = tf.math.top_k(
                scores, k=max_retrieval_indices)

            mention_retrieval_indices = tf.gather(
                batch['mention_target_indices'],
                mention_target_retrieval_indices)
            retrieval_mention_mask = tf.gather(
                batch['mention_target_weights'],
                mention_target_retrieval_indices)
            # set weight to 0 for masked retrievals if we do not want to include these
            if not retrieve_masked:
                retrieval_mention_mask *= tf.gather(
                    1 - tf.cast(batch['mention_target_is_masked'], tf.int32),
                    mention_target_retrieval_indices)

            retrieval_mention_start_positions = tf.gather(
                batch['mention_start_positions'], mention_retrieval_indices)
            retrieval_text_identifiers = tf.gather(batch['text_identifiers'],
                                                   mention_retrieval_indices)
            retrieval_mention_hash = mention_preprocess_utils.modified_cantor_pairing(
                tf.cast(retrieval_mention_start_positions, tf.int64),
                retrieval_text_identifiers)
            retrieval_mention_hash = tf.cast(retrieval_mention_hash, tf.int32)

            retrieval_mention_sort_ids = tf.searchsorted(
                memory_hash_sorted, retrieval_mention_hash)

            # Searchsorted does not check whether value is present in array, just
            # finds insertion point. Here we check and set to default retrieval if not
            # present.
            hash_not_present_mask = tf.not_equal(
                retrieval_mention_hash,
                tf.gather(memory_hash_sorted, retrieval_mention_sort_ids))
            hash_not_present = tf.where(hash_not_present_mask)
            update_values = tf.fill((tf.shape(hash_not_present)[0], ),
                                    tf.shape(hash_sorted_idx)[0] - 1)
            retrieval_mention_sort_ids = tf.tensor_scatter_nd_update(
                retrieval_mention_sort_ids, hash_not_present, update_values)

            # Set mask to 0 if no mention is found
            batch['retrieval_mention_mask'] = retrieval_mention_mask * (
                1 - tf.cast(hash_not_present_mask, tf.int32))

            retrieval_mention_ids = tf.gather(hash_sorted_idx,
                                              retrieval_mention_sort_ids)
            retrieval_mention_values = tf.gather(memory_table,
                                                 retrieval_mention_ids)
            # Match passage entity_ids with memory entity ids as sanity check.
            if memory_entity_pattern:
                retrieval_memory_entity_ids = tf.gather(
                    memory_entity_ids, retrieval_mention_ids)
                retrieval_passage_entity_ids = tf.gather(
                    tf.cast(batch['mention_target_ids'], tf.int32),
                    mention_target_retrieval_indices)
                entity_does_not_match = tf.not_equal(
                    retrieval_memory_entity_ids, retrieval_passage_entity_ids)

                batch['entity_does_not_match'] = tf.logical_and(
                    entity_does_not_match,
                    tf.cast(batch['retrieval_mention_mask'], tf.bool))

            batch['retrieval_mention_values'] = retrieval_mention_values
            batch['retrieval_mention_scores'] = tf.ones_like(
                batch['retrieval_mention_mask'])
            batch['retrieval_mention_batch_positions'] = tf.gather(
                batch['mention_batch_positions'], mention_retrieval_indices)
            batch['retrieval_mention_start_positions'] = retrieval_mention_start_positions  # pylint: disable=line-too-long
            batch['retrieval_mention_end_positions'] = tf.gather(
                batch['mention_end_positions'], mention_retrieval_indices)
            batch['mention_retrieval_indices'] = mention_retrieval_indices

            return batch
Esempio n. 13
0
 def l1norm(x):
     x = tf.concat([x, tf.ones_like(x[..., :1]) * 1e-6], axis=-1)
     x = x / tf.linalg.norm(x, ord=1, axis=-1, keepdims=True)
     return x
Esempio n. 14
0
    if inspect.isclass(condition):
        condition = lambda distribution, cls=condition: isinstance(  # pylint: disable=g-long-lambda
            distribution, cls)
    ASVI_SURROGATE_SUBSTITUTIONS[condition] = substitution_fn


# Default substitutions attempt to express distributions using the most
# flexible available parameterization.
# pylint: disable=g-long-lambda
register_asvi_substitution_rule(
    half_normal.HalfNormal, lambda dist: truncated_normal.TruncatedNormal(
        loc=0., scale=dist.scale, low=0., high=dist.scale * 10.))
register_asvi_substitution_rule(
    uniform.Uniform, lambda dist: shift.Shift(dist.low)
    (scale_lib.Scale(dist.high - dist.low)
     (beta.Beta(concentration0=tf.ones_like(dist.mean()), concentration1=1.))))
register_asvi_substitution_rule(
    exponential.Exponential,
    lambda dist: gamma.Gamma(concentration=1., rate=dist.rate))
register_asvi_substitution_rule(
    chi2.Chi2, lambda dist: gamma.Gamma(concentration=0.5 * dist.df, rate=0.5))

# pylint: enable=g-long-lambda


# TODO(kateslin): Add support for models with prior+likelihood written as
# a single JointDistribution.
def build_asvi_surrogate_posterior(prior,
                                   mean_field=False,
                                   initial_prior_weight=0.5,
                                   seed=None,
Esempio n. 15
0
def lossfun(x, alpha, scale, approximate=False, epsilon=1e-6):
  r"""Implements the general form of the loss.

  This implements the rho(x, \alpha, c) function described in "A General and
  Adaptive Robust Loss Function", Jonathan T. Barron,
  https://arxiv.org/abs/1701.03077.

  Args:
    x: The residual for which the loss is being computed. x can have any shape,
      and alpha and scale will be broadcasted to match x's shape if necessary.
      Must be a tensorflow tensor or numpy array of floats.
    alpha: The shape parameter of the loss (\alpha in the paper), where more
      negative values produce a loss with more robust behavior (outliers "cost"
      less), and more positive values produce a loss with less robust behavior
      (outliers are penalized more heavily). Alpha can be any value in
      [-infinity, infinity], but the gradient of the loss with respect to alpha
      is 0 at -infinity, infinity, 0, and 2. Must be a tensorflow tensor or
      numpy array of floats with the same precision as `x`. Varying alpha allows
      for smooth interpolation between a number of discrete robust losses:
      alpha=-Infinity: Welsch/Leclerc Loss.
      alpha=-2: Geman-McClure loss.
      alpha=0: Cauchy/Lortentzian loss.
      alpha=1: Charbonnier/pseudo-Huber loss.
      alpha=2: L2 loss.
    scale: The scale parameter of the loss. When |x| < scale, the loss is an
      L2-like quadratic bowl, and when |x| > scale the loss function takes on a
      different shape according to alpha. Must be a tensorflow tensor or numpy
      array of single-precision floats.
    approximate: a bool, where if True, this function returns an approximate and
      faster form of the loss, as described in the appendix of the paper. This
      approximation holds well everywhere except as x and alpha approach zero.
    epsilon: A float that determines how inaccurate the "approximate" version of
      the loss will be. Larger values are less accurate but more numerically
      stable. Must be great than single-precision machine epsilon.

  Returns:
    The losses for each element of x, in the same shape as x. This is returned
    as a TensorFlow graph node of single precision floats.
  """
  # `scale` and `alpha` must have the same type as `x`.
  float_dtype = x.dtype
  tf.debugging.assert_type(scale, float_dtype)
  tf.debugging.assert_type(alpha, float_dtype)
  # `scale` must be > 0.
  assert_ops = [tf.Assert(tf.reduce_all(tf.greater(scale, 0.)), [scale])]
  with tf.control_dependencies(assert_ops):
    # Broadcast `alpha` and `scale` to have the same shape as `x`.
    alpha = tf.broadcast_to(alpha, tf.shape(x))
    scale = tf.broadcast_to(scale, tf.shape(x))

    if approximate:
      # `epsilon` must be greater than single-precision machine epsilon.
      assert epsilon > np.finfo(np.float32).eps
      # Compute an approximate form of the loss which is faster, but innacurate
      # when x and alpha are near zero.
      b = tf.abs(alpha - tf.cast(2., float_dtype)) + epsilon
      d = tf.where(
          tf.greater_equal(alpha, 0.), alpha + epsilon, alpha - epsilon)
      loss = (b / d) * (tf.pow(tf.square(x / scale) / b + 1., 0.5 * d) - 1.)
    else:
      # Compute the exact loss.

      # This will be used repeatedly.
      squared_scaled_x = tf.square(x / scale)

      # The loss when alpha == 2.
      loss_two = 0.5 * squared_scaled_x
      # The loss when alpha == 0.
      loss_zero = util.log1p_safe(0.5 * squared_scaled_x)
      # The loss when alpha == -infinity.
      loss_neginf = -tf.math.expm1(-0.5 * squared_scaled_x)
      # The loss when alpha == +infinity.
      loss_posinf = util.expm1_safe(0.5 * squared_scaled_x)

      # The loss when not in one of the above special cases.
      machine_epsilon = tf.cast(np.finfo(np.float32).eps, float_dtype)
      # Clamp |2-alpha| to be >= machine epsilon so that it's safe to divide by.
      beta_safe = tf.maximum(machine_epsilon, tf.abs(alpha - 2.))
      # Clamp |alpha| to be >= machine epsilon so that it's safe to divide by.
      alpha_safe = tf.where(
          tf.greater_equal(alpha, 0.), tf.ones_like(alpha),
          -tf.ones_like(alpha)) * tf.maximum(machine_epsilon, tf.abs(alpha))
      loss_otherwise = (beta_safe / alpha_safe) * (
          tf.pow(squared_scaled_x / beta_safe + 1., 0.5 * alpha) - 1.)

      # Select which of the cases of the loss to return.
      loss = tf.where(
          tf.equal(alpha, -tf.cast(float('inf'), float_dtype)), loss_neginf,
          tf.where(
              tf.equal(alpha, 0.), loss_zero,
              tf.where(
                  tf.equal(alpha, 2.), loss_two,
                  tf.where(
                      tf.equal(alpha, tf.cast(float('inf'), float_dtype)),
                      loss_posinf, loss_otherwise))))

    return loss
Esempio n. 16
0
def draw_sample(num_samples, num_classes, logits, num_trials, dtype, seed):
    """Sample a multinomial.

  The batch shape is given by broadcasting num_trials with
  remove_last_dimension(logits).

  Args:
    num_samples: Python int or singleton integer Tensor: number of multinomial
      samples to draw.
    num_classes: Python int or singleton integer Tensor: number of classes.
    logits: Floating Tensor with last dimension k, of (unnormalized) logit
      probabilities per class.
    num_trials: Tensor of number of categorical trials each multinomial consists
      of.  num_trials[..., tf.newaxis] must broadcast with logits.
    dtype: dtype at which to emit samples.
    seed: Random seed.

  Returns:
    samples: Tensor of given dtype and shape [n] + batch_shape + [k].
  """
    with tf.name_scope('draw_sample'):
        # broadcast the num_trials and logits to same shape
        num_trials = tf.ones_like(logits[..., 0],
                                  dtype=num_trials.dtype) * num_trials
        logits = tf.ones_like(num_trials[..., tf.newaxis],
                              dtype=logits.dtype) * logits

        # flatten the total_count and logits
        # flat_logits has shape [B1B2...Bm, num_classes]
        flat_logits = tf.reshape(logits, [-1, num_classes])
        flat_num_trials = num_samples * tf.reshape(num_trials,
                                                   [-1])  # [B1B2...Bm]

        # Computes each logits and num_trials situation by map_fn.

        # Using just one batch samplers.categorical call doesn't work because that
        # requires num_trials to be the same across all members of the batch of
        # logits.  This restriction makes sense for samplers.categorical because
        # for it, num_trials is part of the returned shape.  However, the
        # multinomial sampler does not need that restriction, because it sums out
        # exactly that dimension.

        # One possibility would be to draw a batch categorical whose sample count is
        # max(num_trials) and mask out the excess ones.  However, if the elements of
        # num_trials vary widely, this can be wasteful of memory.

        # TODO(b/123763054, b/112152209): Revisit the possibility of writing this
        # with a batch categorical followed by batch unsorted_segment_sum, once both
        # of those work and are memory-efficient enough.
        def _sample_one_batch_member(args):
            logits, num_cat_samples, item_seed = args  # [K], []
            # x has shape [1, num_cat_samples = num_samples * num_trials]
            x = samplers.categorical(logits[tf.newaxis, ...],
                                     num_cat_samples,
                                     seed=item_seed)
            x = tf.reshape(x, shape=[num_samples,
                                     -1])  # [num_samples, num_trials]
            x = tf.one_hot(
                x, depth=num_classes)  # [num_samples, num_trials, num_classes]
            x = tf.reduce_sum(x, axis=-2)  # [num_samples, num_classes]
            return tf.cast(x, dtype=dtype)

        flat_seeds = samplers.split_seed(seed,
                                         n=tf.shape(flat_logits)[0],
                                         salt='multinomial_draw_sample')
        x = tf.map_fn(
            _sample_one_batch_member,
            [flat_logits, flat_num_trials, flat_seeds],
            fn_output_signature=dtype)  # [B1B2...Bm, num_samples, num_classes]

        # reshape the results to proper shape
        x = tf.transpose(a=x, perm=[1, 0, 2])
        final_shape = tf.concat(
            [[num_samples], tf.shape(num_trials), [num_classes]], axis=0)
        x = tf.reshape(x, final_shape)

        return x
Esempio n. 17
0
def segment_diff(x,
                 segment_ids,
                 order=1,
                 exclusive=False,
                 dtype=None,
                 name=None):
    """Computes difference of successive elements in a segment.

  For a complete description of segment_* ops see documentation of
  `tf.segment_max`. This op extends the `diff` functionality to segmented
  inputs.

  The behaviour of this op is the same as that of the op `diff` within each
  segment. The result is effectively a concatenation of the results of `diff`
  applied to each segment.

  ## Example

  ```python
    x = tf.constant([2, 5, 1, 7, 9] + [32, 10, 12, 3] + [4, 8, 5])
    segments = tf.constant([0, 0, 0, 0, 0] + [1, 1, 1, 1] + [2, 2, 2])
    # First order diff. Expected result: [3, -4, 6, 2, -22, 2, -9, 4, -3]
    dx1 = segment_diff(
        x, segment_ids=segments, order=1, exclusive=True)
    # Non-exclusive, second order diff.
    # Expected result: [2, 5, -1, 2, 8, 32, 10, -20, -7, 4, 8, 1]
    dx2 = segment_diff(
        x, segment_ids=segments, order=2, exclusive=False)
  ```

  Args:
    x: A rank 1 `Tensor` of any dtype for which arithmetic operations are
      permitted.
    segment_ids: A `Tensor`. Must be one of the following types: int32, int64. A
      1-D tensor whose size is equal to the size of `x`. Values should be sorted
      and can be repeated.
    order: Positive Python int. The order of the difference to compute. `order =
      1` corresponds to the difference between successive elements.
      Default value: 1
    exclusive: Python bool. See description above.
      Default value: False
    dtype: Optional `tf.Dtype`. If supplied, the dtype for `x` to use when
      converting to `Tensor`.
      Default value: None which maps to the default dtype inferred by TF.
    name: Python `str` name prefixed to Ops created by this class.
      Default value: None which is mapped to the default name 'segment_diff'.

  Returns:
    diffs: A `Tensor` of the same dtype as `x`. Assuming that each segment is
      of length greater than or equal to order, if `exclusive` is True,
      then the size is `n-order*k` where `n` is the size of x,
      `k` is the number of different segment ids supplied if `segment_ids` is
      not None or 1 if `segment_ids` is None. If any of the segments is of
      length less than the order, then the size is:
      `n-sum(min(order, length(segment_j)), j)` where the sum is over segments.
      If `exclusive` is False, then the size is `n`.
  """
    with tf.compat.v1.name_scope(name, default_name='segment_diff',
                                 values=[x]):
        x = tf.convert_to_tensor(x, dtype=dtype)
        raw_diffs = diff_ops.diff(x, order=order, exclusive=exclusive)
        if segment_ids is None:
            return raw_diffs
        # If segment ids are supplied, raw_diffs are incorrect at locations:
        # p, p+1, ... min(p+order-1, m_p-1) where p is the index of the first
        # element of a segment other than the very first segment (which is
        # already correct). m_p is the segment length.
        # Find positions where the segments begin.
        has_segment_changed = tf.concat(
            [[False],
             tf.not_equal(segment_ids[1:] - segment_ids[:-1], 0)],
            axis=0)
        # Shape [k, 1]
        segment_start_index = tf.cast(tf.where(has_segment_changed),
                                      dtype=tf.int32)
        segment_end_index = tf.concat([
            tf.reshape(segment_start_index, [-1])[1:], [tf.size(segment_ids)]
        ],
                                      axis=0)
        segment_end_index = tf.reshape(segment_end_index, [-1, 1])
        # The indices of locations that need to be adjusted. This needs to be
        # constructed in steps. First we generate p, p+1, ... p+order-1.
        # Shape [num_segments-1, order]
        fix_indices = (segment_start_index +
                       tf.range(order, dtype=segment_start_index.dtype))
        in_bounds = tf.where(fix_indices < segment_end_index)
        # Keep only the ones in bounds.
        fix_indices = tf.reshape(tf.gather_nd(fix_indices, in_bounds), [-1, 1])

        needs_fix = tf.scatter_nd(
            fix_indices,
            # Unfortunately, scatter_nd doesn't support bool on GPUs so we need to
            # do ints here and then convert to bool.
            tf.reshape(tf.ones_like(fix_indices, dtype=tf.int32), [-1]),
            shape=tf.shape(x))
        # If exclusive is False, then needs_fix means we need to replace the values
        # in raw_diffs at those locations with the values in x.
        needs_fix = tf.cast(needs_fix, dtype=tf.bool)
        if not exclusive:
            return tf.where(needs_fix, x, raw_diffs)

        # If exclusive is True, we have to be more careful. The raw_diffs
        # computation has removed the first 'order' elements. After removing the
        # corresponding elements from needs_fix, we use it to remove the elements
        # from raw_diffs.
        return tf.boolean_mask(raw_diffs, tf.logical_not(needs_fix[order:]))
Esempio n. 18
0
def _make_black_objective_and_vega_func(prices, forwards, strikes, expiries,
                                        is_call_options, discount_factors):
    """Produces an objective and vega function for the Black Scholes model.

  The returned function maps volatilities to a tuple of objective function
  values and their gradients with respect to the volatilities. The objective
  function is the difference between Black Scholes prices and observed market
  prices, whereas the gradient is called vega of the option. That is:

  ```
  g(s) = (f(s) - a, f'(s))
  ```

  Where `g` is the returned function taking volatility parameter `s`, `f` the
  Black Scholes price with all other variables curried and `f'` its derivative,
  and `a` the observed market prices of the options. Hence `g` calculates the
  information necessary for finding the volatility implied by observed market
  prices for options with given terms using first order methods.

  #### References
  [1] Hull, J., 2018. Options, Futures, and Other Derivatives. Harlow, England.
  Pearson. (p.358 - 361)

  Args:
    prices: A real `Tensor` of any shape. The observed market prices of the
      assets.
    forwards: A real `Tensor` of the same shape and dtype as `prices`. The
      current forward prices to expiry.
    strikes: A real `Tensor` of the same shape and dtype as `prices`. The strike
      prices of the options.
    expiries: A real `Tensor` of same shape and dtype as `forwards`. The expiry
      for each option. The units should be such that `expiry * volatility**2` is
      dimensionless.
    is_call_options: A boolean `Tensor` of same shape and dtype as `forwards`.
      Positive one where option is a call, negative one where option is a put.
    discount_factors: A real `Tensor` of the same shape and dtype as `forwards`.
      The total discount factors to apply.

  Returns:
    A function from volatilities to a Black Scholes objective and its
    derivative (which is coincident with Vega).
  """
    dtype = prices.dtype
    phi = tfp.distributions.Normal(loc=tf.zeros(1, dtype=dtype),
                                   scale=tf.ones(1, dtype=dtype))
    # orientations will decide the normalization strategy.
    orientations = strikes >= forwards
    # normalization is the greater of strikes or forwards
    normalization = tf.where(orientations, strikes, forwards)
    normalized_prices = prices / normalization
    if discount_factors is not None:
        normalized_prices /= discount_factors
    else:
        discount_factors = tf.ones_like(normalized_prices)

    units = tf.ones_like(forwards)
    # y is 1 when strikes >= forwards and strikes/forwards otherwise
    y = tf.where(orientations, units, strikes / forwards)
    # x is forwards/strikes when strikes >= forwards and 1 otherwise
    x = tf.where(orientations, forwards / strikes, units)
    lnz = tf.math.log(forwards) - tf.math.log(strikes)
    sqrt_t = tf.sqrt(expiries)
    if is_call_options is not None:
        is_call_options = tf.convert_to_tensor(is_call_options,
                                               dtype=tf.bool,
                                               name='is_call_options')

    def _black_objective_and_vega(volatilities):
        """Calculate the Black Scholes price and vega for a given volatility.

    This method returns normalized results.

    Args:
      volatilities: A real `Tensor` of same shape and dtype as `forwards`. The
        volatility to expiry.

    Returns:
      A tuple containing (value, gradient) of the black scholes price, both of
        which are `Tensor`s of the same shape and dtype as `volatilities`.
    """
        v = volatilities * sqrt_t
        d1 = (lnz / v + v / 2)
        d2 = d1 - v
        implied_prices = x * phi.cdf(d1) - y * phi.cdf(d2)
        if is_call_options is not None:
            put_prices = implied_prices - x + y
            implied_prices = tf.where(
                tf.broadcast_to(is_call_options, tf.shape(put_prices)),
                implied_prices, put_prices)
        vega = x * phi.prob(d1) * sqrt_t / discount_factors
        return implied_prices - normalized_prices, vega

    return _black_objective_and_vega
Esempio n. 19
0
 def _stddev(self):
     return self.scale * tf.ones_like(self.loc)
Esempio n. 20
0
    def _parse_train_data(self, data):
        """Parse data for ShapeMask training."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        masks = data['groundtruth_instance_masks']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # If not using category, makes all categories with id = 0.
        if not self._use_category:
            classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, masks = input_utils.random_horizontal_flip(
                image, boxes, masks)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Resizes and crops boxes and masks.
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  self._output_size, offset)

        # Filters out ground truth boxes that are all zeros.
        indices = input_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        masks = tf.gather(masks, indices)

        # Assigns anchors.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size, self._output_size)
        anchor_labeler = anchor.AnchorLabeler(input_anchor,
                                              self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(
             boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

        # Sample groundtruth masks/boxes/classes for mask branch.
        num_masks = tf.shape(masks)[0]
        mask_shape = tf.shape(masks)[1:3]

        # Pad sampled boxes/masks/classes to a constant batch size.
        padded_boxes = input_utils.pad_to_fixed_size(boxes,
                                                     self._num_sampled_masks)
        padded_classes = input_utils.pad_to_fixed_size(classes,
                                                       self._num_sampled_masks)
        padded_masks = input_utils.pad_to_fixed_size(masks,
                                                     self._num_sampled_masks)

        # Randomly sample groundtruth masks for mask branch training. For the image
        # without groundtruth masks, it will sample the dummy padded tensors.
        rand_indices = tf.random.shuffle(
            tf.range(tf.maximum(num_masks, self._num_sampled_masks)))
        rand_indices = tf.math.mod(rand_indices, tf.maximum(num_masks, 1))
        rand_indices = rand_indices[0:self._num_sampled_masks]
        rand_indices = tf.reshape(rand_indices, [self._num_sampled_masks])

        sampled_boxes = tf.gather(padded_boxes, rand_indices)
        sampled_classes = tf.gather(padded_classes, rand_indices)
        sampled_masks = tf.gather(padded_masks, rand_indices)
        # Jitter the sampled boxes to mimic the noisy detections.
        sampled_boxes = box_utils.jitter_boxes(
            sampled_boxes, noise_scale=self._box_jitter_scale)
        sampled_boxes = box_utils.clip_boxes(sampled_boxes, self._output_size)
        # Compute mask targets in feature crop. A feature crop fully contains a
        # sampled box.
        mask_outer_boxes = box_utils.compute_outer_boxes(
            sampled_boxes, tf.shape(image)[0:2], scale=self._outer_box_scale)
        mask_outer_boxes = box_utils.clip_boxes(mask_outer_boxes,
                                                self._output_size)
        # Compensate the offset of mask_outer_boxes to map it back to original image
        # scale.
        mask_outer_boxes_ori = mask_outer_boxes
        mask_outer_boxes_ori += tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
        mask_outer_boxes_ori /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                        [1, 2])
        norm_mask_outer_boxes_ori = box_utils.normalize_boxes(
            mask_outer_boxes_ori, mask_shape)

        # Set sampled_masks shape to [batch_size, height, width, 1].
        sampled_masks = tf.cast(tf.expand_dims(sampled_masks, axis=-1),
                                tf.float32)
        mask_targets = tf.image.crop_and_resize(
            sampled_masks,
            norm_mask_outer_boxes_ori,
            box_indices=tf.range(self._num_sampled_masks),
            crop_size=[self._mask_crop_size, self._mask_crop_size],
            method='bilinear',
            extrapolation_value=0,
            name='train_mask_targets')
        mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5),
                                tf.ones_like(mask_targets),
                                tf.zeros_like(mask_targets))
        mask_targets = tf.squeeze(mask_targets, axis=-1)
        if self._up_sample_factor > 1:
            fine_mask_targets = tf.image.crop_and_resize(
                sampled_masks,
                norm_mask_outer_boxes_ori,
                box_indices=tf.range(self._num_sampled_masks),
                crop_size=[
                    self._mask_crop_size * self._up_sample_factor,
                    self._mask_crop_size * self._up_sample_factor
                ],
                method='bilinear',
                extrapolation_value=0,
                name='train_mask_targets')
            fine_mask_targets = tf.where(
                tf.greater_equal(fine_mask_targets, 0.5),
                tf.ones_like(fine_mask_targets),
                tf.zeros_like(fine_mask_targets))
            fine_mask_targets = tf.squeeze(fine_mask_targets, axis=-1)
        else:
            fine_mask_targets = mask_targets

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        valid_image = tf.cast(tf.not_equal(num_masks, 0), tf.int32)
        if self._mask_train_class == 'all':
            mask_is_valid = valid_image * tf.ones_like(sampled_classes,
                                                       tf.int32)
        else:
            # Get the intersection of sampled classes with training splits.
            mask_valid_classes = tf.cast(
                tf.expand_dims(
                    class_utils.coco_split_class_ids(self._mask_train_class),
                    1), sampled_classes.dtype)
            match = tf.reduce_any(
                tf.equal(tf.expand_dims(sampled_classes, 0),
                         mask_valid_classes), 0)
            mask_is_valid = valid_image * tf.cast(match, tf.int32)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': input_anchor.multilevel_boxes,
            'num_positives': num_positives,
            'image_info': image_info,
            # For ShapeMask.
            'mask_boxes': sampled_boxes,
            'mask_outer_boxes': mask_outer_boxes,
            'mask_targets': mask_targets,
            'fine_mask_targets': fine_mask_targets,
            'mask_classes': sampled_classes,
            'mask_is_valid': mask_is_valid,
        }
        return image, labels
Esempio n. 21
0
def box_matching(boxes, gt_boxes, gt_classes):
    """Match boxes to groundtruth boxes.

  Given the proposal boxes and the groundtruth boxes and classes, perform the
  groundtruth matching by taking the argmax of the IoU between boxes and
  groundtruth boxes.

  Args:
    boxes: a tensor of shape of [batch_size, N, 4] representing the box
      coordiantes to be matched to groundtruth boxes.
    gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing
      the groundtruth box coordinates. It is padded with -1s to indicate the
      invalid boxes.
    gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box
      classes. It is padded with -1s to indicate the invalid classes.

  Returns:
    matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing
      the matched groundtruth box coordinates for each input box. If the box
      does not overlap with any groundtruth boxes, the matched boxes of it
      will be set to all 0s.
    matched_gt_classes: a tensor of shape of [batch_size, N], representing
      the matched groundtruth classes for each input box. If the box does not
      overlap with any groundtruth boxes, the matched box classes of it will
      be set to 0, which corresponds to the background class.
    matched_gt_indices: a tensor of shape of [batch_size, N], representing
      the indices of the matched groundtruth boxes in the original gt_boxes
      tensor. If the box does not overlap with any groundtruth boxes, the
      index of the matched groundtruth will be set to -1.
    matched_iou: a tensor of shape of [batch_size, N], representing the IoU
      between the box and its matched groundtruth box. The matched IoU is the
      maximum IoU of the box and all the groundtruth boxes.
    iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix
      between boxes and the groundtruth boxes. The IoU between a box and the
      invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1.
  """
    # Compute IoU between boxes and gt_boxes.
    # iou <- [batch_size, N, K]
    iou = box_utils.bbox_overlap(boxes, gt_boxes)

    # max_iou <- [batch_size, N]
    # 0.0 -> no match to gt, or -1.0 match to no gt
    matched_iou = tf.reduce_max(iou, axis=-1)

    # background_box_mask <- bool, [batch_size, N]
    background_box_mask = tf.less_equal(matched_iou, 0.0)

    argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32)

    argmax_iou_indices_shape = tf.shape(argmax_iou_indices)
    batch_indices = (
        tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) *
        tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32))
    gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1)

    matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices)
    matched_gt_boxes = tf.where(
        tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]),
        tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes)

    matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)
    matched_gt_classes = tf.where(background_box_mask,
                                  tf.zeros_like(matched_gt_classes),
                                  matched_gt_classes)

    matched_gt_indices = tf.where(background_box_mask,
                                  -tf.ones_like(argmax_iou_indices),
                                  argmax_iou_indices)

    return (matched_gt_boxes, matched_gt_classes, matched_gt_indices,
            matched_iou, iou)
Esempio n. 22
0
 def ood_logits(logits, ood_log_density):
   logits = tf.concat(
       [logits, tf.ones_like(logits[..., :1]) * ood_log_density], axis=-1)
   return logits
 def left_continuous_cdf(x):
     ones = tf.ones_like(x)
     answer = tf1.where(x <= 3, 0.6 * ones, ones)
     answer = tf1.where(x <= 2, 0.3 * ones, answer)
     answer = tf1.where(x <= 1, 0.1 * ones, answer)
     return tf1.where(x <= 0, 0 * ones, answer)
Esempio n. 24
0
  def get_loss_tensors(self, f0_candidates, freqs, amps):
    """Get traces of loss to estimate fundamental frequency.

    Args:
      f0_candidates: Frequencies of candidates in hertz. [batch, time, freq].
      freqs: Frequencies of sinusoids in hertz. [batch, time, feq].
      amps: Amplitudes of sinusoids, greater than 0. [batch, time, freq].

    Returns:
      sinusoids_loss: -log p(sinusoids|harmonics), [batch, time, f0_candidate].
      harmonics_loss: - log p(harmonics|sinusoids), [batch, time, f0_candidate].
    """
    # ==========================================================================
    # P(sinusoids | candidate_harmonics).
    # ==========================================================================
    p_sinusoids_given_harmonics = self.get_p_sinusoids_given_harmonics()

    # Treat each partial as a candidate.
    # Get the ratio of each partial to each candidate.
    # -> [batch, time, candidate, partial]
    freq_ratios = safe_divide(freqs[:, :, tf.newaxis, :],
                              f0_candidates[:, :, :, tf.newaxis])
    nll_sinusoids = - p_sinusoids_given_harmonics.log_prob(freq_ratios)

    a = tf.convert_to_tensor(amps[:, :, tf.newaxis, :])

    # # Don't count sinusoids that are less than 1 std > mean.
    # a_mean, a_var = tf.nn.moments(a, axes=-1, keepdims=True)
    # a = tf.where(a > a_mean + 0.5 * a_var**0.5, a, tf.zeros_like(a))

    # Weighted sum by sinusoid amplitude.
    # -> [batch, time, candidate]
    sinusoids_loss = safe_divide(tf.reduce_sum(nll_sinusoids * a, axis=-1),
                                 tf.reduce_sum(a, axis=-1))

    # ==========================================================================
    # P(candidate_harmonics | sinusoids)
    # ==========================================================================
    p_harm_given_sin = self.get_p_harmonics_given_sinusoids(freqs, amps)
    harmonics = self.get_candidate_harmonics(f0_candidates, as_midi=True)

    # Need to rearrage shape as tfp expects, [sample_sh, batch_sh, event_sh].
    # -> [candidate, harmonic, batch, time]
    harmonics_transpose = tf.transpose(harmonics, [2, 3, 0, 1])
    nll_harmonics_transpose = - p_harm_given_sin.log_prob(harmonics_transpose)
    # -> [batch, time, candidate, harm]
    nll_harmonics = tf.transpose(nll_harmonics_transpose, [2, 3, 0, 1])

    # Prior decreasing importance of upper harmonics.
    amps_prior = tf.linspace(
        1.0, 1.0 / self.n_harmonic_points, self.n_harmonic_points)
    harmonics_loss = (nll_harmonics *
                      amps_prior[tf.newaxis, tf.newaxis, tf.newaxis, :])

    # Don't count loss for harmonics above nyquist.
    # Reweight by the number of harmonics below nyquist,
    # (so it doesn't just pick the highest frequency possible).
    nyquist_midi = hz_to_midi(self.sample_rate / 2.0)
    nyquist_mask = tf.where(harmonics < nyquist_midi,
                            tf.ones_like(harmonics_loss),
                            tf.zeros_like(harmonics_loss))
    harmonics_loss *= safe_divide(
        nyquist_mask, tf.reduce_mean(nyquist_mask, axis=-1, keepdims=True))

    # Sum over harmonics.
    harmonics_loss = tf.reduce_mean(harmonics_loss, axis=-1)

    return sinusoids_loss, harmonics_loss
Esempio n. 25
0
 def _entropy(self):
   # Use broadcasting rules to calculate the full broadcast sigma.
   scale = self.scale * tf.ones_like(self.loc)
   return (0.5 * (1. + 2 * tf.math.log(scale) +
                  np.euler_gamma + np.log(4. * np.pi)))
Esempio n. 26
0
 def _call(self, r):
     mean = tf.identity(r)
     variance = grad_mean = tf.ones_like(r)
     return mean, variance, grad_mean
Esempio n. 27
0
 def _mode(self):
   return self.loc * tf.ones_like(self.scale)
Esempio n. 28
0
 def _call(self, r):
     mean = 1. / r
     variance = tf.ones_like(r)
     grad_mean = -1. / r**2
     return mean, variance, grad_mean
Esempio n. 29
0
 def _grad_and_hessian_loss_fn(x):
     loss = _neg_log_likelihood(x)
     grad_loss = tf.gradients(ys=loss, xs=[x])[0]
     hessian_loss = tf.hessians(ys=loss, xs=[x])[0]
     hessian_chol = tf.linalg.cholesky(hessian_loss)
     return grad_loss, hessian_chol, tf.ones_like(grad_loss)
 def corr_matrix(t):
     one = tf.ones_like(t)
     row1 = tf.stack([one, 0.5 * t], axis=-1)
     row2 = tf.reverse(row1, [0])
     corr_matrix = tf.stack([row1, row2], axis=-1)
     return corr_matrix