Exemple #1
0
    def _prob(self, y):
        """Called by the base class to compute likelihoods."""
        # Convert to (channels, 1, batch) format by collapsing dimensions and then
        # commuting channels to front.
        y = tf.broadcast_to(
            y,
            tf.broadcast_dynamic_shape(tf.shape(y), self.batch_shape_tensor()))
        shape = tf.shape(y)
        y = tf.reshape(y, (-1, 1, self.batch_shape.num_elements()))
        y = tf.transpose(y, (2, 1, 0))

        # Evaluate densities.
        # We can use the special rule below to only compute differences in the left
        # tail of the sigmoid. This increases numerical stability: sigmoid(x) is 1
        # for large x, 0 for small x. Subtracting two numbers close to 0 can be done
        # with much higher precision than subtracting two numbers close to 1.
        lower = self._logits_cumulative(y - .5)
        upper = self._logits_cumulative(y + .5)
        # Flip signs if we can move more towards the left tail of the sigmoid.
        sign = tf.stop_gradient(-tf.math.sign(lower + upper))
        p = abs(tf.sigmoid(sign * upper) - tf.sigmoid(sign * lower))
        p = math_ops.lower_bound(p, 0.)

        # Convert back to (broadcasted) input tensor shape.
        p = tf.transpose(p, (2, 1, 0))
        p = tf.reshape(p, shape)
        return p
  def _decode(self, rel_codes, anchors):
    """Decode relative codes to boxes.
    Args:
      rel_codes: a tensor representing N anchor-encoded boxes.
      anchors: BoxList of anchors.
    Returns:
      boxes: BoxList holding N bounding boxes.
    """
    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()

    ty, tx, th, tw = tf.unstack(tf.transpose(a=rel_codes))
    if self._scale_factors:
      ty /= self._scale_factors[0]
      tx /= self._scale_factors[1]
      th /= self._scale_factors[2]
      tw /= self._scale_factors[3]
    w = tf.exp(tw) * wa
    h = tf.exp(th) * ha
    ycenter = tf.sigmoid(ty) + ycenter_a
    xcenter = tf.sigmoid(tx) + xcenter_a
    ymin = ycenter - h / 2.
    xmin = xcenter - w / 2.
    ymax = ycenter + h / 2.
    xmax = xcenter + w / 2.
    return box_list.BoxList(tf.transpose(a=tf.stack([ymin, xmin, ymax, xmax])))
Exemple #3
0
 def mlp(self, x):
     layer_1 = tf.sigmoid(
         tf.add(tf.matmul(x, self.h1_weights), self.h1_bias))
     layer_2 = tf.sigmoid(
         tf.add(tf.matmul(layer_1, self.h2_weights), self.h2_bias))
     return tf.sigmoid(
         tf.add(tf.matmul(layer_2, self.out_weights), self.out_bias))
 def __call__(self, x, carry):
     update_t = tf.sigmoid(x @ self.W_update_x + carry @ self.W_update_c +
                           self.b_update)
     reset_t = tf.sigmoid(x @ self.W_reset_x + carry @ self.W_reset_c +
                          self.b_reset)
     new_carry = update_t * carry + (1. - update_t) * tf.tanh(
         self.next_x_net(x) + self.next_c_net(reset_t * carry) +
         self.b_next)
     return new_carry
Exemple #5
0
def get_discriminator_loss(learner_agent_output, env_output,
                           actor_agent_output, actor_action, reward_clipping,
                           discounting, baseline_cost, entropy_cost,
                           num_steps):
    """Discriminator loss."""
    del actor_agent_output
    del actor_action
    del reward_clipping
    del discounting
    del baseline_cost
    del entropy_cost

    first_true = utils.get_first_true_column(
        env_output.observation['disc_mask'])
    output_logits = learner_agent_output.policy_logits
    output_logits = tf.squeeze(output_logits, axis=1)
    output_logits = tf.boolean_mask(output_logits, first_true)
    output_affine_a, output_affine_b = learner_agent_output.baseline

    # Get the first true.
    labels = tf.cast(env_output.observation['label'], tf.float32)
    labels = tf.boolean_mask(labels, first_true)

    positive_label = tf.equal(labels, tf.constant(1.0))
    positive_logits = tf.boolean_mask(output_logits, positive_label)
    tf.summary.histogram('distribution/sigmoid_positive_logits',
                         tf.sigmoid(positive_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/positive_logits',
                         positive_logits,
                         step=num_steps)

    negative_label = tf.equal(labels, tf.constant(0.0))
    negative_logits = tf.boolean_mask(output_logits, negative_label)
    tf.summary.histogram('distribution/sigmoid_negative_logits',
                         tf.sigmoid(negative_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/negative_logits',
                         negative_logits,
                         step=num_steps)

    tf.summary.scalar('labels/positive_label',
                      tf.reduce_mean(tf.cast(positive_label, tf.float32)),
                      step=num_steps)

    tf.summary.scalar('labels/labels', tf.reduce_mean(labels), step=num_steps)
    tf.summary.scalar('affine_transform/a',
                      tf.reduce_mean(output_affine_a),
                      step=num_steps)
    tf.summary.scalar('affine_transform/b',
                      tf.reduce_mean(output_affine_b),
                      step=num_steps)

    cross_entropy = tf.nn.weighted_cross_entropy_with_logits(
        labels=labels, logits=output_logits, pos_weight=5)
    return cross_entropy
Exemple #6
0
def _get_discriminator_logits(learner_agent_output, env_output,
                              actor_agent_output, actor_action,
                              reward_clipping, discounting, baseline_cost,
                              entropy_cost, num_steps):
    """Discriminator loss."""
    del actor_agent_output
    del actor_action
    del reward_clipping
    del discounting
    del baseline_cost
    del entropy_cost

    first_true = utils.get_first_true_column(
        env_output.observation['disc_mask'])
    # Shape of output_logits:[time, batch].
    output_logits = learner_agent_output.policy_logits
    # Shape of output_logits:[batch].
    output_logits = tf.boolean_mask(output_logits, first_true)
    output_affine_a, output_affine_b = learner_agent_output.baseline

    # Get the first true.
    labels = tf.cast(env_output.observation['label'], tf.float32)
    tf.summary.scalar('labels/mean_labels before masking',
                      tf.reduce_mean(labels),
                      step=num_steps)
    # Shape of labels:[batch].
    labels = tf.boolean_mask(labels, first_true)

    positive_label = tf.equal(labels, tf.constant(1.0))
    positive_logits = tf.boolean_mask(output_logits, positive_label)
    tf.summary.histogram('distribution/sigmoid_positive_logits',
                         tf.sigmoid(positive_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/positive_logits',
                         positive_logits,
                         step=num_steps)

    negative_label = tf.equal(labels, tf.constant(0.0))
    negative_logits = tf.boolean_mask(output_logits, negative_label)
    tf.summary.histogram('distribution/sigmoid_negative_logits',
                         tf.sigmoid(negative_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/negative_logits',
                         negative_logits,
                         step=num_steps)
    tf.summary.scalar('labels/positive_label_ratio',
                      tf.reduce_mean(tf.cast(positive_label, tf.float32)),
                      step=num_steps)
    tf.summary.scalar('affine_transform/a',
                      tf.reduce_mean(output_affine_a),
                      step=num_steps)
    tf.summary.scalar('affine_transform/b',
                      tf.reduce_mean(output_affine_b),
                      step=num_steps)
    # Shape: [batch]
    return labels, output_logits
Exemple #7
0
 def get_score_label_v2(self, action_list, env_output_list, agent_output,
                        environment):
     """Gets the probability score and GT labels for DiscriminatorAgentV2."""
     del action_list, environment
     # Remove the unused timestep dimension.
     labels = tf.squeeze(agent_output.policy_logits['labels'], axis=0)
     logits = tf.squeeze(agent_output.baseline, axis=0)
     if self._mode == 'predict':
         instruction_ids = self._get_instruction_ids(env_output_list)
         return [(tf.sigmoid(logits), labels, instruction_ids)]
     else:
         return [(tf.sigmoid(logits), labels)]
Exemple #8
0
    def evaluate_binary_classification(self, predictions, weights):
        """Evaluates the softmax loss on the given predictions.

    Given a rank-1 `Tensor` of predictions with shape (n,), where n is the
    number of examples, and a rank-2 `Tensor` of weights with shape (m, 2),
    where m is broadcastable to n, this method will return a `Tensor` of shape
    (n,) where the ith element is:

    ```python
    softmax_loss[i] = (
      weights[i, 0] * ( exp(predictions[i]) / ( 1 + exp(predictions[i]) ) ) +
      weights[i, 1] * ( 1 / ( 1 + exp(predictions[i]) ) ) )
    ```

    where constant_weights[i] = min{weights[i, 0], weights[i, 1]} contains the
    minimum weights.

    You can think of weights[:, 0] as being the per-example costs associated
    with making a positive prediction, and weights[:, 1] as those for a negative
    prediction.

    Args:
      predictions: a `Tensor` of shape (n,), where n is the number of examples.
      weights: a `Tensor` of shape (m, 2), where m is broadcastable to n. This
        `Tensor` is *not* necessarily non-negative.

    Returns:
      A `Tensor` of shape (n,) and dtype=predictions.dtype, containing the
      softmax losses for each example.

    Raises:
      TypeError: if "predictions" is not a floating-point `Tensor`, or "weights"
        is not a `Tensor`.
      ValueError: if "predictions" is not rank-1, or "weights" is not a rank-2
        `Tensor` with exactly two columns.
    """
        predictions = _convert_to_binary_classification_predictions(
            predictions)
        columns = helpers.get_num_columns_of_2d_tensor(weights, name="weights")
        if columns != 2:
            raise ValueError("weights must have two columns")
        dtype = predictions.dtype.base_dtype

        positive_weights = tf.cast(weights[:, 0], dtype=dtype)
        negative_weights = tf.cast(weights[:, 1], dtype=dtype)

        is_positive = tf.sigmoid(predictions)
        is_negative = tf.sigmoid(-predictions)

        return positive_weights * is_positive + negative_weights * is_negative
  def call(self, inputs, training=True, survival_prob=None):
    """Implementation of call().

    Args:
      inputs: the inputs tensor.
      training: boolean, whether the model is constructed for training.
      survival_prob: float, between 0 to 1, drop connect rate.

    Returns:
      A output tensor.
    """
    x = inputs
    if self._block_args.expand_ratio != 1:
      x = self._relu_fn(self._bn0(self._expand_conv(x), training=training))
    x = self._relu_fn(self._bn1(self._depthwise_conv(x), training=training))

    if self._has_se:
      se_tensor = tf.reduce_mean(
          x, self._spatial_dims, keepdims=True)
      se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor)))
      x = tf.sigmoid(se_tensor) * x

    x = self._bn2(self._project_conv(x), training=training)
    # Add identity so that quantization-aware training can insert quantization
    # ops correctly.
    x = tf.identity(x)
    if self._clip_projection_output:
      x = tf.clip_by_value(x, -6, 6)
    if all(
        s == 1 for s in self._block_args.strides
    ) and self._block_args.input_filters == self._block_args.output_filters:
      if survival_prob:
        x = utils.drop_connect(x, training, survival_prob)
      x = tf.add(x, inputs)
    return x
Exemple #10
0
 def _cdf(self, x):
   logits = self._logits_parameter_no_checks()
   total_count = tf.convert_to_tensor(self.total_count)
   safe_x = tf.where(x >= 0, x, 0.)
   answer = tfp_math.betainc(
       total_count, 1. + safe_x, tf.sigmoid(-logits))
   return distribution_util.extend_cdf_outside_support(x, answer, low=0)
Exemple #11
0
def sigmoid(x):
    """Sigmoid activation function, `sigmoid(x) = 1 / (1 + exp(-x))`.

    Applies the sigmoid activation function. For small values (<-5),
    `sigmoid` returns a value close to zero, and for large values (>5)
    the result of the function gets close to 1.

    Sigmoid is equivalent to a 2-element Softmax, where the second element is
    assumed to be zero. The sigmoid function always returns a value between
    0 and 1.

    For example:

    >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)
    >>> b = tf.keras.activations.sigmoid(a)
    >>> b.numpy()
    array([2.0611537e-09, 2.6894143e-01, 5.0000000e-01, 7.3105860e-01,
             1.0000000e+00], dtype=float32)

    Args:
        x: Input tensor.

    Returns:
        Tensor with the sigmoid activation: `1 / (1 + exp(-x))`.
    """
    output = tf.sigmoid(x)
    # Cache the logits to use for crossentropy loss.
    output._keras_logits = x  # pylint: disable=protected-access
    return output
Exemple #12
0
  def latent_encoder(self, x, y):
    """Encodes the inputs into one representation.

    Args:
      x: Tensor of shape [batch_size, observations, d_x]. For the prior, these
         are context x-values. For the posterior, these are target x-values.
      y: Tensor of shape [batch_size, observations, d_y]. For the prior, these
         are context y-values. For the posterior, these are target y-values.

    Returns:
      A normal distribution over tensors of shape [batch_size, num_latents].
    """
    encoder_input = tf.concat([x, y], axis=-1)
    per_example_embedding = batch_mlp(
        encoder_input, self._latent_encoder_sizes)
    dataset_embedding = tf.reduce_mean(per_example_embedding, axis=1)
    hidden = tf.keras.layers.Dense(
        (self._latent_encoder_sizes[-1] + self._num_latents)//2,
        activation=tf.nn.relu)(dataset_embedding)
    loc = tf.keras.layers.Dense(self._num_latents, activation=None)(hidden)
    untransformed_scale = tf.keras.layers.Dense(self._num_latents,
                                                activation=None)(hidden)
    # Constraint scale following Garnelo et al. (2018).
    scale_diag = 0.1 + 0.9 * tf.sigmoid(untransformed_scale)
    return generated_random_variables.MultivariateNormalDiag(
        loc=loc, scale_diag=scale_diag)
def _apply_score_activation(logits, num_classes, activation):
  """Applies activation to logits and removes the background class.

  Note that it is assumed that the background class has index 0, which is
  sliced away after the score transformation.

  Args:
    logits: the raw logit tensor.
    num_classes: the total number of classes including one background class.
    activation: the score activation type, one of 'SIGMOID', 'SOFTMAX' and
      'IDENTITY'.

  Returns:
    scores: the tensor after applying score transformation and background
      class removal.
  """
  batch_size = tf.shape(input=logits)[0]
  logits = tf.reshape(logits, [batch_size, -1, num_classes])
  if activation == 'SIGMOID':
    scores = tf.sigmoid(logits)
  elif activation == 'SOFTMAX':
    scores = tf.softmax(logits)
  elif activation == 'IDENTITY':
    pass
  else:
    raise ValueError(
        'The score activation should be SIGMOID, SOFTMAX or IDENTITY')
  scores = scores[..., 1:]
  return scores
Exemple #14
0
def _kl_bernoulli_bernoulli(a, b, name=None):
    """Calculate the batched KL divergence KL(a || b) with a and b Bernoulli.

  Args:
    a: instance of a Bernoulli distribution object.
    b: instance of a Bernoulli distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_bernoulli_bernoulli".

  Returns:
    Batchwise KL(a || b)
  """
    with tf.name_scope(name or "kl_bernoulli_bernoulli"):
        delta_probs0 = tf.nn.softplus(-b.logits) - tf.nn.softplus(-a.logits)
        delta_probs1 = tf.nn.softplus(b.logits) - tf.nn.softplus(a.logits)
        return (tf.sigmoid(a.logits) * delta_probs0 +
                tf.sigmoid(-a.logits) * delta_probs1)
def logit_normal_variance_trapezoid(loc, scale):
    """Brute-force the variance of LogitNormal(loc, scale) by quadrature."""
    dist = tfd.Normal(loc, scale)
    grid, compute = logit_normal_trapezoid_rule(loc, scale)
    probs = dist.prob(grid)
    sigmoids = tf.sigmoid(grid)
    mean = compute(sigmoids * probs)
    return compute((sigmoids - mean)**2 * probs)
Exemple #16
0
 def _cdf(self, x):
     logits = self._logits_parameter_no_checks()
     total_count = tf.convert_to_tensor(self.total_count)
     shape = self._batch_shape_tensor(logits_or_probs=logits,
                                      total_count=total_count)
     return tf.math.betainc(tf.broadcast_to(total_count, shape),
                            tf.broadcast_to(1. + x, shape),
                            tf.broadcast_to(tf.sigmoid(-logits), shape))
Exemple #17
0
 def _forward(self, x):
     if self._is_standard_sigmoid:
         return tf.sigmoid(x)
     lo = tf.convert_to_tensor(self.low)  # Concretize only once
     hi = tf.convert_to_tensor(self.high)
     diff = hi - lo
     left = lo + diff * tf.math.sigmoid(x)
     right = hi - diff * tf.math.sigmoid(-x)
     return tf.where(x < 0, left, right)
Exemple #18
0
  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    z = backend.dot(cell_inputs, kernel)
    z += backend.dot(h_tm1, recurrent_kernel)
    z = backend.bias_add(z, bias)

    z0, z1, z2, z3 = tf.split(z, 4, axis=1)

    i = tf.sigmoid(z0)
    f = tf.sigmoid(z1)
    c = f * c_tm1 + i * tf.tanh(z2)
    o = tf.sigmoid(z3)

    h = o * tf.tanh(c)
    return h, [h, c]
Exemple #19
0
    def __call__(self,
                 logits,
                 scaled_labels,
                 classes,
                 category_loss=True,
                 mse_loss=False):
        """Compute instance segmentation loss.

    Args:
      logits: A Tensor of shape [batch_size * num_points, height, width,
        num_classes]. The logits are not necessarily between 0 and 1.
      scaled_labels: A float16 Tensor of shape [batch_size, num_instances,
          mask_size, mask_size], where mask_size =
          mask_crop_size * gt_upsample_scale for fine mask, or mask_crop_size
          for coarse masks and shape priors.
      classes: A int tensor of shape [batch_size, num_instances].
      category_loss: use class specific mask prediction or not.
      mse_loss: use mean square error for mask loss or not

    Returns:
      mask_loss: an float tensor representing total mask classification loss.
      iou: a float tensor representing the IoU between target and prediction.
    """
        classes = tf.reshape(classes, [-1])
        _, _, height, width = scaled_labels.get_shape().as_list()
        scaled_labels = tf.reshape(scaled_labels, [-1, height, width])

        if not category_loss:
            logits = logits[:, :, :, 0]
        else:
            logits = tf.transpose(a=logits, perm=(0, 3, 1, 2))
            gather_idx = tf.stack(
                [tf.range(tf.size(input=classes)), classes - 1], axis=1)
            logits = tf.gather_nd(logits, gather_idx)

        # Ignore loss on empty mask targets.
        valid_labels = tf.reduce_any(input_tensor=tf.greater(scaled_labels, 0),
                                     axis=[1, 2])
        if mse_loss:
            # Logits are probabilities in the case of shape prior prediction.
            logits *= tf.reshape(tf.cast(valid_labels, logits.dtype),
                                 [-1, 1, 1])
            weighted_loss = tf.nn.l2_loss(scaled_labels - logits)
            probs = logits
        else:
            weighted_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=scaled_labels, logits=logits)
            probs = tf.sigmoid(logits)
            weighted_loss *= tf.reshape(
                tf.cast(valid_labels, weighted_loss.dtype), [-1, 1, 1])

        iou = tf.reduce_sum(
            input_tensor=tf.minimum(scaled_labels, probs)) / tf.reduce_sum(
                input_tensor=tf.maximum(scaled_labels, probs))
        mask_loss = tf.reduce_sum(input_tensor=weighted_loss) / tf.reduce_sum(
            input_tensor=scaled_labels)
        return tf.cast(mask_loss, tf.float32), tf.cast(iou, tf.float32)
 def _cdf(self, x):
     logits = self._logits_parameter_no_checks()
     total_count = tf.convert_to_tensor(self.total_count)
     shape = self._batch_shape_tensor(logits=logits,
                                      total_count=total_count)
     safe_x = tf.where(x >= 0, x, 0.)
     answer = tf.math.betainc(tf.broadcast_to(total_count, shape),
                              tf.broadcast_to(1. + safe_x, shape),
                              tf.broadcast_to(tf.sigmoid(-logits), shape))
     return distribution_util.extend_cdf_outside_support(x, answer, low=0)
Exemple #21
0
def _kl_bernoulli_bernoulli(a, b, name=None):
    """Calculate the batched KL divergence KL(a || b) with a and b Bernoulli.

  Args:
    a: instance of a Bernoulli distribution object.
    b: instance of a Bernoulli distribution object.
    name: Python `str` name to use for created operations.
      Default value: `None` (i.e., `'kl_bernoulli_bernoulli'`).

  Returns:
    Batchwise KL(a || b)
  """
    with tf.name_scope(name or 'kl_bernoulli_bernoulli'):
        a_logits = a.logits_parameter()
        b_logits = b.logits_parameter()
        return (tf.sigmoid(a_logits) *
                (tf.math.softplus(-b_logits) - tf.math.softplus(-a_logits)) +
                tf.sigmoid(-a_logits) *
                (tf.math.softplus(b_logits) - tf.math.softplus(a_logits)))
Exemple #22
0
 def _sample_helper(self, value, eps=None):
     mu, sigma = tf.split(value, num_or_size_splits=2, axis=-1)
     sigma = tf.sigmoid(sigma)
     if eps is None:
         eps = tf.random.normal(shape=tf.shape(sigma),
                                mean=0.,
                                stddev=self._eps_std,
                                dtype=tf.float32)
     value = mu + sigma * eps
     neg_kl = 0.5 + tf.math.log(sigma + 1e-8) - 0.5 * (sigma**2 + mu**2)
     return tf.squeeze(value, axis=-1), tf.squeeze(neg_kl, axis=-1), eps
Exemple #23
0
def logit_normal_variance_gh(loc, scale, deg):
  """Approxmates `Var_{N(m,s)}[sigmoid(X)]` by Gauss-Hermite quadrature."""
  # Since we have to compute sigmoids for variance anyway, we inline
  # computing the mean by Gauss-Hermite quadrature at the same grid of points.
  grid, weights = onp.polynomial.hermite_e.hermegauss(deg)
  grid = tf.cast(grid, dtype=loc.dtype)
  weights = tf.cast(weights, dtype=loc.dtype)
  normalizer = tf.constant(onp.sqrt(2 * onp.pi), dtype=loc.dtype)
  sigmoids = tf.sigmoid(grid * scale[..., tf.newaxis] + loc[..., tf.newaxis])
  mean = tf.reduce_sum(sigmoids * weights, axis=-1) / normalizer
  residuals = (sigmoids - mean[..., tf.newaxis])**2
  return tf.reduce_sum(residuals * weights, axis=-1) / normalizer
Exemple #24
0
def logit_normal_mean_gh(loc, scale, deg):
  """Approximates `E_{N(m,s)}[sigmoid(X)]` by Gauss-Hermite quadrature."""
  # We want to integrate
  # A = \int_-inf^inf sigmoid(x) * Normal(loc, scale).pdf(x) dx
  # To bring it into the right form for Gauss-Hermite quadrature,
  # we make the substitution y = (x - loc) / scale, to get
  # A = (1/sqrt(2*pi)) * \int_-inf^inf [
  #       sigmoid(y * scale + loc) * exp(-1/2 y**2) dy]
  grid, weights = onp.polynomial.hermite_e.hermegauss(deg)
  grid = tf.cast(grid, dtype=loc.dtype)
  weights = tf.cast(weights, dtype=loc.dtype)
  normalizer = tf.constant(onp.sqrt(2 * onp.pi), dtype=loc.dtype)
  values = tf.sigmoid(grid * scale[..., tf.newaxis] + loc[..., tf.newaxis])
  return tf.reduce_sum(values * weights, axis=-1) / normalizer
Exemple #25
0
    def testVarianceWhenProbCloseToOne(self):
        # Prob is very close to 1.0, so the naive 1 - p will be (numerically) 0,
        # which would make variance zero.  Main point of this test is to verify that
        # the variance is > 0 ... we also verify that variance is correct.

        # tf.sigmoid(logits) is < float eps away from 1.0, which means the naive
        # 1 - tf.sigmoid(logits) will result in 0.0, which is a loss of precision.
        one_minus_prob_64 = np.float64(np.finfo(np.float32).eps) / 2
        logits_32 = np.float32(
            np.log((1. - one_minus_prob_64) / one_minus_prob_64))

        # Verify that this value of logits results in loss of precision for a naive
        # implementation (justifying our "fancy" implementation of sigmoid(-logits))
        self.assertAllEqual(0., 1 - tf.sigmoid(logits_32))

        # See! This one weird trick fixes everything.  Asserts below check that we
        # used the trick correctly in our code.
        self.assertGreater(self.evaluate(tf.sigmoid(-logits_32)), 0.)

        dist = tfd.Bernoulli(logits=logits_32)

        expected_variance = np.float32(one_minus_prob_64 *
                                       (1 - one_minus_prob_64))

        self.assertGreater(expected_variance, 0.)

        self.assertAllClose(
            dist.variance(),
            expected_variance,
            # Equivalent to atol=0, rtol=1e-6, but less likely to confuse which
            # element is being used for the "r" in rtol.
            # Note this also ensures dist.variance() > 0, which the naive
            # implementation would not be able to do.
            atol=expected_variance * 1e-6,
            rtol=0,
        )
Exemple #26
0
    def step(cell_inputs, cell_states):
        """Step function that will be used by Keras RNN backend."""
        h_tm1 = cell_states[0]

        # inputs projected by all gate matrices at once
        matrix_x = backend.dot(cell_inputs, kernel)
        matrix_x = backend.bias_add(matrix_x, input_bias)

        x_z, x_r, x_h = tf.split(matrix_x, 3, axis=1)

        # hidden state projected by all gate matrices at once
        matrix_inner = backend.dot(h_tm1, recurrent_kernel)
        matrix_inner = backend.bias_add(matrix_inner, recurrent_bias)

        recurrent_z, recurrent_r, recurrent_h = tf.split(matrix_inner,
                                                         3,
                                                         axis=1)
        z = tf.sigmoid(x_z + recurrent_z)
        r = tf.sigmoid(x_r + recurrent_r)
        hh = tf.tanh(x_h + r * recurrent_h)

        # previous and candidate state mixed by update gate
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]
Exemple #27
0
    def make_precision_matrix_update_op(self, gp_feature, logits,
                                        precision_matrix):
        """Defines update op for the precision matrix of feature weights."""
        if self.likelihood != 'gaussian':
            if logits is None:
                raise ValueError(
                    f'"logits" cannot be None when likelihood={self.likelihood}'
                )

            if logits.shape[-1] != 1:
                raise ValueError(
                    f'likelihood={self.likelihood} only support univariate logits.'
                    f'Got logits dimension: {logits.shape[-1]}')

        batch_size = tf.shape(gp_feature)[0]
        batch_size = tf.cast(batch_size, dtype=gp_feature.dtype)

        # Computes batch-specific normalized precision matrix.
        if self.likelihood == 'binary_logistic':
            prob = tf.sigmoid(logits)
            prob_multiplier = prob * (1. - prob)
        elif self.likelihood == 'poisson':
            prob_multiplier = tf.exp(logits)
        else:
            prob_multiplier = 1.

        gp_feature_adjusted = tf.sqrt(prob_multiplier) * gp_feature
        precision_matrix_minibatch = tf.matmul(gp_feature_adjusted,
                                               gp_feature_adjusted,
                                               transpose_a=True)

        # Updates the population-wise precision matrix.
        if self.momentum > 0:
            # Use moving-average updates to accumulate batch-specific precision
            # matrices.
            precision_matrix_minibatch = precision_matrix_minibatch / batch_size
            precision_matrix_new = (
                self.momentum * precision_matrix +
                (1. - self.momentum) * precision_matrix_minibatch)
        else:
            # Compute exact population-wise covariance without momentum.
            # If use this option, make sure to pass through data only once.
            precision_matrix_new = precision_matrix + precision_matrix_minibatch

        # Returns the update op.
        return precision_matrix.assign(precision_matrix_new)
Exemple #28
0
def sigmoid_cross_entropy_focal_loss(logits, labels, alpha=0.25, gamma=2.0):
    """Focal loss for binary (sigmoid) logistic loss."""
    # The numerically-stable way to compute
    #  log(p) for positives;
    #  log(1 - p) for negatives.
    labels = tf.cast(labels, logits.dtype)
    labels = tf.reshape(labels, logits.shape)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                   logits=logits)

    if gamma is not None and gamma != 0:
        # The modulating factor. Note that
        inner = tf.sigmoid(logits * (1 - labels * 2))
        loss *= tf.pow(inner, gamma)

    if alpha is not None:
        # [1] Eq (3)
        loss *= (alpha * labels + (1 - alpha) * (1 - labels))

    loss = tf.reduce_sum(loss, axis=-1)
    return loss
Exemple #29
0
    def posterior_mode(self, K, return_temporaries=False):

        n = self.X_train_.shape[0]
        if self.warm_start and hasattr(self, "f_cached"):
            f = self.f_cached
        else:
            f = tf.zeros(n, dtype=np.float64)
        log_marginal_likelihood = tf.constant(-np.inf, dtype='float64')

        for i in range(self.max_iter_predict):

            pi = tf.sigmoid(f)
            W = pi * (1 - pi)
            W_sr = tf.sqrt(W)
            W_sr_K = tf.reshape(W_sr, [-1, 1]) * K
            B = tf.eye(W.shape[0], dtype='float64') + W_sr_K * W_sr
            L = tf.linalg.cholesky(B)
            b = W * f + (self.y - pi)
            a = b - W_sr * tf.reshape(
                tf.linalg.cholesky_solve(
                    L, tf.matmul(W_sr_K, tf.reshape(b, (-1, 1)))), [-1])
            f = tf.matmul(K, tf.reshape(a, [-1, 1]))

            lml = -0.5 * tf.matmul(tf.reshape(a, [1, -1]), f) - tf.reduce_sum(
                tf.math.log(1 + tf.math.exp(-(self.y * 2.0 - 1.0) *
                                            tf.reshape(f, [-1])))
            ) - tf.reduce_sum(tf.math.log(tf.linalg.tensor_diag_part(L)))
            f = np.reshape(f, [-1])

            if lml[0, 0] - log_marginal_likelihood < 1e-10:
                break
            log_marginal_likelihood = lml

        self.f_cached = f
        if return_temporaries:
            return f, lml, (pi, W_sr, L, b, a)
        else:
            return f, lml, i
Exemple #30
0
    def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape):
        # Collects outputs from all levels into a list.
        boxes = []
        scores = []
        for i in range(self._min_level, self._max_level + 1):
            box_outputs_i_shape = tf.shape(box_outputs[i])
            batch_size = box_outputs_i_shape[0]
            num_anchors_per_locations = box_outputs_i_shape[-1] // 4
            num_classes = tf.shape(
                class_outputs[i])[-1] // num_anchors_per_locations

            # Applies score transformation and remove the implicit background class.
            scores_i = tf.sigmoid(
                tf.reshape(class_outputs[i], [batch_size, -1, num_classes]))
            scores_i = tf.slice(scores_i, [0, 0, 1], [-1, -1, -1])

            # Box decoding.
            # The anchor boxes are shared for all data in a batch.
            # One stage detector only supports class agnostic box regression.
            anchor_boxes_i = tf.reshape(anchor_boxes[i], [batch_size, -1, 4])
            box_outputs_i = tf.reshape(box_outputs[i], [batch_size, -1, 4])
            boxes_i = box_utils.decode_boxes(box_outputs_i, anchor_boxes_i)

            # Box clipping.
            boxes_i = box_utils.clip_boxes(boxes_i, image_shape)

            boxes.append(boxes_i)
            scores.append(scores_i)
        boxes = tf.concat(boxes, axis=1)
        scores = tf.concat(scores, axis=1)

        nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = (
            self._generate_detections(tf.expand_dims(boxes, axis=2), scores))

        # Adds 1 to offset the background class which has index 0.
        nmsed_classes += 1
        return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections