Beispiel #1
0
    def __call__(self, step):
        with tf.name_scope(self.name or "PolynomialDecay") as name:
            initial_learning_rate = tf.convert_to_tensor(
                self.initial_learning_rate, name="initial_learning_rate")
            dtype = initial_learning_rate.dtype
            end_learning_rate = tf.cast(self.end_learning_rate, dtype)
            power = tf.cast(self.power, dtype)

            global_step_recomp = tf.cast(step, dtype)
            decay_steps_recomp = tf.cast(self.decay_steps, dtype)
            if self.cycle:
                # Find the first multiple of decay_steps that is bigger than
                # global_step. If global_step is zero set the multiplier to 1
                multiplier = tf.where(
                    tf.equal(global_step_recomp, 0),
                    1.0,
                    tf.math.ceil(global_step_recomp / self.decay_steps),
                )
                decay_steps_recomp = tf.multiply(decay_steps_recomp,
                                                 multiplier)
            else:
                # Make sure that the global_step used is not bigger than
                # decay_steps.
                global_step_recomp = tf.minimum(global_step_recomp,
                                                decay_steps_recomp)

            p = tf.divide(global_step_recomp, decay_steps_recomp)
            return tf.add(
                tf.multiply(
                    initial_learning_rate - end_learning_rate,
                    tf.pow(1 - p, power),
                ),
                end_learning_rate,
                name=name,
            )
  def fit_critic(self, states, actions, next_states, rewards, masks, discount):
    """Updates critic parameters.

    Args:
      states: A batch of states.
      actions: A batch of actions.
      next_states: A batch of next states.
      rewards: A batch of rewards.
      masks: A batch of masks indicating the end of the episodes.
      discount: An MDP discount factor.

    Returns:
      Critic loss.
    """
    _, next_actions, log_probs = self.actor(next_states)

    target_q1, target_q2 = self.critic_target(next_states, next_actions)
    target_v = tf.minimum(target_q1, target_q2) - self.alpha * log_probs
    target_q = rewards + discount * masks * target_v

    with tf.GradientTape(watch_accessed_variables=False) as tape:
      tape.watch(self.critic.variables)

      q1, q2 = self.critic(states, actions)
      critic_loss = (
          tf.losses.mean_squared_error(target_q, q1) +
          tf.losses.mean_squared_error(target_q, q2))
      critic_loss = tf.reduce_mean(critic_loss)

    critic_grads = tape.gradient(critic_loss, self.critic.variables)

    self.critic_optimizer.apply_gradients(
        zip(critic_grads, self.critic.variables))

    return critic_loss
Beispiel #3
0
    def testExample(self):
        target_dist = tfd.JointDistributionSequential([
            tfd.Normal(0., 1.5),
            tfd.Independent(tfd.Normal(tf.zeros([2, 5], dtype=tf.float32), 5.),
                            reinterpreted_batch_ndims=2),
        ])
        num_burnin_steps = 500
        num_results = 500
        num_chains = 64

        kernel = tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=lambda *args: target_dist.log_prob(args),
            num_leapfrog_steps=2,
            step_size=target_dist.stddev())
        kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
            inner_kernel=kernel,
            num_adaptation_steps=int(num_burnin_steps * 0.8),
            # Cast to int32.  Not necessary for operation since we cast internally
            # to a float type.  This is done to check that we are able to pass in
            # integer types (since they are the natural type for this).
            step_count_smoothing=tf.cast(10, tf.int32))

        seed_stream = test_util.test_seed_stream()
        _, log_accept_ratio = tfp.mcmc.sample_chain(
            num_results=num_results,
            num_burnin_steps=num_burnin_steps,
            current_state=target_dist.sample(num_chains, seed=seed_stream()),
            kernel=kernel,
            trace_fn=lambda _, pkr: pkr.inner_results.log_accept_ratio,
            seed=seed_stream())

        p_accept = tf.reduce_mean(tf.math.exp(tf.minimum(log_accept_ratio,
                                                         0.)))

        self.assertAllClose(0.75, self.evaluate(p_accept), atol=0.15)
Beispiel #4
0
    def __call__(self, step):
        with tf.name_scope(self.name or "NoisyLinearCosineDecay") as name:
            initial_learning_rate = tf.convert_to_tensor(
                self.initial_learning_rate, name="initial_learning_rate")
            dtype = initial_learning_rate.dtype
            decay_steps = tf.cast(self.decay_steps, dtype)
            initial_variance = tf.cast(self.initial_variance, dtype)
            variance_decay = tf.cast(self.variance_decay, dtype)
            num_periods = tf.cast(self.num_periods, dtype)
            alpha = tf.cast(self.alpha, dtype)
            beta = tf.cast(self.beta, dtype)

            global_step_recomp = tf.cast(step, dtype)
            global_step_recomp = tf.minimum(global_step_recomp, decay_steps)
            linear_decayed = (decay_steps - global_step_recomp) / decay_steps
            variance = initial_variance / (tf.pow(1.0 + global_step_recomp,
                                                  variance_decay))
            std = tf.sqrt(variance)
            noisy_linear_decayed = (linear_decayed +
                                    self._random_generator.random_normal(
                                        linear_decayed.shape, stddev=std))

            completed_fraction = global_step_recomp / decay_steps
            fraction = 2.0 * num_periods * completed_fraction
            cosine_decayed = 0.5 * (
                1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction))
            noisy_linear_cosine_decayed = (
                alpha + noisy_linear_decayed) * cosine_decayed + beta

            return tf.multiply(initial_learning_rate,
                               noisy_linear_cosine_decayed,
                               name=name)
Beispiel #5
0
    def testChainLogProbChainTarget(self):
        init_step = tf.constant([0.1, 0.2])
        kernel = FakeMHKernel(FakeSteppedKernel(step_size=init_step),
                              log_accept_ratio=tf.stack(
                                  [tf.math.log(0.74),
                                   tf.math.log(0.76)]))
        kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
            kernel,
            num_adaptation_steps=1,
            log_accept_prob_getter_fn=(
                lambda pkr: tf.minimum(0., pkr.log_accept_ratio)),
            validate_args=True,
            target_accept_prob=tf.stack([0.7, 0.8]))

        kernel_results = kernel.bootstrap_results(tf.zeros(2))
        for _ in range(2):
            _, kernel_results = kernel.one_step(tf.zeros(2), kernel_results)

        step_size = self.evaluate(
            kernel_results.inner_results.accepted_results.step_size, )

        expected = tf.math.exp(
            tf.math.log(10. * init_step) - tf.constant([-0.04, 0.04]) /
            ((_INITIAL_T + 1.) * _EXPLORATION_SHRINKAGE))
        self.assertAllClose(expected, step_size)
Beispiel #6
0
def log_sub_exp(x, y, return_sign=False, name=None):
    """Compute `log(exp(max(x, y)) - exp(min(x, y)))` in a numerically stable way.

  Use `return_sign=True` unless `x >= y`, since we can't represent a negative in
  log-space.

  Args:
    x: Float `Tensor` broadcastable with `y`.
    y: Float `Tensor` broadcastable with `x`.
    return_sign: Whether or not to return the second output value `sign`. If
      it is known that `x >= y`, this is unnecessary.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., `'log_sub_exp'`).

  Returns:
    logsubexp: Float `Tensor` of `log(exp(max(x, y)) - exp(min(x, y)))`.
    sign: Float `Tensor` +/-1 indicating the sign of `exp(x) - exp(y)`.
  """
    with tf.name_scope(name or 'log_sub_exp'):
        dtype = dtype_util.common_dtype([x, y], dtype_hint=tf.float32)
        x = tf.convert_to_tensor(x, dtype=dtype, name='x')
        y = tf.convert_to_tensor(y, dtype=dtype, name='y')
        larger = tf.maximum(x, y)
        smaller = tf.minimum(x, y)
        result = larger + log1mexp(tf.maximum(larger - smaller, 0))
        if return_sign:
            ones = tf.ones([], result.dtype)
            return result, tf.where(x < y, -ones, ones)
        return result
Beispiel #7
0
 def trace_fn(_, pkr):
     results = pkr.inner_results
     return {
         'accept_prob': tf.exp(tf.minimum(0.,
                                          results.log_accept_ratio)),
         'step_size': results.accepted_results.step_size,
     }
    def testExample(self):
        tf.random.set_seed(test_util.test_seed())
        target_dist = tfd.JointDistributionSequential([
            tfd.Normal(0., 1.5),
            tfd.Independent(tfd.Normal(tf.zeros([2, 5], dtype=tf.float32), 5.),
                            reinterpreted_batch_ndims=2),
        ])
        num_burnin_steps = 500
        num_results = 500
        num_chains = 64

        kernel = tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=lambda *args: target_dist.log_prob(args),
            num_leapfrog_steps=2,
            step_size=target_dist.stddev(),
            seed=_set_seed(test_util.test_seed()))
        kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
            inner_kernel=kernel,
            num_adaptation_steps=int(num_burnin_steps * 0.8))

        _, log_accept_ratio = tfp.mcmc.sample_chain(
            num_results=num_results,
            num_burnin_steps=num_burnin_steps,
            current_state=target_dist.sample(num_chains),
            kernel=kernel,
            trace_fn=lambda _, pkr: pkr.inner_results.log_accept_ratio)

        p_accept = tf.math.exp(
            tfp.math.reduce_logmeanexp(tf.minimum(log_accept_ratio, 0.)))

        self.assertAllClose(0.75, self.evaluate(p_accept), atol=0.15)
Beispiel #9
0
    def testGumbelGumbelKL(self):
        a_loc = np.arange(-2.0, 3.0, 1.0)
        a_scale = np.arange(0.5, 2.5, 0.5)
        b_loc = 2 * np.arange(-2.0, 3.0, 1.0)
        b_scale = np.arange(0.5, 2.5, 0.5)

        # This reshape is intended to expand the number of test cases.
        a_loc = a_loc.reshape((len(a_loc), 1, 1, 1))
        a_scale = a_scale.reshape((1, len(a_scale), 1, 1))
        b_loc = b_loc.reshape((1, 1, len(b_loc), 1))
        b_scale = b_scale.reshape((1, 1, 1, len(b_scale)))

        a = tfd.Gumbel(loc=a_loc, scale=a_scale, validate_args=True)
        b = tfd.Gumbel(loc=b_loc, scale=b_scale, validate_args=True)

        true_kl = (
            np.log(b_scale) - np.log(a_scale) + np.euler_gamma *
            (a_scale / b_scale - 1.) +
            np.expm1((b_loc - a_loc) / b_scale +
                     np.vectorize(np.math.lgamma)(a_scale / b_scale + 1.)) +
            (a_loc - b_loc) / b_scale)

        kl = tfd.kl_divergence(a, b)

        x = a.sample(int(1e5), seed=test_util.test_seed())
        kl_sample = tf.reduce_mean(input_tensor=a.log_prob(x) - b.log_prob(x),
                                   axis=0)

        # As noted in the Gumbel-Gumbel KL divergence implementation, there is an
        # error in the reference paper we use to implement our divergence. This
        # error is a missing summand, (a.loc - b.loc) / b.scale. To ensure that we
        # are adequately testing this difference in the below tests, we compute the
        # relative error between kl_sample_ and kl_ and check that it is "much less"
        # than this missing summand.
        summand = (a_loc - b_loc) / b_scale
        relative_error = (tf.abs(kl - kl_sample) /
                          tf.minimum(tf.abs(kl), tf.abs(kl_sample)))
        exists_missing_summand_test = tf.reduce_any(
            input_tensor=summand > 2 * relative_error)
        exists_missing_summand_test_ = self.evaluate(
            exists_missing_summand_test)
        self.assertTrue(
            exists_missing_summand_test_,
            msg=('No test case exists where (a.loc - b.loc) / b.scale '
                 'is much less than the relative error between kl as '
                 'computed in closed form, and kl as computed by '
                 'sampling. Failing to include such a test case makes '
                 'it difficult to detect regressions where this '
                 'summand (which is missing in our reference paper) '
                 'is omitted.'))

        kl_, kl_sample_ = self.evaluate([kl, kl_sample])
        self.assertAllClose(true_kl, kl_, atol=0.0, rtol=1e-12)
        self.assertAllClose(true_kl, kl_sample_, atol=0.0, rtol=1e-1)

        zero_kl = tfd.kl_divergence(a, a)
        true_zero_kl_, zero_kl_ = self.evaluate(
            [tf.zeros_like(zero_kl), zero_kl])
        self.assertAllEqual(true_zero_kl_, zero_kl_)
 def mutate_onestep(i, state, pkr, log_accept_prob_sum):
     next_state, next_kernel_results = kernel.one_step(
         state, pkr)
     kernel_log_accept_ratio, _ = gather_mh_like_result(pkr)
     log_accept_prob = tf.minimum(kernel_log_accept_ratio, 0.)
     log_accept_prob_sum = log_add_exp(log_accept_prob_sum,
                                       log_accept_prob)
     return i + 1, next_state, next_kernel_results, log_accept_prob_sum
Beispiel #11
0
 def critic_mix(self, s, a):
     if self.use_dqn:
         target_q1, target_q2 = self.critic_target(s, a)
         target_q = tf.minimum(target_q1, target_q2)
         q1, q2 = self.critic(s, a)
         return q1 * 0.05 + target_q * 0.95, q2 * 0.05 + target_q * 0.95,
     else:
         return self.critic(s, a) * 0.05 + self.critic_target(s, a) * 0.95
 def _sample_n(self, n, seed=None):
     # TODO(b/151571025): revert to `super()._sample_n` once the InverseGamma
     # sampler is XLA-able.
     xs = 1. / gamma.Gamma(concentration=self.concentration,
                           rate=self.scale).sample(n, seed=seed)
     if self._upper_bound is not None:
         xs = tf.minimum(xs, self._upper_bound)
     return xs
Beispiel #13
0
    def __call__(self,
                 logits,
                 scaled_labels,
                 classes,
                 category_loss=True,
                 mse_loss=False):
        """Compute instance segmentation loss.

    Args:
      logits: A Tensor of shape [batch_size * num_points, height, width,
        num_classes]. The logits are not necessarily between 0 and 1.
      scaled_labels: A float16 Tensor of shape [batch_size, num_instances,
          mask_size, mask_size], where mask_size =
          mask_crop_size * gt_upsample_scale for fine mask, or mask_crop_size
          for coarse masks and shape priors.
      classes: A int tensor of shape [batch_size, num_instances].
      category_loss: use class specific mask prediction or not.
      mse_loss: use mean square error for mask loss or not

    Returns:
      mask_loss: an float tensor representing total mask classification loss.
      iou: a float tensor representing the IoU between target and prediction.
    """
        classes = tf.reshape(classes, [-1])
        _, _, height, width = scaled_labels.get_shape().as_list()
        scaled_labels = tf.reshape(scaled_labels, [-1, height, width])

        if not category_loss:
            logits = logits[:, :, :, 0]
        else:
            logits = tf.transpose(a=logits, perm=(0, 3, 1, 2))
            gather_idx = tf.stack(
                [tf.range(tf.size(input=classes)), classes - 1], axis=1)
            logits = tf.gather_nd(logits, gather_idx)

        # Ignore loss on empty mask targets.
        valid_labels = tf.reduce_any(input_tensor=tf.greater(scaled_labels, 0),
                                     axis=[1, 2])
        if mse_loss:
            # Logits are probabilities in the case of shape prior prediction.
            logits *= tf.reshape(tf.cast(valid_labels, logits.dtype),
                                 [-1, 1, 1])
            weighted_loss = tf.nn.l2_loss(scaled_labels - logits)
            probs = logits
        else:
            weighted_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=scaled_labels, logits=logits)
            probs = tf.sigmoid(logits)
            weighted_loss *= tf.reshape(
                tf.cast(valid_labels, weighted_loss.dtype), [-1, 1, 1])

        iou = tf.reduce_sum(
            input_tensor=tf.minimum(scaled_labels, probs)) / tf.reduce_sum(
                input_tensor=tf.maximum(scaled_labels, probs))
        mask_loss = tf.reduce_sum(input_tensor=weighted_loss) / tf.reduce_sum(
            input_tensor=scaled_labels)
        return tf.cast(mask_loss, tf.float32), tf.cast(iou, tf.float32)
Beispiel #14
0
def _initialize_instrument_weights(float_times, fixed_times, dtype):
    """Function to compute default initial weights for optimization."""
    weights = tf.ones(len(float_times), dtype=dtype)
    one = tf.ones([], dtype=dtype)
    float_times_last = tf.stack([times[-1] for times in float_times])
    fixed_times_last = tf.stack([times[-1] for times in fixed_times])
    weights = tf.maximum(one / float_times_last, one / fixed_times_last)
    weights = tf.minimum(one, weights)
    return tf.unstack(weights, name='instrument_weights')
Beispiel #15
0
def effective_sample_size(x, **kwargs):
    """tfp.mcmc.effective_sample_size, with a maximum appropriate for HMC."""
    # Since ESS is an estimate, it can go wrong...  E.g. we can have negatively
    # correlated samples, which *do* have ESS > N, but this ESS is only applicable
    # for variance reduction power for estimation of the mean.  We want to
    # (blindly) use ESS everywhere (e.g. variance estimates)....and so...
    ess = tfp.mcmc.effective_sample_size(x, **kwargs)
    n = tf.cast(prefer_static.size0(x), x.dtype)
    return tf.minimum(ess, n)
        def grad_fn(dcovx):
            """Chunk-at-a-time backprop."""
            # Backward, we partition along the `x1`-defined axis.
            bwd_ax_size = tf.shape(x1)[-kernel.feature_ndims - 1]
            bwd_part_size = bwd_ax_size // num_matmul_parts

            dist_ctx = tf.distribute.get_replica_context()
            replica_id = dist_ctx.replica_id_in_sync_group
            num_replicas = dist_ctx.num_replicas_in_sync
            replica_num_parts = num_matmul_parts // num_replicas + tf.cast(
                num_matmul_parts % num_replicas > replica_id,
                num_matmul_parts.dtype)
            replica_begin = (
                (num_matmul_parts // num_replicas) * replica_id +
                tf.minimum(replica_id, num_matmul_parts % num_replicas))

            def bw_cond(i, *_):
                return i < replica_begin + replica_num_parts

            def bw_body(i, dx1, dx2, dx, dkernel_args):
                """tf.while_loop body for backprop."""
                dx1part, dx2part, dxpart, dkernel_argspart = _backward_matmul_one_part(
                    dcovx, kernel_fn, kernel_args, x1, x2, x, bwd_part_size, i)
                dx1, dx2, dx, dkernel_args = tf.nest.pack_sequence_as(
                    (dx1, dx2, dx, dkernel_args),
                    [
                        a + b for a, b in zip(  # pylint: disable=g-complex-comprehension
                            tf.nest.flatten((dx1, dx2, dx, dkernel_args)),
                            tf.nest.flatten((dx1part, dx2part, dxpart,
                                             dkernel_argspart)))
                    ])
                return i + 1, dx1, dx2, dx, dkernel_args

            _, dx1, dx2, dx, dkernel_args = tf.while_loop(
                bw_cond,
                bw_body, (replica_begin, ) +
                tf.nest.map_structure(tf.zeros_like, (x1, x2, x, kernel_args)),
                back_prop=False,
                parallel_iterations=1)
            dx1rem, dx2rem, dxrem, dkernel_argsrem = _backward_matmul_one_part(
                dcovx,
                kernel_fn,
                kernel_args,
                x1,
                x2,
                x,
                bwd_part_size,
                num_matmul_parts,
                remainder_part_size=bwd_ax_size -
                (num_matmul_parts * bwd_part_size))
            flat_xdevice = tf.nest.flatten((dx1, dx2, dx, dkernel_args))
            flat_remainder = tf.nest.flatten(
                (dx1rem, dx2rem, dxrem, dkernel_argsrem))
            return tuple(
                dist_ctx.all_reduce(tf.distribute.ReduceOp.SUM, a) + b
                for a, b in zip(flat_xdevice, flat_remainder))
 def while_loop_body(iteration, multipliers, inactive, old_inactive):
     """Performs one iteration of the projection."""
     del old_inactive  # Needed by the condition, but not the body.
     iteration += 1
     scale = tf.minimum(0.0, (radius - tf.reduce_sum(multipliers)) /
                        tf.maximum(1.0, tf.reduce_sum(inactive)))
     multipliers = multipliers + (scale * inactive)
     new_inactive = tf.cast(multipliers > 0, multipliers.dtype)
     multipliers = multipliers * new_inactive
     return (iteration, multipliers, new_inactive, inactive)
Beispiel #18
0
    def evaluate_binary_classification(self, predictions, weights):
        """Evaluates the hinge loss on the given predictions.

    Given a rank-1 `Tensor` of predictions with shape (n,), where n is the
    number of examples, and a rank-2 `Tensor` of weights with shape (m, 2),
    where m is broadcastable to n, this method will return a `Tensor` of shape
    (n,) where the ith element is:

    ```python
    hinge_loss[i] = constant_weights[i] +
      (weights[i, 0] - constant_weights[i]) * max{0, margin + predictions[i]} +
      (weights[i, 1] - constant_weights[i]) * max{0, margin - predictions[i]}
    ```

    where constant_weights[i] = min{weights[i, 0], weights[i, 1]} contains the
    minimum weights.

    You can think of weights[:, 0] as being the per-example costs associated
    with making a positive prediction, and weights[:, 1] as those for a negative
    prediction.

    Args:
      predictions: a `Tensor` of shape (n,), where n is the number of examples.
      weights: a `Tensor` of shape (m, 2), where m is broadcastable to n. This
        `Tensor` is *not* necessarily non-negative.

    Returns:
      A `Tensor` of shape (n,) and dtype=predictions.dtype, containing the
      hinge losses for each example.

    Raises:
      TypeError: if "predictions" is not a floating-point `Tensor`, or "weights"
        is not a `Tensor`.
      ValueError: if "predictions" is not rank-1, or "weights" is not a rank-2
        `Tensor` with exactly two columns.
    """
        predictions = _convert_to_binary_classification_predictions(
            predictions)
        columns = helpers.get_num_columns_of_2d_tensor(weights, name="weights")
        if columns != 2:
            raise ValueError("weights must have two columns")
        dtype = predictions.dtype.base_dtype
        zero = tf.zeros(1, dtype=dtype)

        positive_weights = tf.cast(weights[:, 0], dtype=dtype)
        negative_weights = tf.cast(weights[:, 1], dtype=dtype)
        constant_weights = tf.minimum(positive_weights, negative_weights)
        positive_weights -= constant_weights
        negative_weights -= constant_weights

        is_positive = tf.maximum(zero, self._margin + predictions)
        is_negative = tf.maximum(zero, self._margin - predictions)

        return constant_weights + (positive_weights * is_positive +
                                   negative_weights * is_negative)
Beispiel #19
0
def clip_boxes(boxes, image_shape):
    """Clips boxes to image boundaries.

  Args:
    boxes: a tensor whose last dimension is 4 representing the coordinates
      of boxes in ymin, xmin, ymax, xmax order.
    image_shape: a list of two integers, a two-element vector or a tensor such
      that all but the last dimensions are `broadcastable` to `boxes`. The last
      dimension is 2, which represents [height, width].

  Returns:
    clipped_boxes: a tensor whose shape is the same as `boxes` representing the
      clipped boxes.

  Raises:
    ValueError: If the last dimension of boxes is not 4.
  """
    if boxes.shape[-1] != 4:
        raise ValueError('boxes.shape[1] is {:d}, but must be 4.'.format(
            boxes.shape[1]))

    with tf.name_scope('crop_boxes'):
        if isinstance(image_shape, list) or isinstance(image_shape, tuple):
            height, width = image_shape
        else:
            image_shape = tf.cast(image_shape, dtype=boxes.dtype)
            height = image_shape[..., 0:1]
            width = image_shape[..., 1:2]

        ymin = boxes[..., 0:1]
        xmin = boxes[..., 1:2]
        ymax = boxes[..., 2:3]
        xmax = boxes[..., 3:4]

        clipped_ymin = tf.maximum(tf.minimum(ymin, height - 1.0), 0.0)
        clipped_ymax = tf.maximum(tf.minimum(ymax, height - 1.0), 0.0)
        clipped_xmin = tf.maximum(tf.minimum(xmin, width - 1.0), 0.0)
        clipped_xmax = tf.maximum(tf.minimum(xmax, width - 1.0), 0.0)

        clipped_boxes = tf.concat(
            [clipped_ymin, clipped_xmin, clipped_ymax, clipped_xmax], axis=-1)
        return clipped_boxes
Beispiel #20
0
    def _update_principal_component_ema(
        self,
        reduce_axes,
        state,
        step,
        principal_component_ema_points,
        ema_principal_component,
    ):
        # This is a batched version of Oja's algorithm. For the learning rate step,
        # we use Welford's algorithm where the number of points is clamped to a
        # function that grows slower than N.

        event_axes = tf.nest.map_structure(
            lambda x: ps.range(ps.size(reduce_axes), ps.rank(x)) - ps.rank(x),
            state)
        if self.experimental_shard_axis_names is None:
            shard_axis_names = tf.nest.map_structure(lambda _: None, state)
        else:
            shard_axis_names = self.experimental_shard_axis_names

        def _center_part(x):
            return x - distribute_lib.reduce_mean(
                x, reduce_axes, self.experimental_reduce_chain_axis_names)

        state_dot_p = _dot_product(tf.nest.map_structure(_center_part, state),
                                   ema_principal_component, event_axes,
                                   shard_axis_names)

        def _weighted_sum_part(x):
            return distribute_lib.reduce_sum(
                bu.left_justified_expand_dims_like(state_dot_p, x) * x,
                reduce_axes, self.experimental_reduce_chain_axis_names)

        new_principal_component = _normalize(
            tf.nest.map_structure(_weighted_sum_part, state), event_axes,
            shard_axis_names)

        def _ema_part(old_x, new_x):
            weight = 1. / (
                tf.cast(principal_component_ema_points, old_x.dtype) + 1.)
            return old_x + (new_x - old_x) * weight

        new_principal_component_ema_points = tf.minimum(
            principal_component_ema_points + 1,
            tf.maximum(1, step // self.principal_component_ema_factor))
        new_ema_principal_component = _normalize(
            tf.nest.map_structure(_ema_part, ema_principal_component,
                                  new_principal_component), event_axes,
            shard_axis_names)
        return tf.nest.map_structure(
            lambda x, y: tf.where(step < self.num_adaptation_steps, x, y),
            (new_principal_component_ema_points, new_ema_principal_component),
            (principal_component_ema_points, ema_principal_component),
        )
Beispiel #21
0
 def __call__(self, step):
     starting_iteration = self.steps_per_epoch * self.start_epoch
     starting_iteration = tf.cast(starting_iteration, self.dtype)
     global_step = tf.cast(step, self.dtype)
     recomp_iteration = global_step - starting_iteration + 1.
     decayed_coeff = self.coeff_scheduler(recomp_iteration)
     # This is an autograph-friendly alternative to checking Tensorflow booleans
     # in eager mode.
     scale = tf.minimum(
         tf.maximum(tf.cast(recomp_iteration, self.dtype), 0.), 1.)
     return scale * decayed_coeff
Beispiel #22
0
def _binomial_subtree_acceptance_batched(
    num_states_in_subtree, num_states, seed_stream):
  with tf1.name_scope("binomial_subtree_acceptance_batched"):
    batch_size = tf.shape(input=num_states_in_subtree)[0]
    return _random_bernoulli(
        [batch_size],
        probs=tf.minimum(
            tf.cast(num_states_in_subtree, dtype=tf.float32) /
            tf.cast(num_states, dtype=tf.float32), 1.),
        dtype=tf.bool,
        seed=seed_stream())
Beispiel #23
0
def pad_reflecting(x, padding_below, padding_above, axis):
    """Pads `x` with reflecting conditions above and/or below it along some axis.

  Pads `x` with reflecting conditions for `padding_below` entries below the
  tensor and `padding_above` entries above the tensor in the direction along
  `axis`. This is like using tf.pad(x, --, 'REFLECT'), except that this code
  allows for an unbounded number of reflections while tf.pad() only supports
  one reflection. Multiple reflections are necessary for for wavelet
  decompositions to guard against cases where the wavelet filters are larger
  than the input tensor along `axis`, which happens often at coarse scales.
  Note that "reflecting" boundary conditions are different from "symmetric"
  boundary conditions, in that it doesn't repeat the last element:
  reflect([A, B, C, D], 2) = [C, B, A, B, C, D, C, B]
  symmet.([A, B, C, D], 2) = [B, A, A, B, C, D, D, C]

  Args:
    x: The tensor to be padded with reflecting boundary conditions.
    padding_below: The number of elements being padded below the tensor.
    padding_above: The number of elements being padded above the tensor.
    axis: The axis in x in which padding will be performed.

  Returns:
    `x` padded according to `padding_below` and `padding_above` along `axis`
    with reflecting boundary conditions.
  """
    if not isinstance(padding_below, int):
        raise ValueError(
            'Expected `padding_below` of type int, but is of type {}'.format(
                type(padding_below)))
    if not isinstance(padding_above, int):
        raise ValueError(
            'Expected `padding_above` of type int, but is of type {}'.format(
                type(padding_above)))
    if not isinstance(axis, int):
        raise ValueError(
            'Expected `axis` of type int, but is of type {}'.format(
                type(axis)))
    if not (axis >= 0 and axis < len(x.shape)):
        raise ValueError('Expected `axis` in [0, {}], but is = {}'.format(
            len(x.shape) - 1, axis))

    if padding_below == 0 and padding_above == 0:
        return tf.convert_to_tensor(x)
    n = tf.shape(x)[axis]
    # `i' contains the indices of the output padded tensor in the frame of
    # reference of the input tensor.
    i = tf.range(-padding_below, n + padding_above, dtype=tf.int32)
    # `j` contains the indices of the input tensor corresponding to the output
    # padded tensor.
    i_mod = tf.math.mod(i, tf.maximum(1, 2 * (n - 1)))
    j = tf.minimum(2 * (n - 1) - i_mod, i_mod)
    return tf.gather(x, j, axis=axis)
Beispiel #24
0
    def _diag_part(self):
        x1, x2, axis = self._x1_x2_axis()
        ax_minsize = tf.minimum(tf.shape(x1)[axis], tf.shape(x2)[axis])

        def slice_of(xn):
            slice_size = tf.where(
                tf.equal(tf.range(tf.rank(xn)),
                         tf.rank(xn) + axis), ax_minsize, tf.shape(xn))
            return tf.slice(xn,
                            begin=tf.zeros_like(tf.shape(xn)),
                            size=slice_size)

        return self.kernel.apply(slice_of(x1), slice_of(x2))
Beispiel #25
0
    def get_all_episodes(
        self,
        truncate_episode_at: Optional[int] = None,
        limit: Optional[int] = None
    ) -> Tuple[EnvStep, Union[np.ndarray, tf.Tensor]]:
        if self._last_episode_id < 0:
            raise ValueError('No episodes in the dataset.')

        max_range = self._last_episode_id + 1
        if limit is not None:
            max_range = tf.minimum(max_range, tf.cast(limit, tf.int64))
        episode_ids = tf.range(max_range)
        return self._get_episodes(episode_ids, truncate_episode_at)
Beispiel #26
0
 def _cdf(self, x):
   x = tf.convert_to_tensor(x, name='x')
   flat_x = tf.reshape(x, shape=[-1])
   upper_bound = tf.searchsorted(self.outcomes, values=flat_x, side='right')
   values_at_ub = tf.gather(
       self.outcomes,
       indices=tf.minimum(upper_bound,
                          dist_util.prefer_static_shape(self.outcomes)[-1] -
                          1))
   should_use_upper_bound = self._is_equal_or_close(flat_x, values_at_ub)
   indices = tf.where(should_use_upper_bound, upper_bound, upper_bound - 1)
   return self._categorical.cdf(
       tf.reshape(indices, shape=dist_util.prefer_static_shape(x)))
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
    """Clip bounding boxes to a window.

  This op clips any input bounding boxes (represented by bounding box
  corners) to a window, optionally filtering out boxes that do not
  overlap at all with the window.

  Args:
    boxlist: BoxList holding M_in boxes
    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
      window to which the op should clip boxes.
    filter_nonoverlapping: whether to filter out boxes that do not overlap at
      all with the window.
    scope: name scope.

  Returns:
    a BoxList holding M_out boxes where M_out <= M_in
  """
    with tf.name_scope(scope, 'ClipToWindow'):
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
        y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
        y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
        x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
        x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
        clipped = box_list.BoxList(
            tf.concat(
                [y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
                1))
        clipped = _copy_extra_fields(clipped, boxlist)
        if filter_nonoverlapping:
            areas = area(clipped)
            nonzero_area_indices = tf.cast(
                tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
            clipped = gather(clipped, nonzero_area_indices)
        return clipped
Beispiel #28
0
def berp(global_step, start_step, end_step, start_val, end_val, alpha=5):
    """Beta interpolation."""
    beta_dist = tfd.Beta(alpha, alpha)
    mode = beta_dist.mode()
    interp = (tf.cast(global_step - start_step, tf.float32) /
              tf.cast(end_step - start_step, tf.float32))
    interp = tf.maximum(0.0, tf.minimum(1.0, interp))
    interp = tf.where(tf.math.is_nan(interp), tf.zeros_like(interp), interp)
    interp *= mode
    val = beta_dist.prob(interp)
    val /= beta_dist.prob(mode)
    val *= (end_val - start_val)
    val += start_val
    return val
Beispiel #29
0
def get_linear_warmup_rsqrt_decay_lr(init_lr, hidden_size, num_warmup_steps):
    """Calculate learning rate with linear warmup and rsqrt decay."""
    num_warmup_steps = tf.cast(num_warmup_steps, tf.float32)
    global_step = tf.compat.v1.train.get_or_create_global_step()
    global_step = tf.cast(global_step, tf.float32)

    learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)
    learning_rate *= tf.math.rsqrt(tf.cast(hidden_size, tf.float32))
    # Apply linear warmup
    learning_rate *= tf.minimum(1.0, global_step / num_warmup_steps)
    # Apply rsqrt decay
    learning_rate *= tf.math.rsqrt(tf.maximum(global_step, num_warmup_steps))

    return learning_rate
Beispiel #30
0
def get_max_num_levels(sz):
  """Returns the maximum number of levels that construct() can support.

  Args:
    sz: A tuple of ints representing some input size (batch, width, height).

  Returns:
    The maximum value for num_levels, when calling construct(im, num_levels),
    assuming `sz` is the shape of `im`.
  """
  min_sz = tf.minimum(sz[1], sz[2])
  log2 = lambda x: tf.math.log(tf.cast(x, tf.float32)) / tf.math.log(2.)
  max_num_levels = tf.cast(tf.math.ceil(log2(tf.maximum(1, min_sz))), tf.int32)
  return max_num_levels