def __call__(self, step): with tf.name_scope(self.name or "PolynomialDecay") as name: initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype end_learning_rate = tf.cast(self.end_learning_rate, dtype) power = tf.cast(self.power, dtype) global_step_recomp = tf.cast(step, dtype) decay_steps_recomp = tf.cast(self.decay_steps, dtype) if self.cycle: # Find the first multiple of decay_steps that is bigger than # global_step. If global_step is zero set the multiplier to 1 multiplier = tf.where( tf.equal(global_step_recomp, 0), 1.0, tf.math.ceil(global_step_recomp / self.decay_steps), ) decay_steps_recomp = tf.multiply(decay_steps_recomp, multiplier) else: # Make sure that the global_step used is not bigger than # decay_steps. global_step_recomp = tf.minimum(global_step_recomp, decay_steps_recomp) p = tf.divide(global_step_recomp, decay_steps_recomp) return tf.add( tf.multiply( initial_learning_rate - end_learning_rate, tf.pow(1 - p, power), ), end_learning_rate, name=name, )
def fit_critic(self, states, actions, next_states, rewards, masks, discount): """Updates critic parameters. Args: states: A batch of states. actions: A batch of actions. next_states: A batch of next states. rewards: A batch of rewards. masks: A batch of masks indicating the end of the episodes. discount: An MDP discount factor. Returns: Critic loss. """ _, next_actions, log_probs = self.actor(next_states) target_q1, target_q2 = self.critic_target(next_states, next_actions) target_v = tf.minimum(target_q1, target_q2) - self.alpha * log_probs target_q = rewards + discount * masks * target_v with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(self.critic.variables) q1, q2 = self.critic(states, actions) critic_loss = ( tf.losses.mean_squared_error(target_q, q1) + tf.losses.mean_squared_error(target_q, q2)) critic_loss = tf.reduce_mean(critic_loss) critic_grads = tape.gradient(critic_loss, self.critic.variables) self.critic_optimizer.apply_gradients( zip(critic_grads, self.critic.variables)) return critic_loss
def testExample(self): target_dist = tfd.JointDistributionSequential([ tfd.Normal(0., 1.5), tfd.Independent(tfd.Normal(tf.zeros([2, 5], dtype=tf.float32), 5.), reinterpreted_batch_ndims=2), ]) num_burnin_steps = 500 num_results = 500 num_chains = 64 kernel = tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=lambda *args: target_dist.log_prob(args), num_leapfrog_steps=2, step_size=target_dist.stddev()) kernel = tfp.mcmc.DualAveragingStepSizeAdaptation( inner_kernel=kernel, num_adaptation_steps=int(num_burnin_steps * 0.8), # Cast to int32. Not necessary for operation since we cast internally # to a float type. This is done to check that we are able to pass in # integer types (since they are the natural type for this). step_count_smoothing=tf.cast(10, tf.int32)) seed_stream = test_util.test_seed_stream() _, log_accept_ratio = tfp.mcmc.sample_chain( num_results=num_results, num_burnin_steps=num_burnin_steps, current_state=target_dist.sample(num_chains, seed=seed_stream()), kernel=kernel, trace_fn=lambda _, pkr: pkr.inner_results.log_accept_ratio, seed=seed_stream()) p_accept = tf.reduce_mean(tf.math.exp(tf.minimum(log_accept_ratio, 0.))) self.assertAllClose(0.75, self.evaluate(p_accept), atol=0.15)
def __call__(self, step): with tf.name_scope(self.name or "NoisyLinearCosineDecay") as name: initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype decay_steps = tf.cast(self.decay_steps, dtype) initial_variance = tf.cast(self.initial_variance, dtype) variance_decay = tf.cast(self.variance_decay, dtype) num_periods = tf.cast(self.num_periods, dtype) alpha = tf.cast(self.alpha, dtype) beta = tf.cast(self.beta, dtype) global_step_recomp = tf.cast(step, dtype) global_step_recomp = tf.minimum(global_step_recomp, decay_steps) linear_decayed = (decay_steps - global_step_recomp) / decay_steps variance = initial_variance / (tf.pow(1.0 + global_step_recomp, variance_decay)) std = tf.sqrt(variance) noisy_linear_decayed = (linear_decayed + self._random_generator.random_normal( linear_decayed.shape, stddev=std)) completed_fraction = global_step_recomp / decay_steps fraction = 2.0 * num_periods * completed_fraction cosine_decayed = 0.5 * ( 1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction)) noisy_linear_cosine_decayed = ( alpha + noisy_linear_decayed) * cosine_decayed + beta return tf.multiply(initial_learning_rate, noisy_linear_cosine_decayed, name=name)
def testChainLogProbChainTarget(self): init_step = tf.constant([0.1, 0.2]) kernel = FakeMHKernel(FakeSteppedKernel(step_size=init_step), log_accept_ratio=tf.stack( [tf.math.log(0.74), tf.math.log(0.76)])) kernel = tfp.mcmc.DualAveragingStepSizeAdaptation( kernel, num_adaptation_steps=1, log_accept_prob_getter_fn=( lambda pkr: tf.minimum(0., pkr.log_accept_ratio)), validate_args=True, target_accept_prob=tf.stack([0.7, 0.8])) kernel_results = kernel.bootstrap_results(tf.zeros(2)) for _ in range(2): _, kernel_results = kernel.one_step(tf.zeros(2), kernel_results) step_size = self.evaluate( kernel_results.inner_results.accepted_results.step_size, ) expected = tf.math.exp( tf.math.log(10. * init_step) - tf.constant([-0.04, 0.04]) / ((_INITIAL_T + 1.) * _EXPLORATION_SHRINKAGE)) self.assertAllClose(expected, step_size)
def log_sub_exp(x, y, return_sign=False, name=None): """Compute `log(exp(max(x, y)) - exp(min(x, y)))` in a numerically stable way. Use `return_sign=True` unless `x >= y`, since we can't represent a negative in log-space. Args: x: Float `Tensor` broadcastable with `y`. y: Float `Tensor` broadcastable with `x`. return_sign: Whether or not to return the second output value `sign`. If it is known that `x >= y`, this is unnecessary. name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., `'log_sub_exp'`). Returns: logsubexp: Float `Tensor` of `log(exp(max(x, y)) - exp(min(x, y)))`. sign: Float `Tensor` +/-1 indicating the sign of `exp(x) - exp(y)`. """ with tf.name_scope(name or 'log_sub_exp'): dtype = dtype_util.common_dtype([x, y], dtype_hint=tf.float32) x = tf.convert_to_tensor(x, dtype=dtype, name='x') y = tf.convert_to_tensor(y, dtype=dtype, name='y') larger = tf.maximum(x, y) smaller = tf.minimum(x, y) result = larger + log1mexp(tf.maximum(larger - smaller, 0)) if return_sign: ones = tf.ones([], result.dtype) return result, tf.where(x < y, -ones, ones) return result
def trace_fn(_, pkr): results = pkr.inner_results return { 'accept_prob': tf.exp(tf.minimum(0., results.log_accept_ratio)), 'step_size': results.accepted_results.step_size, }
def testExample(self): tf.random.set_seed(test_util.test_seed()) target_dist = tfd.JointDistributionSequential([ tfd.Normal(0., 1.5), tfd.Independent(tfd.Normal(tf.zeros([2, 5], dtype=tf.float32), 5.), reinterpreted_batch_ndims=2), ]) num_burnin_steps = 500 num_results = 500 num_chains = 64 kernel = tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=lambda *args: target_dist.log_prob(args), num_leapfrog_steps=2, step_size=target_dist.stddev(), seed=_set_seed(test_util.test_seed())) kernel = tfp.mcmc.DualAveragingStepSizeAdaptation( inner_kernel=kernel, num_adaptation_steps=int(num_burnin_steps * 0.8)) _, log_accept_ratio = tfp.mcmc.sample_chain( num_results=num_results, num_burnin_steps=num_burnin_steps, current_state=target_dist.sample(num_chains), kernel=kernel, trace_fn=lambda _, pkr: pkr.inner_results.log_accept_ratio) p_accept = tf.math.exp( tfp.math.reduce_logmeanexp(tf.minimum(log_accept_ratio, 0.))) self.assertAllClose(0.75, self.evaluate(p_accept), atol=0.15)
def testGumbelGumbelKL(self): a_loc = np.arange(-2.0, 3.0, 1.0) a_scale = np.arange(0.5, 2.5, 0.5) b_loc = 2 * np.arange(-2.0, 3.0, 1.0) b_scale = np.arange(0.5, 2.5, 0.5) # This reshape is intended to expand the number of test cases. a_loc = a_loc.reshape((len(a_loc), 1, 1, 1)) a_scale = a_scale.reshape((1, len(a_scale), 1, 1)) b_loc = b_loc.reshape((1, 1, len(b_loc), 1)) b_scale = b_scale.reshape((1, 1, 1, len(b_scale))) a = tfd.Gumbel(loc=a_loc, scale=a_scale, validate_args=True) b = tfd.Gumbel(loc=b_loc, scale=b_scale, validate_args=True) true_kl = ( np.log(b_scale) - np.log(a_scale) + np.euler_gamma * (a_scale / b_scale - 1.) + np.expm1((b_loc - a_loc) / b_scale + np.vectorize(np.math.lgamma)(a_scale / b_scale + 1.)) + (a_loc - b_loc) / b_scale) kl = tfd.kl_divergence(a, b) x = a.sample(int(1e5), seed=test_util.test_seed()) kl_sample = tf.reduce_mean(input_tensor=a.log_prob(x) - b.log_prob(x), axis=0) # As noted in the Gumbel-Gumbel KL divergence implementation, there is an # error in the reference paper we use to implement our divergence. This # error is a missing summand, (a.loc - b.loc) / b.scale. To ensure that we # are adequately testing this difference in the below tests, we compute the # relative error between kl_sample_ and kl_ and check that it is "much less" # than this missing summand. summand = (a_loc - b_loc) / b_scale relative_error = (tf.abs(kl - kl_sample) / tf.minimum(tf.abs(kl), tf.abs(kl_sample))) exists_missing_summand_test = tf.reduce_any( input_tensor=summand > 2 * relative_error) exists_missing_summand_test_ = self.evaluate( exists_missing_summand_test) self.assertTrue( exists_missing_summand_test_, msg=('No test case exists where (a.loc - b.loc) / b.scale ' 'is much less than the relative error between kl as ' 'computed in closed form, and kl as computed by ' 'sampling. Failing to include such a test case makes ' 'it difficult to detect regressions where this ' 'summand (which is missing in our reference paper) ' 'is omitted.')) kl_, kl_sample_ = self.evaluate([kl, kl_sample]) self.assertAllClose(true_kl, kl_, atol=0.0, rtol=1e-12) self.assertAllClose(true_kl, kl_sample_, atol=0.0, rtol=1e-1) zero_kl = tfd.kl_divergence(a, a) true_zero_kl_, zero_kl_ = self.evaluate( [tf.zeros_like(zero_kl), zero_kl]) self.assertAllEqual(true_zero_kl_, zero_kl_)
def mutate_onestep(i, state, pkr, log_accept_prob_sum): next_state, next_kernel_results = kernel.one_step( state, pkr) kernel_log_accept_ratio, _ = gather_mh_like_result(pkr) log_accept_prob = tf.minimum(kernel_log_accept_ratio, 0.) log_accept_prob_sum = log_add_exp(log_accept_prob_sum, log_accept_prob) return i + 1, next_state, next_kernel_results, log_accept_prob_sum
def critic_mix(self, s, a): if self.use_dqn: target_q1, target_q2 = self.critic_target(s, a) target_q = tf.minimum(target_q1, target_q2) q1, q2 = self.critic(s, a) return q1 * 0.05 + target_q * 0.95, q2 * 0.05 + target_q * 0.95, else: return self.critic(s, a) * 0.05 + self.critic_target(s, a) * 0.95
def _sample_n(self, n, seed=None): # TODO(b/151571025): revert to `super()._sample_n` once the InverseGamma # sampler is XLA-able. xs = 1. / gamma.Gamma(concentration=self.concentration, rate=self.scale).sample(n, seed=seed) if self._upper_bound is not None: xs = tf.minimum(xs, self._upper_bound) return xs
def __call__(self, logits, scaled_labels, classes, category_loss=True, mse_loss=False): """Compute instance segmentation loss. Args: logits: A Tensor of shape [batch_size * num_points, height, width, num_classes]. The logits are not necessarily between 0 and 1. scaled_labels: A float16 Tensor of shape [batch_size, num_instances, mask_size, mask_size], where mask_size = mask_crop_size * gt_upsample_scale for fine mask, or mask_crop_size for coarse masks and shape priors. classes: A int tensor of shape [batch_size, num_instances]. category_loss: use class specific mask prediction or not. mse_loss: use mean square error for mask loss or not Returns: mask_loss: an float tensor representing total mask classification loss. iou: a float tensor representing the IoU between target and prediction. """ classes = tf.reshape(classes, [-1]) _, _, height, width = scaled_labels.get_shape().as_list() scaled_labels = tf.reshape(scaled_labels, [-1, height, width]) if not category_loss: logits = logits[:, :, :, 0] else: logits = tf.transpose(a=logits, perm=(0, 3, 1, 2)) gather_idx = tf.stack( [tf.range(tf.size(input=classes)), classes - 1], axis=1) logits = tf.gather_nd(logits, gather_idx) # Ignore loss on empty mask targets. valid_labels = tf.reduce_any(input_tensor=tf.greater(scaled_labels, 0), axis=[1, 2]) if mse_loss: # Logits are probabilities in the case of shape prior prediction. logits *= tf.reshape(tf.cast(valid_labels, logits.dtype), [-1, 1, 1]) weighted_loss = tf.nn.l2_loss(scaled_labels - logits) probs = logits else: weighted_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=scaled_labels, logits=logits) probs = tf.sigmoid(logits) weighted_loss *= tf.reshape( tf.cast(valid_labels, weighted_loss.dtype), [-1, 1, 1]) iou = tf.reduce_sum( input_tensor=tf.minimum(scaled_labels, probs)) / tf.reduce_sum( input_tensor=tf.maximum(scaled_labels, probs)) mask_loss = tf.reduce_sum(input_tensor=weighted_loss) / tf.reduce_sum( input_tensor=scaled_labels) return tf.cast(mask_loss, tf.float32), tf.cast(iou, tf.float32)
def _initialize_instrument_weights(float_times, fixed_times, dtype): """Function to compute default initial weights for optimization.""" weights = tf.ones(len(float_times), dtype=dtype) one = tf.ones([], dtype=dtype) float_times_last = tf.stack([times[-1] for times in float_times]) fixed_times_last = tf.stack([times[-1] for times in fixed_times]) weights = tf.maximum(one / float_times_last, one / fixed_times_last) weights = tf.minimum(one, weights) return tf.unstack(weights, name='instrument_weights')
def effective_sample_size(x, **kwargs): """tfp.mcmc.effective_sample_size, with a maximum appropriate for HMC.""" # Since ESS is an estimate, it can go wrong... E.g. we can have negatively # correlated samples, which *do* have ESS > N, but this ESS is only applicable # for variance reduction power for estimation of the mean. We want to # (blindly) use ESS everywhere (e.g. variance estimates)....and so... ess = tfp.mcmc.effective_sample_size(x, **kwargs) n = tf.cast(prefer_static.size0(x), x.dtype) return tf.minimum(ess, n)
def grad_fn(dcovx): """Chunk-at-a-time backprop.""" # Backward, we partition along the `x1`-defined axis. bwd_ax_size = tf.shape(x1)[-kernel.feature_ndims - 1] bwd_part_size = bwd_ax_size // num_matmul_parts dist_ctx = tf.distribute.get_replica_context() replica_id = dist_ctx.replica_id_in_sync_group num_replicas = dist_ctx.num_replicas_in_sync replica_num_parts = num_matmul_parts // num_replicas + tf.cast( num_matmul_parts % num_replicas > replica_id, num_matmul_parts.dtype) replica_begin = ( (num_matmul_parts // num_replicas) * replica_id + tf.minimum(replica_id, num_matmul_parts % num_replicas)) def bw_cond(i, *_): return i < replica_begin + replica_num_parts def bw_body(i, dx1, dx2, dx, dkernel_args): """tf.while_loop body for backprop.""" dx1part, dx2part, dxpart, dkernel_argspart = _backward_matmul_one_part( dcovx, kernel_fn, kernel_args, x1, x2, x, bwd_part_size, i) dx1, dx2, dx, dkernel_args = tf.nest.pack_sequence_as( (dx1, dx2, dx, dkernel_args), [ a + b for a, b in zip( # pylint: disable=g-complex-comprehension tf.nest.flatten((dx1, dx2, dx, dkernel_args)), tf.nest.flatten((dx1part, dx2part, dxpart, dkernel_argspart))) ]) return i + 1, dx1, dx2, dx, dkernel_args _, dx1, dx2, dx, dkernel_args = tf.while_loop( bw_cond, bw_body, (replica_begin, ) + tf.nest.map_structure(tf.zeros_like, (x1, x2, x, kernel_args)), back_prop=False, parallel_iterations=1) dx1rem, dx2rem, dxrem, dkernel_argsrem = _backward_matmul_one_part( dcovx, kernel_fn, kernel_args, x1, x2, x, bwd_part_size, num_matmul_parts, remainder_part_size=bwd_ax_size - (num_matmul_parts * bwd_part_size)) flat_xdevice = tf.nest.flatten((dx1, dx2, dx, dkernel_args)) flat_remainder = tf.nest.flatten( (dx1rem, dx2rem, dxrem, dkernel_argsrem)) return tuple( dist_ctx.all_reduce(tf.distribute.ReduceOp.SUM, a) + b for a, b in zip(flat_xdevice, flat_remainder))
def while_loop_body(iteration, multipliers, inactive, old_inactive): """Performs one iteration of the projection.""" del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = tf.minimum(0.0, (radius - tf.reduce_sum(multipliers)) / tf.maximum(1.0, tf.reduce_sum(inactive))) multipliers = multipliers + (scale * inactive) new_inactive = tf.cast(multipliers > 0, multipliers.dtype) multipliers = multipliers * new_inactive return (iteration, multipliers, new_inactive, inactive)
def evaluate_binary_classification(self, predictions, weights): """Evaluates the hinge loss on the given predictions. Given a rank-1 `Tensor` of predictions with shape (n,), where n is the number of examples, and a rank-2 `Tensor` of weights with shape (m, 2), where m is broadcastable to n, this method will return a `Tensor` of shape (n,) where the ith element is: ```python hinge_loss[i] = constant_weights[i] + (weights[i, 0] - constant_weights[i]) * max{0, margin + predictions[i]} + (weights[i, 1] - constant_weights[i]) * max{0, margin - predictions[i]} ``` where constant_weights[i] = min{weights[i, 0], weights[i, 1]} contains the minimum weights. You can think of weights[:, 0] as being the per-example costs associated with making a positive prediction, and weights[:, 1] as those for a negative prediction. Args: predictions: a `Tensor` of shape (n,), where n is the number of examples. weights: a `Tensor` of shape (m, 2), where m is broadcastable to n. This `Tensor` is *not* necessarily non-negative. Returns: A `Tensor` of shape (n,) and dtype=predictions.dtype, containing the hinge losses for each example. Raises: TypeError: if "predictions" is not a floating-point `Tensor`, or "weights" is not a `Tensor`. ValueError: if "predictions" is not rank-1, or "weights" is not a rank-2 `Tensor` with exactly two columns. """ predictions = _convert_to_binary_classification_predictions( predictions) columns = helpers.get_num_columns_of_2d_tensor(weights, name="weights") if columns != 2: raise ValueError("weights must have two columns") dtype = predictions.dtype.base_dtype zero = tf.zeros(1, dtype=dtype) positive_weights = tf.cast(weights[:, 0], dtype=dtype) negative_weights = tf.cast(weights[:, 1], dtype=dtype) constant_weights = tf.minimum(positive_weights, negative_weights) positive_weights -= constant_weights negative_weights -= constant_weights is_positive = tf.maximum(zero, self._margin + predictions) is_negative = tf.maximum(zero, self._margin - predictions) return constant_weights + (positive_weights * is_positive + negative_weights * is_negative)
def clip_boxes(boxes, image_shape): """Clips boxes to image boundaries. Args: boxes: a tensor whose last dimension is 4 representing the coordinates of boxes in ymin, xmin, ymax, xmax order. image_shape: a list of two integers, a two-element vector or a tensor such that all but the last dimensions are `broadcastable` to `boxes`. The last dimension is 2, which represents [height, width]. Returns: clipped_boxes: a tensor whose shape is the same as `boxes` representing the clipped boxes. Raises: ValueError: If the last dimension of boxes is not 4. """ if boxes.shape[-1] != 4: raise ValueError('boxes.shape[1] is {:d}, but must be 4.'.format( boxes.shape[1])) with tf.name_scope('crop_boxes'): if isinstance(image_shape, list) or isinstance(image_shape, tuple): height, width = image_shape else: image_shape = tf.cast(image_shape, dtype=boxes.dtype) height = image_shape[..., 0:1] width = image_shape[..., 1:2] ymin = boxes[..., 0:1] xmin = boxes[..., 1:2] ymax = boxes[..., 2:3] xmax = boxes[..., 3:4] clipped_ymin = tf.maximum(tf.minimum(ymin, height - 1.0), 0.0) clipped_ymax = tf.maximum(tf.minimum(ymax, height - 1.0), 0.0) clipped_xmin = tf.maximum(tf.minimum(xmin, width - 1.0), 0.0) clipped_xmax = tf.maximum(tf.minimum(xmax, width - 1.0), 0.0) clipped_boxes = tf.concat( [clipped_ymin, clipped_xmin, clipped_ymax, clipped_xmax], axis=-1) return clipped_boxes
def _update_principal_component_ema( self, reduce_axes, state, step, principal_component_ema_points, ema_principal_component, ): # This is a batched version of Oja's algorithm. For the learning rate step, # we use Welford's algorithm where the number of points is clamped to a # function that grows slower than N. event_axes = tf.nest.map_structure( lambda x: ps.range(ps.size(reduce_axes), ps.rank(x)) - ps.rank(x), state) if self.experimental_shard_axis_names is None: shard_axis_names = tf.nest.map_structure(lambda _: None, state) else: shard_axis_names = self.experimental_shard_axis_names def _center_part(x): return x - distribute_lib.reduce_mean( x, reduce_axes, self.experimental_reduce_chain_axis_names) state_dot_p = _dot_product(tf.nest.map_structure(_center_part, state), ema_principal_component, event_axes, shard_axis_names) def _weighted_sum_part(x): return distribute_lib.reduce_sum( bu.left_justified_expand_dims_like(state_dot_p, x) * x, reduce_axes, self.experimental_reduce_chain_axis_names) new_principal_component = _normalize( tf.nest.map_structure(_weighted_sum_part, state), event_axes, shard_axis_names) def _ema_part(old_x, new_x): weight = 1. / ( tf.cast(principal_component_ema_points, old_x.dtype) + 1.) return old_x + (new_x - old_x) * weight new_principal_component_ema_points = tf.minimum( principal_component_ema_points + 1, tf.maximum(1, step // self.principal_component_ema_factor)) new_ema_principal_component = _normalize( tf.nest.map_structure(_ema_part, ema_principal_component, new_principal_component), event_axes, shard_axis_names) return tf.nest.map_structure( lambda x, y: tf.where(step < self.num_adaptation_steps, x, y), (new_principal_component_ema_points, new_ema_principal_component), (principal_component_ema_points, ema_principal_component), )
def __call__(self, step): starting_iteration = self.steps_per_epoch * self.start_epoch starting_iteration = tf.cast(starting_iteration, self.dtype) global_step = tf.cast(step, self.dtype) recomp_iteration = global_step - starting_iteration + 1. decayed_coeff = self.coeff_scheduler(recomp_iteration) # This is an autograph-friendly alternative to checking Tensorflow booleans # in eager mode. scale = tf.minimum( tf.maximum(tf.cast(recomp_iteration, self.dtype), 0.), 1.) return scale * decayed_coeff
def _binomial_subtree_acceptance_batched( num_states_in_subtree, num_states, seed_stream): with tf1.name_scope("binomial_subtree_acceptance_batched"): batch_size = tf.shape(input=num_states_in_subtree)[0] return _random_bernoulli( [batch_size], probs=tf.minimum( tf.cast(num_states_in_subtree, dtype=tf.float32) / tf.cast(num_states, dtype=tf.float32), 1.), dtype=tf.bool, seed=seed_stream())
def pad_reflecting(x, padding_below, padding_above, axis): """Pads `x` with reflecting conditions above and/or below it along some axis. Pads `x` with reflecting conditions for `padding_below` entries below the tensor and `padding_above` entries above the tensor in the direction along `axis`. This is like using tf.pad(x, --, 'REFLECT'), except that this code allows for an unbounded number of reflections while tf.pad() only supports one reflection. Multiple reflections are necessary for for wavelet decompositions to guard against cases where the wavelet filters are larger than the input tensor along `axis`, which happens often at coarse scales. Note that "reflecting" boundary conditions are different from "symmetric" boundary conditions, in that it doesn't repeat the last element: reflect([A, B, C, D], 2) = [C, B, A, B, C, D, C, B] symmet.([A, B, C, D], 2) = [B, A, A, B, C, D, D, C] Args: x: The tensor to be padded with reflecting boundary conditions. padding_below: The number of elements being padded below the tensor. padding_above: The number of elements being padded above the tensor. axis: The axis in x in which padding will be performed. Returns: `x` padded according to `padding_below` and `padding_above` along `axis` with reflecting boundary conditions. """ if not isinstance(padding_below, int): raise ValueError( 'Expected `padding_below` of type int, but is of type {}'.format( type(padding_below))) if not isinstance(padding_above, int): raise ValueError( 'Expected `padding_above` of type int, but is of type {}'.format( type(padding_above))) if not isinstance(axis, int): raise ValueError( 'Expected `axis` of type int, but is of type {}'.format( type(axis))) if not (axis >= 0 and axis < len(x.shape)): raise ValueError('Expected `axis` in [0, {}], but is = {}'.format( len(x.shape) - 1, axis)) if padding_below == 0 and padding_above == 0: return tf.convert_to_tensor(x) n = tf.shape(x)[axis] # `i' contains the indices of the output padded tensor in the frame of # reference of the input tensor. i = tf.range(-padding_below, n + padding_above, dtype=tf.int32) # `j` contains the indices of the input tensor corresponding to the output # padded tensor. i_mod = tf.math.mod(i, tf.maximum(1, 2 * (n - 1))) j = tf.minimum(2 * (n - 1) - i_mod, i_mod) return tf.gather(x, j, axis=axis)
def _diag_part(self): x1, x2, axis = self._x1_x2_axis() ax_minsize = tf.minimum(tf.shape(x1)[axis], tf.shape(x2)[axis]) def slice_of(xn): slice_size = tf.where( tf.equal(tf.range(tf.rank(xn)), tf.rank(xn) + axis), ax_minsize, tf.shape(xn)) return tf.slice(xn, begin=tf.zeros_like(tf.shape(xn)), size=slice_size) return self.kernel.apply(slice_of(x1), slice_of(x2))
def get_all_episodes( self, truncate_episode_at: Optional[int] = None, limit: Optional[int] = None ) -> Tuple[EnvStep, Union[np.ndarray, tf.Tensor]]: if self._last_episode_id < 0: raise ValueError('No episodes in the dataset.') max_range = self._last_episode_id + 1 if limit is not None: max_range = tf.minimum(max_range, tf.cast(limit, tf.int64)) episode_ids = tf.range(max_range) return self._get_episodes(episode_ids, truncate_episode_at)
def _cdf(self, x): x = tf.convert_to_tensor(x, name='x') flat_x = tf.reshape(x, shape=[-1]) upper_bound = tf.searchsorted(self.outcomes, values=flat_x, side='right') values_at_ub = tf.gather( self.outcomes, indices=tf.minimum(upper_bound, dist_util.prefer_static_shape(self.outcomes)[-1] - 1)) should_use_upper_bound = self._is_equal_or_close(flat_x, values_at_ub) indices = tf.where(should_use_upper_bound, upper_bound, upper_bound - 1) return self._categorical.cdf( tf.reshape(indices, shape=dist_util.prefer_static_shape(x)))
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None): """Clip bounding boxes to a window. This op clips any input bounding boxes (represented by bounding box corners) to a window, optionally filtering out boxes that do not overlap at all with the window. Args: boxlist: BoxList holding M_in boxes window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] window to which the op should clip boxes. filter_nonoverlapping: whether to filter out boxes that do not overlap at all with the window. scope: name scope. Returns: a BoxList holding M_out boxes where M_out <= M_in """ with tf.name_scope(scope, 'ClipToWindow'): y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min) y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min) x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min) x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min) clipped = box_list.BoxList( tf.concat( [y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped], 1)) clipped = _copy_extra_fields(clipped, boxlist) if filter_nonoverlapping: areas = area(clipped) nonzero_area_indices = tf.cast( tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32) clipped = gather(clipped, nonzero_area_indices) return clipped
def berp(global_step, start_step, end_step, start_val, end_val, alpha=5): """Beta interpolation.""" beta_dist = tfd.Beta(alpha, alpha) mode = beta_dist.mode() interp = (tf.cast(global_step - start_step, tf.float32) / tf.cast(end_step - start_step, tf.float32)) interp = tf.maximum(0.0, tf.minimum(1.0, interp)) interp = tf.where(tf.math.is_nan(interp), tf.zeros_like(interp), interp) interp *= mode val = beta_dist.prob(interp) val /= beta_dist.prob(mode) val *= (end_val - start_val) val += start_val return val
def get_linear_warmup_rsqrt_decay_lr(init_lr, hidden_size, num_warmup_steps): """Calculate learning rate with linear warmup and rsqrt decay.""" num_warmup_steps = tf.cast(num_warmup_steps, tf.float32) global_step = tf.compat.v1.train.get_or_create_global_step() global_step = tf.cast(global_step, tf.float32) learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32) learning_rate *= tf.math.rsqrt(tf.cast(hidden_size, tf.float32)) # Apply linear warmup learning_rate *= tf.minimum(1.0, global_step / num_warmup_steps) # Apply rsqrt decay learning_rate *= tf.math.rsqrt(tf.maximum(global_step, num_warmup_steps)) return learning_rate
def get_max_num_levels(sz): """Returns the maximum number of levels that construct() can support. Args: sz: A tuple of ints representing some input size (batch, width, height). Returns: The maximum value for num_levels, when calling construct(im, num_levels), assuming `sz` is the shape of `im`. """ min_sz = tf.minimum(sz[1], sz[2]) log2 = lambda x: tf.math.log(tf.cast(x, tf.float32)) / tf.math.log(2.) max_num_levels = tf.cast(tf.math.ceil(log2(tf.maximum(1, min_sz))), tf.int32) return max_num_levels