def test_random_dates(self): start_dates = dateslib.dates_from_tuples([(2020, 5, 16), (2020, 6, 13)]) end_dates = dateslib.dates_from_tuples([(2021, 5, 21)]) size = 3 # Generate 3 dates for each pair of (start, end date). sample = dateslib.random_dates( start_date=start_dates, end_date=end_dates, size=size, seed=42) self.assertEqual(sample.shape, (3, 2)) self.assertTrue(self.evaluate(tf.reduce_all(sample < end_dates))) self.assertTrue(self.evaluate(tf.reduce_all(sample >= start_dates)))
def rank_equal_case(): tf.debugging.Assert( tf.reduce_all(tf.shape(a) == tf.shape(weights)), [tf.shape(a), tf.shape(weights)]) weights_sum = tf.reduce_sum(weights, axis=axis) avg = tf.reduce_sum(a * weights, axis=axis) / weights_sum return avg, weights_sum
def _inv_hessian_control_inputs(inv_hessian): """Computes control inputs to validate a provided inverse Hessian. These ensure that the provided inverse Hessian is positive definite and symmetric. Args: inv_hessian: The starting estimate for the inverse of the Hessian at the initial point. Returns: A list of tf.Assert ops suitable for use with tf.control_dependencies. """ # The easiest way to validate if the inverse Hessian is positive definite is # to compute its Cholesky decomposition. is_positive_definite = tf.reduce_all( tf.math.is_finite(tf.linalg.cholesky(inv_hessian)), axis=[-1, -2]) # Then check that the supplied inverse Hessian is symmetric. is_symmetric = tf.equal(bfgs_utils.norm( inv_hessian - _batch_transpose(inv_hessian), dims=2), 0) # Simply adding a control dependencies on these results is not enough to # trigger them, we need to add asserts on the results. return [ tf.debugging.Assert( is_positive_definite, ['Initial inverse Hessian is not positive definite.', inv_hessian]), tf.debugging.Assert( is_symmetric, ['Initial inverse Hessian is not symmetric', inv_hessian])]
def partition_spline_curve(alpha): """Applies a curve to alpha >= 0 to compress its range before interpolation. This is a weird hand-crafted function designed to take in alpha values and curve them to occupy a short finite range that works well when using spline interpolation to model the partition function Z(alpha). Because Z(alpha) is only varied in [0, 4] and is especially interesting around alpha=2, this curve is roughly linear in [0, 4] with a slope of ~1 at alpha=0 and alpha=4 but a slope of ~10 at alpha=2. When alpha > 4 the curve becomes logarithmic. Some (input, output) pairs for this function are: [(0, 0), (1, ~1.2), (2, 4), (3, ~6.8), (4, 8), (8, ~8.8), (400000, ~12)] This function is continuously differentiable. Args: alpha: A numpy array or TF tensor (float32 or float64) with values >= 0. Returns: An array/tensor of curved values >= 0 with the same type as `alpha`, to be used as input x-coordinates for spline interpolation. """ c = lambda z: tf.cast(z, alpha.dtype) assert_ops = [tf.Assert(tf.reduce_all(alpha >= 0.), [alpha])] with tf.control_dependencies(assert_ops): x = tf.where(alpha < 4, (c(2.25) * alpha - c(4.5)) / (tf.abs(alpha - c(2)) + c(0.25)) + alpha + c(2), c(5) / c(18) * util.log_safe(c(4) * alpha - c(15)) + c(8)) return x
def log_base_partition_function(self, alpha): r"""Approximate the distribution's log-partition function with a 1D spline. Because the partition function (Z(\alpha) in the paper) of the distribution is difficult to model analytically, we approximate it with a (transformed) cubic hermite spline: Each alpha is pushed through a nonlinearity before being used to interpolate into a spline, which allows us to use a relatively small spline to accurately model the log partition function over the range of all non-negative input values. Args: alpha: A tensor or scalar of single or double precision floats containing the set of alphas for which we would like an approximate log partition function. Must be non-negative, as the partition function is undefined when alpha < 0. Returns: An approximation of log(Z(alpha)) accurate to within 1e-6 """ float_dtype = alpha.dtype # The partition function is undefined when `alpha`< 0. assert_ops = [tf.Assert(tf.reduce_all(alpha >= 0.), [alpha])] with tf.control_dependencies(assert_ops): # Transform `alpha` to the form expected by the spline. x = partition_spline_curve(alpha) # Interpolate into the spline. return cubic_spline.interpolate1d( x * tf.cast(self._spline_x_scale, float_dtype), tf.cast(self._spline_values, float_dtype), tf.cast(self._spline_tangents, float_dtype))
def has_not_u_turn_at_odd_step(instruction, iter_, direction, momentum_state_memory, momentum_right, state_right): """Check u turn for early stopping.""" # Note that here iter_ is actually iter_ // 2 left_current_index = instruction[iter_] if USE_TENSORARRAY: momentum_left = [ x.gather(left_current_index) for x in momentum_state_memory.momentum_swap ] state_left = [ x.gather(left_current_index) for x in momentum_state_memory.state_swap ] else: momentum_left = [ tf.gather(x, left_current_index, axis=0) for x in momentum_state_memory.momentum_swap ] state_left = [ tf.gather(x, left_current_index, axis=0) for x in momentum_state_memory.state_swap ] no_u_turns_within_tree_ = has_not_u_turn( state_left, [tf.where(d, m, -m) for d, m in zip(direction, momentum_left)], state_right, [tf.where(d, m, -m) for d, m in zip(direction, momentum_right)]) no_u_turns_within_tree = tf.reduce_all(no_u_turns_within_tree_, axis=0) return no_u_turns_within_tree
def quadratic_with_spike(x): quadratic = tf.reduce_sum( scales * tf.math.squared_difference(x, minimum), axis=-1) square_hole = tf.reduce_all(tf.logical_and((x > 0.7), (x < 1.3)), axis=-1) infty = tf.constant(float('+inf'), dtype=quadratic.dtype) answer = tf.where(square_hole, infty, quadratic) return answer
def _prob(self, x): flat_values = tf.nest.flatten(self.values_with_sample_dim) return tf.cast(tf.reduce_all([ tf.equal(a, b[:prefer_static.size0(a)]) for (a, b) in zip(tf.nest.flatten(x), flat_values) ]), dtype=flat_values[0].dtype)
def _update_position(state, position_delta, next_objective, next_gradient, grad_tolerance, f_relative_tolerance, x_tolerance): """Updates the state advancing its position by a given position_delta.""" failed = state.failed | ~tf.math.is_finite(next_objective) | ~tf.reduce_all( tf.math.is_finite(next_gradient), axis=-1) next_position = state.position + position_delta converged = ~failed & _check_convergence(state.position, next_position, state.objective_value, next_objective, next_gradient, grad_tolerance, f_relative_tolerance, x_tolerance) return update_fields( state, converged=state.converged | converged, failed=failed, position=next_position, objective_value=next_objective, objective_gradient=next_gradient)
def _assertions(self, t, power=None): if not self.validate_args: return [] power = power if power is not None else tf.convert_to_tensor(self.power) return [tf.debugging.Assert( tf.reduce_all((t >= 0.) | _is_odd_integer(power)), ['Elements must be non-negative, except for odd-integer powers.'])]
def is_sequence_right_padded(mask): """Check the mask tensor and see if it right padded. For cuDNN kernel, it uses the sequence length param to skip the tailing timestep. If the data is left padded, or not a strict right padding (has masked value in the middle of the sequence), then cuDNN kernel won't be work properly in those cases. Left padded data: [[False, False, True, True, True]]. Right padded data: [[True, True, True, False, False]]. Mixture of mask/unmasked data: [[True, False, True, False, False]]. Note that for the mixed data example above, the actually data RNN should see are those 2 Trues (index 0 and 2), the index 1 False should be ignored and not pollute the internal states. Args: mask: the Boolean tensor with shape [batch, timestep] Returns: boolean scalar tensor, whether the mask is strictly right padded. """ max_seq_length = tf.shape(mask)[1] count_of_true = tf.reduce_sum(tf.cast(mask, tf.int32), axis=1) right_padded_mask = tf.sequence_mask(count_of_true, maxlen=max_seq_length) return tf.reduce_all(tf.equal(mask, right_padded_mask))
def reduce_all(input_tensor, axis=None, keepdims=False): """A version of tf.reduce_all that eagerly evaluates if possible.""" v = get_static_value(input_tensor) if v is None: return tf.reduce_all(input_tensor, axis=axis, keepdims=keepdims) else: return v.all(axis=axis, keepdims=keepdims)
def testUniformSamplePdf(self): a = 10.0 b = [11.0, 100.0] uniform = tfd.Uniform(a, b) self.assertTrue( self.evaluate( tf.reduce_all(input_tensor=uniform.prob(uniform.sample(10)) > 0)))
def testLogProbAccuracy(self, dist_name, data): self.skip_if_tf1() dist = tfe.as_composite( data.draw( dhps.distributions( dist_name=dist_name, # Accuracy tools can't handle batches (yet?) batch_shape=(), # Variables presumably do not affect the numerics enable_vars=False, # Checking that samples pass validations (including in 64-bit # arithmetic) is left for another test validate_args=False))) seed = test_util.test_seed(sampler_type='stateless') with tfp_hps.no_tf_rank_errors(): sample = dist.sample(seed=seed) if sample.dtype.is_floating: hp.assume(self.evaluate(tf.reduce_all(~tf.math.is_nan(sample)))) hp.note('Testing on sample {}'.format(sample)) as_tensors = tf.nest.flatten(dist, expand_composites=True) def log_prob_function(tensors, x): dist_ = tf.nest.pack_sequence_as(dist, tensors, expand_composites=True) return dist_.log_prob(x) with tfp_hps.finite_ground_truth_only(): badness = nt.excess_wrong_bits(log_prob_function, as_tensors, sample) # TODO(axch): Lower the acceptable badness to 4, which corresponds # to slightly better accuracy than 1e-6 relative error for # well-conditioned functions. self.assertAllLess(badness, 20)
def __call__(self, cache, new_items): datawise_matches = [] for key in self.keys: cache_vals = cache.data[key] new_items_vals = new_items[key] if cache_vals.dtype.is_floating: raise NotImplementedError('Floating datatypes are not yet implemented.') cache_vals = tf.expand_dims(cache_vals, axis=0) new_items_vals = tf.expand_dims(new_items_vals, axis=1) elementwise = cache_vals == new_items_vals datawise = tf.reduce_all(elementwise, axis=range(2, tf.rank(elementwise))) datawise_matches.append(datawise) all_keys_datawise = tf.stack(datawise_matches, axis=2) all_keys_match = tf.reduce_all(all_keys_datawise, axis=2) in_cache = tf.reduce_any(all_keys_match, axis=1) return tf.logical_not(in_cache)
def _newton_iteration(unused_should_stop, w, z, tol): """Newton iteration on root finding of w for the equation w * exp(w) = z.""" w = tf.convert_to_tensor(w) z = tf.convert_to_tensor(z) delta = (w - z * tf.exp(-w)) / (1. + w) converged = tf.abs(delta) <= tol should_stop_next = tf.reduce_all(converged) return should_stop_next, w - delta, z, tol
def has_fully_masked_sequence(mask): # See https://github.com/tensorflow/tensorflow/issues/33148 for more details. # Cudnn kernel will error out if the input sequence contains any fully masked # data. We walk around this issue by rerouting the computation to standard # kernel, until the issue on cudnn side has been fixed. # For a fully masked sequence, it will contain all Falses. To make it easy to # check, we inverse the boolean, check if any of the sequence has all True. return tf.reduce_any(tf.reduce_all(tf.logical_not(mask), axis=1))
def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx): batch_size = tf.shape(boxes)[0] new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0], [batch_size, NMS_TILE_SIZE, 4]) iou = box_utils.bbox_overlap(new_slice, box_slice) ret_slice = tf.expand_dims( tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype), 2) * box_slice return boxes, ret_slice, iou_threshold, inner_idx + 1
def nllfun(self, x, alpha, scale): r"""Implements the negative log-likelihood (NLL). Specifically, we implement -log(p(x | 0, \alpha, c) of Equation 16 in the paper as nllfun(x, alpha, shape). Args: x: The residual for which the NLL is being computed. x can have any shape, and alpha and scale will be broadcasted to match x's shape if necessary. Must be a tensorflow tensor or numpy array of floats. alpha: The shape parameter of the NLL (\alpha in the paper), where more negative values cause outliers to "cost" more and inliers to "cost" less. Alpha can be any non-negative value, but the gradient of the NLL with respect to alpha has singularities at 0 and 2 so you may want to limit usage to (0, 2) during gradient descent. Must be a tensorflow tensor or numpy array of floats. Varying alpha in that range allows for smooth interpolation between a Cauchy distribution (alpha = 0) and a Normal distribution (alpha = 2) similar to a Student's T distribution. scale: The scale parameter of the loss. When |x| < scale, the NLL is like that of a (possibly unnormalized) normal distribution, and when |x| > scale the NLL takes on a different shape according to alpha. Must be a tensorflow tensor or numpy array of floats. Returns: The NLLs for each element of x, in the same shape as x. This is returned as a TensorFlow graph node of floats with the same precision as x. """ # `scale` and `alpha` must have the same type as `x`. tf.debugging.assert_type(scale, x.dtype) tf.debugging.assert_type(alpha, x.dtype) assert_ops = [ # `scale` must be > 0. tf.Assert(tf.reduce_all(scale > 0.), [scale]), # `alpha` must be >= 0. tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]), ] with tf.control_dependencies(assert_ops): loss = general.lossfun(x, alpha, scale, approximate=False) log_partition = (tf.math.log(scale) + self.log_base_partition_function(alpha)) nll = loss + log_partition return nll
def _prob(self, event): event = tf.convert_to_tensor(event, name='event', dtype=self.dtype) event, samples = _broadcast_event_and_samples( event, self.samples, event_ndims=self._event_ndims) prob = tf.reduce_sum(tf.cast(tf.reduce_all( tf.equal(samples, event), axis=tf.range(-self._event_ndims, 0)), dtype=tf.int32), axis=-1) / self.num_samples if dtype_util.is_floating(self.dtype): prob = tf.cast(prob, self.dtype) return prob
def _continued_fraction_one_step( unused_should_stop, numerator, previous_numerator, denominator, previous_denominator, iteration_count): partial_denominator = 1. if partial_denominator_fn: partial_denominator = partial_denominator_fn( iteration_count, *numerator_denominator_args_list) new_numerator = partial_denominator * numerator new_denominator = partial_denominator * denominator partial_numerator = 1. if partial_numerator_fn: partial_numerator = partial_numerator_fn( iteration_count, *numerator_denominator_args_list) new_numerator = new_numerator + partial_numerator * previous_numerator new_denominator = ( new_denominator + partial_numerator * previous_denominator) should_stop_next = iteration_count > max_iterations if tolerance is not None: # We can use a more efficient computation when the partial numerators # are 1. if partial_numerator_fn is None: # We now want to compute to relative error between the fraction at # this iteration, vs. the previous iteration. # Let h_i be the numerator and k_i the denominator, and a_i be the # i-th term. # h_i / k_i - h_{i-1} / k_{i-1} = # (h_i * k_{i - 1} - h_{i - 1} * k_i) / (k_i * k_{i - 1}) = # ((a_i h_{i - 1} + h_{i - 2}) * k_{i - 1} - # (a_i k_{i - 1} + k_{i - 2}) * h_{i - 1}) / (k_i * k_{i - 1}) = # -(h_{i - 1} * k_{i - 2} - h_{i - 2} * k_{i - 1}) / (k_i * k_{i - 1}) # This suggests we should prove something about the numerator # inductively, and indeed # (h_i * k_{i - 1} - h_{i - 1} * k_i) = (-1)**i delta = tf.math.reciprocal(new_denominator * denominator) # We actually need to compute the difference of fractions. else: delta = new_numerator / new_denominator - numerator / denominator converged = tf.math.abs(delta) <= tolerance should_stop_next = tf.reduce_all(converged) | should_stop_next return (should_stop_next, new_numerator, numerator, new_denominator, denominator, iteration_count + 1.)
def __init__(self, prior, coding_rank, compression=False, likelihood_bound=1e-9, tail_mass=2**-8, range_coder_precision=12): """Initializer. Arguments: prior: A `tfp.distributions.Distribution` object. A density model fitting the marginal distribution of the bottleneck data with additive uniform noise, which is shared a priori between the sender and the receiver. For best results, the distribution should be flexible enough to have a unit-width uniform distribution as a special case, since this is the marginal distribution for bottleneck dimensions that are constant. The distribution parameters may not depend on data (they must be either variables or constants). coding_rank: Integer. Number of innermost dimensions considered a coding unit. Each coding unit is compressed to its own bit string, and the `bits()` method sums over each coding unit. compression: Boolean. If set to `True`, the range coding tables used by `compress()` and `decompress()` will be built on instantiation. This assumes eager mode (throws an error if in graph mode or inside a `tf.function` call). If set to `False`, these two methods will not be accessible. likelihood_bound: Float. Lower bound for likelihood values, to prevent training instabilities. tail_mass: Float. Approximate probability mass which is range encoded with less precision, by using a Golomb-like code. range_coder_precision: Integer. Precision passed to the range coding op. Raises: RuntimeError: when attempting to instantiate an entropy model with `compression=True` and not in eager execution mode. """ if coding_rank < prior.batch_shape.rank: raise ValueError( "`coding_rank` can't be smaller than batch rank of prior.") super().__init__( prior, coding_rank, compression=compression, likelihood_bound=likelihood_bound, tail_mass=tail_mass, range_coder_precision=range_coder_precision) quantization_offset = helpers.quantization_offset(prior) if self.compression: # Optimization: if the quantization offset is zero, we don't need to # subtract/add it when quantizing, and we don't need to serialize its # value. Note that this code will only work in eager mode. if tf.reduce_all(tf.equal(quantization_offset, 0.)): quantization_offset = None else: quantization_offset = tf.broadcast_to( quantization_offset, self.prior_shape) quantization_offset = tf.Variable( quantization_offset, trainable=False, name="quantization_offset") self._quantization_offset = quantization_offset
def _log_prob(self, x): concentration = tf.convert_to_tensor(self.concentration) lp = (tf.reduce_sum(tf.math.xlogy(concentration - 1., x), axis=-1) - tf.math.lbeta(concentration)) if self._force_probs_to_zero_outside_support: eps = np.finfo(dtype_util.as_numpy_dtype(x.dtype)).eps in_support = ( tf.reduce_all(x >= 0, axis=-1) & # Reusing the logic of tf.debugging.assert_near, 10 * np.finfo.eps (tf.math.abs(tf.reduce_sum(x, axis=-1) - 1.) < 10 * eps)) return tf.where(in_support, lp, -float('inf')) return lp
def _cdf(self, event): samples = tf.convert_to_tensor(self._samples) num_samples = self._compute_num_samples(samples) event = tf.convert_to_tensor(event, name='event', dtype=self.dtype) event, samples = _broadcast_event_and_samples( event, samples, event_ndims=self._event_ndims) cdf = tf.reduce_sum(tf.cast(tf.reduce_all( samples <= event, axis=tf.range(-self._event_ndims, 0)), dtype=tf.int32), axis=-1) / num_samples if dtype_util.is_floating(self.dtype): cdf = tf.cast(cdf, self.dtype) return cdf
def inv_partition_spline_curve(x): """The inverse of partition_spline_curve().""" c = lambda z: tf.cast(z, x.dtype) assert_ops = [tf.Assert(tf.reduce_all(x >= 0.), [x])] with tf.control_dependencies(assert_ops): alpha = tf.where( x < 8, c(0.5) * x + tf.where( x <= 4, c(1.25) - tf.sqrt(c(1.5625) - x + c(.25) * tf.square(x)), c(-1.25) + tf.sqrt(c(9.5625) - c(3) * x + c(.25) * tf.square(x))), c(3.75) + c(0.25) * util.exp_safe(x * c(3.6) - c(28.8))) return alpha
def _cdf(self, event): event = tf.convert_to_tensor(value=event, name='event', dtype=self.dtype) event, samples = _broadcast_event_and_samples(event, self.samples, event_ndims=self._event_ndims) cdf = tf.reduce_sum( input_tensor=tf.cast( tf.reduce_all( input_tensor=samples <= event, axis=tf.range(-self._event_ndims, 0)), dtype=tf.int32), axis=-1) / self.num_samples if self.dtype.is_floating: cdf = tf.cast(cdf, self.dtype) return cdf
def _decode_and_random_crop(image_bytes: tf.Tensor) -> tf.Tensor: """Make a random crop of 224.""" jpeg_shape = tf.image.extract_jpeg_shape(image_bytes) bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) image = _distorted_bounding_box_crop(image_bytes, jpeg_shape=jpeg_shape, bbox=bbox, min_object_covered=0.1, aspect_ratio_range=(3 / 4, 4 / 3), area_range=(0.08, 1.0), max_attempts=10) if tf.reduce_all(tf.equal(jpeg_shape, tf.shape(image))): # If the random crop failed fall back to center crop. image = _decode_and_center_crop(image_bytes, jpeg_shape) return image
def random_cropped_inputs(): """Cropped inputs with stateless random ops.""" input_shape = tf.compat.v1.shape(inputs) crop_size = tf.stack( [input_shape[0], self.height, self.width, input_shape[3]]) check = tf.Assert( tf.reduce_all(input_shape >= crop_size), [self.height, self.width]) with tf.control_dependencies([check]): limit = input_shape - crop_size + 1 offset = stateless_random_ops.stateless_random_uniform( tf.compat.v1.shape(input_shape), dtype=crop_size.dtype, maxval=crop_size.dtype.max, seed=self._rng.make_seeds()[:, 0]) % limit return tf.slice(inputs, offset, crop_size)
def _prob(self, x): if self.validate_args: is_vector_check = assert_util.assert_rank_at_least(x, 1) right_vec_space_check = assert_util.assert_equal( self.event_shape_tensor(), tf.gather(tf.shape(input=x), tf.rank(x) - 1), message= "Argument 'x' not defined in the same space R^k as this distribution" ) with tf.control_dependencies([is_vector_check]): with tf.control_dependencies([right_vec_space_check]): x = tf.identity(x) return tf.cast(tf.reduce_all( input_tensor=tf.abs(x - self.loc) <= self._slack, axis=-1), dtype=self.dtype)
def _mode(self): k = tf.cast(self.event_shape_tensor()[0], self.dtype) mode = (self.concentration - 1.) / (self.total_concentration[..., tf.newaxis] - k) if self.allow_nan_stats: nan = tf.fill(tf.shape(input=mode), dtype_util.as_numpy_dtype(self.dtype)(np.nan), name="nan") return tf.where( tf.reduce_all(input_tensor=self.concentration > 1., axis=-1), mode, nan) return distribution_util.with_dependencies([ assert_util.assert_less( tf.ones([], self.dtype), self.concentration, message="Mode undefined when any concentration <= 1"), ], mode)