def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( array_ops.shape(self.loc), array_ops.broadcast_dynamic_shape( array_ops.shape(self.scale), array_ops.broadcast_dynamic_shape( array_ops.shape(self.minval), array_ops.shape(self.maxval))))
def _shape_tensor(self): batch_shape = array_ops.broadcast_dynamic_shape( self.base_operator.batch_shape_tensor(), array_ops.shape(self.u)[:-2]) batch_shape = array_ops.broadcast_dynamic_shape( batch_shape, array_ops.shape(self.v)[:-2]) return array_ops.concat( [batch_shape, self.base_operator.shape_tensor()[-2:]], axis=0)
def _itemwise_error_rate( total_error_rate, param_tensors, sample_tensor=None, name=None): with ops.name_scope( name, "itemwise_error_rate", [total_error_rate, param_tensors, sample_tensor]): result_shape = [1] for p_tensor in param_tensors: result_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(p_tensor), result_shape) if sample_tensor is not None: result_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(sample_tensor)[1:], result_shape) num_items = math_ops.reduce_prod(result_shape) return total_error_rate / math_ops.cast( num_items, dtype=total_error_rate.dtype)
def _solve(self, rhs, adjoint=False, adjoint_arg=False): diagonals = self.diagonals if adjoint: diagonals = self._construct_adjoint_diagonals(diagonals) # TODO(b/144860784): Remove the broadcasting code below once # tridiagonal_solve broadcasts. rhs_shape = array_ops.shape(rhs) k = self._shape_tensor(diagonals)[-1] broadcast_shape = array_ops.broadcast_dynamic_shape( self._shape_tensor(diagonals)[:-2], rhs_shape[:-2]) rhs = array_ops.broadcast_to( rhs, array_ops.concat( [broadcast_shape, rhs_shape[-2:]], axis=-1)) if self.diagonals_format == _MATRIX: diagonals = array_ops.broadcast_to( diagonals, array_ops.concat( [broadcast_shape, [k, k]], axis=-1)) elif self.diagonals_format == _COMPACT: diagonals = array_ops.broadcast_to( diagonals, array_ops.concat( [broadcast_shape, [3, k]], axis=-1)) else: diagonals = [ array_ops.broadcast_to(d, array_ops.concat( [broadcast_shape, [k]], axis=-1)) for d in diagonals] y = linalg.tridiagonal_solve( diagonals, rhs, diagonals_format=self.diagonals_format, transpose_rhs=adjoint_arg, conjugate_rhs=adjoint_arg) return y
def _tf_gcd(x1, x2): # pylint: disable=missing-function-docstring def _gcd_cond_fn(_, x2): return math_ops.reduce_any(x2 != 0) def _gcd_body_fn(x1, x2): # math_ops.mod will raise an error when any element of x2 is 0. To avoid # that, we change those zeros to ones. Their values don't matter because # they won't be used. x2_safe = array_ops.where_v2(x2 != 0, x2, constant_op.constant(1, x2.dtype)) x1, x2 = (array_ops.where_v2(x2 != 0, x2, x1), array_ops.where_v2(x2 != 0, math_ops.mod(x1, x2_safe), constant_op.constant(0, x2.dtype))) return (array_ops.where_v2(x1 < x2, x2, x1), array_ops.where_v2(x1 < x2, x1, x2)) if (not np.issubdtype(x1.dtype.as_numpy_dtype, np.integer) or not np.issubdtype(x2.dtype.as_numpy_dtype, np.integer)): raise ValueError('Arguments to gcd must be integers.') shape = array_ops.broadcast_dynamic_shape(array_ops.shape(x1), array_ops.shape(x2)) x1 = array_ops.broadcast_to(x1, shape) x2 = array_ops.broadcast_to(x2, shape) value, _ = control_flow_ops.while_loop( _gcd_cond_fn, _gcd_body_fn, (math_ops.abs(x1), math_ops.abs(x2))) return value
def _shape_tensor(self, diagonals=None): diagonals = diagonals if diagonals is not None else self.diagonals if self.diagonals_format == _MATRIX: return array_ops.shape(diagonals) if self.diagonals_format == _COMPACT: d_shape = array_ops.shape(diagonals[..., 0, :]) else: broadcast_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(self.diagonals[0])[:-1], array_ops.shape(self.diagonals[1])[:-1]) broadcast_shape = array_ops.broadcast_dynamic_shape( broadcast_shape, array_ops.shape(self.diagonals[2])[:-1]) d_shape = array_ops.concat( [broadcast_shape, [array_ops.shape(self.diagonals[1])[-1]]], axis=0) return array_ops.concat([d_shape, [d_shape[-1]]], axis=-1)
def _to_dense(self): row = ops.convert_to_tensor(self.row) col = ops.convert_to_tensor(self.col) total_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(row), array_ops.shape(col)) n = array_ops.shape(row)[-1] row = array_ops.broadcast_to(row, total_shape) col = array_ops.broadcast_to(col, total_shape) # We concatenate the column in reverse order to the row. # This gives us 2*n + 1 elements. elements = array_ops.concat( [array_ops.reverse(col, axis=[-1]), row[..., 1:]], axis=-1) # Given the above vector, the i-th row of the Toeplitz matrix # is the last n elements of the above vector shifted i right # (hence the first row is just the row vector provided, and # the first element of each row will belong to the column vector). # We construct these set of indices below. indices = math_ops.mod( # How much to shift right. This corresponds to `i`. math_ops.range(0, n) + # Specifies the last `n` indices. math_ops.range(n - 1, -1, -1)[..., array_ops.newaxis], # Mod out by the total number of elements to ensure the index is # non-negative (for tf.gather) and < 2 * n - 1. 2 * n - 1) return array_ops.gather(elements, indices, axis=-1)
def _shape_tensor(self, row=None, col=None): row = self.row if row is None else row col = self.col if col is None else col v_shape = array_ops.broadcast_dynamic_shape(array_ops.shape(row), array_ops.shape(col)) k = v_shape[-1] return array_ops.concat((v_shape, [k]), 0)
def _broadcast_shape(shape1, shape2): """Convenience function which statically broadcasts shape when possible.""" if (tensor_util.constant_value(shape1) is not None and tensor_util.constant_value(shape2) is not None): return array_ops.broadcast_static_shape( tensor_shape.TensorShape(tensor_util.constant_value(shape1)), tensor_shape.TensorShape(tensor_util.constant_value(shape2))) return array_ops.broadcast_dynamic_shape(shape1, shape2)
def check(t): target = array_ops.shape(tensor)[1:] result = array_ops.broadcast_dynamic_shape(target, array_ops.shape(t)) # This rank check ensures that I don't get a wrong answer from the # _shapes_ broadcasting against each other. gt = check_ops.assert_greater(array_ops.rank(target), array_ops.rank(t)) eq = check_ops.assert_equal(target, result) return gt, eq
def uniform(low=0.0, high=1.0, size=None): dtype = np_dtypes.default_float_type() low = np_array_ops.asarray(low, dtype=dtype) high = np_array_ops.asarray(high, dtype=dtype) if size is None: size = array_ops.broadcast_dynamic_shape(low.shape, high.shape) return random_ops.random_uniform( shape=size, minval=low, maxval=high, dtype=dtype)
def _cdf(self, x): broadcast_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(x), self.batch_shape_tensor()) zeros = array_ops.zeros(broadcast_shape, dtype=self.dtype) ones = array_ops.ones(broadcast_shape, dtype=self.dtype) broadcasted_x = x * ones result_if_not_big = array_ops.where_v2( x < self.low, zeros, (broadcasted_x - self.low) / self.range()) return array_ops.where_v2(x >= self.high, ones, result_if_not_big)
def _cdf(self, x): broadcast_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(x), self.batch_shape_tensor()) zeros = array_ops.zeros(broadcast_shape, dtype=self.dtype) ones = array_ops.ones(broadcast_shape, dtype=self.dtype) broadcasted_x = x * ones result_if_not_big = array_ops.where( x < self.low, zeros, (broadcasted_x - self.low) / self.range()) return array_ops.where(x >= self.high, ones, result_if_not_big)
def check(t): samples_batch_shape = array_ops.shape(samples)[1:] broadcasted_batch_shape = array_ops.broadcast_dynamic_shape( samples_batch_shape, array_ops.shape(t)) # This rank check ensures that I don't get a wrong answer from the # _shapes_ broadcasting against each other. samples_batch_ndims = array_ops.size(samples_batch_shape) ge = check_ops.assert_greater_equal( samples_batch_ndims, array_ops.rank(t)) eq = check_ops.assert_equal(samples_batch_shape, broadcasted_batch_shape) return ge, eq
def determine_batch_event_shapes(grid, endpoint_affine): """Helper to infer batch_shape and event_shape.""" with ops.name_scope(name="determine_batch_event_shapes"): # grid # shape: [B, k, q] # endpoint_affine # len=k, shape: [B, d, d] batch_shape = grid.shape[:-2] batch_shape_tensor = array_ops.shape(grid)[:-2] event_shape = None event_shape_tensor = None def _set_event_shape(shape, shape_tensor): if event_shape is None: return shape, shape_tensor return (array_ops.broadcast_static_shape(event_shape, shape), array_ops.broadcast_dynamic_shape(event_shape_tensor, shape_tensor)) for aff in endpoint_affine: if aff.shift is not None: batch_shape = array_ops.broadcast_static_shape( batch_shape, aff.shift.shape[:-1]) batch_shape_tensor = array_ops.broadcast_dynamic_shape( batch_shape_tensor, array_ops.shape(aff.shift)[:-1]) event_shape, event_shape_tensor = _set_event_shape( aff.shift.shape[-1:], array_ops.shape(aff.shift)[-1:]) if aff.scale is not None: batch_shape = array_ops.broadcast_static_shape( batch_shape, aff.scale.batch_shape) batch_shape_tensor = array_ops.broadcast_dynamic_shape( batch_shape_tensor, aff.scale.batch_shape_tensor()) event_shape, event_shape_tensor = _set_event_shape( tensor_shape.TensorShape([aff.scale.range_dimension]), aff.scale.range_dimension_tensor()[array_ops.newaxis]) return batch_shape, batch_shape_tensor, event_shape, event_shape_tensor
def tf_broadcast(*args): """Broadcast tensors. Args: *args: a list of tensors whose shapes are broadcastable against each other. Returns: Tensors broadcasted to the common shape. """ if len(args) <= 1: return args sh = array_ops.shape(args[0]) for arg in args[1:]: sh = array_ops.broadcast_dynamic_shape(sh, array_ops.shape(arg)) return [array_ops.broadcast_to(arg, sh) for arg in args]
def determine_batch_event_shapes(grid, endpoint_affine): """Helper to infer batch_shape and event_shape.""" with ops.name_scope(name="determine_batch_event_shapes"): # grid # shape: [B, k, q] # endpoint_affine # len=k, shape: [B, d, d] batch_shape = grid.shape[:-2] batch_shape_tensor = array_ops.shape(grid)[:-2] event_shape = None event_shape_tensor = None def _set_event_shape(shape, shape_tensor): if event_shape is None: return shape, shape_tensor return (array_ops.broadcast_static_shape(event_shape, shape), array_ops.broadcast_dynamic_shape( event_shape_tensor, shape_tensor)) for aff in endpoint_affine: if aff.shift is not None: batch_shape = array_ops.broadcast_static_shape( batch_shape, aff.shift.shape[:-1]) batch_shape_tensor = array_ops.broadcast_dynamic_shape( batch_shape_tensor, array_ops.shape(aff.shift)[:-1]) event_shape, event_shape_tensor = _set_event_shape( aff.shift.shape[-1:], array_ops.shape(aff.shift)[-1:]) if aff.scale is not None: batch_shape = array_ops.broadcast_static_shape( batch_shape, aff.scale.batch_shape) batch_shape_tensor = array_ops.broadcast_dynamic_shape( batch_shape_tensor, aff.scale.batch_shape_tensor()) event_shape, event_shape_tensor = _set_event_shape( tensor_shape.TensorShape([aff.scale.range_dimension]), aff.scale.range_dimension_tensor()[array_ops.newaxis]) return batch_shape, batch_shape_tensor, event_shape, event_shape_tensor
def _shape_tensor(self): # Avoid messy broadcasting if possible. if self.shape.is_fully_defined(): return ops.convert_to_tensor_v2_with_dispatch( self.shape.as_list(), dtype=dtypes.int32, name="shape") domain_dimension = sum(self._block_domain_dimension_tensors()) range_dimension = sum(self._block_range_dimension_tensors()) matrix_shape = array_ops.stack([domain_dimension, range_dimension]) batch_shape = self.operators[0][0].batch_shape_tensor() for row in self.operators[1:]: for operator in row: batch_shape = array_ops.broadcast_dynamic_shape( batch_shape, operator.batch_shape_tensor()) return array_ops.concat((batch_shape, matrix_shape), 0)
def _shape_tensor(self): domain_dimension = self.operators[0].domain_dimension_tensor() for operator in self.operators[1:]: domain_dimension *= operator.domain_dimension_tensor() range_dimension = self.operators[0].range_dimension_tensor() for operator in self.operators[1:]: range_dimension *= operator.range_dimension_tensor() matrix_shape = [range_dimension, domain_dimension] # Get broadcast batch shape. # broadcast_shape checks for compatibility. batch_shape = self.operators[0].batch_shape_tensor() for operator in self.operators[1:]: batch_shape = array_ops.broadcast_dynamic_shape( batch_shape, operator.batch_shape_tensor()) return array_ops.concat((batch_shape, matrix_shape), 0)
def _shape_tensor(self): domain_dimension = self.operators[0].domain_dimension_tensor() for operator in self.operators[1:]: domain_dimension = domain_dimension * operator.domain_dimension_tensor() range_dimension = self.operators[0].range_dimension_tensor() for operator in self.operators[1:]: range_dimension = range_dimension * operator.range_dimension_tensor() matrix_shape = [range_dimension, domain_dimension] # Get broadcast batch shape. # broadcast_shape checks for compatibility. batch_shape = self.operators[0].batch_shape_tensor() for operator in self.operators[1:]: batch_shape = array_ops.broadcast_dynamic_shape( batch_shape, operator.batch_shape_tensor()) return array_ops.concat((batch_shape, matrix_shape), 0)
def prefer_static_broadcast_shape(shape1, shape2, name="prefer_static_broadcast_shape"): """Convenience function which statically broadcasts shape when possible. Args: shape1: `1-D` integer `Tensor`. Already converted to tensor! shape2: `1-D` integer `Tensor`. Already converted to tensor! name: A string name to prepend to created ops. Returns: The broadcast shape, either as `TensorShape` (if broadcast can be done statically), or as a `Tensor`. """ with ops.name_scope(name, values=[shape1, shape2]): def make_shape_tensor(x): return ops.convert_to_tensor(x, name="shape", dtype=dtypes.int32) def get_tensor_shape(s): if isinstance(s, tensor_shape.TensorShape): return s s_ = tensor_util.constant_value(make_shape_tensor(s)) if s_ is not None: return tensor_shape.TensorShape(s_) return None def get_shape_tensor(s): if not isinstance(s, tensor_shape.TensorShape): return make_shape_tensor(s) if s.is_fully_defined(): return make_shape_tensor(s.as_list()) raise ValueError("Cannot broadcast from partially " "defined `TensorShape`.") shape1_ = get_tensor_shape(shape1) shape2_ = get_tensor_shape(shape2) if shape1_ is not None and shape2_ is not None: return array_ops.broadcast_static_shape(shape1_, shape2_) shape1_ = get_shape_tensor(shape1) shape2_ = get_shape_tensor(shape2) return array_ops.broadcast_dynamic_shape(shape1_, shape2_)
def prefer_static_broadcast_shape( shape1, shape2, name="prefer_static_broadcast_shape"): """Convenience function which statically broadcasts shape when possible. Args: shape1: `1-D` integer `Tensor`. Already converted to tensor! shape2: `1-D` integer `Tensor`. Already converted to tensor! name: A string name to prepend to created ops. Returns: The broadcast shape, either as `TensorShape` (if broadcast can be done statically), or as a `Tensor`. """ with ops.name_scope(name, values=[shape1, shape2]): if (tensor_util.constant_value(shape1) is not None and tensor_util.constant_value(shape2) is not None): return array_ops.broadcast_static_shape( tensor_shape.TensorShape(tensor_util.constant_value(shape1)), tensor_shape.TensorShape(tensor_util.constant_value(shape2))) return array_ops.broadcast_dynamic_shape(shape1, shape2)
def prefer_static_broadcast_shape( shape1, shape2, name="prefer_static_broadcast_shape"): """Convenience function which statically broadcasts shape when possible. Args: shape1: `1-D` integer `Tensor`. Already converted to tensor! shape2: `1-D` integer `Tensor`. Already converted to tensor! name: A string name to prepend to created ops. Returns: The broadcast shape, either as `TensorShape` (if broadcast can be done statically), or as a `Tensor`. """ with ops.name_scope(name, values=[shape1, shape2]): def make_shape_tensor(x): return ops.convert_to_tensor(x, name="shape", dtype=dtypes.int32) def get_tensor_shape(s): if isinstance(s, tensor_shape.TensorShape): return s s_ = tensor_util.constant_value(make_shape_tensor(s)) if s_ is not None: return tensor_shape.TensorShape(s_) return None def get_shape_tensor(s): if not isinstance(s, tensor_shape.TensorShape): return make_shape_tensor(s) if s.is_fully_defined(): return make_shape_tensor(s.as_list()) raise ValueError("Cannot broadcast from partially " "defined `TensorShape`.") shape1_ = get_tensor_shape(shape1) shape2_ = get_tensor_shape(shape2) if shape1_ is not None and shape2_ is not None: return array_ops.broadcast_static_shape(shape1_, shape2_) shape1_ = get_shape_tensor(shape1) shape2_ = get_shape_tensor(shape2) return array_ops.broadcast_dynamic_shape(shape1_, shape2_)
def prefer_static_broadcast_shape(shape1, shape2, name="prefer_static_broadcast_shape"): """Convenience function which statically broadcasts shape when possible. Args: shape1: `1-D` integer `Tensor`. Already converted to tensor! shape2: `1-D` integer `Tensor`. Already converted to tensor! name: A string name to prepend to created ops. Returns: The broadcast shape, either as `TensorShape` (if broadcast can be done statically), or as a `Tensor`. """ with ops.name_scope(name, values=[shape1, shape2]): if (tensor_util.constant_value(shape1) is not None and tensor_util.constant_value(shape2) is not None): return array_ops.broadcast_static_shape( tensor_shape.TensorShape(tensor_util.constant_value(shape1)), tensor_shape.TensorShape(tensor_util.constant_value(shape2))) return array_ops.broadcast_dynamic_shape(shape1, shape2)
def _broadcast_parameter_with_batch_shape( param, param_ndims_to_matrix_ndims, batch_shape): """Broadcasts `param` with the given batch shape, recursively.""" if hasattr(param, 'batch_shape_tensor'): # Recursively broadcast every parameter inside the operator. override_dict = {} for name, ndims in param._experimental_parameter_ndims_to_matrix_ndims.items(): # pylint:disable=protected-access,line-too-long sub_param = getattr(param, name) override_dict[name] = nest.map_structure_up_to( sub_param, functools.partial( _broadcast_parameter_with_batch_shape, batch_shape=batch_shape), sub_param, ndims) parameters = dict(param.parameters, **override_dict) return type(param)(**parameters) base_shape = array_ops.concat( [batch_shape, array_ops.ones( [param_ndims_to_matrix_ndims], dtype=dtypes.int32)], axis=0) return array_ops.broadcast_to( param, array_ops.broadcast_dynamic_shape(base_shape, array_ops.shape(param)))
def get_broadcast_shape(*tensors): """Get broadcast shape as a Python list of integers (preferred) or `Tensor`. Args: *tensors: One or more `Tensor` objects (already converted!). Returns: broadcast shape: Python list (if shapes determined statically), otherwise an `int32` `Tensor`. """ # Try static. s_shape = tensors[0].shape for t in tensors[1:]: s_shape = array_ops.broadcast_static_shape(s_shape, t.shape) if s_shape.is_fully_defined(): return s_shape.as_list() # Fallback on dynamic. d_shape = array_ops.shape(tensors[0]) for t in tensors[1:]: d_shape = array_ops.broadcast_dynamic_shape(d_shape, array_ops.shape(t)) return d_shape
def _matmul(self, x, adjoint=False, adjoint_arg=False): perm = ops.convert_to_tensor_v2_with_dispatch(self.perm) if adjoint and not self.is_self_adjoint: # TODO(srvasude): invert_permutation doesn't work on batches so we use # argsort. perm = sort_ops.argsort(perm, axis=-1) x = linalg.adjoint(x) if adjoint_arg else x # We need to broadcast x and the permutation since tf.gather doesn't # broadcast. broadcast_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(x)[:-1], array_ops.shape(perm)) k = array_ops.shape(x)[-1] broadcast_x_shape = array_ops.concat([broadcast_shape, [k]], axis=-1) x = array_ops.broadcast_to(x, broadcast_x_shape) perm = array_ops.broadcast_to(perm, broadcast_shape) m = array_ops.shape(x)[-2] x = array_ops.reshape(x, [-1, m, k]) perm = array_ops.reshape(perm, [-1, m]) y = array_ops.gather(x, perm, axis=-2, batch_dims=1) return array_ops.reshape(y, broadcast_x_shape)
def broadcast_matrix_batch_dims(batch_matrices, name=None): """Broadcast leading dimensions of zero or more [batch] matrices. Example broadcasting one batch dim of two simple matrices. ```python x = [[1, 2], [3, 4]] # Shape [2, 2], no batch dims y = [[[1]]] # Shape [1, 1, 1], 1 batch dim of shape [1] x_bc, y_bc = broadcast_matrix_batch_dims([x, y]) x_bc ==> [[[1, 2], [3, 4]]] # Shape [1, 2, 2], 1 batch dim of shape [1]. y_bc ==> same as y ``` Example broadcasting many batch dims ```python x = tf.random_normal(shape=(2, 3, 1, 4, 4)) y = tf.random_normal(shape=(1, 3, 2, 5, 5)) x_bc, y_bc = broadcast_matrix_batch_dims([x, y]) x_bc.shape ==> (2, 3, 2, 4, 4) y_bc.shape ==> (2, 3, 2, 5, 5) ``` Args: batch_matrices: Iterable of `Tensor`s, each having two or more dimensions. name: A string name to prepend to created ops. Returns: bcast_matrices: List of `Tensor`s, with `bcast_matricies[i]` containing the values from `batch_matrices[i]`, with possibly broadcast batch dims. Raises: ValueError: If any input `Tensor` is statically determined to have less than two dimensions. """ with ops.name_scope( name or "broadcast_matrix_batch_dims", values=batch_matrices): check_ops.assert_proper_iterable(batch_matrices) batch_matrices = list(batch_matrices) for i, mat in enumerate(batch_matrices): batch_matrices[i] = ops.convert_to_tensor(mat) assert_is_batch_matrix(batch_matrices[i]) if len(batch_matrices) < 2: return batch_matrices # Try static broadcasting. # bcast_batch_shape is the broadcast batch shape of ALL matrices. # E.g. if batch_matrices = [x, y], with # x.shape = [2, j, k] (batch shape = [2]) # y.shape = [3, 1, l, m] (batch shape = [3, 1]) # ==> bcast_batch_shape = [3, 2] bcast_batch_shape = batch_matrices[0].get_shape()[:-2] for mat in batch_matrices[1:]: bcast_batch_shape = array_ops.broadcast_static_shape( bcast_batch_shape, mat.get_shape()[:-2]) if bcast_batch_shape.is_fully_defined(): # The [1, 1] at the end will broadcast with anything. bcast_shape = bcast_batch_shape.concatenate([1, 1]) for i, mat in enumerate(batch_matrices): if mat.get_shape()[:-2] != bcast_batch_shape: batch_matrices[i] = _broadcast_to_shape(mat, bcast_shape) return batch_matrices # Since static didn't work, do dynamic, which always copies data. bcast_batch_shape = array_ops.shape(batch_matrices[0])[:-2] for mat in batch_matrices[1:]: bcast_batch_shape = array_ops.broadcast_dynamic_shape( bcast_batch_shape, array_ops.shape(mat)[:-2]) bcast_shape = array_ops.concat([bcast_batch_shape, [1, 1]], axis=0) for i, mat in enumerate(batch_matrices): batch_matrices[i] = _broadcast_to_shape(mat, bcast_shape) return batch_matrices
def random_gamma(shape, alpha, beta=None, dtype=dtypes.float32, seed=None, name=None): """Draws `shape` samples from each of the given Gamma distribution(s). `alpha` is the shape parameter describing the distribution(s), and `beta` is the inverse scale parameter(s). Note: Because internal calculations are done using `float64` and casting has `floor` semantics, we must manually map zero outcomes to the smallest possible positive floating-point value, i.e., `np.finfo(dtype).tiny`. This means that `np.finfo(dtype).tiny` occurs more frequently than it otherwise should. This bias can only happen for small values of `alpha`, i.e., `alpha << 1` or large values of `beta`, i.e., `beta >> 1`. The samples are differentiable w.r.t. alpha and beta. The derivatives are computed using the approach described in (Figurnov et al., 2018). Example: ```python samples = tf.random.gamma([10], [0.5, 1.5]) # samples has shape [10, 2], where each slice [:, 0] and [:, 1] represents # the samples drawn from each distribution samples = tf.random.gamma([7, 5], [0.5, 1.5]) # samples has shape [7, 5, 2], where each slice [:, :, 0] and [:, :, 1] # represents the 7x5 samples drawn from each of the two distributions alpha = tf.constant([[1.],[3.],[5.]]) beta = tf.constant([[3., 4.]]) samples = tf.random.gamma([30], alpha=alpha, beta=beta) # samples has shape [30, 3, 2], with 30 samples each of 3x2 distributions. loss = tf.reduce_mean(tf.square(samples)) dloss_dalpha, dloss_dbeta = tf.gradients(loss, [alpha, beta]) # unbiased stochastic derivatives of the loss function alpha.shape == dloss_dalpha.shape # True beta.shape == dloss_dbeta.shape # True ``` Args: shape: A 1-D integer Tensor or Python array. The shape of the output samples to be drawn per alpha/beta-parameterized distribution. alpha: A Tensor or Python value or N-D array of type `dtype`. `alpha` provides the shape parameter(s) describing the gamma distribution(s) to sample. Must be broadcastable with `beta`. beta: A Tensor or Python value or N-D array of type `dtype`. Defaults to 1. `beta` provides the inverse scale parameter(s) of the gamma distribution(s) to sample. Must be broadcastable with `alpha`. dtype: The type of alpha, beta, and the output: `float16`, `float32`, or `float64`. seed: A Python integer. Used to create a random seed for the distributions. See `tf.random.set_seed` for behavior. name: Optional name for the operation. Returns: samples: a `Tensor` of shape `tf.concat([shape, tf.shape(alpha + beta)], axis=0)` with values of type `dtype`. References: Implicit Reparameterization Gradients: [Figurnov et al., 2018] (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients) ([pdf] (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf)) """ with ops.name_scope(name, "random_gamma", [shape, alpha, beta]): shape = ops.convert_to_tensor(shape, name="shape", dtype=dtypes.int32) alpha = ops.convert_to_tensor(alpha, name="alpha", dtype=dtype) beta = ops.convert_to_tensor(beta if beta is not None else 1, name="beta", dtype=dtype) broadcast_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(alpha), array_ops.shape(beta)) alpha_broadcast = array_ops.broadcast_to(alpha, broadcast_shape) seed1, seed2 = random_seed.get_seed(seed) result = math_ops.maximum( np.finfo(alpha.dtype.as_numpy_dtype).tiny, gen_random_ops.random_gamma( shape, alpha_broadcast, seed=seed1, seed2=seed2) / beta) _maybe_set_static_shape_helper(result, shape, alpha_broadcast) return result
def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( array_ops.shape(self.concentration), array_ops.shape(self.rate))
def _batch_shape(self): return array_ops.broadcast_dynamic_shape(array_ops.shape(self.loc), array_ops.shape(self.scale))
def _batch_shape(self): return array_ops.broadcast_dynamic_shape( array_ops.shape(self.alpha), array_ops.shape(self.beta))
def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( array_ops.shape(self.low), array_ops.shape(self.high))
def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape(array_ops.shape(self.low), array_ops.shape(self.high))
def _batch_shape(self): return array_ops.broadcast_dynamic_shape( array_ops.shape(self.df), array_ops.broadcast_dynamic_shape( array_ops.shape(self.mu), array_ops.shape(self.sigma)))
def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( array_ops.shape(self.total_count), array_ops.shape(self.probs))
def conjugate_gradient(operator, rhs, preconditioner=None, x=None, tol=1e-5, max_iter=20, name='conjugate_gradient'): r"""Conjugate gradient solver. Solves a linear system of equations `A*x = rhs` for self-adjoint, positive definite matrix `A` and right-hand side vector `rhs`, using an iterative, matrix-free algorithm where the action of the matrix A is represented by `operator`. The iteration terminates when either the number of iterations exceeds `max_iter` or when the residual norm has been reduced to `tol` times its initial value, i.e. \\(||rhs - A x_k|| <= tol ||rhs||\\). Args: operator: A `LinearOperator` that is self-adjoint and positive definite. rhs: A possibly batched vector of shape `[..., N]` containing the right-hand size vector. preconditioner: A `LinearOperator` that approximates the inverse of `A`. An efficient preconditioner could dramatically improve the rate of convergence. If `preconditioner` represents matrix `M`(`M` approximates `A^{-1}`), the algorithm uses `preconditioner.apply(x)` to estimate `A^{-1}x`. For this to be useful, the cost of applying `M` should be much lower than computing `A^{-1}` directly. x: A possibly batched vector of shape `[..., N]` containing the initial guess for the solution. tol: A float scalar convergence tolerance. max_iter: An integer giving the maximum number of iterations. name: A name scope for the operation. Returns: output: A namedtuple representing the final state with fields: - i: A scalar `int32` `Tensor`. Number of iterations executed. - x: A rank-1 `Tensor` of shape `[..., N]` containing the computed solution. - r: A rank-1 `Tensor` of shape `[.., M]` containing the residual vector. - p: A rank-1 `Tensor` of shape `[..., N]`. `A`-conjugate basis vector. - gamma: \\(r \dot M \dot r\\), equivalent to \\(||r||_2^2\\) when `preconditioner=None`. """ if not (operator.is_self_adjoint and operator.is_positive_definite): raise ValueError( 'Expected a self-adjoint, positive definite operator.') cg_state = collections.namedtuple('CGState', ['i', 'x', 'r', 'p', 'gamma']) def stopping_criterion(i, state): return math_ops.logical_and( i < max_iter, math_ops.reduce_any(linalg.norm(state.r, axis=-1) > tol)) def dot(x, y): return array_ops.squeeze(math_ops.matvec(x[..., array_ops.newaxis], y, adjoint_a=True), axis=-1) def cg_step(i, state): # pylint: disable=missing-docstring z = math_ops.matvec(operator, state.p) alpha = state.gamma / dot(state.p, z) x = state.x + alpha[..., array_ops.newaxis] * state.p r = state.r - alpha[..., array_ops.newaxis] * z if preconditioner is None: q = r else: q = preconditioner.matvec(r) gamma = dot(r, q) beta = gamma / state.gamma p = q + beta[..., array_ops.newaxis] * state.p return i + 1, cg_state(i + 1, x, r, p, gamma) # We now broadcast initial shapes so that we have fixed shapes per iteration. with ops.name_scope(name): broadcast_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(rhs)[:-1], operator.batch_shape_tensor()) if preconditioner is not None: broadcast_shape = array_ops.broadcast_dynamic_shape( broadcast_shape, preconditioner.batch_shape_tensor()) broadcast_rhs_shape = array_ops.concat( [broadcast_shape, [array_ops.shape(rhs)[-1]]], axis=-1) r0 = array_ops.broadcast_to(rhs, broadcast_rhs_shape) tol *= linalg.norm(r0, axis=-1) if x is None: x = array_ops.zeros(broadcast_rhs_shape, dtype=rhs.dtype.base_dtype) else: r0 = rhs - math_ops.matvec(operator, x) if preconditioner is None: p0 = r0 else: p0 = math_ops.matvec(preconditioner, r0) gamma0 = dot(r0, p0) i = constant_op.constant(0, dtype=dtypes.int32) state = cg_state(i=i, x=x, r=r0, p=p0, gamma=gamma0) _, state = control_flow_ops.while_loop(stopping_criterion, cg_step, [i, state]) return cg_state(state.i, x=state.x, r=state.r, p=state.p, gamma=state.gamma)
def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( self.distribution.batch_shape_tensor(), array_ops.shape(self.mixture_distribution.logits))[:-1]
def stateless_random_gamma(shape, seed, alpha, beta=None, dtype=dtypes.float32, name=None): """Outputs deterministic pseudorandom values from a gamma distribution. The generated values follow a gamma distribution with specified concentration (`alpha`) and inverse scale (`beta`) parameters. This is a stateless version of `tf.random.gamma`: if run twice with the same seeds and shapes, it will produce the same pseudorandom numbers. The output is consistent across multiple runs on the same hardware (and between CPU and GPU), but may change between versions of TensorFlow or on non-CPU/GPU hardware. A slight difference exists in the interpretation of the `shape` parameter between `stateless_gamma` and `gamma`: in `gamma`, the `shape` is always prepended to the shape of the broadcast of `alpha` with `beta`; whereas in `stateless_gamma` the `shape` parameter must always encompass the shapes of each of `alpha` and `beta` (which must broadcast together to match the trailing dimensions of `shape`). Note: Because internal calculations are done using `float64` and casting has `floor` semantics, we must manually map zero outcomes to the smallest possible positive floating-point value, i.e., `np.finfo(dtype).tiny`. This means that `np.finfo(dtype).tiny` occurs more frequently than it otherwise should. This bias can only happen for small values of `alpha`, i.e., `alpha << 1` or large values of `beta`, i.e., `beta >> 1`. The samples are differentiable w.r.t. alpha and beta. The derivatives are computed using the approach described in (Figurnov et al., 2018). Example: ```python samples = tf.random.stateless_gamma([10, 2], seed=[12, 34], alpha=[0.5, 1.5]) # samples has shape [10, 2], where each slice [:, 0] and [:, 1] represents # the samples drawn from each distribution samples = tf.random.stateless_gamma([7, 5, 2], seed=[12, 34], alpha=[.5, 1.5]) # samples has shape [7, 5, 2], where each slice [:, :, 0] and [:, :, 1] # represents the 7x5 samples drawn from each of the two distributions alpha = tf.constant([[1.], [3.], [5.]]) beta = tf.constant([[3., 4.]]) samples = tf.random.stateless_gamma( [30, 3, 2], seed=[12, 34], alpha=alpha, beta=beta) # samples has shape [30, 3, 2], with 30 samples each of 3x2 distributions. with tf.GradientTape() as tape: tape.watch([alpha, beta]) loss = tf.reduce_mean(tf.square(tf.random.stateless_gamma( [30, 3, 2], seed=[12, 34], alpha=alpha, beta=beta))) dloss_dalpha, dloss_dbeta = tape.gradient(loss, [alpha, beta]) # unbiased stochastic derivatives of the loss function alpha.shape == dloss_dalpha.shape # True beta.shape == dloss_dbeta.shape # True ``` Args: shape: A 1-D integer Tensor or Python array. The shape of the output tensor. seed: A shape [2] Tensor, the seed to the random number generator. Must have dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) alpha: Tensor. The concentration parameter of the gamma distribution. Must be broadcastable with `beta`, and broadcastable with the rightmost dimensions of `shape`. beta: Tensor. The inverse scale parameter of the gamma distribution. Must be broadcastable with `alpha` and broadcastable with the rightmost dimensions of `shape`. dtype: Floating point dtype of `alpha`, `beta`, and the output. name: A name for the operation (optional). Returns: samples: A Tensor of the specified shape filled with random gamma values. For each i, each `samples[..., i] is an independent draw from the gamma distribution with concentration alpha[i] and scale beta[i]. """ with ops.name_scope(name, "stateless_random_gamma", [shape, seed, alpha, beta]) as name: shape = tensor_util.shape_tensor(shape) alpha = ops.convert_to_tensor(alpha, dtype=dtype, name="alpha") beta = ops.convert_to_tensor(beta if beta is not None else 1, name="beta", dtype=dtype) broadcast_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(alpha), array_ops.shape(beta)) alpha_broadcast = array_ops.broadcast_to(alpha, broadcast_shape) result = math_ops.maximum( np.finfo(alpha.dtype.as_numpy_dtype).tiny, gen_stateless_random_ops.stateless_random_gamma_v2( shape, seed=seed, alpha=alpha_broadcast) / beta) tensor_util.maybe_set_static_shape(result, shape) return result
def _shape_tensor(self): batch_shape = array_ops.broadcast_dynamic_shape( self.base_operator.batch_shape_tensor(), array_ops.shape(self.u)[:-2]) return array_ops.concat( [batch_shape, self.base_operator.shape_tensor()[-2:]], axis=0)
def _batch_shape(self): return array_ops.broadcast_dynamic_shape( array_ops.shape(self.n), array_ops.shape(self.p))
def _set_event_shape(shape, shape_tensor): if event_shape is None: return shape, shape_tensor return (array_ops.broadcast_static_shape(event_shape, shape), array_ops.broadcast_dynamic_shape(event_shape_tensor, shape_tensor))
def _set_event_shape(shape, shape_tensor): if event_shape is None: return shape, shape_tensor return (array_ops.broadcast_static_shape(event_shape, shape), array_ops.broadcast_dynamic_shape( event_shape_tensor, shape_tensor))
def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( array_ops.shape(self.loc), array_ops.shape(self.scale))