def _check_shapes_dynamic(self, operator, v, diag): """Return (v, diag) with Assert dependencies, which check shape.""" checks = [] with ops.op_scope([operator, v, diag], 'check_shapes'): s_v = array_ops.shape(v) r_op = operator.rank() r_v = array_ops.rank(v) if diag is not None: s_d = array_ops.shape(diag) r_d = array_ops.rank(diag) # Check tensor rank. checks.append(check_ops.assert_rank(v, r_op)) if diag is not None: checks.append(check_ops.assert_rank(diag, r_op - 1)) # Check batch shape checks.append(check_ops.assert_equal( operator.batch_shape(), array_ops.slice(s_v, [0], [r_v - 2]))) if diag is not None: checks.append(check_ops.assert_equal( operator.batch_shape(), array_ops.slice(s_d, [0], [r_d - 1]))) # Check event shape checks.append(check_ops.assert_equal( operator.vector_space_dimension(), array_ops.gather(s_v, r_v - 2))) if diag is not None: checks.append(check_ops.assert_equal( array_ops.gather(s_v, r_v - 1), array_ops.gather(s_d, r_d - 1))) v = control_flow_ops.with_dependencies(checks, v) if diag is not None: diag = control_flow_ops.with_dependencies(checks, diag) return v, diag
def _check_mu(self, mu): """Return `mu` after validity checks and possibly with assertations.""" mu = ops.convert_to_tensor(mu) cov = self._cov if mu.dtype != cov.dtype: raise TypeError( "mu and cov must have the same dtype. Found mu.dtype = %s, " "cov.dtype = %s" % (mu.dtype, cov.dtype)) if not self.strict: return mu else: assert_compatible_shapes = control_flow_ops.group( check_ops.assert_equal( array_ops.rank(mu) + 1, cov.rank(), data=["mu should have rank 1 less than cov. Found: rank(mu) = ", array_ops.rank(mu), " rank(cov) = ", cov.rank()], ), check_ops.assert_equal( array_ops.shape(mu), cov.vector_shape(), data=["mu.shape and cov.shape[:-1] should match. " "Found: shape(mu) = " , array_ops.shape(mu), " shape(cov) = ", cov.shape()], ), ) return control_flow_ops.with_dependencies([assert_compatible_shapes], mu)
def call(self, labels, predictions, weights=None): """Accumulate accuracy statistics. For example, if labels is [1, 2, 3, 4] and predictions is [0, 2, 3, 4] then the accuracy is 3/4 or .75. If the weights were specified as [1, 1, 0, 0] then the accuracy would be 1/2 or .5. `labels` and `predictions` should have the same shape and type. Args: labels: Tensor with the true labels for each example. One example per element of the Tensor. predictions: Tensor with the predicted label for each example. weights: Optional weighting of each example. Defaults to 1. Returns: The arguments, for easy chaining. """ check_ops.assert_equal( array_ops.shape(labels), array_ops.shape(predictions), message="Shapes of labels and predictions are unequal") matches = math_ops.equal(labels, predictions) matches = math_ops.cast(matches, self.dtype) super(Accuracy, self).call(matches, weights=weights) if weights is None: return labels, predictions return labels, predictions, weights
def call(self, labels, predictions, weights=None): """Accumulate accuracy statistics. `labels` and `predictions` should have the same shape. As argmax is being done here, labels and predictions type can be different. Args: labels: One-hot Tensor. predictions: Tensor with the logits or probabilities for each example. weights: Optional weighting of each example. Defaults to 1. Returns: The arguments, for easy chaining. """ check_ops.assert_equal( array_ops.shape(labels), array_ops.shape(predictions), message="Shapes of labels and predictions are unequal") labels = math_ops.argmax(labels, axis=-1) predictions = math_ops.argmax(predictions, axis=-1) matches = math_ops.equal(labels, predictions) matches = math_ops.cast(matches, self.dtype) super(CategoricalAccuracy, self).call(matches, weights=weights) if weights is None: return labels, predictions return labels, predictions, weights
def call(self, labels, predictions, weights=None): """Accumulate accuracy statistics. `labels` and `predictions` should have the same shape and type. Args: labels: Binary Tensor(containing 0 or 1). predictions: Tensor with probabilities or logits. weights: Optional weighting of each example. Defaults to 1. Returns: The arguments, for easy chaining. """ check_ops.assert_equal( array_ops.shape(labels), array_ops.shape(predictions), message="Shapes of labels and predictions are unequal") predictions = ops.convert_to_tensor(predictions) predictions = predictions > self.threshold # Convert labels to bool to match predictions. labels = math_ops.cast(labels, dtypes.bool) matches = math_ops.equal(labels, predictions) matches = math_ops.cast(matches, self.dtype) super(BinaryAccuracy, self).call(matches, weights=weights) if weights is None: return labels, predictions return labels, predictions, weights
def call(self, labels, predictions, weights=None): """Accumulate accuracy statistics. `labels` and `predictions` should have the same shape except the predictions must have one additional trailing dimension equal to the number of classes(you want to predict). Type of labels and predictions can be different. Args: labels: Tensor of shape (batch_size, ) containing integers predictions: Tensor with the logits or probabilities for each example. weights: Optional weighting of each example. Defaults to 1. Returns: The arguments, for easy chaining. """ check_ops.assert_equal( array_ops.shape(labels), array_ops.shape(predictions)[0], message="First axis of labels and predictions is unequal") predictions = math_ops.argmax(predictions, axis=-1) labels = math_ops.cast(labels, dtypes.int64) matches = math_ops.equal(labels, predictions) matches = math_ops.cast(matches, self.dtype) super(SparseAccuracy, self).call(matches, weights=weights) if weights is None: return labels, predictions return labels, predictions, weights
def _get_sparse_tensors(self, inputs, weight_collections=None, trainable=None): sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) id_tensor = sparse_tensors.id_tensor weight_tensor = sparse_tensors.weight_tensor # Expands final dimension, so that embeddings are not combined during # embedding lookup. check_id_rank = check_ops.assert_equal( array_ops.rank(id_tensor), 2, data=[ 'Column {} expected ID tensor of rank 2. '.format(self.name), 'id_tensor shape: ', array_ops.shape(id_tensor)]) with ops.control_dependencies([check_id_rank]): id_tensor = sparse_ops.sparse_reshape( id_tensor, shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) if weight_tensor is not None: check_weight_rank = check_ops.assert_equal( array_ops.rank(weight_tensor), 2, data=[ 'Column {} expected weight tensor of rank 2.'.format(self.name), 'weight_tensor shape:', array_ops.shape(weight_tensor)]) with ops.control_dependencies([check_weight_rank]): weight_tensor = sparse_ops.sparse_reshape( weight_tensor, shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
def _kl_independent(a, b, name="kl_independent"): """Batched KL divergence `KL(a || b)` for Independent distributions. We can leverage the fact that ``` KL(Independent(a) || Independent(b)) = sum(KL(a || b)) ``` where the sum is over the `reinterpreted_batch_ndims`. Args: a: Instance of `Independent`. b: Instance of `Independent`. name: (optional) name to use for created ops. Default "kl_independent". Returns: Batchwise `KL(a || b)`. Raises: ValueError: If the event space for `a` and `b`, or their underlying distributions don't match. """ p = a.distribution q = b.distribution # The KL between any two (non)-batched distributions is a scalar. # Given that the KL between two factored distributions is the sum, i.e. # KL(p1(x)p2(y) || q1(x)q2(y)) = KL(p1 || q1) + KL(q1 || q2), we compute # KL(p || q) and do a `reduce_sum` on the reinterpreted batch dimensions. if a.event_shape.is_fully_defined() and b.event_shape.is_fully_defined(): if a.event_shape == b.event_shape: if p.event_shape == q.event_shape: num_reduce_dims = a.event_shape.ndims - p.event_shape.ndims reduce_dims = [-i - 1 for i in range(0, num_reduce_dims)] return math_ops.reduce_sum( kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims) else: raise NotImplementedError("KL between Independents with different " "event shapes not supported.") else: raise ValueError("Event shapes do not match.") else: with ops.control_dependencies([ check_ops.assert_equal(a.event_shape_tensor(), b.event_shape_tensor()), check_ops.assert_equal(p.event_shape_tensor(), q.event_shape_tensor()) ]): num_reduce_dims = ( array_ops.shape(a.event_shape_tensor()[0]) - array_ops.shape(p.event_shape_tensor()[0])) reduce_dims = math_ops.range(-num_reduce_dims - 1, -1, 1) return math_ops.reduce_sum( kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims)
def test_raises_when_less(self): with self.test_session(): # Static check static_small = constant_op.constant([3, 1], name="small") static_big = constant_op.constant([4, 2], name="big") with self.assertRaisesRegexp(ValueError, "fail"): check_ops.assert_equal(static_big, static_small, message="fail") # Dynamic check small = array_ops.placeholder(dtypes.int32, name="small") big = array_ops.placeholder(dtypes.int32, name="big") with ops.control_dependencies([check_ops.assert_equal(small, big)]): out = array_ops.identity(small) with self.assertRaisesOpError("small.*big"): out.eval(feed_dict={small: [3, 1], big: [4, 2]})
def _check_mu(self, mu): """Return `mu` after validity checks and possibly with assertations.""" mu = ops.convert_to_tensor(mu) cov = self._cov if mu.dtype != cov.dtype: raise TypeError( "mu and cov must have the same dtype. Found mu.dtype = %s, " "cov.dtype = %s" % (mu.dtype, cov.dtype) ) # Try to validate with static checks. mu_shape = mu.get_shape() cov_shape = cov.get_shape() if mu_shape.is_fully_defined() and cov_shape.is_fully_defined(): if mu_shape != cov_shape[:-1]: raise ValueError( "mu.shape and cov.shape[:-1] should match. Found: mu.shape=%s, " "cov.shape=%s" % (mu_shape, cov_shape) ) else: return mu # Static checks could not be run, so possibly do dynamic checks. if not self.validate_args: return mu else: assert_same_rank = check_ops.assert_equal( array_ops.rank(mu) + 1, cov.rank(), data=[ "mu should have rank 1 less than cov. Found: rank(mu) = ", array_ops.rank(mu), " rank(cov) = ", cov.rank(), ], ) with ops.control_dependencies([assert_same_rank]): assert_same_shape = check_ops.assert_equal( array_ops.shape(mu), cov.vector_shape(), data=[ "mu.shape and cov.shape[:-1] should match. " "Found: shape(mu) = ", array_ops.shape(mu), " shape(cov) = ", cov.shape(), ], ) return control_flow_ops.with_dependencies([assert_same_shape], mu)
def _model_fn_ops( expected_features, expected_labels, actual_features, actual_labels, mode): assert_ops = tuple([ check_ops.assert_equal( expected_features[k], actual_features[k], name='assert_%s' % k) for k in expected_features ] + [ check_ops.assert_equal( expected_labels, actual_labels, name='assert_labels') ]) with ops.control_dependencies(assert_ops): return model_fn.ModelFnOps( mode=mode, predictions=constant_op.constant(0.), loss=constant_op.constant(0.), train_op=constant_op.constant(0.))
def test_doesnt_raise_when_both_empty(self): with self.test_session(): larry = constant_op.constant([]) curly = constant_op.constant([]) with ops.control_dependencies([check_ops.assert_equal(larry, curly)]): out = array_ops.identity(larry) out.eval()
def calculate_reshape(original_shape, new_shape, validate=False, name=None): """Calculates the reshaped dimensions (replacing up to one -1 in reshape).""" batch_shape_static = tensor_util.constant_value_as_shape(new_shape) if batch_shape_static.is_fully_defined(): return np.int32(batch_shape_static.as_list()), batch_shape_static, [] with ops.name_scope(name, "calculate_reshape", [original_shape, new_shape]): original_size = math_ops.reduce_prod(original_shape) implicit_dim = math_ops.equal(new_shape, -1) size_implicit_dim = ( original_size // math_ops.maximum(1, -math_ops.reduce_prod(new_shape))) new_ndims = array_ops.shape(new_shape) expanded_new_shape = array_ops.where( # Assumes exactly one `-1`. implicit_dim, array_ops.fill(new_ndims, size_implicit_dim), new_shape) validations = [] if not validate else [ check_ops.assert_rank( original_shape, 1, message="Original shape must be a vector."), check_ops.assert_rank( new_shape, 1, message="New shape must be a vector."), check_ops.assert_less_equal( math_ops.count_nonzero(implicit_dim, dtype=dtypes.int32), 1, message="At most one dimension can be unknown."), check_ops.assert_positive( expanded_new_shape, message="Shape elements must be >=-1."), check_ops.assert_equal( math_ops.reduce_prod(expanded_new_shape), original_size, message="Shape sizes do not match."), ] return expanded_new_shape, batch_shape_static, validations
def assert_close( x, y, data=None, summarize=None, message=None, name="assert_close"): """Assert that that x and y are within machine epsilon of each other. Args: x: Numeric `Tensor` y: Numeric `Tensor` data: The tensors to print out if the condition is `False`. Defaults to error message and first few entries of `x` and `y`. summarize: Print this many entries of each tensor. message: A string to prefix to the default message. name: A name for this operation (optional). Returns: Op raising `InvalidArgumentError` if |x - y| > machine epsilon. """ message = message or "" x = ops.convert_to_tensor(x, name="x") y = ops.convert_to_tensor(y, name="y") if x.dtype.is_integer: return check_ops.assert_equal( x, y, data=data, summarize=summarize, message=message, name=name) with ops.name_scope(name, "assert_close", [x, y, data]): tol = np.finfo(x.dtype.as_numpy_dtype).resolution if data is None: data = [ message, "Condition x ~= y did not hold element-wise: x = ", x.name, x, "y = ", y.name, y ] condition = math_ops.reduce_all(math_ops.less_equal(math_ops.abs(x-y), tol)) return control_flow_ops.Assert( condition, data, summarize=summarize)
def assert_splits_match(nested_splits_lists): """Checks that the given splits lists are identical. Performs static tests to ensure that the given splits lists are identical, and returns a list of control dependency op tensors that check that they are fully identical. Args: nested_splits_lists: A list of nested_splits_lists, where each split_list is a list of `splits` tensors from a `RaggedTensor`, ordered from outermost ragged dimension to innermost ragged dimension. Returns: A list of control dependency op tensors. Raises: ValueError: If the splits are not identical. """ error_msg = "Inputs must have identical ragged splits" for splits_list in nested_splits_lists: if len(splits_list) != len(nested_splits_lists[0]): raise ValueError(error_msg) return [ check_ops.assert_equal(s1, s2, message=error_msg) for splits_list in nested_splits_lists[1:] for (s1, s2) in zip(nested_splits_lists[0], splits_list) ]
def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self._initial_cell_state is not None: cell_state = self._initial_cell_state else: cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "When calling zero_state of AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size. Are you using " "the BeamSearchDecoder? If so, make sure your encoder output has " "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and " "the batch_size= argument passed to zero_state is " "batch_size * beam_width.") with ops.control_dependencies( [check_ops.assert_equal(batch_size, self._attention_mechanism.batch_size, message=error_message)]): cell_state = nest.map_structure( lambda s: array_ops.identity(s, name="checked_cell_state"), cell_state) if self._alignment_history: alignment_history = tensor_array_ops.TensorArray( dtype=dtype, size=0, dynamic_size=True) else: alignment_history = () return AttentionWrapperState( cell_state=cell_state, time=array_ops.zeros([], dtype=dtypes.int32), attention=_zero_state_tensors(self._attention_layer_size, batch_size, dtype), alignments=self._attention_mechanism.initial_alignments( batch_size, dtype), alignment_history=alignment_history)
def _check_labels(labels, expected_labels_dimension): """Check labels type and shape.""" with ops.name_scope(None, 'labels', (labels,)) as scope: labels = sparse_tensor.convert_to_tensor_or_sparse_tensor(labels) if isinstance(labels, sparse_tensor.SparseTensor): raise ValueError('SparseTensor labels are not supported.') labels_shape = array_ops.shape(labels) err_msg = 'labels shape must be [batch_size, {}]'.format( expected_labels_dimension) assert_rank = check_ops.assert_rank(labels, 2, message=err_msg) with ops.control_dependencies([assert_rank]): static_shape = labels.shape if static_shape is not None: dim1 = static_shape[1] if (dim1 is not None) and (dim1 != expected_labels_dimension): raise ValueError( 'Mismatched label shape. ' 'Classifier configured with n_classes=%s. Received %s. ' 'Suggested Fix: check your n_classes argument to the estimator ' 'and/or the shape of your label.' % (expected_labels_dimension, dim1)) assert_dimension = check_ops.assert_equal( expected_labels_dimension, labels_shape[1], message=err_msg) with ops.control_dependencies([assert_dimension]): return array_ops.identity(labels, name=scope)
def assert_integer_form( x, data=None, summarize=None, message=None, int_dtype=None, name="assert_integer_form"): """Assert that x has integer components (or floats equal to integers). Args: x: Floating-point `Tensor` data: The tensors to print out if the condition is `False`. Defaults to error message and first few entries of `x` and `y`. summarize: Print this many entries of each tensor. message: A string to prefix to the default message. int_dtype: A `tf.dtype` used to cast the float to. The default (`None`) implies the smallest possible signed int will be used for casting. name: A name for this operation (optional). Returns: Op raising `InvalidArgumentError` if `cast(x, int_dtype) != x`. """ with ops.name_scope(name, values=[x, data]): x = ops.convert_to_tensor(x, name="x") if x.dtype.is_integer: return control_flow_ops.no_op() message = message or "{} has non-integer components".format(x.op.name) if int_dtype is None: try: int_dtype = { dtypes.float16: dtypes.int16, dtypes.float32: dtypes.int32, dtypes.float64: dtypes.int64, }[x.dtype.base_dtype] except KeyError: raise TypeError("Unrecognized type {}".format(x.dtype.name)) return check_ops.assert_equal( x, math_ops.cast(math_ops.cast(x, int_dtype), x.dtype), data=data, summarize=summarize, message=message, name=name)
def __init__(self, event_ndims=0, validate_args=False, name="absolute_value"): """Instantiates the `AbsoluteValue` bijector. Args: event_ndims: Python scalar indicating the number of dimensions associated with a particular draw from the distribution. Currently only zero is supported. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. Raises: ValueError: If `event_ndims` is not zero. """ self._graph_parents = [] self._name = name event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") event_ndims_const = tensor_util.constant_value(event_ndims) if event_ndims_const is not None and event_ndims_const not in (0,): raise ValueError("event_ndims(%s) was not 0" % event_ndims_const) else: if validate_args: event_ndims = control_flow_ops.with_dependencies( [check_ops.assert_equal( event_ndims, 0, message="event_ndims was not 0")], event_ndims) with self._name_scope("init"): super(AbsoluteValue, self).__init__( event_ndims=event_ndims, validate_args=validate_args, name=name)
def _maybe_check_matching_sizes(self, event_shape_in, event_shape_out, validate_args=False): """Check that prod(event_shape_in)==prod(event_shape_out).""" def _get_size_from_shape(shape): """Computes size from a shape `Tensor`, statically if possible.""" s = tensor_util.constant_value(shape) if s is not None: return [np.int32(np.prod(s))]*2 return None, math_ops.reduce_prod(shape, name="size") # Ensure `event_shape_in` is compatible with `event_shape_out`. event_size_in_, event_size_in = _get_size_from_shape( # pylint: disable=unbalanced-tuple-unpacking event_shape_in) event_size_out_, event_size_out = _get_size_from_shape( # pylint: disable=unbalanced-tuple-unpacking event_shape_out) assertions = [] if event_size_in_ is not None and event_size_out_ is not None: if event_size_in_ != event_size_out_: raise ValueError( "Input `event_size` ({}) does not match output `event_size` ({}).". format(event_size_in, event_size_out_)) elif validate_args: assertions.append(check_ops.assert_equal( event_size_in, event_size_out, message="Input/output `event_size`s do not match.")) return assertions
def maybe_check_quadrature_param(param, name, validate_args): """Helper which checks validity of `loc` and `scale` init args.""" with ops.name_scope(name="check_" + name, values=[param]): assertions = [] if param.shape.ndims is not None: if param.shape.ndims == 0: raise ValueError("Mixing params must be a (batch of) vector; " "{}.rank={} is not at least one.".format( name, param.shape.ndims)) elif validate_args: assertions.append(check_ops.assert_rank_at_least( param, 1, message=("Mixing params must be a (batch of) vector; " "{}.rank is not at least one.".format( name)))) # TODO(jvdillon): Remove once we support k-mixtures. if param.shape.with_rank_at_least(1)[-1] is not None: if param.shape[-1].value != 1: raise NotImplementedError("Currently only bimixtures are supported; " "{}.shape[-1]={} is not 1.".format( name, param.shape[-1].value)) elif validate_args: assertions.append(check_ops.assert_equal( array_ops.shape(param)[-1], 1, message=("Currently only bimixtures are supported; " "{}.shape[-1] is not 1.".format(name)))) if assertions: return control_flow_ops.with_dependencies(assertions, param) return param
def test_doesnt_raise_when_equal_and_broadcastable_shapes(self): with self.test_session(): small = constant_op.constant([1, 2], name="small") small_2 = constant_op.constant([1, 2], name="small_2") with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): out = array_ops.identity(small) out.eval()
def validate_init_args( distribution, batch_shape, validate_args, batch_shape_static): """Helper to __init__ which makes or raises assertions.""" with ops.name_scope(name="validate_init_args", values=[batch_shape] + distribution._graph_parents): # pylint: disable=protected-access runtime_assertions = [] if batch_shape.shape.ndims is not None: if batch_shape.shape.ndims != 1: raise ValueError("`batch_shape` must be a vector " "(saw rank: {}).".format( batch_shape.shape.ndims)) elif validate_args: runtime_assertions += [ check_ops.assert_rank( batch_shape, 1, message="`batch_shape` must be a vector.", name="assert_batch_shape_is_vector"), ] batch_size_static = np.prod(batch_shape_static) dist_batch_size_static = ( None if not distribution.batch_shape.is_fully_defined() else np.prod(distribution.batch_shape).value) if batch_size_static is not None and dist_batch_size_static is not None: if batch_size_static != dist_batch_size_static: raise ValueError("`batch_shape` size ({}) must match " "`distribution.batch_shape` size ({}).".format( batch_size_static, dist_batch_size_static)) elif validate_args: runtime_assertions += [ check_ops.assert_equal( math_ops.reduce_prod(batch_shape), math_ops.reduce_prod(distribution.batch_shape_tensor()), message=("`batch_shape` size must match " "`distributions.batch_shape` size."), name="assert_batch_size"), ] if batch_shape_static is not None: if np.any(batch_shape_static < 1): raise ValueError("`batch_shape` elements must be positive " "(i.e., larger than zero).") elif validate_args: runtime_assertions += [ check_ops.assert_positive( batch_shape, message=("`batch_shape` elements must be positive " "(i.e., larger than zero)."), name="assert_batch_shape_positive") ] return runtime_assertions
def _verify_input(tensor_list, labels, probs_list): """Verify that batched inputs are well-formed.""" checked_probs_list = [] for probs in probs_list: # Since number of classes shouldn't change at runtime, probabilities shape # should be fully defined. probs.get_shape().assert_is_fully_defined() # Probabilities must be 1D. probs.get_shape().assert_has_rank(1) # Probabilities must be nonnegative and sum to one. tol = 1e-6 prob_sum = math_ops.reduce_sum(probs) checked_probs = control_flow_ops.with_dependencies([ check_ops.assert_non_negative(probs), check_ops.assert_less(prob_sum, 1.0 + tol), check_ops.assert_less(1.0 - tol, prob_sum) ], probs) checked_probs_list.append(checked_probs) # All probabilities should be the same length. prob_length = checked_probs_list[0].get_shape().num_elements() for checked_prob in checked_probs_list: if checked_prob.get_shape().num_elements() != prob_length: raise ValueError('Probability parameters must have the same length.') # Labels tensor should only have batch dimension. labels.get_shape().assert_has_rank(1) for tensor in tensor_list: # Data tensor should have a batch dimension. shape = tensor.get_shape().with_rank_at_least(1) # Data and label batch dimensions must be compatible. tensor_shape.dimension_at_index(shape, 0).assert_is_compatible_with( labels.get_shape()[0]) # Data and labels must have the same, strictly positive batch size. Since we # can't assume we know the batch size at graph creation, add runtime checks. labels_batch_size = array_ops.shape(labels)[0] lbl_assert = check_ops.assert_positive(labels_batch_size) # Make each tensor depend on its own checks. labels = control_flow_ops.with_dependencies([lbl_assert], labels) tensor_list = [ control_flow_ops.with_dependencies([ lbl_assert, check_ops.assert_equal(array_ops.shape(x)[0], labels_batch_size) ], x) for x in tensor_list ] # Label's classes must be integers 0 <= x < num_classes. labels = control_flow_ops.with_dependencies([ check_ops.assert_integer(labels), check_ops.assert_non_negative(labels), check_ops.assert_less(labels, math_ops.cast(prob_length, labels.dtype)) ], labels) return tensor_list, labels, checked_probs_list
def check(t): target = array_ops.shape(tensor)[1:] result = array_ops.broadcast_dynamic_shape(target, array_ops.shape(t)) # This rank check ensures that I don't get a wrong answer from the # _shapes_ broadcasting against each other. gt = check_ops.assert_greater(array_ops.rank(target), array_ops.rank(t)) eq = check_ops.assert_equal(target, result) return gt, eq
def test_raises_when_less(self): with self.test_session(): small = constant_op.constant([3, 1], name="small") big = constant_op.constant([4, 2], name="big") with ops.control_dependencies([check_ops.assert_equal(small, big)]): out = array_ops.identity(small) with self.assertRaisesOpError("small.*big"): out.eval()
def test_raises_when_equal_but_non_broadcastable_shapes(self): with self.test_session(): small = constant_op.constant([1, 1, 1], name="small") small_2 = constant_op.constant([1, 1], name="small_2") with self.assertRaisesRegexp(ValueError, "must be"): with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): out = array_ops.identity(small) out.eval()
def _assert_self_adjoint(self): dense = self._get_cached_dense_matrix() logging.warn( "Using (possibly slow) default implementation of assert_self_adjoint." " Requires conversion to a dense matrix.") return check_ops.assert_equal( dense, linear_operator_util.matrix_adjoint(dense), message="Matrix was not equal to its adjoint.")
def _forward(self, x): if self.validate_args: is_matrix = check_ops.assert_rank_at_least(x, 2) shape = array_ops.shape(x) is_square = check_ops.assert_equal(shape[-2], shape[-1]) x = control_flow_ops.with_dependencies([is_matrix, is_square], x) # For safety, explicitly zero-out the upper triangular part. x = array_ops.matrix_band_part(x, -1, 0) return math_ops.matmul(x, x, adjoint_b=True)
def update(): is_finite = itr.get_next() grad = self._get_tensor(is_finite) update_op, should_apply_gradients = loss_scale.update([grad]) assert_op = check_ops.assert_equal(should_apply_gradients, is_finite) if context.executing_eagerly(): return with ops.control_dependencies([assert_op]): return array_ops.identity(update_op)
def _batch_size_checks(self, batch_size, error_message): del batch_size # Unused. # Attention batch size must be always 1. return [check_ops.assert_equal(1, attention_mechanism.batch_size, message=error_message) for attention_mechanism in self._attention_mechanisms]
def map_fn(x): with ops.control_dependencies( [check_ops.assert_equal(x, np.int64(0))]): return array_ops.identity(x)
def shape_reduce_fn(state, value): check_ops.assert_equal(state, array_ops.shape(value)) return state
def _assert_integer_form(x): """Check x for integer components (or floats that are equal to integers).""" x = ops.convert_to_tensor(x, name='x') casted_x = math_ops.to_int64(x) return check_ops.assert_equal(x, math_ops.cast( math_ops.round(casted_x), x.dtype))
def span_overlaps(source_start, source_limit, target_start, target_limit, contains=False, contained_by=False, partial_overlap=False, name=None): """Returns a boolean tensor indicating which source and target spans overlap. The source and target spans are specified using B+1 dimensional tensors, with `B>=0` batch dimensions followed by a final dimension that lists the span offsets for each span in the batch: * The `i`th source span in batch `b1...bB` starts at `source_start[b1...bB, i]` (inclusive), and extends to just before `source_limit[b1...bB, i]` (exclusive). * The `j`th target span in batch `b1...bB` starts at `target_start[b1...bB, j]` (inclusive), and extends to just before `target_limit[b1...bB, j]` (exclusive). `result[b1...bB, i, j]` is true if the `i`th source span overlaps with the `j`th target span in batch `b1...bB`, where a source span overlaps a target span if any of the following are true: * The spans are identical. * `contains` is true, and the source span contains the target span. * `contained_by` is true, and the source span is contained by the target span. * `partial_overlap` is true, and there is a non-zero overlap between the source span and the target span. Args: source_start: A B+1 dimensional potentially ragged tensor with shape `[D1...DB, source_size]`: the start offset of each source span. source_limit: A B+1 dimensional potentially ragged tensor with shape `[D1...DB, source_size]`: the limit offset of each source span. target_start: A B+1 dimensional potentially ragged tensor with shape `[D1...DB, target_size]`: the start offset of each target span. target_limit: A B+1 dimensional potentially ragged tensor with shape `[D1...DB, target_size]`: the limit offset of each target span. contains: If true, then a source span is considered to overlap a target span when the source span contains the target span. contained_by: If true, then a source span is considered to overlap a target span when the source span is contained by the target span. partial_overlap: If true, then a source span is considered to overlap a target span when the source span partially overlaps the target span. name: A name for the operation (optional). Returns: A B+2 dimensional potentially ragged boolean tensor with shape `[D1...DB, source_size, target_size]`. Raises: ValueError: If the span tensors are incompatible. #### Example: Given the following source and target spans (with no batch dimensions): ```python # 0 5 10 15 20 25 30 35 40 # |====|====|====|====|====|====|====|====| # Source: [-0-] [-1-] [2] [-3-][-4-][-5-] # Target: [-0-][-1-] [-2-] [3] [-4-][-5-] # |====|====|====|====|====|====|====|====| >>> source_start = [0, 10, 16, 20, 25, 30] >>> source_limit = [5, 15, 19, 25, 30, 35] >>> target_start = [0, 5, 15, 21, 27, 31] >>> target_limit = [5, 10, 20, 24, 32, 37] ``` `result[i, j]` will be true at the following locations: * `[0, 0]` (always) * `[2, 2]` (if contained_by=True or partial_overlaps=True) * `[3, 3]` (if contains=True or partial_overlaps=True) * `[4, 4]` (if partial_overlaps=True) * `[5, 5]` (if partial_overlaps=True) """ _check_type(contains, 'contains', bool) _check_type(contained_by, 'contained_by', bool) _check_type(partial_overlap, 'partial_overlap', bool) scope_tensors = [source_start, source_limit, target_start, target_limit] with ops.name_scope(name, 'SpanOverlaps', scope_tensors): # Convert input tensors. source_start = ragged_tensor.convert_to_tensor_or_ragged_tensor( source_start, name='source_start') source_limit = ragged_tensor.convert_to_tensor_or_ragged_tensor( source_limit, name='source_limit') target_start = ragged_tensor.convert_to_tensor_or_ragged_tensor( target_start, name='target_start') target_limit = ragged_tensor.convert_to_tensor_or_ragged_tensor( target_limit, name='target_limit') span_tensors = [source_start, source_limit, target_start, target_limit] # Verify input tensor shapes and types. source_start.shape.assert_is_compatible_with(source_limit.shape) target_start.shape.assert_is_compatible_with(target_limit.shape) source_start.shape.assert_same_rank(target_start.shape) source_start.shape.assert_same_rank(target_limit.shape) source_limit.shape.assert_same_rank(target_start.shape) source_limit.shape.assert_same_rank(target_limit.shape) if not (source_start.dtype == target_start.dtype == source_limit.dtype == target_limit.dtype): raise TypeError('source_start, source_limit, target_start, and ' 'target_limit must all have the same dtype') ndims = set( [t.shape.ndims for t in span_tensors if t.shape.ndims is not None]) assert len(ndims) <= 1 # because of assert_same_rank statements above. if all(not isinstance(t, ragged_tensor.RaggedTensor) for t in span_tensors): return _span_overlaps(source_start, source_limit, target_start, target_limit, contains, contained_by, partial_overlap) elif all( isinstance(t, ragged_tensor.RaggedTensor) for t in span_tensors): if not ndims: raise ValueError( 'For ragged inputs, the shape.ndims of at least one ' 'span tensor must be statically known.') if list(ndims)[0] == 2: return _span_overlaps(source_start, source_limit, target_start, target_limit, contains, contained_by, partial_overlap) else: # Handle ragged batch dimension by recursion on values. row_splits = span_tensors[0].row_splits shape_checks = [ check_ops.assert_equal( t.row_splits, row_splits, message='Mismatched ragged shapes for batch dimensions' ) for t in span_tensors[1:] ] with ops.control_dependencies(shape_checks): return ragged_tensor.RaggedTensor.from_row_splits( span_overlaps(source_start.values, source_limit.values, target_start.values, target_limit.values, contains, contained_by, partial_overlap), row_splits) else: # Mix of dense and ragged tensors. raise ValueError('Span tensors must all have the same ragged_rank')
def map_fn(x): with ops.control_dependencies([check_ops.assert_equal(x, 0)]): return x
def _forward_log_det_jacobian(self, x): # Let Y be a symmetric, positive definite matrix and write: # Y = X X.T # where X is lower-triangular. # # Observe that, # dY[i,j]/dX[a,b] # = d/dX[a,b] { X[i,:] X[j,:] } # = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] } # # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is # symmetric and X is lower-triangular, we need vectors of dimension: # d = p (p + 1) / 2 # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e., # k = { i (i + 1) / 2 + j i>=j # { undef i<j # and assume zero-based indexes. When k is undef, the element is dropped. # Example: # j k # 0 1 2 3 / # 0 [ 0 . . . ] # i 1 [ 1 2 . . ] # 2 [ 3 4 5 . ] # 3 [ 6 7 8 9 ] # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With # slight abuse: k(i,j)=undef means the element is dropped.) # # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b. # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since: # (1) j<=i<a thus i,j!=a. # (2) i=a>j thus i,j!=a. # # Since the Jacobian is lower-triangular, we need only compute the product # of diagonal elements: # d vec[Y] / d vec[X] @[k(i,j), k(i,j)] # = X[j,j] + I[i=j] X[i,j] # = 2 X[j,j]. # Since there is a 2 X[j,j] term for every lower-triangular element of X we # conclude: # |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}. diag = array_ops.matrix_diag_part(x) # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the # output is unchanged. diag = self._make_columnar(diag) if self.validate_args: is_matrix = check_ops.assert_rank_at_least( x, 2, message="Input must be a (batch of) matrix.") shape = array_ops.shape(x) is_square = check_ops.assert_equal( shape[-2], shape[-1], message="Input must be a (batch of) square matrix.") # Assuming lower-triangular means we only need check diag>0. is_positive_definite = check_ops.assert_positive( diag, message="Input must be positive definite.") x = control_flow_ops.with_dependencies( [is_matrix, is_square, is_positive_definite], x) # Create a vector equal to: [p, p-1, ..., 2, 1]. if x.get_shape().ndims is None or x.get_shape()[-1].value is None: p_int = array_ops.shape(x)[-1] p_float = math_ops.cast(p_int, dtype=x.dtype) else: p_int = x.get_shape()[-1].value p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype) exponents = math_ops.linspace(p_float, 1., p_int) sum_weighted_log_diag = array_ops.squeeze(math_ops.matmul( math_ops.log(diag), exponents[..., array_ops.newaxis]), axis=-1) fldj = p_float * np.log(2.) + sum_weighted_log_diag return fldj
def shape_reduce_fn(state, value): check_ops.assert_equal(state, value.dense_shape) return state
def _assert_self_adjoint(self): imag_multiplier = math_ops.imag(self.multiplier) return check_ops.assert_equal( array_ops.zeros_like(imag_multiplier), imag_multiplier, message="LinearOperator was not self-adjoint")
def __init__(self, cell, attention_mechanism, attention_layer_size=None, alignment_history=False, cell_input_fn=None, output_attention=True, initial_cell_state=None, name=None): super(GatedAttentionWrapper, self).__init__(name=name) if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access raise TypeError("cell must be an RNNCell, saw type: %s" % type(cell).__name__) if not isinstance(attention_mechanism, AttentionMechanism): raise TypeError( "attention_mechanism must be a AttentionMechanism, saw type: %s" % type(attention_mechanism).__name__) if cell_input_fn is None: cell_input_fn = (lambda inputs, attention: array_ops.concat( [inputs, attention], -1)) else: if not callable(cell_input_fn): raise TypeError( "cell_input_fn must be callable, saw type: %s" % type(cell_input_fn).__name__) if attention_layer_size is not None: self._attention_layer = layers_core.Dense(attention_layer_size, name="attention_layer", use_bias=False) self._attention_layer_size = attention_layer_size else: self._attention_layer = None self._attention_layer_size = attention_mechanism.values.get_shape( )[-1].value self._cell = cell self._attention_mechanism = attention_mechanism self._cell_input_fn = cell_input_fn self._output_attention = output_attention self._alignment_history = alignment_history with ops.name_scope(name, "AttentionWrapperInit"): if initial_cell_state is None: self._initial_cell_state = None else: final_state_tensor = nest.flatten(initial_cell_state)[-1] state_batch_size = (final_state_tensor.shape[0].value or array_ops.shape(final_state_tensor)[0]) error_message = ( "When constructing AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and initial_cell_state. Are you using " "the BeamSearchDecoder? You may need to tile your initial state " "via the tf.contrib.seq2seq.tile_batch function with argument " "multiple=beam_width.") with ops.control_dependencies([ check_ops.assert_equal( state_batch_size, self._attention_mechanism.batch_size, message=error_message) ]): self._initial_cell_state = nest.map_structure( lambda s: array_ops.identity( s, name="check_initial_cell_state"), initial_cell_state)
def stack_dynamic_partitions(data, partitions, num_partitions, name=None): """Stacks dynamic partitions of a Tensor or RaggedTensor. Returns a RaggedTensor `output` with `num_partitions` rows, where the row `output[i]` is formed by stacking all slices `data[j1...jN]` such that `partitions[j1...jN] = i`. Slices of `data` are stacked in row-major order. If `num_partitions` is an `int` (not a `Tensor`), then this is equivalent to `tf.ragged.stack(tf.dynamic_partition(data, partitions, num_partitions))`. #### Example: >>> data = ['a', 'b', 'c', 'd', 'e'] >>> partitions = [ 3, 0, 2, 2, 3] >>> num_partitions = 5 >>> tf.ragged.stack_dynamic_partitions(data, partitions, num_partitions) <tf.RaggedTensor [[b'b'], [], [b'c', b'd'], [b'a', b'e'], []]> Args: data: A `Tensor` or `RaggedTensor` containing the values to stack. partitions: An `int32` or `int64` `Tensor` or `RaggedTensor` specifying the partition that each slice of `data` should be added to. `partitions.shape` must be a prefix of `data.shape`. Values must be greater than or equal to zero, and less than `num_partitions`. `partitions` is not required to be sorted. num_partitions: An `int32` or `int64` scalar specifying the number of partitions to output. This determines the number of rows in `output`. name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the stacked partitions. The returned tensor has the same dtype as `data`, and its shape is `[num_partitions, (D)] + data.shape[partitions.rank:]`, where `(D)` is a ragged dimension whose length is the number of data slices stacked for each `partition`. """ with ops.name_scope(name, 'SegmentStack', [data, partitions, num_partitions]): # Convert inputs to tensors. data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') row_splits_dtype = (data.row_splits.dtype if isinstance( data, ragged_tensor.RaggedTensor) else None) partitions = ragged_tensor.convert_to_tensor_or_ragged_tensor( partitions, name='partitions', preferred_dtype=row_splits_dtype) num_partitions = ops.convert_to_tensor( num_partitions, name='num_partitions', preferred_dtype=partitions.dtype) if row_splits_dtype is not None: partitions = math_ops.cast(partitions, row_splits_dtype) num_partitions = math_ops.cast(num_partitions, partitions.dtype) # Sanity-checks for shapes. partitions_rank = partitions.shape.ndims if partitions_rank is None: raise ValueError('partitions must have known rank.') num_partitions.shape.assert_has_rank(0) partitions.shape.assert_is_compatible_with( data.shape[:partitions_rank]) if partitions_rank == 0: # If partitions is a scalar, then just create a RaggedTensor containing # that single the complete `data` value in the specified row. return ragged_tensor.RaggedTensor.from_value_rowids( values=array_ops.stack([data]), value_rowids=array_ops.stack([partitions]), nrows=num_partitions, validate=False) elif partitions_rank == 1: # If partitions is a vector (the typical case): we can just use data and # partitions as the `values` and `value_rowids` for `from_value_rowids`, # as long as we sort them first. permutation = sort_ops.argsort(partitions, stable=True) value_rowids = array_ops.gather(partitions, permutation) values = array_ops.gather(data, permutation) check = check_ops.assert_less( value_rowids[-1:], num_partitions, message='partitions must be less than num_partitions') with ops.control_dependencies([check]): return ragged_tensor.RaggedTensor.from_value_rowids( values, value_rowids, nrows=num_partitions, validate=False) else: # Handle higher-dimensional partitions via recursion. if not isinstance(data, ragged_tensor.RaggedTensor): data = ragged_tensor.RaggedTensor.from_tensor( data, row_splits_dtype=partitions.dtype, ragged_rank=1) if not isinstance(partitions, ragged_tensor.RaggedTensor): partitions = ragged_tensor.RaggedTensor.from_tensor( partitions, row_splits_dtype=partitions.dtype, ragged_rank=max(data.ragged_rank, partitions_rank - 1)) check = check_ops.assert_equal( data.row_splits, partitions.row_splits, message='data and partitions have incompatible ragged shapes') with ops.control_dependencies([check]): return stack_dynamic_partitions(data.values, partitions.values, num_partitions)
def call(self, inputs, state): """Perform a step of attention-wrapped RNN. - Step 1: Mix the `inputs` and previous step's `attention` output via `cell_input_fn`. - Step 2: Call the wrapped `cell` with this input and its previous state. - Step 3: Score the cell's output with `attention_mechanism`. - Step 4: Calculate the alignments by passing the score through the `normalizer`. - Step 5: Calculate the context vector as the inner product between the alignments and the attention_mechanism's values (memory). - Step 6: Calculate the attention output by concatenating the cell output and context through the attention layer (a linear layer with `attention_size` outputs). Args: inputs: (Possibly nested tuple of) Tensor, the input at this time step. state: An instance of `AttentionWrapperState` containing tensors from the previous time step. Returns: A tuple `(attention_or_cell_output, next_state)`, where: - `attention_or_cell_output` depending on `output_attention`. - `next_state` is an instance of `DynamicAttentionWrapperState` containing the state calculated at this time step. """ # Step 1: Calculate the true inputs to the cell based on the # previous attention value. cell_inputs = self._cell_input_fn(inputs, state.attention) cell_state = state.cell_state cell_output, next_cell_state = self._cell(cell_inputs, cell_state) cell_batch_size = (cell_output.shape[0].value or array_ops.shape(cell_output)[0]) error_message = ( "When applying AttentionWrapper %s: " % self.name + "Non-matching batch sizes between the memory " "(encoder output) and the query (decoder output). Are you using " "the BeamSearchDecoder? You may need to tile your memory input via " "the tf.contrib.seq2seq.tile_batch function with argument " "multiple=beam_width.") with ops.control_dependencies([ check_ops.assert_equal(cell_batch_size, self._attention_mechanism.batch_size, message=error_message) ]): cell_output = array_ops.identity(cell_output, name="checked_cell_output") score = self._attention_mechanism(cell_output) alignments = self._probability_fn(score) # Reshape from [batch_size, memory_time] to [batch_size, 1, memory_time] expanded_alignments = array_ops.expand_dims(alignments, 1) # Context is the inner product of alignments and values along the # memory time dimension. # alignments shape is # [batch_size, 1, memory_time] # attention_mechanism.values shape is # [batch_size, memory_time, attention_mechanism.num_units] # the batched matmul is over memory_time, so the output shape is # [batch_size, 1, attention_mechanism.num_units]. # we then squeeze out the singleton dim. attention_mechanism_values = self._attention_mechanism.values context = math_ops.matmul(expanded_alignments, attention_mechanism_values) context = array_ops.squeeze(context, [1]) if self._attention_layer is not None: attention = self._attention_layer( array_ops.concat([cell_output, context], 1)) else: attention = context if self._alignment_history: alignment_history = state.alignment_history.write( state.time, alignments) else: alignment_history = () next_state = AttentionWrapperState(time=state.time + 1, cell_state=next_cell_state, attention=attention, alignment_history=alignment_history) if self._output_attention: return attention, next_state else: return cell_output, next_state
def batch_jacobian(self, target, source, unconnected_gradients=UnconnectedGradients.NONE, parallel_iterations=None, experimental_use_pfor=True): """Computes and stacks per-example jacobians. See [wikipedia article](http://en.wikipedia.org/wiki/jacobian_matrix_and_determinant) for the definition of a Jacobian. This function is essentially an efficient implementation of the following: `tf.stack([self.jacobian(y[i], x[i]) for i in range(x.shape[0])])`. Note that compared to `GradientTape.jacobian` which computes gradient of each output value w.r.t each input value, this function is useful when `target[i,...]` is independent of `source[j,...]` for `j != i`. This assumption allows more efficient computation as compared to `GradientTape.jacobian`. The output, as well as intermediate activations, are lower dimensional and avoid a bunch of redundant zeros which would result in the jacobian computation given the independence assumption. Example usage: ```python with tf.GradientTape() as g: x = tf.constant([[1., 2.], [3., 4.]], dtype=tf.float32) g.watch(x) y = x * x batch_jacobian = g.batch_jacobian(y, x) # batch_jacobian is [[[2, 0], [0, 4]], [[6, 0], [0, 8]]] ``` Args: target: A tensor with rank 2 or higher and with shape [b, y1, ..., y_n]. `target[i,...]` should only depend on `source[i,...]`. source: A tensor with rank 2 or higher and with shape [b, x1, ..., x_m]. unconnected_gradients: a value which can either hold 'none' or 'zero' and alters the value which will be returned if the target and sources are unconnected. The possible values and effects are detailed in 'UnconnectedGradients' and it defaults to 'none'. parallel_iterations: A knob to control how many iterations are dispatched in parallel. This knob can be used to control the total memory usage. experimental_use_pfor: If true, uses pfor for computing the Jacobian. Else uses a tf.while_loop. Returns: A tensor `t` with shape [b, y_1, ..., y_n, x1, ..., x_m] where `t[i, ...]` is the jacobian of `target[i, ...]` w.r.t. `source[i, ...]`, i.e. stacked per-example jacobians. Raises: RuntimeError: If called on a non-persistent tape with eager execution enabled and without enabling experimental_use_pfor. ValueError: If vectorization of jacobian computation fails or if first dimension of `target` and `source` do not match. """ target_shape = target.shape if target_shape.rank is None: dim = tensor_shape.Dimension(None) else: dim = target_shape.dims[0] if not (target_shape.with_rank_at_least(2) and source.shape.with_rank_at_least(2) and dim.is_compatible_with(source.shape[0])): raise ValueError("Need first dimension of target shape (%s) and " "source shape (%s) to match." % (target.shape, source.shape)) if target_shape.is_fully_defined(): batch_size = int(target_shape[0]) target_row_size = target_shape.num_elements() // batch_size else: target_shape = array_ops.shape(target) batch_size = target_shape[0] target_row_size = array_ops.size(target) // batch_size source_shape = array_ops.shape(source) # Flatten target to 2-D. # Note that we push and pop the tape here and below. This is needed since we # need gradients through the enclosed operations. self._push_tape() with ops.control_dependencies( [check_ops.assert_equal(batch_size, source_shape[0])]): target = array_ops.reshape(target, [batch_size, target_row_size]) self._pop_tape() def loop_fn(i): self._push_tape() y = array_ops.gather(target, i, axis=1) self._pop_tape() return self.gradient(y, source, unconnected_gradients=unconnected_gradients) if experimental_use_pfor: try: output = pfor_ops.pfor(loop_fn, target_row_size, parallel_iterations=parallel_iterations) except ValueError as err: six.reraise( ValueError, ValueError( str(err) + "\nEncountered an exception while vectorizing the " "batch_jacobian computation. Vectorization can be disabled by " "setting experimental_use_pfor to False."), sys.exc_info()[2]) else: if context.executing_eagerly() and not self._persistent: raise RuntimeError( "GradientTape must be created with persistent=True" " to compute the batch_jacobian with eager execution enabled and " " with experimental_use_pfor set to False.") output = pfor_ops.for_loop(loop_fn, target.dtype, target_row_size, parallel_iterations=parallel_iterations) if output is None: return None output = array_ops.reshape(output, [target_row_size, batch_size, -1]) output = array_ops.transpose(output, [1, 0, 2]) new_shape = array_ops.concat([target_shape, source_shape[1:]], axis=0) return array_ops.reshape(output, new_shape)
def assert_true_mean_equal_by_dkwm_two_sample( samples1, low1, high1, samples2, low2, high2, false_fail_rate=1e-6, name=None): """Asserts the means of the given distributions are equal. More precisely, fails if there is enough evidence (using the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval)) that the means of the distributions from which the given samples are drawn are _not_ equal with statistical significance `false_fail_rate` or stronger, otherwise passes. If you also want to check that you are gathering enough evidence that a pass is not spurious, see `min_num_samples_for_dkwm_mean_two_sample_test` and `min_discrepancy_of_true_means_detectable_by_dkwm_two_sample`. Note that `false_fail_rate` is a total false failure rate for all the assertions in the batch. As such, if the batch is nontrivial, the assertion will insist on stronger evidence to fail any one member. Args: samples1: Floating-point `Tensor` of samples from the distribution(s) A. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low1`, `high1`, `low2`, and `high2`. The support is bounded: `low1 <= samples1 <= high1`. low1: Floating-point `Tensor` of lower bounds on the supports of the distributions A. high1: Floating-point `Tensor` of upper bounds on the supports of the distributions A. samples2: Floating-point `Tensor` of samples from the distribution(s) B. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low1`, `high1`, `low2`, and `high2`. The support is bounded: `low2 <= samples2 <= high2`. low2: Floating-point `Tensor` of lower bounds on the supports of the distributions B. high2: Floating-point `Tensor` of upper bounds on the supports of the distributions B. false_fail_rate: *Scalar* floating-point `Tensor` admissible total rate of mistakes. name: A name for this operation (optional). Returns: check: Op that raises `InvalidArgumentError` if any pair of confidence intervals true for corresponding true means do not overlap. """ with ops.name_scope( name, "assert_true_mean_equal_by_dkwm_two_sample", [samples1, low1, high1, samples2, low2, high2, false_fail_rate]): samples1 = ops.convert_to_tensor(samples1, name="samples1") low1 = ops.convert_to_tensor(low1, name="low1") high1 = ops.convert_to_tensor(high1, name="high1") samples2 = ops.convert_to_tensor(samples2, name="samples2") low2 = ops.convert_to_tensor(low2, name="low2") high2 = ops.convert_to_tensor(high2, name="high2") false_fail_rate = ops.convert_to_tensor( false_fail_rate, name="false_fail_rate") samples1 = _check_shape_dominates(samples1, [low1, high1]) samples2 = _check_shape_dominates(samples2, [low2, high2]) compatible_samples = check_ops.assert_equal( array_ops.shape(samples1)[1:], array_ops.shape(samples2)[1:]) with ops.control_dependencies([compatible_samples]): # Could in principle play games with cleverly allocating # significance instead of the even split below. It may be possible # to get tighter intervals, in order to obtain a higher power test. # Any allocation strategy that depends only on the support bounds # and sample counts should be valid; however, because the intervals # scale as O(-log(false_fail_rate)), there doesn't seem to be much # room to win. min_mean_2, max_mean_2 = true_mean_confidence_interval_by_dkwm( samples2, low2, high2, false_fail_rate / 2.) return assert_true_mean_in_interval_by_dkwm( samples1, low1, high1, min_mean_2, max_mean_2, false_fail_rate / 2.)
def _ragged_segment_aggregate(unsorted_segment_op, data, segment_ids, num_segments, name=None): """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`. Returns a RaggedTensor `output` with `num_segments` rows, where the row `output[i]` is formed by combining all rows of `data` whose corresponding `segment_id` is `i`. The values in each row are combined using `unsorted_segment_op`. The length of the row `output[i]` will be the maximum of the lengths of all rows of `data` whose corresponding `segment_id` is `i`. If no `data` rows correspond to a given segment ID, then the output row for that segment ID will be empty. Args: unsorted_segment_op: The tensorflow `op` that should be used to combine values in each row. Must have the same signature and basic behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc. data: A `RaggedTensor` containing the values to be combined. segment_ids: A `Tensor` or `RaggedTensor`. Must have type `int64` or `int32`. `segment_ids.shape` must be a prefix of `data.shape`. `segment_ids` is not required to be sorted. num_segments: An `int32` or `int64` scalar. name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the aggregated values. The returned tensor has the same dtype as `data`, and its shape is `[num_segments] + data.shape[segment_ids.rank:]`. Raises: ValueError: If segment_ids.shape is not a prefix of data.shape. """ if not (ragged_tensor.is_ragged(data) or ragged_tensor.is_ragged(segment_ids)): return unsorted_segment_op(data, segment_ids, num_segments, name) with ops.name_scope(name, 'RaggedSegment', [data, segment_ids, num_segments]) as name: data = ragged_factory_ops.convert_to_tensor_or_ragged_tensor( data, name='data') segment_ids = ragged_factory_ops.convert_to_tensor_or_ragged_tensor( segment_ids, name='segment_ids') if ragged_tensor.is_ragged(segment_ids): if not ragged_tensor.is_ragged(data): raise ValueError( 'segment_ids.shape must be a prefix of data.shape, ' 'but segment_ids is ragged and data is not.') check_splits = check_ops.assert_equal( segment_ids.row_splits, data.row_splits, message='segment_ids.shape must be a prefix of data.shape') with ops.control_dependencies([check_splits]): return _ragged_segment_aggregate(unsorted_segment_op, data.values, segment_ids.values, num_segments, name) segment_ids = math_ops.cast(segment_ids, dtypes.int64) # Find the length of each row in data. (dtype=int64, shape=[data_nrows]) data_row_lengths = data.row_splits[1:] - data.row_splits[:-1] # Find the length that each output row will have. The length of the row # corresponding to segment `id` is `max(data_row_lengths[i])` where # `segment_ids[i]=id`. (dtype=int64, shape=[output_nrows]) output_row_lengths = math_ops.maximum( math_ops.unsorted_segment_max(data_row_lengths, segment_ids, num_segments), 0) assert output_row_lengths.dtype == dtypes.int64 # Build the splits tensor for the output RaggedTensor. output_splits = array_ops.concat([ array_ops.zeros([1], dtypes.int64), math_ops.cumsum(output_row_lengths) ], axis=0) # For each row in `data`, find the start & limit position where that row's # values will be aggregated in output.values. data_row_to_out_row_start = array_ops.gather(output_splits, segment_ids) data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths # For each value in `data.values`, find the position where it will # aggregated in `output.values`. # Get the target output values index for each data values index. data_val_to_out_val_index = range(data_row_to_out_row_start, data_row_to_out_row_limit).values # Recursively aggregate the values. output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values, data_val_to_out_val_index, output_splits[-1]) return ragged_factory_ops.from_row_splits(output_values, output_splits)
def __init__(self, cat, components, validate_args=False, allow_nan_stats=True, name="Mixture"): """Initialize a Mixture distribution. A `Mixture` is defined by a `Categorical` (`cat`, representing the mixture probabilities) and a list of `Distribution` objects all having matching dtype, batch shape, event shape, and continuity properties (the components). The `num_classes` of `cat` must be possible to infer at graph construction time and match `len(components)`. Args: cat: A `Categorical` distribution instance, representing the probabilities of `distributions`. components: A list or tuple of `Distribution` instances. Each instance must have the same type, be defined on the same domain, and have matching `event_shape` and `batch_shape`. validate_args: Python `bool`, default `False`. If `True`, raise a runtime error if batch or event ranks are inconsistent between cat and any of the distributions. This is only checked if the ranks cannot be determined statically at graph construction time. allow_nan_stats: Boolean, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member. If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: A name for this distribution (optional). Raises: TypeError: If cat is not a `Categorical`, or `components` is not a list or tuple, or the elements of `components` are not instances of `Distribution`, or do not have matching `dtype`. ValueError: If `components` is an empty list or tuple, or its elements do not have a statically known event rank. If `cat.num_classes` cannot be inferred at graph creation time, or the constant value of `cat.num_classes` is not equal to `len(components)`, or all `components` and `cat` do not have matching static batch shapes, or all components do not have matching static event shapes. """ parameters = locals() if not isinstance(cat, categorical.Categorical): raise TypeError( "cat must be a Categorical distribution, but saw: %s" % cat) if not components: raise ValueError("components must be a non-empty list or tuple") if not isinstance(components, (list, tuple)): raise TypeError("components must be a list or tuple, but saw: %s" % components) if not all( isinstance(c, distribution.Distribution) for c in components): raise TypeError( "all entries in components must be Distribution instances" " but saw: %s" % components) dtype = components[0].dtype if not all(d.dtype == dtype for d in components): raise TypeError("All components must have the same dtype, but saw " "dtypes: %s" % [(d.name, d.dtype) for d in components]) static_event_shape = components[0].event_shape static_batch_shape = cat.batch_shape for d in components: static_event_shape = static_event_shape.merge_with(d.event_shape) static_batch_shape = static_batch_shape.merge_with(d.batch_shape) if static_event_shape.ndims is None: raise ValueError( "Expected to know rank(event_shape) from components, but " "none of the components provide a static number of ndims") # Ensure that all batch and event ndims are consistent. with ops.name_scope(name, values=[cat.logits]): num_components = cat.event_size static_num_components = tensor_util.constant_value(num_components) if static_num_components is None: raise ValueError( "Could not infer number of classes from cat and unable " "to compare this value to the number of components passed in." ) # Possibly convert from numpy 0-D array. static_num_components = int(static_num_components) if static_num_components != len(components): raise ValueError( "cat.num_classes != len(components): %d vs. %d" % (static_num_components, len(components))) cat_batch_shape = cat.batch_shape_tensor() cat_batch_rank = array_ops.size(cat_batch_shape) if validate_args: batch_shapes = [d.batch_shape_tensor() for d in components] batch_ranks = [array_ops.size(bs) for bs in batch_shapes] check_message = ("components[%d] batch shape must match cat " "batch shape") self._assertions = [ check_ops.assert_equal(cat_batch_rank, batch_ranks[di], message=check_message % di) for di in range(len(components)) ] self._assertions += [ check_ops.assert_equal(cat_batch_shape, batch_shapes[di], message=check_message % di) for di in range(len(components)) ] else: self._assertions = [] self._cat = cat self._components = list(components) self._num_components = static_num_components self._static_event_shape = static_event_shape self._static_batch_shape = static_batch_shape # We let the Mixture distribution access _graph_parents since its arguably # more like a baseclass. graph_parents = self._cat._graph_parents # pylint: disable=protected-access for c in self._components: graph_parents += c._graph_parents # pylint: disable=protected-access super(Mixture, self).__init__( dtype=dtype, reparameterization_type=distribution.NOT_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=graph_parents, name=name)
def batch_jacobian(output, inp, use_pfor=True, parallel_iterations=None): """Computes and stacks jacobians of `output[i,...]` w.r.t. `input[i,...]`. e.g. x = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) y = x * x jacobian = batch_jacobian(y, x) # => [[[2, 0], [0, 4]], [[6, 0], [0, 8]]] Args: output: A tensor with shape [b, y1, ..., y_n]. `output[i,...]` should only depend on `inp[i,...]`. inp: A tensor with shape [b, x1, ..., x_m] use_pfor: If true, uses pfor for computing the Jacobian. Else uses a tf.while_loop. parallel_iterations: A knob to control how many iterations and dispatched in parallel. This knob can be used to control the total memory usage. Returns: A tensor `t` with shape [b, y_1, ..., y_n, x1, ..., x_m] where `t[i, ...]` is the jacobian of `output[i, ...]` w.r.t. `inp[i, ...]`, i.e. stacked per-example jacobians. Raises: ValueError: if first dimension of `output` and `inp` do not match. """ output_shape = output.shape if not output_shape[0].is_compatible_with(inp.shape[0]): raise ValueError("Need first dimension of output shape (%s) and inp shape " "(%s) to match." % (output.shape, inp.shape)) if output_shape.is_fully_defined(): batch_size = int(output_shape[0]) output_row_size = output_shape.num_elements() // batch_size else: output_shape = array_ops.shape(output) batch_size = output_shape[0] output_row_size = array_ops.size(output) // batch_size inp_shape = array_ops.shape(inp) # Flatten output to 2-D. with ops.control_dependencies( [check_ops.assert_equal(batch_size, inp_shape[0])]): output = array_ops.reshape(output, [batch_size, output_row_size]) def loop_fn(i): y = array_ops.gather(output, i, axis=1) return gradient_ops.gradients(y, inp)[0] if use_pfor: pfor_output = control_flow_ops.pfor(loop_fn, output_row_size, parallel_iterations=parallel_iterations) else: pfor_output = control_flow_ops.for_loop( loop_fn, output.dtype, output_row_size, parallel_iterations=parallel_iterations) if pfor_output is None: return None pfor_output = array_ops.reshape(pfor_output, [output_row_size, batch_size, -1]) output = array_ops.transpose(pfor_output, [1, 0, 2]) new_shape = array_ops.concat([output_shape, inp_shape[1:]], axis=0) return array_ops.reshape(output, new_shape)
def _assert_self_adjoint(self): return check_ops.assert_equal( self.row, self.col, message=("row and col are not the same, and " "so this operator is not self-adjoint."))
def predict(self, features): """Computes predictions multiple steps into the future. Args: features: A dictionary with the following key/value pairs: PredictionFeatures.TIMES: A [batch size, predict window size] integer Tensor of times, after the window of data indicated by `STATE_TUPLE`, to make predictions for. PredictionFeatures.STATE_TUPLE: A tuple of (times, values), times with shape [batch size, self.input_window_size], values with shape [batch size, self.input_window_size, self.num_features] representing a segment of the time series before `TIMES`. This data is used to start of the autoregressive computation. This should have data for at least self.input_window_size timesteps. Returns: A dictionary with keys, "mean", "covariance". The values are Tensors of shape [batch_size, predict window size, num_features] and correspond to the values passed in `TIMES`. """ predict_times = math_ops.cast( ops.convert_to_tensor(features[PredictionFeatures.TIMES]), dtypes.int32) batch_size = array_ops.shape(predict_times)[0] num_predict_values = array_ops.shape(predict_times)[1] prediction_iterations = ( (num_predict_values + self.output_window_size - 1) // self.output_window_size) # Pad predict_times so as to have exact multiple of self.output_window_size # values per example. padding_size = (prediction_iterations * self.output_window_size - num_predict_values) padding = array_ops.zeros([batch_size, padding_size], predict_times.dtype) predict_times = control_flow_ops.cond( padding_size > 0, lambda: array_ops.concat([predict_times, padding], 1), lambda: predict_times) state = features[PredictionFeatures.STATE_TUPLE] (state_times, state_values) = state state_times = math_ops.cast(ops.convert_to_tensor(state_times), dtypes.int32) state_values = ops.convert_to_tensor(state_values, dtype=self.dtype) initial_input_times = predict_times[:, :self.output_window_size] if self.input_window_size > 0: initial_input_times = array_ops.concat([ state_times[:, -self.input_window_size:], initial_input_times ], 1) values_size = array_ops.shape(state_values)[1] times_size = array_ops.shape(state_times)[1] with ops.control_dependencies([ check_ops.assert_greater_equal(values_size, self.input_window_size), check_ops.assert_equal(values_size, times_size) ]): initial_input_values = state_values[:, -self. input_window_size:, :] else: initial_input_values = 0 # Iterate over the predict_times, predicting self.output_window_size values # in each iteration. def _while_condition(iteration_number, *unused_args): return math_ops.less(iteration_number, prediction_iterations) def _while_body(iteration_number, input_times, input_values, mean_ta, covariance_ta): """Predict self.output_window_size values.""" prediction_ops = self.prediction_ops(input_times, input_values) predicted_mean = prediction_ops["mean"] predicted_covariance = prediction_ops["covariance"] offset = self.output_window_size * gen_math_ops.minimum( iteration_number + 1, prediction_iterations - 1) if self.input_window_size > 0: if self.output_window_size < self.input_window_size: new_input_values = array_ops.concat([ input_values[:, self.output_window_size:, :], predicted_mean ], 1) new_input_times = array_ops.concat([ input_times[:, self.output_window_size:], predict_times[:, offset:offset + self.output_window_size] ], 1) else: new_input_values = predicted_mean[:, -self. input_window_size:, :] new_input_times = predict_times[:, offset - self. input_window_size:offset + self.output_window_size] else: new_input_values = input_values new_input_times = predict_times[:, offset:offset + self.output_window_size] new_input_times.set_shape(initial_input_times.get_shape()) new_mean_ta = mean_ta.write(iteration_number, predicted_mean) if isinstance(covariance_ta, tensor_array_ops.TensorArray): new_covariance_ta = covariance_ta.write( iteration_number, predicted_covariance) else: new_covariance_ta = covariance_ta return (iteration_number + 1, new_input_times, new_input_values, new_mean_ta, new_covariance_ta) # Note that control_flow_ops.while_loop doesn't seem happy with None. Hence # using 0 for cases where we don't want to predict covariance. covariance_ta_init = (tensor_array_ops.TensorArray( dtype=self.dtype, size=prediction_iterations) if self.loss != ARModel.SQUARED_LOSS else 0.) mean_ta_init = tensor_array_ops.TensorArray(dtype=self.dtype, size=prediction_iterations) _, _, _, mean_ta, covariance_ta = control_flow_ops.while_loop( _while_condition, _while_body, [ 0, initial_input_times, initial_input_values, mean_ta_init, covariance_ta_init ]) def _parse_ta(values_ta): """Helper function to parse the returned TensorArrays.""" if not isinstance(values_ta, tensor_array_ops.TensorArray): return None predictions_length = prediction_iterations * self.output_window_size # Shape [prediction_iterations, batch_size, self.output_window_size, # self.num_features] values_packed = values_ta.stack() # Transpose to move batch dimension outside. output_values = array_ops.reshape( array_ops.transpose(values_packed, [1, 0, 2, 3]), array_ops.stack([batch_size, predictions_length, -1])) # Clip to desired size return output_values[:, :num_predict_values, :] predicted_mean = _parse_ta(mean_ta) predicted_covariance = _parse_ta(covariance_ta) if predicted_covariance is None: predicted_covariance = array_ops.ones_like(predicted_mean) # Transform and scale the mean and covariance appropriately. predicted_mean = self._scale_back_data(predicted_mean) predicted_covariance = self._scale_back_variance(predicted_covariance) return {"mean": predicted_mean, "covariance": predicted_covariance}
def matmul(a: ragged_tensor.RaggedOrDense, b: ragged_tensor.RaggedOrDense, transpose_a=False, transpose_b=False, adjoint_a=False, adjoint_b=False, a_is_sparse=False, b_is_sparse=False, output_type=None, name=None): """Multiplies matrix `a` by matrix `b`. If all transpose or adjoint attributes are `False` then: ``` output[..., i, j] = sum_k (a[..., i, k] * b[..., k, j]), for all indices i, j. ``` The inputs `a` and `b` must have `rank >= 2`, where the outermost `rank - 2` dimensions are batch dimensions. The inputs must have the same dtype. See `tf.matmul` for more information. Args: a: `tf.Tensor` or `RaggedTensor` with `rank > 1`. b: `tf.Tensor` or `RaggedTensor` with same type and rank as `a`. transpose_a: If `True`, `a` is transposed before multiplication. transpose_b: If `True`, `b` is transposed before multiplication. adjoint_a: If `True`, `a` is conjugated & transposed before multiplication. adjoint_b: If `True`, `b` is conjugated & transposed before multiplication. a_is_sparse: If `True`, optimize assuming `a` is mostly zero. b_is_sparse: If `True`, optimize assuming `b` is mostly zero. output_type: The output datatype (optional). name: Name for the operation (optional). Returns: A `Tensor` or `RaggedTensor` with the same rank and shape as `a`, where each inner-most matrix is the product of the corresponding matrices in `a` and `b`. """ if transpose_a and adjoint_a: raise ValueError('Only one of transpose_a and adjoint_a can be True.') if transpose_b and adjoint_b: raise ValueError('Only one of transpose_b and adjoint_b can be True.') kwargs = dict( transpose_a=transpose_a, transpose_b=transpose_b, adjoint_a=adjoint_a, adjoint_b=adjoint_b, a_is_sparse=a_is_sparse, b_is_sparse=b_is_sparse, output_type=output_type) with ops.name_scope(name, 'RaggedMatMul', [a, b]) as name: a = ragged_tensor.convert_to_tensor_or_ragged_tensor(a, name='a') b = ragged_tensor.convert_to_tensor_or_ragged_tensor(b, name='b') a_is_ragged = isinstance(a, ragged_tensor.RaggedTensor) b_is_ragged = isinstance(b, ragged_tensor.RaggedTensor) if not (a_is_ragged or b_is_ragged): return math_ops.matmul(a, b, **kwargs) if a.dtype != b.dtype: raise ValueError('`a` and `b` must have the same dtype.') # TODO(edloper): Support broadcasting inputs. (Broadcast support is not # documented by https://www.tensorflow.org/api_docs/python/tf/linalg/matmul, # but it is supported by the op.) # Find the rank of the input tensors. if a.shape.rank is None: if b.shape.rank is None: raise ValueError('matmul requires at least one input to have known ' 'rank if either input is ragged.') rank = b.shape.rank else: if b.shape.rank is not None and a.shape.rank != b.shape.rank: raise ValueError('`a` and `b` must have the same rank.') rank = a.shape.rank # At least one of `a` and `b` is ragged; and ragged tensors always have # rank>=2. if rank < 2: # This can happen if e.g. `a` is a 1D dense tensor and `b` is a # ragged tensor with unknown rank. Since ragged tensors always have # `rank>=2`, this implies that `a` and `b` have different ranks. raise ValueError('`a` and `b` must have the same rank.') # Rank>3: We have multiple batch dimensions. Merge them into a single # batch dimension, recursively call `matmul`, and then restore the original # batch dimension (using a.row_splits). if rank > 3: shape_err = 'Batch dimensions of `a` and `b` do not have the same size.' if not a_is_ragged: a = ragged_tensor.RaggedTensor.from_tensor(a, ragged_rank=1) if not b_is_ragged: b = ragged_tensor.RaggedTensor.from_tensor(b, ragged_rank=1) with ops.control_dependencies([ check_ops.assert_equal(a.row_splits, b.row_splits, message=shape_err) ]): flat_result = matmul(a.values, b.values, **kwargs) return a.with_values(flat_result) if rank == 2: return _matmul_2d(a, b, **kwargs) assert rank == 3 # I.e., we have a single batch dimension. a_ragged_rank = a.ragged_rank if a_is_ragged else 0 if a_ragged_rank == 1 and not (b_is_ragged or transpose_a or adjoint_a): # If `a.shape=[B, (I), J]` and `b.shape=[B, J, K], then we can compute # the result with a single dense `matmul`. return _matmul_3d_with_batch_dim_folding(a, b, **kwargs) else: # Otherwie, fall back on using `map_fn`. return _matmul_3d_with_map_fn(a, b, **kwargs)
def _add_batched_ragged_partition(rt, partition, tensor_dict, feature_key, validate, outer_splits=None): """Adds a batched ragged partition tensor to a batched ragged tensor. Args: rt: A RaggedTensor with shape [batch_size, ...]. partition: The partition configuration object. Specifies the key that should be used to look up the partition tensor (unless partition is a RaggedFeature.UniformRowLength, in which case there is no partition tensor). The specified tensor must have shape [batch_size, ...]. tensor_dict: The dictionary mapping keys to tensors. feature_key: The name of the feature being parsed (for error messages). validate: Whether to validate that the values form a valid RaggedTensor. outer_splits: If not None, then we have two batch dimensions, and this is the row-splits for the collapsed batch dimension. Every partition tensor must have an outer row_splits that matches this value. Returns: A new RaggedTensor where each batch item `rt[i]` has been partitioned using the `partition_t[i]`. """ if isinstance(partition, RaggedFeature.UniformRowLength): if rt.ragged_rank > 1: length = ops.convert_to_tensor(partition.length, rt.row_splits.dtype) return ragged_tensor.RaggedTensor.from_row_splits( ragged_tensor.RaggedTensor.from_uniform_row_length( rt.values, length, validate=validate), rt.row_splits // length, validate=validate) else: reshaped_vals = array_ops.reshape( rt.values, array_ops.concat([[-1, partition.length], array_ops.shape(rt.values)[1:]], axis=0)) return ragged_tensor.RaggedTensor.from_row_splits( reshaped_vals, rt.row_splits // partition.length, validate=validate) partition_t = tensor_dict[partition.key] if partition_t.values.dtype != rt.row_splits.dtype: partition_t = math_ops.cast(partition_t, rt.row_splits.dtype) checks = [] if outer_splits is not None: if validate: checks.append( check_ops.assert_equal( outer_splits, partition_t.row_splits, message="Feature %s: values and partitions are not aligned" % feature_key)) partition_t = partition_t.values with ops.control_dependencies(checks): if isinstance(partition, (RaggedFeature.RowSplits, RaggedFeature.RowLimits)): if isinstance(partition, RaggedFeature.RowSplits): partition_t = partition_t[:, 1:] adjusted_limits = partition_t.values + array_ops.repeat( rt.row_starts(), partition_t.row_lengths()) return partition_t.with_values( ragged_tensor.RaggedTensor.from_row_limits(rt.values, adjusted_limits, validate=validate)) elif isinstance(partition, RaggedFeature.RowStarts): adjusted_starts = partition_t.values + array_ops.repeat( rt.row_starts(), partition_t.row_lengths()) return partition_t.with_values( ragged_tensor.RaggedTensor.from_row_starts(rt.values, adjusted_starts, validate=validate)) elif isinstance(partition, RaggedFeature.RowLengths): return partition_t.with_values( ragged_tensor.RaggedTensor.from_row_lengths(rt.values, partition_t.values, validate=validate)) elif isinstance(partition, RaggedFeature.ValueRowIds): nrows = math_ops.maximum( # number of rows in each batch item ragged_math_ops.reduce_max(partition_t + 1, axis=1), 0) adjusted_rowids = partition_t.values + array_ops.repeat( math_ops.cumsum(nrows, exclusive=True), partition_t.row_lengths()) return ragged_tensor.RaggedTensor.from_row_lengths( ragged_tensor.RaggedTensor.from_value_rowids( rt.values, adjusted_rowids, validate=validate), nrows, validate=validate) raise ValueError("Unhandled partition type %r" % partition)
def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"): """Creates a (batch of) lower triangular matrix from a vector of inputs. If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1, b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e., `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`. Although the non-batch complexity is O(n**2), large constants and sub-optimal vectorization means the complexity of this function is 5x slower than zeroing out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This function becomes competitive only when several matmul/cholesky/etc ops can be ellided in constructing the input. Example: wiring a fully connected layer as a covariance matrix; this function reduces the final layer by 2x and possibly reduces the network arch complexity considerably. In most cases it is better to simply build a full matrix and zero out the upper triangular elements, e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly construct a lower triangular. Warning: This Op is intended for convenience, not efficiency. Example: ```python fill_lower_triangular([1, 2, 3, 4, 5, 6]) # Returns: [[1, 0, 0], # [2, 3, 0], # [4, 5, 6]] ``` For comparison, a pure numpy version of this function can be found in `distribution_util_test.py`, function `_fill_lower_triangular`. Args: x: `Tensor` representing lower triangular elements. validate_args: Python `bool`, default `False`. Whether to ensure the shape of `x` can be mapped to a lower triangular matrix (controls non-static checks only). name: Python `str`. The name to give this op. Returns: tril: `Tensor` with lower triangular elements filled from `x`. Raises: ValueError: if shape if `x` has static shape which cannot be mapped to a lower triangular matrix. """ # TODO(jvdillon): Replace this code with dedicated op when it exists. with ops.name_scope(name, values=[x]): x = ops.convert_to_tensor(x, name="x") if (x.get_shape().ndims is not None and x.get_shape()[-1].value is not None): d = x.get_shape()[-1].value # d = n(n+1)/2 implies n is: n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.)) d_inferred = n * (n + 1) / 2 if d != d_inferred: raise ValueError( "Input cannot be mapped to a lower triangular; " "n*(n+1)/2 = %d != %d" % (d_inferred, d)) final_shape = x.get_shape()[:-1].concatenate( tensor_shape.TensorShape([n, n])) else: d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32) # d = n(n+1)/2 implies n is: n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.), dtype=dtypes.int32) if validate_args: is_valid_input_shape = check_ops.assert_equal( n * (n + 1) / 2, d, message="Input cannot be mapped to a lower triangular.") n = control_flow_ops.with_dependencies([is_valid_input_shape], n) final_shape = x.get_shape()[:-1].concatenate( tensor_shape.TensorShape([None, None])) def tril_ids(n): """Internal helper to create vector of linear indices into y.""" # Build the ids statically; chose 512 because it implies 1MiB. if not tensor_util.is_tensor(n) and n <= 512: ids = np.arange(n**2, dtype=np.int32) rows = (ids / n).astype(np.int32) # Implicit floor. # We need to stop incrementing the index when we encounter # upper-triangular elements. The idea here is to compute the # lower-right number of zeros then by "symmetry" subtract this from the # total number of zeros, n(n-1)/2. # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2 offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32) # We could also zero out when (rows < cols) == (rows < ids-n*rows). # mask = (ids <= (n + 1) * rows).astype(np.int32) else: ids = math_ops.range(n**2) rows = math_ops.cast(ids / n, dtype=dtypes.int32) offset = math_ops.cast(rows * (2 * n - rows - 1) / 2, dtype=dtypes.int32) return ids - offset # Special-case non-batch case. if x.get_shape().ndims == 1: y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n])) y = array_ops.matrix_band_part(y, -1, 0) y.set_shape(y.get_shape().merge_with(final_shape)) return y # Make ids for each batch dim. if (x.get_shape().ndims is not None and x.get_shape()[:-1].is_fully_defined()): batch_shape = np.asarray(x.get_shape()[:-1].as_list(), dtype=np.int32) m = np.prod(batch_shape).astype(np.int32) else: batch_shape = array_ops.shape(x)[:-1] m = math_ops.reduce_prod(array_ops.shape(x)[:-1]) batch_ids = math_ops.range(m) # Assemble the tril_ids into batch,tril_id pairs. idx = array_ops.stack([ array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]), array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1]) ]) idx = array_ops.transpose(idx, [1, 2, 0]) # Gather up, reshape, and return. y = array_ops.reshape(x, [-1, d]) y = array_ops.gather_nd(y, idx) y = array_ops.reshape(y, array_ops.concat([batch_shape, [n, n]], 0)) y = array_ops.matrix_band_part(y, -1, 0) y.set_shape(y.get_shape().merge_with(final_shape)) return y
def __init__(self, loc=None, covariance_matrix=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalFullCovariance"): """Construct Multivariate Normal distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `covariance_matrix` arguments. The `event_shape` is given by last dimension of the matrix implied by `covariance_matrix`. The last dimension of `loc` (if provided) must broadcast with this. A non-batch `covariance_matrix` matrix is a `k x k` symmetric positive definite matrix. In other words it is (real) symmetric with all eigenvalues strictly positive. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. covariance_matrix: Floating-point, symmetric positive definite `Tensor` of same `dtype` as `loc`. The strict upper triangle of `covariance_matrix` is ignored, so if `covariance_matrix` is not symmetric no error will be raised (unless `validate_args is True`). `covariance_matrix` has shape `[B1, ..., Bb, k, k]` where `b >= 0` and `k` is the event size. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if neither `loc` nor `covariance_matrix` are specified. """ parameters = locals() # Convert the covariance_matrix up to a scale_tril and call MVNTriL. with ops.name_scope(name): with ops.name_scope("init", values=[loc, covariance_matrix]): if covariance_matrix is None: scale_tril = None else: covariance_matrix = ops.convert_to_tensor( covariance_matrix, name="covariance_matrix") if validate_args: assert_symmetric = check_ops.assert_equal( covariance_matrix, array_ops.matrix_transpose(covariance_matrix), message="Matrix was not symmetric.") covariance_matrix = control_flow_ops.with_dependencies( [assert_symmetric], covariance_matrix) # No need to validate that covariance_matrix is non-singular. # LinearOperatorLowerTriangular has an assert_non_singular method that # is called by the Bijector. # However, cholesky() ignores the upper triangular part, so we do need # to separately assert symmetric. scale_tril = linalg_ops.cholesky(covariance_matrix) super(MultivariateNormalFullCovariance, self).__init__( loc=loc, scale_tril=scale_tril, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name) self._parameters = parameters
def op(self): """Returns the cluster initializer op.""" return control_flow_ops.cond( math_ops.equal(self._num_remaining, 0), lambda: check_ops.assert_equal(self._cluster_centers_initialized, True), self._initialize)
def fn(x): with ops.control_dependencies([check_ops.assert_equal(x, 0)]): return array_ops.identity(x)
def _reshape_helper(self, x, event_shape_in, event_shape_out): """Reshape only the event_shape of an input `Tensor`.""" event_ndims_in_ = _static_ndims_from_shape(event_shape_in) event_ndims_in = _ndims_from_shape(event_shape_in) x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x) assertions = [] # Ensure x.event_shape is compatible with event_shape_in. if (event_ndims_in_ is not None and x_ndims_ is not None and x.shape.with_rank_at_least(event_ndims_in_)[ x_ndims_-event_ndims_in_:].is_fully_defined()): x_event_shape_, x_event_shape = [ # pylint: disable=unbalanced-tuple-unpacking np.int32(x.shape[x_ndims_-event_ndims_in_:])]*2 else: x_event_shape_, x_event_shape = ( None, array_ops.shape(x)[x_ndims-event_ndims_in:]) event_shape_in_ = tensor_util.constant_value(event_shape_in) if x_event_shape_ is not None and event_shape_in_ is not None: # Compare the shape dimensions that are fully specified in the # input (i.e., for which event_shape_in is not -1). If x_event_shape # matches along all of these dimensions, it is compatible with # the desired input shape and any further mismatches (i.e., # imcompatibility with the desired *output* shape) will be # caught inside of array_ops.reshape() below. x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0] event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0] if not np.equal(x_event_shape_specified_, event_shape_in_specified_).all(): raise ValueError( "Input `event_shape` does not match `event_shape_in` ({} vs {}).". format(x_event_shape_, event_shape_in_)) elif self.validate_args: # Similarly to the static case, we compare the shape dimensions # that are fully specified in the input. We extract these # dimensions using boolean_mask(), which requires that the mask # have known ndims. We can assume that shape Tensors always have # ndims==1 (this assumption is verified inside of # _maybe_check_valid_shape), so the reshape operation is just a # no-op that formally encodes this fact to make boolean_mask() # happy. event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1]) x_event_shape_specified = array_ops.boolean_mask(x_event_shape, event_shape_mask) event_shape_in_specified = array_ops.boolean_mask(event_shape_in, event_shape_mask) assertions.append(check_ops.assert_equal( x_event_shape_specified, event_shape_in_specified, message="Input `event_shape` does not match `event_shape_in`.")) if assertions: x = control_flow_ops.with_dependencies(assertions, x) # get the parts of shape(x) that will not change sample_and_batch_shape = array_ops.shape(x) ndims = (x.shape.ndims if x.shape.ndims is not None else array_ops.rank(x)) sample_and_batch_shape = sample_and_batch_shape[ :(ndims - math_ops.abs(event_ndims_in))] if (event_ndims_in_ is not None and x_ndims_ is not None and event_ndims_in_ == x_ndims_): # Hack to allow forward/inverse_event_shape to do shape # inference by calling this helper method with a dummy Tensor of # shape event_shape_in. In this special case, # sample_and_batch_shape will be empty so we can preserve static # shape information by avoiding the concat operation below # (which would be a no-op). new_shape = event_shape_out else: new_shape = array_ops.concat( [sample_and_batch_shape, event_shape_out], axis=0) return array_ops.reshape(x, new_shape)
def _batch_size_checks(self, batch_size, error_message): return [check_ops.assert_equal(batch_size, self._attention_mechanism.batch_size, message=error_message)]
def assert_symmetric(matrix): matrix_t = array_ops.matrix_transpose(matrix) return control_flow_ops.with_dependencies( [check_ops.assert_equal(matrix, matrix_t)], matrix)
def __init__(self, cell, attention_mechanism, attention_layer_size=None, alignment_history=False, cell_input_fn=None, probability_fn=None, output_attention=True, initial_cell_state=None, name=None): """Construct the `AttentionWrapper`. Args: cell: An instance of `RNNCell`. attention_mechanism: An instance of `AttentionMechanism`. attention_layer_size: Python integer, the depth of the attention (output) layer. If None (default), use the context as attention at each time step. Otherwise, feed the context and cell output into the attention layer to generate attention at each time step. alignment_history: Python boolean, whether to store alignment history from all time steps in the final output state (currently stored as a time major `TensorArray` on which you must call `stack()`). cell_input_fn: (optional) A `callable`. The default is: `lambda inputs, attention: array_ops.concat([inputs, attention], -1)`. probability_fn: (optional) A `callable`. Converts the score to probabilities. The default is @{tf.nn.softmax}. Other options include @{tf.contrib.seq2seq.hardmax} and @{tf.contrib.sparsemax.sparsemax}. output_attention: Python bool. If `True` (default), the output at each time step is the attention value. This is the behavior of Luong-style attention mechanisms. If `False`, the output at each time step is the output of `cell`. This is the beahvior of Bhadanau-style attention mechanisms. In both cases, the `attention` tensor is propagated to the next time step via the state and is used there. This flag only controls whether the attention mechanism is propagated up to the next cell in an RNN stack or to the top RNN output. initial_cell_state: The initial state value to use for the cell when the user calls `zero_state()`. Note that if this value is provided now, and the user uses a `batch_size` argument of `zero_state` which does not match the batch size of `initial_cell_state`, proper behavior is not guaranteed. name: Name to use when creating ops. """ super(AttentionWrapper, self).__init__(name=name) if not isinstance(cell, core_rnn_cell.RNNCell): raise TypeError("cell must be an RNNCell, saw type: %s" % type(cell).__name__) if not isinstance(attention_mechanism, AttentionMechanism): raise TypeError( "attention_mechanism must be a AttentionMechanism, saw type: %s" % type(attention_mechanism).__name__) if cell_input_fn is None: cell_input_fn = (lambda inputs, attention: array_ops.concat( [inputs, attention], -1)) else: if not callable(cell_input_fn): raise TypeError( "cell_input_fn must be callable, saw type: %s" % type(cell_input_fn).__name__) if probability_fn is None: probability_fn = nn_ops.softmax else: if not callable(probability_fn): raise TypeError( "probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) if attention_layer_size is not None: self._attention_layer = layers_core.Dense(attention_layer_size, name="attention_layer", use_bias=False) self._attention_size = attention_layer_size else: self._attention_layer = None self._attention_size = attention_mechanism.values.get_shape( )[-1].value self._cell = cell self._attention_mechanism = attention_mechanism self._cell_input_fn = cell_input_fn self._probability_fn = probability_fn self._output_attention = output_attention self._alignment_history = alignment_history with ops.name_scope(name, "AttentionWrapperInit"): if initial_cell_state is None: self._initial_cell_state = None else: final_state_tensor = nest.flatten(initial_cell_state)[-1] state_batch_size = (final_state_tensor.shape[0].value or array_ops.shape(final_state_tensor)[0]) error_message = ( "When constructing AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and initial_cell_state. Are you using " "the BeamSearchDecoder? You may need to tile your initial state " "via the tf.contrib.seq2seq.tile_batch function with argument " "multiple=beam_width.") with ops.control_dependencies([ check_ops.assert_equal( state_batch_size, self._attention_mechanism.batch_size, message=error_message) ]): self._initial_cell_state = nest.map_structure( lambda s: array_ops.identity( s, name="check_initial_cell_state"), initial_cell_state)
def _validate_sample_arg(self, x): """Helper which validates sample arg, e.g., input to `log_prob`.""" with ops.name_scope(name="validate_sample_arg", values=[x]): x_ndims = (array_ops.rank(x) if x.shape.ndims is None else x.shape.ndims) event_ndims = (array_ops.size(self.event_shape_tensor()) if self.event_shape.ndims is None else self.event_shape.ndims) batch_ndims = (array_ops.size(self.batch_shape_tensor()) if self.batch_shape.ndims is None else self.batch_shape.ndims) expected_batch_event_ndims = batch_ndims + event_ndims if (isinstance(x_ndims, int) and isinstance(expected_batch_event_ndims, int)): if x_ndims < expected_batch_event_ndims: raise NotImplementedError( "Broadcasting is not supported; too few event dims " "(expected at least {}, saw {}).".format( expected_batch_event_ndims, x_ndims)) ndims_assertion = [] elif self.validate_args: ndims_assertion = [ check_ops.assert_greater_equal( x_ndims, expected_batch_event_ndims, message="Broadcasting is not supported; too few event dims.", name="assert_batch_and_event_ndims_large_enough"), ] if (self.batch_shape.is_fully_defined() and self.event_shape.is_fully_defined()): expected_batch_event_shape = np.int32(self.batch_shape.concatenate( self.event_shape).as_list()) else: expected_batch_event_shape = array_ops.concat([ self.batch_shape_tensor(), self.event_shape_tensor(), ], axis=0) sample_ndims = x_ndims - expected_batch_event_ndims if isinstance(sample_ndims, int): sample_ndims = max(sample_ndims, 0) if (isinstance(sample_ndims, int) and x.shape[sample_ndims:].is_fully_defined()): actual_batch_event_shape = np.int32(x.shape[sample_ndims:].as_list()) else: sample_ndims = math_ops.maximum(sample_ndims, 0) actual_batch_event_shape = array_ops.shape(x)[sample_ndims:] if (isinstance(expected_batch_event_shape, np.ndarray) and isinstance(actual_batch_event_shape, np.ndarray)): if any(expected_batch_event_shape != actual_batch_event_shape): raise NotImplementedError("Broadcasting is not supported; " "unexpected batch and event shape " "(expected {}, saw {}).".format( expected_batch_event_shape, actual_batch_event_shape)) # We need to set the final runtime-assertions to `ndims_assertion` since # its possible this assertion was created. We could add a condition to # only do so if `self.validate_args == True`, however this is redundant # as `ndims_assertion` already encodes this information. runtime_assertions = ndims_assertion elif self.validate_args: # We need to make the `ndims_assertion` a control dep because otherwise # TF itself might raise an exception owing to this assertion being # ill-defined, ie, one cannot even compare different rank Tensors. with ops.control_dependencies(ndims_assertion): shape_assertion = check_ops.assert_equal( expected_batch_event_shape, actual_batch_event_shape, message=("Broadcasting is not supported; " "unexpected batch and event shape."), name="assert_batch_and_event_shape_same") runtime_assertions = [shape_assertion] else: runtime_assertions = [] return runtime_assertions