def calculate_reshape(original_shape, new_shape, validate=False, name=None): """Calculates the reshaped dimensions (replacing up to one -1 in reshape).""" batch_shape_static = tensor_util.constant_value_as_shape(new_shape) if batch_shape_static.is_fully_defined(): return np.int32(batch_shape_static.as_list()), batch_shape_static, [] with ops.name_scope(name, "calculate_reshape", [original_shape, new_shape]): original_size = math_ops.reduce_prod(original_shape) implicit_dim = math_ops.equal(new_shape, -1) size_implicit_dim = ( original_size // math_ops.maximum(1, -math_ops.reduce_prod(new_shape))) new_ndims = array_ops.shape(new_shape) expanded_new_shape = array_ops.where( # Assumes exactly one `-1`. implicit_dim, array_ops.fill(new_ndims, size_implicit_dim), new_shape) validations = [] if not validate else [ check_ops.assert_rank( original_shape, 1, message="Original shape must be a vector."), check_ops.assert_rank( new_shape, 1, message="New shape must be a vector."), check_ops.assert_less_equal( math_ops.count_nonzero(implicit_dim, dtype=dtypes.int32), 1, message="At most one dimension can be unknown."), check_ops.assert_positive( expanded_new_shape, message="Shape elements must be >=-1."), check_ops.assert_equal( math_ops.reduce_prod(expanded_new_shape), original_size, message="Shape sizes do not match."), ] return expanded_new_shape, batch_shape_static, validations
def testDegenerate(self): for use_gpu in False, True: with self.test_session(use_gpu=use_gpu): for dtype in (dtypes.bool,): # A large number is needed to get Eigen to die x = array_ops.zeros((0, 9938), dtype=dtype) y = math_ops.count_nonzero(x, [0]) self.assertAllEqual(y.eval(), np.zeros(9938))
def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0, feed_dict=None): np_ans = (x != zero).astype(np.int32) if reduction_axes is None: np_ans = np.sum(np_ans, keepdims=keepdims) else: reduction_axes = np.array(reduction_axes).astype(np.int32) for ra in reduction_axes.ravel()[::-1]: np_ans = np.sum(np_ans, axis=ra, keepdims=keepdims) with self.test_session(use_gpu=use_gpu) as sess: tf_ans = math_ops.count_nonzero(x, reduction_axes, keepdims) out = sess.run(tf_ans, feed_dict) self.assertAllClose(np_ans, out) self.assertShapeEqual(np_ans, tf_ans)
def testStringReduce(self): # Test case for GitHub issue 18712 with self.cached_session() as sess: v = math_ops.count_nonzero(constant_op.constant(["test"])) self.assertAllClose(sess.run(v), 1)
def count_nonzero(a, axis=None): return np_arrays.tensor_to_ndarray( math_ops.count_nonzero(np_array_ops.array(a).data, axis))
def boolean_mask(data, mask, keepdims=False, name=None): """Applies a boolean mask to `data`. Returns a potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. If `keepdims` is true then outer dimensions (corresponding to the `mask` dimensions) are preserved, and: * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]` Where `j` is the `i`th `True` entry of `mask[a1...aA]`. If `keepdims` is false, then the outer dimensions are collapsed (similar to the behavior of `tf.boolean_mask`), and: * `output[i, b1...bB] = data[a1...aA, b1...bB]` Where `(a1...aA)` is the `i`th `True` entry of `mask` (in row-major order). Args: data: A potentially ragged tensor. mask: A potentially ragged boolean tensor. `mask`'s shape must be a prefix of `data`'s shape. `rank(mask)` must be known statically. keepdims: Whether to preserve the outer dimensions (`keepdims=True`) or flatten them (`keepdims=False`). name: A name prefix for the returned tensor (optional). Returns: A potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. If `keepdims` is false: * `rank(output) = rank(data) - rank(mask) + 1`. * `output.ragged_rank = max(data.ragged_rank - rank(mask) + 1, 0)`. If `keepdims` is true: * `rank(output) = rank(data)`. * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`. Raises: ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is not a prefix of `data.shape`. #### Examples: ```python >>> # Aliases for True & False so data and mask line up. >>> T, F = (True, False) >>> tf.ragged.boolean_mask( # Mask a 2D Tensor. Flatten outer dims. ... data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... mask=[[T, F, T], [F, F, F], [T, F, F]], ... keepdims=False).tolist() [1, 3, 7] >>> tf.ragged.boolean_mask( # Mask a 2D Tensor. Preserve outer dims. ... data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... mask=[[T, F, T], [F, F, F], [T, F, F]], ... keepdims=True).tolist() [[1, 3], [], [7]] >>> tf.ragged.boolean_mask( # Mask a 2D RaggedTensor. Flatten outer dims. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([[F, F, T], [F], [T, T]]), ... keepdims=False).tolist() [3, 5, 6] >>> tf.ragged.boolean_mask( # Mask a 2D RaggedTensor. Preserve outer dims. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([[F, F, T], [F], [T, T]]), ... keepdims=True).tolist() [[3], [], [5, 6]] >>> tf.ragged.boolean_mask( # Mask rows of a 2D RaggedTensor. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([True, False, True]), ... keepdims=True).tolist() [[1, 2, 3], [5, 6]] ``` """ with ops.name_scope(name, 'RaggedMask', [data, mask]): # Convert inputs to tensors. data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') mask = ragged_tensor.convert_to_tensor_or_ragged_tensor( mask, dtypes.bool, name='mask') row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes( data, mask, return_dtype=True) # Get static rank of mask. if mask.shape.ndims is None: raise ValueError('mask.shape.ndims must be known statically.') elif mask.shape.ndims == 0: raise ValueError('mask cannot be scalar.') # If mask is ragged, then recurse with a non-ragged mask. if ragged_tensor.is_ragged(mask): if not ragged_tensor.is_ragged(data): data = ragged_tensor.RaggedTensor.from_tensor( data, ragged_rank=mask.ragged_rank, row_splits_dtype=mask.row_splits.dtype) # Check that mask.nested_row_splits is a prefix of # data.nested_row_splits. splits_list = [ mask.nested_row_splits, data.nested_row_splits[:mask.ragged_rank] ] with ops.control_dependencies( ragged_util.assert_splits_match(splits_list)): # Strip off ragged `splits` until `mask` is non-ragged. Keep the splits # that we strip off in `splits`, so we can add them back on after # we recursively mask the non-ragged data. splits = [] while ragged_tensor.is_ragged(mask): if mask.shape.ndims > 2: splits.append(mask.row_splits) else: # Count the number of True mask values in each row to find the # lengths of the filtered rows; then convert to splits. int_mask = ragged_functional_ops.map_flat_values( math_ops.cast, mask, dtype=row_splits_dtype) masked_row_lengths = ragged_math_ops.reduce_sum(int_mask, axis=1) splits.append(ragged_util.lengths_to_splits(masked_row_lengths)) mask = mask.values data = data.values # Recursively apply the nested non-ragged mask to the nested data. masked_values = boolean_mask(data, mask, keepdims) # Add the ragged `splits` back to the result. if keepdims: masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits( masked_values, splits, validate=False) return masked_values # If mask is non-ragged and has rank 1, and data is ragged, then build a # ragged tensor with the indicated rows. elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1: # Get the masked splits: first get the length of each row, then filter # out the rows that we are deleting, and convert that filtered set of # masks back to a splits tensor. lengths = data.row_lengths() masked_lengths = array_ops.boolean_mask(lengths, mask) masked_splits = ragged_util.lengths_to_splits(masked_lengths) # Get the masked values: first get row ids corresponding to each # value, then use tf.gather to build a boolean mask that's false for # values that come from rows that we are deleting, and use that mask to # construct the masked values tensor. segment_ids = segment_id_ops.row_splits_to_segment_ids(data.row_splits) segment_mask = array_ops.gather(mask, segment_ids) masked_values = boolean_mask(data.values, segment_mask, keepdims=False) return ragged_tensor.RaggedTensor.from_row_splits(masked_values, masked_splits, validate=False) # If mask is non-ragged and has rank>1, then convert it to be ragged, # with a ragged rank matching data. if ragged_tensor.is_ragged(data): mask = ragged_tensor.RaggedTensor.from_tensor( mask, ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1), row_splits_dtype=data.row_splits.dtype) return boolean_mask(data, mask, keepdims) # Otherwise, data and mask are both `Tensor`s. else: # Apply `boolean_mask` to get the masked values. masked_values = array_ops.boolean_mask(data, mask) if mask.shape.ndims >= 2 and keepdims: # Add the innermost ragged dimension. For each innermost cell, get the # number of values it contains. Then flatten that to get a list of # cell lengths, and convert it to splits. Finally, combine the splits # and values to get the innermost ragged tensor. masked_lengths = math_ops.count_nonzero(mask, axis=-1, dtype=row_splits_dtype) flattened_masked_lengths = array_ops.reshape(masked_lengths, [-1]) masked_values = ragged_tensor.RaggedTensor.from_row_lengths( masked_values, flattened_masked_lengths, validate=False) # Wrap remaining ragged dimensions. if mask.shape.ndims > 2 and keepdims: mask_shape = array_ops.shape(mask, out_type=row_splits_dtype) split_size = math_ops.cumprod(mask_shape) + 1 for dim in range(mask.shape.ndims - 3, -1, -1): elt_size = mask_shape[dim + 1] masked_splits = math_ops.range(split_size[dim]) * elt_size masked_values = ragged_tensor.RaggedTensor.from_row_splits( masked_values, masked_splits, validate=False) return masked_values
def testStringReduce(self): # Test case for GitHub issue 18712 with self.test_session() as sess: v = math_ops.count_nonzero(constant_op.constant(["test"])) self.assertAllClose(sess.run(v), 1)
def _is_all_zeros(grad): all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0) return all_zeros
def boolean_mask(data, mask, name=None): """Applies a boolean mask to `data` without flattening the mask dimensions. Returns a potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]` Where `j` is the `i`th `True` entry of `mask[a1...aA]`. Note that `output` preserves the mask dimensions `a1...aA`; this differs from `tf.boolean_mask`, which flattens those dimensions. Args: data: A potentially ragged tensor. mask: A potentially ragged boolean tensor. `mask`'s shape must be a prefix of `data`'s shape. `rank(mask)` must be known statically. name: A name prefix for the returned tensor (optional). Returns: A potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. * `rank(output) = rank(data)`. * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`. Raises: ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is not a prefix of `data.shape`. #### Examples: >>> # Aliases for True & False so data and mask line up. >>> T, F = (True, False) >>> tf.ragged.boolean_mask( # Mask a 2D Tensor. ... data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... mask=[[T, F, T], [F, F, F], [T, F, F]]).to_list() [[1, 3], [], [7]] >>> tf.ragged.boolean_mask( # Mask a 2D RaggedTensor. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([[F, F, T], [F], [T, T]])).to_list() [[3], [], [5, 6]] >>> tf.ragged.boolean_mask( # Mask rows of a 2D RaggedTensor. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([True, False, True])).to_list() [[1, 2, 3], [5, 6]] """ with ops.name_scope(name, 'RaggedMask', [data, mask]): # Convert inputs to tensors. data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') mask = ragged_tensor.convert_to_tensor_or_ragged_tensor(mask, dtypes.bool, name='mask') row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes( data, mask, return_dtype=True) # Get static rank of mask. if mask.shape.ndims is None: raise ValueError('mask.shape.ndims must be known statically.') elif mask.shape.ndims == 0: raise ValueError('mask cannot be scalar.') # If mask is ragged, then recurse with a non-ragged mask. if ragged_tensor.is_ragged(mask): if not ragged_tensor.is_ragged(data): data = ragged_tensor.RaggedTensor.from_tensor( data, ragged_rank=mask.ragged_rank, row_splits_dtype=mask.row_splits.dtype) # Check that mask.nested_row_splits is a prefix of # data.nested_row_splits. splits_list = [ mask.nested_row_splits, data.nested_row_splits[:mask.ragged_rank] ] with ops.control_dependencies( ragged_util.assert_splits_match(splits_list)): # Strip off ragged `splits` until `mask` is non-ragged. Keep the splits # that we strip off in `splits`, so we can add them back on after # we recursively mask the non-ragged data. splits = [] while ragged_tensor.is_ragged(mask): if mask.shape.ndims > 2: splits.append(mask.row_splits) else: # Count the number of True mask values in each row to find the # lengths of the filtered rows; then convert to splits. int_mask = ragged_functional_ops.map_flat_values( math_ops.cast, mask, dtype=row_splits_dtype) masked_row_lengths = ragged_math_ops.reduce_sum( int_mask, axis=1) splits.append( ragged_util.lengths_to_splits(masked_row_lengths)) mask = mask.values data = data.values # Recursively apply the nested non-ragged mask to the nested data. masked_values = boolean_mask(data, mask) # Add the ragged `splits` back to the result. masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits( masked_values, splits, validate=False) return masked_values # If mask is non-ragged and has rank 1, and data is ragged, then build a # ragged tensor with the indicated rows. elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1: # Get the masked splits: first get the length of each row, then filter # out the rows that we are deleting, and convert that filtered set of # masks back to a splits tensor. lengths = data.row_lengths() masked_lengths = array_ops.boolean_mask(lengths, mask) masked_splits = ragged_util.lengths_to_splits(masked_lengths) # Get the masked values: first get row ids corresponding to each # value, then use tf.gather to build a boolean mask that's false for # values that come from rows that we are deleting, and use that mask to # construct the masked values tensor. segment_ids = segment_id_ops.row_splits_to_segment_ids( data.row_splits) segment_mask = array_ops.gather(mask, segment_ids) masked_values = boolean_mask(data.values, segment_mask) return ragged_tensor.RaggedTensor.from_row_splits(masked_values, masked_splits, validate=False) # If mask is non-ragged and has rank>1, then convert it to be ragged, # with a ragged rank matching data. if ragged_tensor.is_ragged(data): mask = ragged_tensor.RaggedTensor.from_tensor( mask, ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1), row_splits_dtype=data.row_splits.dtype) return boolean_mask(data, mask) # Otherwise, data and mask are both `Tensor`s. else: # Apply `boolean_mask` to get the masked values. masked_values = array_ops.boolean_mask(data, mask) if mask.shape.ndims >= 2: # Add the innermost ragged dimension. For each innermost cell, get the # number of values it contains. Then flatten that to get a list of # cell lengths, and convert it to splits. Finally, combine the splits # and values to get the innermost ragged tensor. masked_lengths = math_ops.count_nonzero(mask, axis=-1, dtype=row_splits_dtype) flattened_masked_lengths = array_ops.reshape( masked_lengths, [-1]) masked_values = ragged_tensor.RaggedTensor.from_row_lengths( masked_values, flattened_masked_lengths, validate=False) # Wrap remaining ragged dimensions. if mask.shape.ndims > 2: mask_shape = array_ops.shape(mask, out_type=row_splits_dtype) split_size = math_ops.cumprod(mask_shape) + 1 for dim in range(mask.shape.ndims - 3, -1, -1): elt_size = mask_shape[dim + 1] masked_splits = math_ops.range( split_size[dim]) * elt_size masked_values = ragged_tensor.RaggedTensor.from_row_splits( masked_values, masked_splits, validate=False) return masked_values
def count_nonzero(a, axis=None): return math_ops.count_nonzero(np_array_ops.array(a), axis)
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, sum_over_timesteps=False, sum_over_batch=False, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits. Depending on the values of `average_across_timesteps` / `sum_over_timesteps` and `average_across_batch` / `sum_over_batch`, the return Tensor will have rank 0, 1, or 2 as these arguments reduce the cross-entropy at each target, which has shape `[batch_size, sequence_length]`, over their respective dimensions. For example, if `average_across_timesteps` is `True` and `average_across_batch` is `False`, then the return Tensor will have shape `[batch_size]`. Note that `average_across_timesteps` and `sum_over_timesteps` cannot be True at same time. Same for `average_across_batch` and `sum_over_batch`. The recommended loss reduction in tf 2.0 has been changed to sum_over, instead of weighted average. User are recommend to use `sum_over_timesteps` and `sum_over_batch` for reduction. Args: logits: A Tensor of shape `[batch_size, sequence_length, num_decoder_symbols]` and dtype float. The logits correspond to the prediction across all classes at each timestep. targets: A Tensor of shape `[batch_size, sequence_length]` and dtype int. The target represents the true class at each timestep. weights: A Tensor of shape `[batch_size, sequence_length]` and dtype float. `weights` constitutes the weighting of each prediction in the sequence. When using `weights` as masking, set all valid timesteps to 1 and all padded timesteps to 0, e.g. a mask returned by `tf.sequence_mask`. average_across_timesteps: If set, sum the cost across the sequence dimension and divide the cost by the total label weight across timesteps. average_across_batch: If set, sum the cost across the batch dimension and divide the returned cost by the batch size. sum_over_timesteps: If set, sum the cost across the sequence dimension and divide the size of the sequence. Note that any element with 0 weights will be excluded from size calculation. sum_over_batch: if set, sum the cost across the batch dimension and divide the total cost by the batch size. Not that any element with 0 weights will be excluded from size calculation. softmax_loss_function: Function (labels, logits) -> loss-batch to be used instead of the standard softmax (the default if this is None). **Note that to avoid confusion, it is required for the function to accept named arguments.** name: Optional name for this operation, defaults to "sequence_loss". Returns: A float Tensor of rank 0, 1, or 2 depending on the `average_across_timesteps` and `average_across_batch` arguments. By default, it has rank 0 (scalar) and is the weighted average cross-entropy (log-perplexity) per symbol. Raises: ValueError: logits does not have 3 dimensions or targets does not have 2 dimensions or weights does not have 2 dimensions. """ if len(logits.get_shape()) != 3: raise ValueError("Logits must be a " "[batch_size x sequence_length x logits] tensor") if len(targets.get_shape()) != 2: raise ValueError("Targets must be a [batch_size x sequence_length] tensor") if len(weights.get_shape()) != 2: raise ValueError("Weights must be a [batch_size x sequence_length] tensor") if average_across_timesteps and sum_over_timesteps: raise ValueError("average_across_timesteps and sum_over_timesteps cannot " "be set to True at same time.") if average_across_batch and sum_over_batch: raise ValueError("average_across_batch and sum_over_batch cannot be set " "to True at same time.") with ops.name_scope(name, "sequence_loss", [logits, targets, weights]): num_classes = array_ops.shape(logits)[2] logits_flat = array_ops.reshape(logits, [-1, num_classes]) targets = array_ops.reshape(targets, [-1]) if softmax_loss_function is None: crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=logits_flat) else: crossent = softmax_loss_function(labels=targets, logits=logits_flat) crossent *= array_ops.reshape(weights, [-1]) if average_across_timesteps and average_across_batch: crossent = math_ops.reduce_sum(crossent) total_size = math_ops.reduce_sum(weights) crossent = math_ops.div_no_nan(crossent, total_size) elif sum_over_timesteps and sum_over_batch: crossent = math_ops.reduce_sum(crossent) total_count = math_ops.cast(math_ops.count_nonzero(weights), crossent.dtype) crossent = math_ops.div_no_nan(crossent, total_count) else: crossent = array_ops.reshape(crossent, array_ops.shape(logits)[0:2]) if average_across_timesteps or average_across_batch: reduce_axis = [0] if average_across_batch else [1] crossent = math_ops.reduce_sum(crossent, axis=reduce_axis) total_size = math_ops.reduce_sum(weights, axis=reduce_axis) crossent = math_ops.div_no_nan(crossent, total_size) elif sum_over_timesteps or sum_over_batch: reduce_axis = [0] if sum_over_batch else [1] crossent = math_ops.reduce_sum(crossent, axis=reduce_axis) total_count = math_ops.cast( math_ops.count_nonzero(weights, axis=reduce_axis), dtype=crossent.dtype) crossent = math_ops.div_no_nan(crossent, total_count) return crossent
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, sum_over_timesteps=False, sum_over_batch=False, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits. Depending on the values of `average_across_timesteps` / `sum_over_timesteps` and `average_across_batch` / `sum_over_batch`, the return Tensor will have rank 0, 1, or 2 as these arguments reduce the cross-entropy at each target, which has shape `[batch_size, sequence_length]`, over their respective dimensions. For example, if `average_across_timesteps` is `True` and `average_across_batch` is `False`, then the return Tensor will have shape `[batch_size]`. Note that `average_across_timesteps` and `sum_over_timesteps` cannot be True at same time. Same for `average_across_batch` and `sum_over_batch`. The recommended loss reduction in tf 2.0 has been changed to sum_over, instead of weighted average. User are recommend to use `sum_over_timesteps` and `sum_over_batch` for reduction. Args: logits: A Tensor of shape `[batch_size, sequence_length, num_decoder_symbols]` and dtype float. The logits correspond to the prediction across all classes at each timestep. targets: A Tensor of shape `[batch_size, sequence_length]` and dtype int. The target represents the true class at each timestep. weights: A Tensor of shape `[batch_size, sequence_length]` and dtype float. `weights` constitutes the weighting of each prediction in the sequence. When using `weights` as masking, set all valid timesteps to 1 and all padded timesteps to 0, e.g. a mask returned by `tf.sequence_mask`. average_across_timesteps: If set, sum the cost across the sequence dimension and divide the cost by the total label weight across timesteps. average_across_batch: If set, sum the cost across the batch dimension and divide the returned cost by the batch size. sum_over_timesteps: If set, sum the cost across the sequence dimension and divide the size of the sequence. Note that any element with 0 weights will be excluded from size calculation. sum_over_batch: if set, sum the cost across the batch dimension and divide the total cost by the batch size. Not that any element with 0 weights will be excluded from size calculation. softmax_loss_function: Function (labels, logits) -> loss-batch to be used instead of the standard softmax (the default if this is None). **Note that to avoid confusion, it is required for the function to accept named arguments.** name: Optional name for this operation, defaults to "sequence_loss". Returns: A float Tensor of rank 0, 1, or 2 depending on the `average_across_timesteps` and `average_across_batch` arguments. By default, it has rank 0 (scalar) and is the weighted average cross-entropy (log-perplexity) per symbol. Raises: ValueError: logits does not have 3 dimensions or targets does not have 2 dimensions or weights does not have 2 dimensions. """ if len(logits.get_shape()) != 3: raise ValueError("Logits must be a " "[batch_size x sequence_length x logits] tensor") if len(targets.get_shape()) != 2: raise ValueError( "Targets must be a [batch_size x sequence_length] tensor") if len(weights.get_shape()) != 2: raise ValueError( "Weights must be a [batch_size x sequence_length] tensor") if average_across_timesteps and sum_over_timesteps: raise ValueError( "average_across_timesteps and sum_over_timesteps cannot " "be set to True at same time.") if average_across_batch and sum_over_batch: raise ValueError( "average_across_batch and sum_over_batch cannot be set " "to True at same time.") with ops.name_scope(name, "sequence_loss", [logits, targets, weights]): num_classes = array_ops.shape(logits)[2] logits_flat = array_ops.reshape(logits, [-1, num_classes]) targets = array_ops.reshape(targets, [-1]) if softmax_loss_function is None: crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=logits_flat) else: crossent = softmax_loss_function(labels=targets, logits=logits_flat) crossent *= array_ops.reshape(weights, [-1]) if average_across_timesteps and average_across_batch: crossent = math_ops.reduce_sum(crossent) total_size = math_ops.reduce_sum(weights) crossent = math_ops.div_no_nan(crossent, total_size) elif sum_over_timesteps and sum_over_batch: crossent = math_ops.reduce_sum(crossent) total_count = math_ops.cast(math_ops.count_nonzero(weights), crossent.dtype) crossent = math_ops.div_no_nan(crossent, total_count) else: crossent = array_ops.reshape(crossent, array_ops.shape(logits)[0:2]) if average_across_timesteps or average_across_batch: reduce_axis = [0] if average_across_batch else [1] crossent = math_ops.reduce_sum(crossent, axis=reduce_axis) total_size = math_ops.reduce_sum(weights, axis=reduce_axis) crossent = math_ops.div_no_nan(crossent, total_size) elif sum_over_timesteps or sum_over_batch: reduce_axis = [0] if sum_over_batch else [1] crossent = math_ops.reduce_sum(crossent, axis=reduce_axis) total_count = math_ops.cast(math_ops.count_nonzero( weights, axis=reduce_axis), dtype=crossent.dtype) crossent = math_ops.div_no_nan(crossent, total_count) return crossent