def _testMultipleReduceJoin(self, input_array, axis, separator=" "): """Tests reduce_join for one input and multiple axes. Does so by comparing the output to that from nested reduce_string_joins. The correctness of single-dimension reduce_join is verified by other tests below using _testReduceJoin. Args: input_array: The input to test. axis: The indices to reduce. separator: The separator to use when joining. """ with self.cached_session(): output = string_ops.reduce_join( inputs=input_array, axis=axis, keep_dims=False, separator=separator) output_keep_dims = string_ops.reduce_join( inputs=input_array, axis=axis, keep_dims=True, separator=separator) truth = input_array for index in axis: truth = string_ops.reduce_join( inputs=truth, axis=index, keep_dims=True, separator=separator) if not axis: truth = constant_op.constant(truth) truth_squeezed = array_ops.squeeze(truth, axis=axis) output_array = self.evaluate(output) output_keep_dims_array = self.evaluate(output_keep_dims) truth_array = self.evaluate(truth) truth_squeezed_array = self.evaluate(truth_squeezed) self.assertAllEqualUnicode(truth_array, output_keep_dims_array) self.assertAllEqualUnicode(truth_squeezed_array, output_array) self.assertAllEqual(truth.get_shape(), output_keep_dims.get_shape()) self.assertAllEqual(truth_squeezed.get_shape(), output.get_shape())
def testDeprecatedArgs(self): foobar = constant_op.constant(["foobar"]) # Old names: keep_dims and reduction_indices output = string_ops.reduce_join( ["foo", "bar"], reduction_indices=0, keep_dims=True) self.assertAllEqual(foobar, output) # New names keepdims and axis. output = string_ops.reduce_join(["foo", "bar"], axis=0, keepdims=True) self.assertAllEqual(foobar, output)
def testInvalidArgsUnknownShape(self): with self.cached_session(): placeholder = array_ops.placeholder(dtypes.string, name="placeholder") index_too_high = string_ops.reduce_join(placeholder, axis=1) duplicate_index = string_ops.reduce_join(placeholder, axis=[-1, 1]) with self.assertRaisesOpError("Invalid reduction dimension 1"): index_too_high.eval(feed_dict={placeholder.name: [""]}) with self.assertRaisesOpError("Duplicate reduction dimension 1"): duplicate_index.eval(feed_dict={placeholder.name: [[""]]})
def testZeroDims(self): valid_truth_shape = [0] with self.test_session(): inputs = np.zeros([0, 1], dtype=str) with self.assertRaisesRegexp(ValueError, "dimension 0 with size 0"): string_ops.reduce_join(inputs=inputs, reduction_indices=0) valid = string_ops.reduce_join(inputs=inputs, reduction_indices=1) valid_array_shape = valid.eval().shape self.assertAllEqualUnicode(valid_truth_shape, valid_array_shape)
def testZeroDims(self): with self.cached_session(): inputs = np.zeros([0, 1], dtype=str) # Reduction that drops the dim of size 0. output = string_ops.reduce_join(inputs=inputs, axis=0) self.assertAllEqualUnicode([""], self.evaluate(output)) # Reduction that keeps the dim of size 0. output = string_ops.reduce_join(inputs=inputs, axis=1) output_shape = self.evaluate(output).shape self.assertAllEqual([0], output_shape)
def testInvalidReductionIndices(self): with self.cached_session(): with self.assertRaisesRegex(ValueError, "Invalid reduction dim"): string_ops.reduce_join(inputs="", axis=0) with self.assertRaisesRegex(ValueError, "Invalid reduction dimension -3"): string_ops.reduce_join(inputs=[[""]], axis=-3) with self.assertRaisesRegex(ValueError, "Invalid reduction dimension 2"): string_ops.reduce_join(inputs=[[""]], axis=2) with self.assertRaisesRegex(ValueError, "Invalid reduction dimension -3"): string_ops.reduce_join(inputs=[[""]], axis=[0, -3]) with self.assertRaisesRegex(ValueError, "Invalid reduction dimension 2"): string_ops.reduce_join(inputs=[[""]], axis=[0, 2])
def testInvalidReductionIndices(self): with self.cached_session(): with self.assertRaisesRegexp(ValueError, "Invalid reduction dim"): string_ops.reduce_join(inputs="", axis=0) with self.assertRaisesRegexp(ValueError, "Invalid reduction dimension -3"): string_ops.reduce_join(inputs=[[""]], axis=-3) with self.assertRaisesRegexp(ValueError, "Invalid reduction dimension 2"): string_ops.reduce_join(inputs=[[""]], axis=2) with self.assertRaisesRegexp(ValueError, "Invalid reduction dimension -3"): string_ops.reduce_join(inputs=[[""]], axis=[0, -3]) with self.assertRaisesRegexp(ValueError, "Invalid reduction dimension 2"): string_ops.reduce_join(inputs=[[""]], axis=[0, 2])
def _testReduceJoin(self, input_array, truth, truth_shape, axis, keep_dims=False, separator=""): """Compares the output of reduce_join to an expected result. Args: input_array: The string input to be joined. truth: An array or np.array of the expected result. truth_shape: An array or np.array of the expected shape. axis: The indices to reduce over. keep_dims: Whether or not to retain reduced dimensions. separator: The separator to use for joining. """ with self.cached_session(): output = string_ops.reduce_join(inputs=input_array, axis=axis, keep_dims=keep_dims, separator=separator) output_array = self.evaluate(output) self.assertAllEqualUnicode(truth, output_array) self.assertAllEqual(truth_shape, output.get_shape())
def list_files(datapath, days, match_pattern): with ops.name_scope("list_files"): file_pattern = [] for day in days: file_pattern.append(os.path.join(datapath, day, match_pattern)) file_pattern = ops.convert_to_tensor( file_pattern, dtype=dtypes.string, name="file_pattern") matching_files = gen_io_ops.matching_files(file_pattern) # Raise an exception if `file_pattern` does not match any files. condition = math_ops.greater(array_ops.shape(matching_files)[0], 0, name="match_not_empty") message = math_ops.add( "No files matched pattern: ", string_ops.reduce_join(file_pattern, separator=", "), name="message") assert_not_empty = control_flow_ops.Assert( condition, [message], summarize=1, name="assert_not_empty") with ops.control_dependencies([assert_not_empty]): matching_files = array_ops.identity(matching_files) dataset = dataset_ops.Dataset.from_tensor_slices(matching_files) return dataset
def string_format(template, inputs, placeholder="{}", summarize=3, name=None): """Version of tf.strings.format that handles RaggedTensors.""" if tensor_util.is_tensor(inputs) or ragged_tensor.is_ragged(inputs): inputs = [inputs] split_template = template.split(placeholder) if len(inputs) != len(split_template) - 1: raise ValueError( "num placeholders in template and num inputs must match" ": {} vs {}".format(len(split_template) - 1, len(inputs))) with ops.name_scope(name, "StringFormat", [inputs]): output_pieces = [constant_op.constant(split_template[0])] for i, input in enumerate(inputs): if ragged_tensor.is_ragged(input): output_pieces.append(ragged_tensor_to_string(input, summarize)) else: output_pieces.append( string_ops.string_format("{}", [input], summarize=summarize)) output_pieces.append(constant_op.constant(split_template[i + 1])) if len(output_pieces) == 1: return output_pieces[0] else: return string_ops.reduce_join(output_pieces)
def _ragged_tensor_to_string(string_tensor, summarize): """Returns a scalar string tensor with the contents of `string_tensor`. Args: string_tensor: A potentially ragged tensor with dtype=string. summarize: Include only the first and last `summarize` elements of each dimension. If `-1` or `None`, then include all elements. Returns: A scalar string Tensor. """ if string_tensor.shape.rank == 1: pieces = string_tensor else: pieces = map_fn_lib.map_fn( lambda s: _ragged_tensor_to_string(s, summarize), string_tensor, fn_output_signature=tensor_spec.TensorSpec(None, dtypes.string)) if summarize not in (-1, None): pieces = control_flow_ops.cond( _nrows(string_tensor) <= 2 * summarize, lambda: pieces, lambda: array_ops.concat( # pylint: disable=g-long-lambda [pieces[:summarize], ["..."], pieces[-summarize:]], axis=0)) return "[" + string_ops.reduce_join(pieces, separator=", ") + "]"
def _testReduceJoin(self, input_array, truth, truth_shape, axis, keep_dims=False, separator=""): """Compares the output of reduce_join to an expected result. Args: input_array: The string input to be joined. truth: An array or np.array of the expected result. truth_shape: An array or np.array of the expected shape. axis: The indices to reduce over. keep_dims: Whether or not to retain reduced dimensions. separator: The separator to use for joining. """ with self.cached_session(): output = string_ops.reduce_join( inputs=input_array, axis=axis, keep_dims=keep_dims, separator=separator) output_array = self.evaluate(output) self.assertAllEqualUnicode(truth, output_array) self.assertAllEqual(truth_shape, output.get_shape())
def _testMultipleReduceJoin(self, input_array, reduction_indices, separator=" "): """Tests reduce_join for one input and multiple reduction_indices. Does so by comparing the output to that from nested reduce_string_joins. The correctness of single-dimension reduce_join is verified by other tests below using _testReduceJoin. Args: input_array: The input to test. reduction_indices: The indices to reduce. separator: The separator to use when joining. """ num_dims = len(input_array.shape) truth_red_indices = reduction_indices or list( reversed(xrange(num_dims))) with self.test_session(): output = string_ops.reduce_join( inputs=input_array, reduction_indices=reduction_indices, keep_dims=False, separator=separator) output_keep_dims = string_ops.reduce_join( inputs=input_array, reduction_indices=reduction_indices, keep_dims=True, separator=separator) truth = input_array for index in truth_red_indices: truth = string_ops.reduce_join(inputs=truth, reduction_indices=index, keep_dims=True, separator=separator) truth_squeezed = array_ops.squeeze(truth, squeeze_dims=truth_red_indices) output_array = output.eval() output_keep_dims_array = output_keep_dims.eval() truth_array = truth.eval() truth_squeezed_array = truth_squeezed.eval() self.assertAllEqualUnicode(truth_array, output_keep_dims_array) self.assertAllEqualUnicode(truth_squeezed_array, output_array) self.assertAllEqual(truth.get_shape(), output_keep_dims.get_shape()) self.assertAllEqual(truth_squeezed.get_shape(), output.get_shape())
def _testMultipleReduceJoin(self, input_array, reduction_indices, separator=" "): """Tests reduce_join for one input and multiple reduction_indices. Does so by comparing the output to that from nested reduce_string_joins. The correctness of single-dimension reduce_join is verified by other tests below using _testReduceJoin. Args: input_array: The input to test. reduction_indices: The indices to reduce. separator: The separator to use when joining. """ num_dims = len(input_array.shape) truth_red_indices = reduction_indices or list(reversed(xrange(num_dims))) with self.test_session(): output = string_ops.reduce_join( inputs=input_array, reduction_indices=reduction_indices, keep_dims=False, separator=separator) output_keep_dims = string_ops.reduce_join( inputs=input_array, reduction_indices=reduction_indices, keep_dims=True, separator=separator) truth = input_array for index in truth_red_indices: truth = string_ops.reduce_join( inputs=truth, reduction_indices=index, keep_dims=True, separator=separator) truth_squeezed = array_ops.squeeze(truth, squeeze_dims=truth_red_indices) output_array = output.eval() output_keep_dims_array = output_keep_dims.eval() truth_array = truth.eval() truth_squeezed_array = truth_squeezed.eval() self.assertAllEqualUnicode(truth_array, output_keep_dims_array) self.assertAllEqualUnicode(truth_squeezed_array, output_array) self.assertAllEqual(truth.get_shape(), output_keep_dims.get_shape()) self.assertAllEqual(truth_squeezed.get_shape(), output.get_shape())
def testInvalidArgsUnknownIndices(self): with self.cached_session(): placeholder = array_ops.placeholder(dtypes.int32, name="placeholder") reduced = string_ops.reduce_join(["test", "test2"], axis=placeholder) with self.assertRaisesOpError("reduction dimension -2"): reduced.eval(feed_dict={placeholder.name: -2}) with self.assertRaisesOpError("reduction dimension 2"): reduced.eval(feed_dict={placeholder.name: 2})
def testUnknownShape(self): input_array = [["a"], ["b"]] truth = ["ab"] truth_shape = None with self.cached_session(): placeholder = array_ops.placeholder(dtypes.string, name="placeholder") reduced = string_ops.reduce_join(placeholder, axis=0) output_array = reduced.eval(feed_dict={placeholder.name: input_array}) self.assertAllEqualUnicode(truth, output_array) self.assertAllEqual(truth_shape, reduced.get_shape())
def testUnknownIndices(self): input_array = [["this", "is", "a", "test"], ["please", "do", "not", "panic"]] truth_dim_zero = ["thisplease", "isdo", "anot", "testpanic"] truth_dim_one = ["thisisatest", "pleasedonotpanic"] truth_shape = None with self.cached_session(): placeholder = array_ops.placeholder(dtypes.int32, name="placeholder") reduced = string_ops.reduce_join(input_array, axis=placeholder) output_array_dim_zero = reduced.eval(feed_dict={placeholder.name: [0]}) output_array_dim_one = reduced.eval(feed_dict={placeholder.name: [1]}) self.assertAllEqualUnicode(truth_dim_zero, output_array_dim_zero) self.assertAllEqualUnicode(truth_dim_one, output_array_dim_one) self.assertAllEqual(truth_shape, reduced.get_shape())
def func(data): with ops.name_scope(name, 'NGrams', [data, width]): data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') slices = [] for start in range(width): stop = None if start - width + 1 == 0 else start - width + 1 if axis >= 0: idx = [slice(None)] * axis + [slice(start, stop)] else: idx = [Ellipsis, slice(start, stop)] + [slice(None)] * (-axis - 1) slices.append(data[idx]) # Stack the slices. stack_axis = axis + 1 if axis >= 0 else axis windowed_data = array_ops.stack(slices, stack_axis) return string_ops.reduce_join( windowed_data, axis=axis, separator=string_separator)
def testInvalidReductionIndices(self): with self.test_session(): with self.assertRaisesRegexp(ValueError, "Invalid reduction dim"): string_ops.reduce_join(inputs="", reduction_indices=0) with self.assertRaisesRegexp(ValueError, "Invalid reduction dimension -3"): string_ops.reduce_join(inputs=[[""]], reduction_indices=-3) with self.assertRaisesRegexp(ValueError, "Invalid reduction dimension 2"): string_ops.reduce_join(inputs=[[""]], reduction_indices=2) with self.assertRaisesRegexp(ValueError, "Invalid reduction dimension -3"): string_ops.reduce_join(inputs=[[""]], reduction_indices=[0, -3]) with self.assertRaisesRegexp(ValueError, "Invalid reduction dimension 2"): string_ops.reduce_join(inputs=[[""]], reduction_indices=[0, 2]) with self.assertRaisesRegexp(ValueError, "Duplicate reduction index 0"): string_ops.reduce_join(inputs=[[""]], reduction_indices=[0, 0])
def ngrams(data, width, axis=-1, reduction_type=None, string_separator=" ", name=None): """Create a tensor of n-grams based on the input data `data`. Creates a tensor of n-grams based on `data`. The n-grams are of width `width` and are created along axis `axis`; the n-grams are created by combining windows of `width` adjacent elements from `data` using `reduction_type`. This op is intended to cover basic use cases; more complex combinations can be created using the sliding_window op. Args: data: The data to reduce. width: The width of the ngram window. If there is not sufficient data to fill out the ngram window, the resulting ngram will be empty. axis: The axis to create ngrams along. Note that for string join reductions, only axis '-1' is supported; for other reductions, any positive or negative axis can be used. Should be a constant. reduction_type: A member of the Reduction enum. Should be a constant. Currently supports: * `Reduction.SUM`: Add values in the window. * `Reduction.MEAN`: Average values in the window. * `Reduction.STRING_JOIN`: Join strings in the window. Note that axis must be -1 here. string_separator: The separator string used for `Reduction.STRING_JOIN`. Ignored otherwise. Must be a string constant, not a Tensor. name: The op name. Returns: A tensor of ngrams. Raises: InvalidArgumentError: if `reduction_type` is either None or not a Reduction, or if `reduction_type` is STRING_JOIN and `axis` is not -1. """ with ops.name_scope(name, "NGrams", [data, width]): if reduction_type is None: raise errors.InvalidArgumentError( None, None, "reduction_type must be specified.") if not isinstance(reduction_type, Reduction): raise errors.InvalidArgumentError( None, None, "reduction_type must be a Reduction.") # TODO(b/122967921): Lift this restriction after ragged_reduce_join is done. if reduction_type is Reduction.STRING_JOIN and axis != -1: raise errors.InvalidArgumentError( None, None, "%s requires that ngrams' 'axis' parameter be -1." % Reduction.STRING_JOIN.name) windowed_data = sliding_window(data, width, axis) if axis < 0: reduction_axis = axis else: reduction_axis = axis + 1 # Ragged reduction ops work on both Tensor and RaggedTensor, so we can # use them here regardless of the type of tensor in 'windowed_data'. if reduction_type is Reduction.SUM: return math_ops.reduce_sum(windowed_data, reduction_axis) elif reduction_type is Reduction.MEAN: return math_ops.reduce_mean(windowed_data, reduction_axis) elif reduction_type is Reduction.STRING_JOIN: if isinstance(data, ragged_tensor.RaggedTensor): return ragged_functional_ops.map_flat_values( string_ops.reduce_join, windowed_data, axis=axis, separator=string_separator) else: return string_ops.reduce_join(windowed_data, axis=axis, separator=string_separator)