def testRaggedMapOnStructure_RaggedOutputs(self): batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]]) # [[10, 20, 30], [40], [50, 60, 70]] robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10) features = {'batman': batman, 'robin': robin} def _increment(f): return { 'batman': f['batman'] + 1, 'robin': f['robin'] + 1, } output = ragged_map_ops.map_fn( fn=_increment, elems=features, infer_shape=False, dtype={ 'batman': ragged_tensor.RaggedTensorType( dtype=dtypes.int32, ragged_rank=1), 'robin': ragged_tensor.RaggedTensorType( dtype=dtypes.int32, ragged_rank=1) }, ) self.assertRaggedEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]]) self.assertRaggedEqual(output['robin'], [[11, 21, 31], [41], [51, 61, 71]])
def testRaggedMapOnStructure_RaggedOutputs(self): batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]]) # [[10, 20, 30], [40], [50, 60, 70]] robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10) features = {'batman': batman, 'robin': robin} def _increment(f): return { 'batman': f['batman'] + 1, 'robin': f['robin'] + 1, } output = ragged_map_ops.map_fn( fn=_increment, elems=features, infer_shape=False, dtype={ 'batman': ragged_tensor.RaggedTensorType(dtype=dtypes.int32, ragged_rank=1), 'robin': ragged_tensor.RaggedTensorType(dtype=dtypes.int32, ragged_rank=1) }, ) self.assertRaggedEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]]) self.assertRaggedEqual(output['robin'], [[11, 21, 31], [41], [51, 61, 71]])
def testMismatchRaggedRank(self): elems = ragged_factory_ops.constant([[[1, 2, 3]], [[4, 5], [6, 7]]]) fn = lambda x: ragged_math_ops.reduce_sum(x, axis=0) with self.assertRaisesRegex( ValueError, r'(?s)Expected `fn` to return.*But it returned.*'): _ = ragged_map_ops.map_fn(fn, elems, dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.int64, ragged_rank=23))
def testMismatchRaggedRank2(self): elems = ragged_factory_ops.constant([[1, 2, 3], [4, 5], [6, 7]]) fn = lambda x: ragged_tensor.RaggedTensor.from_row_starts(x, [0]) with self.assertRaisesRegex( ValueError, r'(?s)Expected `fn` to return.*But it returned.*'): _ = ragged_map_ops.map_fn(fn, elems, dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.int64, ragged_rank=10))
def testMismatchRaggedRank2(self): elems = ragged_factory_ops.constant([[1, 2, 3], [4, 5], [6, 7]]) fn = lambda x: ragged_tensor.RaggedTensor.from_row_starts(x, [0]) with self.assertRaisesWithLiteralMatch( ValueError, r'The declared ragged rank (10) mismatches the result (2)'): _ = ragged_map_ops.map_fn( fn, elems, dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.int64, ragged_rank=10))
def testMismatchRaggedRank(self): elems = ragged_factory_ops.constant([[[1, 2, 3]], [[4, 5], [6, 7]]]) fn = lambda x: ragged_math_ops.reduce_sum(x, axis=0) with self.assertRaisesWithLiteralMatch( ValueError, r'The declared ragged rank (23) mismatches the result (1)'): _ = ragged_map_ops.map_fn(fn, elems, dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.int64, ragged_rank=23))
def testMismatchRaggedRank2(self): elems = ragged_factory_ops.constant([[1, 2, 3], [4, 5], [6, 7]]) fn = lambda x: ragged_tensor.RaggedTensor.from_row_starts(x, [0]) with self.assertRaisesWithLiteralMatch( ValueError, r'The declared ragged rank (10) mismatches the result (2)'): _ = ragged_map_ops.map_fn(fn, elems, dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.int64, ragged_rank=10))
def testMismatchRaggedRank(self): elems = ragged_factory_ops.constant([[[1, 2, 3]], [[4, 5], [6, 7]]]) fn = lambda x: ragged_math_ops.reduce_sum(x, axis=0) with self.assertRaisesWithLiteralMatch( ValueError, r'The declared ragged rank (23) mismatches the result (1)'): _ = ragged_map_ops.map_fn( fn, elems, dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.int64, ragged_rank=23))
def testMapOnSparseTensor(self): s = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1], [1, 0], [1, 1]], values=[0, 5, 0, 4], dense_shape=[2, 2], ) t2 = ragged_tensor.RaggedTensor.from_sparse(s) id_t2 = ragged_map_ops.map_fn( lambda x: x, t2, ) self.assertRaggedEqual(id_t2, [[0, 5], [0, 4]])
def testMapOnSparseTensor(self): s = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1], [1, 0], [1, 1]], values=[0, 5, 0, 4], dense_shape=[2, 2], ) t2 = ragged_tensor.RaggedTensor.from_sparse(s) id_t2 = ragged_map_ops.map_fn( lambda x: x, t2, ) self.assertRaggedEqual(id_t2, [[0, 5], [0, 4]])
def getTokenWord(self, text, token_starts, token_ends): def _FindSubstr(input_tensor): text, token_start, token_length = input_tensor return tf.strings.substr(text, token_start, token_length) token_lengths = token_ends - token_starts token_word = ragged_map_ops.map_fn( _FindSubstr, (text, token_starts, token_lengths), dtype=ragged_tensor.RaggedTensorType(dtype=tf.string, ragged_rank=1), infer_shape=False) return token_word
def break_sentences_with_offsets(self, doc): """Splits `doc` into sentence fragments, returns text, start & end offsets. Example: 1 1 2 3 012345678901234 01234567890123456789012345678901234567 doc: 'Hello...foo bar', 'Welcome to the U.S. don't be surprised' fragment_text: [['Hello...', 'foo bar'], ['Welcome to the U.S.' , 'don't be surprised']] start: [[0, 8],[0, 20]] end: [[8, 15],[19, 38]] Args: doc: A string `Tensor` of shape [batch] with a batch of documents. Returns: A tuple of (fragment_text, start, end) where: fragment_text: A string `RaggedTensor` of shape [batch, (num_sentences)] with each input broken up into its constituent sentence fragments. start: A int64 `RaggedTensor` of shape [batch, (num_sentences)] where each entry is the inclusive beginning byte offset of a sentence. end: A int64 `RaggedTensor` of shape [batch, (num_sentences)] where each entry is the exclusive ending byte offset of a sentence. """ if doc.shape.ndims > 1: doc = ragged_tensor.RaggedTensor.from_tensor(doc) doc = doc.flat_values # Run sentence fragmenter op v2 fragment = gen_state_based_sentence_breaker_op.sentence_fragments_v2( doc) start, end, properties, terminal_punc_token, row_lengths = fragment # Pack and create `RaggedTensor`s start, end, properties, terminal_punc_token = tuple( ragged_tensor.RaggedTensor.from_row_lengths(value, row_lengths) for value in [start, end, properties, terminal_punc_token]) # Helper for use within map_fn (function must only take in one argument) def _substring(x): s, pos, length = x return string_ops.substr(s, pos, length) # Extract fragment text using offsets fragment_text = ragged_map_ops.map_fn( _substring, (doc, start, math_ops.subtract(end, start)), infer_shape=False, dtype=dtypes.string) return fragment_text, start, end
def testRaggedMapOnStructure(self): batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]]) # [[10, 20, 30], [40], [50, 60, 70]] robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10) features = {'batman': batman, 'robin': robin} def _reduce_sum_from_all(f): return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin']) output = ragged_map_ops.map_fn( fn=_reduce_sum_from_all, elems=features, dtype=dtypes.int32, ) self.assertRaggedEqual(output, [66, 44, 198])
def testRaggedMapOnStructure(self): batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]]) # [[10, 20, 30], [40], [50, 60, 70]] robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10) features = {'batman': batman, 'robin': robin} def _reduce_sum_from_all(f): return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin']) output = ragged_map_ops.map_fn( fn=_reduce_sum_from_all, elems=features, dtype=dtypes.int32, ) self.assertAllEqual(output, [66, 44, 198])
def _ragged_set_op(set_op, pred_begin, pred_end, gold_begin, gold_end, pred_label=None, gold_label=None): """Computes a sets op on RaggedTensor of prediction and labelled spans. This op computes `set_op` (an op from tf.sets) on prediction and gold labelled spans and returns the number of results from `set_op`. Args: set_op: A callable tf.sets operation pred_begin: A `RaggedTensor` of shape [batch, (num_spans)] containing the beginning indices of a prediction span. pred_end: A `RaggedTensor` of shape [batch, (num_spans)] containing the ending indices of a prediction span. gold_begin: A `RaggedTensor` of shape [batch, (num_spans)] containing the beginning indices of a gold labelled span. gold_end: A `RaggedTensor` of shape [batch, (num_spans)] containing the ending indices of a gold labelled span. pred_label: (optional) A `RaggedTensor` of shape [batch, (num_spans)] containing the prediction label types. If not provided, assumes all spans are of the same type. gold_label: (optional) A `RaggedTensor` of shape [batch, (num_spans)] containing the gold label types. If not provided, assumes all spans are of the same type. Returns: A 1-D Tensor containing the number of elements in the results of `set_op`. """ op = functools.partial(_per_batch_set_op, set_op) if pred_label is None: pred_label = pred_begin.with_flat_values( array_ops.zeros_like(pred_begin.flat_values)) if gold_label is None: gold_label = gold_begin.with_flat_values( array_ops.zeros_like(gold_begin.flat_values)) results = ragged_map_ops.map_fn( op, (pred_begin, pred_end, pred_label, gold_begin, gold_end, gold_label), dtype=(dtypes.int32), infer_shape=False) return results
def testZip(self): x = ragged_factory_ops.constant( [[10, 20], [30, 40], [50, 60], [70], [80, 90, 100]], dtypes.int64) y = array_ops.expand_dims(mo.range(x.nrows(out_type=dtypes.int64)), axis=1) def _zip(foo): y_val, x_val = foo bar = backend.tile(y_val, array_ops.shape(x_val)) return array_ops.stack([bar, x_val], axis=1) output = ragged_map_ops.map_fn( _zip, (y, x), dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.int64, ragged_rank=1), infer_shape=False) self.assertRaggedEqual( output, [[[0, 10], [0, 20]], [[1, 30], [1, 40]], [[2, 50], [2, 60]], [[3, 70]], [[4, 80], [4, 90], [4, 100]]])
def testZip(self): x = ragged_factory_ops.constant( [[10, 20], [30, 40], [50, 60], [70], [80, 90, 100]], dtypes.int64) y = array_ops.expand_dims(mo.range(x.nrows(), dtype=dtypes.int64), axis=1) def _zip(foo): y_val, x_val = foo bar = backend.tile(y_val, array_ops.shape(x_val)) return array_ops.stack([bar, x_val], axis=1) output = ragged_map_ops.map_fn( _zip, (y, x), dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.int64, ragged_rank=1), infer_shape=False) self.assertRaggedEqual( output, [[[0, 10], [0, 20]], [[1, 30], [1, 40]], [[2, 50], [2, 60]], [[3, 70]], [[4, 80], [4, 90], [4, 100]]])
def testRaggedMap( self, fn, elems, expected_output, expected_ragged_rank=None, result_ragged_rank=None, elems_ragged_rank=None, dtype=dtypes.int64, result_dtype=None, infer_shape=False, ): elems = ragged_factory_ops.constant(elems, dtype, elems_ragged_rank) output = ragged_map_ops.map_fn( fn=fn, elems=elems, dtype=result_dtype, infer_shape=infer_shape) expected_rt = ragged_factory_ops.constant( expected_output, ragged_rank=expected_ragged_rank) self.assertRaggedEqual(expected_rt, output)
def testBatchGather(self): tokens = ragged_factory_ops.constant([['hello', '.', 'there'], ['merhaba'], ['bonjour', '.', 'ca va', '?']]) indices = ragged_factory_ops.constant([[0, 2], [0], [0, 2]]) def gather(x): tokens_val, indices_val = x return array_ops.gather(tokens_val, indices_val) data = tokens, indices out = ragged_map_ops.map_fn(gather, data, dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.string, ragged_rank=1), infer_shape=False) self.assertAllEqual( out, [[b'hello', b'there'], [b'merhaba'], [b'bonjour', b'ca va']])
def testRaggedMap( self, fn, elems, expected_output, expected_ragged_rank=None, result_ragged_rank=None, elems_ragged_rank=None, dtype=dtypes.int64, result_dtype=None, infer_shape=False, ): elems = ragged_factory_ops.constant(elems, dtype, elems_ragged_rank) output = ragged_map_ops.map_fn( fn=fn, elems=elems, dtype=result_dtype, infer_shape=infer_shape) expected_rt = ragged_factory_ops.constant( expected_output, ragged_rank=expected_ragged_rank) self.assertRaggedEqual(expected_rt, output)
def testBatchGather(self): tokens = ragged_factory_ops.constant([['hello', '.', 'there'], ['merhaba'], ['bonjour', '.', 'ca va', '?']]) indices = ragged_factory_ops.constant([[0, 2], [0], [0, 2]]) def gather(x): tokens_val, indices_val = x return array_ops.gather(tokens_val, indices_val) data = tokens, indices out = ragged_map_ops.map_fn( gather, data, dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.string, ragged_rank=1), infer_shape=False) self.assertRaggedEqual( out, [[b'hello', b'there'], [b'merhaba'], [b'bonjour', b'ca va']])
def get_segments(self, sentences): """Extracts the next sentence label from sentences. Args: sentences: A `RaggedTensor` of strings w/ shape [batch, (num_sentences)]. Returns: A tuple of (segment_a, segment_b, is_next_sentence) where: segment_a: A `Tensor` of strings w/ shape [total_num_sentences] that contains all the original sentences. segment_b: A `Tensor` with shape [num_sentences] that contains either the subsequent sentence of `segment_a` or a randomly injected sentence. is_next_sentence: A `Tensor` of bool w/ shape [num_sentences] that contains whether or not `segment_b` is truly a subsequent sentence or not. """ next_sentence = ragged_map_ops.map_fn( functools.partial(manip_ops.roll, axis=0, shift=-1), sentences, dtype=ragged_tensor.RaggedTensorType(dtypes.string, 1), infer_shape=False) random_sentence = sentences.with_flat_values( self._shuffle_fn(sentences.flat_values)) is_next_sentence_labels = (self._random_fn(sentences.flat_values.shape) > self._random_next_sentence_threshold) is_next_sentence = sentences.with_flat_values(is_next_sentence_labels) # Randomly decide if we should use next sentence or throw in a random # sentence. segment_two = ragged_where_op.where(is_next_sentence, x=next_sentence, y=random_sentence) # Get rid of the docs dimensions sentences = sentences.merge_dims(-2, -1) segment_two = segment_two.merge_dims(-2, -1) is_next_sentence = is_next_sentence.merge_dims(-2, -1) is_next_sentence = math_ops.cast(is_next_sentence, dtypes.int64) return sentences, segment_two, is_next_sentence
def _ragged_substr(text_input, begin, end): text_input_flat = None if ragged_tensor.is_ragged(text_input): text_input_flat = text_input.flat_values else: text_input_flat = text_input def _ragged_tile(x): input_text, indices = x multiple = math_ops.reduce_sum(indices.row_lengths()) return array_ops.tile([input_text], [multiple]) broadcasted_text = ragged_map_ops.map_fn( _ragged_tile, (text_input_flat, begin), dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.string, ragged_rank=1), infer_shape=False, ) size = math_ops.sub( array_ops.squeeze(end.flat_values), array_ops.squeeze(begin.flat_values)) new_tokens = string_ops.substr_v2(broadcasted_text, array_ops.squeeze(begin.flat_values), size) return begin.with_flat_values(new_tokens.flat_values)
def testDefaultAttrValues(self): ragged_map_ops.map_fn(fn=lambda x: x, elems=ragged_factory_ops.constant([[7]]), dtype=ragged_tensor.RaggedTensorType( dtype=dtypes.int32, ragged_rank=1))