def testScalar(self): with self.cached_session(): with self.assertRaises(ValueError): ragged_string_ops.unicode_encode(72, "UTF-8") with self.cached_session(): with self.assertRaises(ValueError): ragged_string_ops.unicode_encode(constant_op.constant(72), "UTF-8")
def testReplaceErrors(self, encoding): test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace") self.assertRaggedEqual(unicode_encode_op, expected_value) # Test custom replacement character test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"Heooo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace", 111) self.assertRaggedEqual(unicode_encode_op, expected_value) # Verify "replace" is default test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertRaggedEqual(unicode_encode_op, expected_value) # Replacement_char must be within range test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace", 1114112) with self.assertRaises(errors.InvalidArgumentError): self.evaluate(unicode_encode_op)
def testVector(self, encoding): test_value = np.array([72, 101, 108, 108, 111], np.int32) expected_value = u"Hello".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertRaggedEqual(unicode_encode_op, expected_value) test_value = np.array([72, 101, 195, 195, 128516], np.int32) expected_value = u"He\xc3\xc3\U0001f604".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertRaggedEqual(unicode_encode_op, expected_value) # Single character string test_value = np.array([72], np.int32) expected_value = u"H".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertRaggedEqual(unicode_encode_op, expected_value) test_value = np.array([128516], np.int32) expected_value = u"\U0001f604".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertRaggedEqual(unicode_encode_op, expected_value)
def testVector(self, encoding): test_value = np.array( [ord('H'), ord('e'), ord('l'), ord('l'), ord('o')], np.int32) expected_value = u"Hello".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value) test_value = np.array( [ord('H'), ord('e'), 0xC3, 0xC3, 0x1F604], np.int32) expected_value = u"He\xc3\xc3\U0001f604".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value) # Single character string test_value = np.array([ord('H')], np.int32) expected_value = u"H".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value) test_value = np.array([0x1F604], np.int32) expected_value = u"\U0001f604".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def testVector(self, encoding): test_value = np.array([72, 101, 108, 108, 111], np.int32) expected_value = u"Hello".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) test_value = np.array([72, 101, 195, 195, 128516], np.int32) expected_value = u"He\xc3\xc3\U0001f604".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) # Single character string test_value = np.array([72], np.int32) expected_value = u"H".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) test_value = np.array([128516], np.int32) expected_value = u"\U0001f604".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value)
def testScalar(self): with self.cached_session(): with self.assertRaises(ValueError): ragged_string_ops.unicode_encode(72, "UTF-8") with self.cached_session(): with self.assertRaises(ValueError): ragged_string_ops.unicode_encode(constant_op.constant(72), "UTF-8")
def testStrictErrors(self, encoding): test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) with self.cached_session() as session: with self.assertRaises(errors.InvalidArgumentError): session.run( ragged_string_ops.unicode_encode(test_value, encoding, "strict"))
def testStrictErrors(self, encoding): test_value = np.array([ord('H'), ord('e'), 0x7FFFFFFF, -1, ord('o')], np.int32) with self.cached_session() as session: with self.assertRaises(errors.InvalidArgumentError): session.run( ragged_string_ops.unicode_encode(test_value, encoding, "strict"))
def testRaggedMatrixWithMultiDimensionInnerValues(self, encoding): test_inner_values = constant_op.constant([[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]], [[102, 105, 120, 101, 100], [119, 111, 114, 100, 115]], [[72, 121, 112, 101, 114], [99, 117, 98, 101, 46]]]) test_row_splits = [ constant_op.constant([0, 2, 3], dtype=np.int64), constant_op.constant([0, 1, 1, 3], dtype=np.int64) ] test_value = ragged_factory_ops.from_nested_row_splits( test_inner_values, test_row_splits) expected_value = [ [[[u"Hello".encode(encoding), u"World".encode(encoding)]], []], [[[u"fixed".encode(encoding), u"words".encode(encoding)], [u"Hyper".encode(encoding), u"cube.".encode(encoding)]]] ] unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertEqual(unicode_encode_op.ragged_rank, 2) self.assertAllEqual(result.tolist(), expected_value) # These next two assertions don't necessarily need to be here as they test # internal representations and we already verified the value is correct. self.assertAllEqual(len(result.nested_row_splits), len(test_row_splits)) self.assertEqual(unicode_encode_op.inner_values.shape.ndims, test_inner_values.shape.ndims - 1)
def testIgnoreErrors(self, encoding): test_value = np.array([ord('H'), ord('e'), 0x7FFFFFFF, -1, ord('o')], np.int32) expected_value = u"Heo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding, "ignore") self.assertAllEqual(unicode_encode_op, expected_value)
def testRaggedMatrixWithMultiDimensionInnerValues(self, encoding): test_inner_values = constant_op.constant([[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]], [[102, 105, 120, 101, 100], [119, 111, 114, 100, 115]], [[72, 121, 112, 101, 114], [99, 117, 98, 101, 46]]]) test_row_splits = [ constant_op.constant([0, 2, 3], dtype=np.int64), constant_op.constant([0, 1, 1, 3], dtype=np.int64) ] test_value = ragged_factory_ops.from_nested_row_splits(test_inner_values, test_row_splits) expected_value = [[[[u"Hello".encode(encoding), u"World".encode(encoding)]], []], [[[u"fixed".encode(encoding), u"words".encode(encoding)], [u"Hyper".encode(encoding), u"cube.".encode(encoding)]]]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertEqual(unicode_encode_op.ragged_rank, 2) self.assertAllEqual(result.tolist(), expected_value) # These next two assertions don't necessarily need to be here as they test # internal representations and we already verified the value is correct. self.assertAllEqual(len(result.nested_row_splits), len(test_row_splits)) self.assertEqual(unicode_encode_op.inner_values.shape.ndims, test_inner_values.shape.ndims - 1)
def _whitespace_tokenize_with_offsets_encode_decode_wrapper( self, input_tensor): """Tokenizes a tensor of UTF-8 strings with rank of 1. Args: input_tensor: The single dimensional Tensor to tokenize. Returns: Tuple of RaggedTensors of tokenized text and byte offsets, with shapes [num_strings, (num_tokens or num_offsets)]. """ # Decode the strings and get byte offsets (codepoints, byte_start_offsets) = ( ragged_string_ops.unicode_decode_with_offsets(input_tensor, "UTF-8")) byte_limit_offsets = array_ops.concat([ byte_start_offsets[:, 1:], math_ops.cast( array_ops.expand_dims(string_ops.string_length(input_tensor), 1), dtypes.int64) ], 1) # Tokenize (codepoint_tokens, codepoint_start_offsets, codepoint_limit_offsets) = ( self._whitespace_tokenize_codepoints_with_offsets(codepoints)) # Encode the codepoints and translate the codepoint offsets to byte offsets. return (ragged_string_ops.unicode_encode(codepoint_tokens, "UTF-8"), array_ops.batch_gather(byte_start_offsets, codepoint_start_offsets), array_ops.batch_gather( byte_limit_offsets, math_ops.subtract(codepoint_limit_offsets, [1])))
def testMatrix(self, encoding): test_value = np.array( [[72, 0x1F604, 108, 108, 111], [87, 0x1F604, 114, 108, 100]], np.int32) expected_value = [ u"H\U0001f604llo".encode(encoding), u"W\U0001f604rld".encode(encoding) ] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def testRaggedMatrix(self, encoding): test_value = ragged_factory_ops.constant( [[72, 195, 108, 108, 111], [87, 128516, 114, 108, 100, 46]], np.int32) expected_value = [ u"H\xc3llo".encode(encoding), u"W\U0001f604rld.".encode(encoding) ] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def test4DimRaggedMatrix(self, encoding): test_value = ragged_factory_ops.constant( [[[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]]], [[[]], [[72, 121, 112, 101]]]], np.int32) expected_value = [[[u"Hello".encode(encoding), u"World".encode(encoding)]], [[u"".encode(encoding)], [u"Hype".encode(encoding)]]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def test3DimMatrixWithRagged2ndAnd3rdDim(self, encoding): test_value = ragged_factory_ops.constant( [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100, 46]], [], [[0x1F604]]], np.int32) expected_value = [[u"Hello".encode(encoding), u"World.".encode(encoding)], [], [u"\U0001f604".encode(encoding)]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def testRaggedMatrix(self, encoding): test_value = ragged_factory_ops.constant( [[ord('H'), 0xC3, ord('l'), ord('l'), ord('o')], [ord('W'), 0x1F604, ord('r'), ord('l'), ord('d'), ord('.')]], np.int32) expected_value = [ u"H\xc3llo".encode(encoding), u"W\U0001f604rld.".encode(encoding) ] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def testIgnoreErrors(self, encoding): test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"Heo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "ignore") with self.cached_session() as session: result = session.run(unicode_encode_op) self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value)
def testIgnoreErrors(self, encoding): test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"Heo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding, "ignore") with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value)
def test3DimMatrix(self, encoding): test_value = constant_op.constant( [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]], [[102, 105, 120, 101, 100], [119, 111, 114, 100, 115]], [[72, 121, 112, 101, 114], [99, 117, 98, 101, 46]]], np.int32) expected_value = [[u"Hello".encode(encoding), u"World".encode(encoding)], [u"fixed".encode(encoding), u"words".encode(encoding)], [u"Hyper".encode(encoding), u"cube.".encode(encoding)]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def testRaggedMatrix(self, encoding): test_value = ragged_factory_ops.constant( [[72, 195, 108, 108, 111], [87, 128516, 114, 108, 100, 46]], np.int32) expected_value = [ u"H\xc3llo".encode(encoding), u"W\U0001f604rld.".encode(encoding) ] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(unicode_encode_op, ops.Tensor) self.assertAllEqual(result, expected_value)
def test3DimMatrixWithRagged2ndAnd3rdDim(self, encoding): test_value = ragged_factory_ops.constant( [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100, 46]], [], [[128516]]], np.int32) expected_value = [[u"Hello".encode(encoding), u"World.".encode(encoding)], [], [u"\U0001f604".encode(encoding)]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertEqual(unicode_encode_op.ragged_rank, 1) self.assertAllEqual(result.tolist(), expected_value)
def test4DimRaggedMatrix(self, encoding): test_value = ragged_factory_ops.constant( [[[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]]], [[[]], [[72, 121, 112, 101]]]], np.int32) expected_value = [[[u"Hello".encode(encoding), u"World".encode(encoding)]], [[u"".encode(encoding)], [u"Hype".encode(encoding)]]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertEqual(unicode_encode_op.ragged_rank, 2) self.assertAllEqual(result.tolist(), expected_value)
def test3DimMatrixWithRagged3rdDim(self, encoding): test_value = ragged_factory_ops.constant( [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100, 46]], [[68, 111, 110, 39, 116], [119, 195, 114, 114, 121, 44, 32, 98, 101]], [[0x1F604], []]], np.int32) expected_value = [[u"Hello".encode(encoding), u"World.".encode(encoding)], [ u"Don't".encode(encoding), u"w\xc3rry, be".encode(encoding) ], [u"\U0001f604".encode(encoding), u"".encode(encoding)]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def testRaggedMatrix(self, encoding): test_value = ragged_factory_ops.constant( [[72, 195, 108, 108, 111], [87, 128516, 114, 108, 100, 46]], np.int32) expected_value = [ u"H\xc3llo".encode(encoding), u"W\U0001f604rld.".encode(encoding) ] unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(unicode_encode_op, ops.Tensor) self.assertAllEqual(result, expected_value)
def test3DimMatrixWithRagged2ndAnd3rdDim(self, encoding): test_value = ragged_factory_ops.constant( [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100, 46]], [], [[128516]]], np.int32) expected_value = [[ u"Hello".encode(encoding), u"World.".encode(encoding) ], [], [u"\U0001f604".encode(encoding)]] unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertEqual(unicode_encode_op.ragged_rank, 1) self.assertAllEqual(result.tolist(), expected_value)
def test4DimRaggedMatrix(self, encoding): test_value = ragged_factory_ops.constant( [[[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]]], [[[]], [[72, 121, 112, 101]]]], np.int32) expected_value = [[[ u"Hello".encode(encoding), u"World".encode(encoding) ]], [[u"".encode(encoding)], [u"Hype".encode(encoding)]]] unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertEqual(unicode_encode_op.ragged_rank, 2) self.assertAllEqual(result.tolist(), expected_value)
def test3DimMatrix(self, encoding): test_value = constant_op.constant( [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]], [[102, 105, 120, 101, 100], [119, 111, 114, 100, 115]], [[72, 121, 112, 101, 114], [99, 117, 98, 101, 46]]], np.int32) expected_value = [[u"Hello".encode(encoding), u"World".encode(encoding)], [u"fixed".encode(encoding), u"words".encode(encoding)], [u"Hyper".encode(encoding), u"cube.".encode(encoding)]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(unicode_encode_op, ops.Tensor) self.assertAllEqual(result, expected_value)
def testReplaceErrors(self, encoding): test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace") with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) # Test custom replacement character test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"Heooo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace", 111) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) # Verify "replace" is default test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) # Replacement_char must be within range test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace", 1114112) with self.cached_session(): with self.assertRaises(errors.InvalidArgumentError): unicode_encode_op.eval()
def testVector(self, encoding): test_value = np.array([72, 101, 108, 108, 111], np.int32) expected_value = u"Hello".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) test_value = np.array([72, 101, 195, 195, 128516], np.int32) expected_value = u"He\xc3\xc3\U0001f604".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) # Single character string test_value = np.array([72], np.int32) expected_value = u"H".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) test_value = np.array([128516], np.int32) expected_value = u"\U0001f604".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value)
def detokenize(self, input, name=None): # pylint: disable=redefined-builtin """Detokenizes input codepoints (integers) to UTF-8 strings. Args: input: A `RaggedTensor` or `Tensor` of codepoints (ints) with a rank of at least 1. name: The name argument that is passed to the op function. Returns: A N-1 dimensional string tensor of the detokenized text. """ name = None with ops.name_scope(name, "UnicodeCharTokenize", [input, self]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) return ragged_string_ops.unicode_encode(input_tensor, "UTF-8")
def testReplaceErrors(self, encoding): test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding, "replace") with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) # Test custom replacement character test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"Heooo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding, "replace", 111) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) # Verify "replace" is default test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(result, bytes) self.assertAllEqual(result, expected_value) # Replacement_char must be within range test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding, "replace", 1114112) with self.cached_session(): with self.assertRaises(errors.InvalidArgumentError): unicode_encode_op.eval()
def testRequireParams(self): with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode() with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode(72) with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode(encoding="UTF-8")
def testRequireParams(self): with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode() # pylint: disable=no-value-for-parameter with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode(72) # pylint: disable=no-value-for-parameter with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode(encoding="UTF-8") # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
def testRequireParams(self): with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode() with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode(72) with self.cached_session(): with self.assertRaises(TypeError): ragged_string_ops.unicode_encode(encoding="UTF-8")
def test4DimMatrix(self, encoding): test_value = constant_op.constant( [[[[72, 101, 108, 108, 111]], [[87, 111, 114, 108, 100]]], [[[102, 105, 120, 101, 100]], [[119, 111, 114, 100, 115]]], [[[72, 121, 112, 101, 114]], [[99, 117, 98, 101, 46]]]], np.int32) expected_value = [[[u"Hello".encode(encoding)], [u"World".encode(encoding)]], [[u"fixed".encode(encoding)], [u"words".encode(encoding)]], [[u"Hyper".encode(encoding)], [u"cube.".encode(encoding)]]] unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertIsInstance(unicode_encode_op, ops.Tensor) self.assertAllEqual(result, expected_value)
def test3DimMatrixWithRagged2ndDim(self, encoding): test_value = ragged_factory_ops.constant( [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]], [[102, 105, 120, 101, 100]], [[72, 121, 112, 101, 114], [119, 111, 114, 100, 115], [99, 117, 98, 101, 46]]], np.int32) expected_value = [[u"Hello".encode(encoding), u"World".encode(encoding)], [u"fixed".encode(encoding)], [ u"Hyper".encode(encoding), u"words".encode(encoding), u"cube.".encode(encoding) ]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertEqual(unicode_encode_op.ragged_rank, 1) self.assertAllEqual(result.tolist(), expected_value)
def testRaggedMatrixWithMultiDimensionInnerValues(self, encoding): test_flat_values = constant_op.constant([[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]], [[102, 105, 120, 101, 100], [119, 111, 114, 100, 115]], [[72, 121, 112, 101, 114], [99, 117, 98, 101, 46]]]) test_row_splits = [ constant_op.constant([0, 2, 3], dtype=np.int64), constant_op.constant([0, 1, 1, 3], dtype=np.int64) ] test_value = ragged_tensor.RaggedTensor.from_nested_row_splits( test_flat_values, test_row_splits) expected_value = [[[[u"Hello".encode(encoding), u"World".encode(encoding)]], []], [[[u"fixed".encode(encoding), u"words".encode(encoding)], [u"Hyper".encode(encoding), u"cube.".encode(encoding)]]]] unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value)
def testReplaceErrors(self, encoding): test_value = np.array( [ord('H'), ord('e'), 0x7FFFFFFF, -1, ord('o')], np.int32) expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace") self.assertAllEqual(unicode_encode_op, expected_value) # Test custom replacement character test_value = np.array( [ord('H'), ord('e'), 0x7FFFFFFF, -1, ord('o')], np.int32) expected_value = u"Heooo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace", ord('o')) self.assertAllEqual(unicode_encode_op, expected_value) # Verify "replace" is default test_value = np.array( [ord('H'), ord('e'), 0x7FFFFFFF, -1, ord('o')], np.int32) expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) self.assertAllEqual(unicode_encode_op, expected_value) # Verify non-default replacement with an unpaired surrogate. test_value = np.array([0xD801], np.int32) expected_value = u"A".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace", 0x41) self.assertAllEqual(unicode_encode_op, expected_value) # Test with a noncharacter code point. test_value = np.array([0x1FFFF], np.int32) expected_value = u"A".encode(encoding) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace", 0x41) self.assertAllEqual(unicode_encode_op, expected_value) # Replacement_char must be within range test_value = np.array( [ord('H'), ord('e'), 0x7FFFFFFF, -1, ord('o')], np.int32) unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding, "replace", 0x110000) with self.assertRaises(errors.InvalidArgumentError): self.evaluate(unicode_encode_op)
def test3DimMatrixWithRagged2ndDim(self, encoding): test_value = ragged_factory_ops.constant( [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]], [[102, 105, 120, 101, 100]], [[72, 121, 112, 101, 114], [119, 111, 114, 100, 115], [99, 117, 98, 101, 46]]], np.int32) expected_value = [ [u"Hello".encode(encoding), u"World".encode(encoding)], [u"fixed".encode(encoding)], [ u"Hyper".encode(encoding), u"words".encode(encoding), u"cube.".encode(encoding) ] ] unicode_encode_op = ragged_string_ops.unicode_encode( test_value, encoding) with self.cached_session(): result = unicode_encode_op.eval() self.assertEqual(unicode_encode_op.ragged_rank, 1) self.assertAllEqual(result.tolist(), expected_value)
def testStrictErrors(self, encoding): test_value = np.array([72, 101, 2147483647, -1, 111], np.int32) with self.cached_session(): with self.assertRaises(errors.InvalidArgumentError): ragged_string_ops.unicode_encode(test_value, encoding, "strict").eval()
def f(v): return ragged_string_ops.unicode_encode(v, "UTF-8")