def testBatchDecode(self): text = constant_op.constant( ["仅今年前", "分享介面終於迎來更新"]) row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets( text, "utf-8") with self.test_session(): self.assertAllEqual([ codepoint("仅"), codepoint("今"), codepoint("年"), codepoint("前"), codepoint("分"), codepoint("享"), codepoint("介"), codepoint("面"), codepoint("終"), codepoint("於"), codepoint("迎"), codepoint("來"), codepoint("更"), codepoint("新") ], self.evaluate(utf8_text).tolist()) self.assertAllEqual([0, 4, 14], self.evaluate(row_splits).tolist()) self.assertAllEqual([0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27], self.evaluate(offsets).tolist())
def testBatchDecode(self): text = constant_op.constant(["仅今年前", "分享介面終於迎來更新"]) row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets( text, "utf-8") with self.test_session(): self.assertAllEqual([ codepoint("仅"), codepoint("今"), codepoint("年"), codepoint("前"), codepoint("分"), codepoint("享"), codepoint("介"), codepoint("面"), codepoint("終"), codepoint("於"), codepoint("迎"), codepoint("來"), codepoint("更"), codepoint("新") ], utf8_text.eval().tolist()) self.assertAllEqual([0, 4, 14], row_splits.eval().tolist()) self.assertAllEqual( [0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27], offsets.eval().tolist())
def testBatchDecode(self): text = constant_op.constant( ["仅今年前", "中国进出口银行与中国银行加强合作"]) row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets( text, "utf-8") with self.test_session(): self.assertAllEqual([ codepoint("仅"), codepoint("今"), codepoint("年"), codepoint("前"), codepoint("中"), codepoint("国"), codepoint("进"), codepoint("出"), codepoint("口"), codepoint("银"), codepoint("行"), codepoint("与"), codepoint("中"), codepoint("国"), codepoint("银"), codepoint("行"), codepoint("加"), codepoint("强"), codepoint("合"), codepoint("作") ], utf8_text.eval().tolist()) self.assertAllEqual([0, 4, 20], row_splits.eval().tolist()) self.assertAllEqual([0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45], offsets.eval().tolist())
def testBatchDecode(self): text = constant_op.constant(["仅今年前", "中国进出口银行与中国银行加强合作"]) row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets( text, "utf-8") with self.test_session(): self.assertAllEqual([ codepoint("仅"), codepoint("今"), codepoint("年"), codepoint("前"), codepoint("中"), codepoint("国"), codepoint("进"), codepoint("出"), codepoint("口"), codepoint("银"), codepoint("行"), codepoint("与"), codepoint("中"), codepoint("国"), codepoint("银"), codepoint("行"), codepoint("加"), codepoint("强"), codepoint("合"), codepoint("作") ], utf8_text.eval().tolist()) self.assertAllEqual([0, 4, 20], row_splits.eval().tolist()) self.assertAllEqual([ 0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45 ], offsets.eval().tolist())
def testBadReplacementChar(self): text = constant_op.constant([b"\xFE"]) _, error, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="replace", replacement_char=11141111) with self.assertRaises(errors.InvalidArgumentError): with self.test_session(): error.eval()
def testStrictError(self): text = constant_op.constant([b"\xFEED"]) _, error, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="strict") with self.assertRaises(errors.InvalidArgumentError): with self.test_session(): error.eval()
def testReplaceOnError(self): text = constant_op.constant([b"\xFE"]) _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="replace") with self.test_session(): self.assertAllEqual(utf8_text.eval().tolist(), [65533])
def testStrictError(self): text = constant_op.constant([b"\xFEED"]) _, error, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="strict") with self.assertRaises(errors.InvalidArgumentError): with self.test_session(): error.eval()
def testBadReplacementChar(self): text = constant_op.constant([b"\xFE"]) _, error, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="replace", replacement_char=11141111) with self.assertRaises(errors.InvalidArgumentError): with self.test_session(): error.eval()
def testReplaceOnError(self): text = constant_op.constant([b"\xFE"]) _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="replace") with self.test_session(): self.assertAllEqual(utf8_text.eval().tolist(), [65533])
def testDecodeGenOp(self, doc, expected_row_splits=None, expected_char_values=None, expected_char_to_byte_starts=None, **args): """Test for the c++ interface (gen_string_ops.unicode_decode).""" result = gen_string_ops.unicode_decode_with_offsets(**args) self.assertAllEqual(expected_row_splits, result.row_splits) self.assertAllEqual(expected_char_values, result.char_values) self.assertAllEqual(expected_char_to_byte_starts, result.char_to_byte_starts)
def testDecodeGenOp(self, doc, expected_row_splits=None, expected_char_values=None, expected_char_to_byte_starts=None, **args): """Test for the c++ interface (gen_string_ops.unicode_decode).""" result = gen_string_ops.unicode_decode_with_offsets(**args) self.assertAllEqual(expected_row_splits, result.row_splits) self.assertAllEqual(expected_char_values, result.char_values) self.assertAllEqual(expected_char_to_byte_starts, result.char_to_byte_starts)
def testIgnoreOnError(self): text = constant_op.constant([b"\xFEhello"]) _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="ignore") with self.test_session(): self.assertAllEqual(utf8_text.eval().tolist(), [ codepoint("h"), codepoint("e"), codepoint("l"), codepoint("l"), codepoint("o") ])
def testIgnoreOnError(self): text = constant_op.constant([b"\xFEhello"]) _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="ignore") with self.test_session(): self.assertAllEqual(utf8_text.eval().tolist(), [ codepoint("h"), codepoint("e"), codepoint("l"), codepoint("l"), codepoint("o") ])
def testBasicDecodeWithOffset(self): text = constant_op.constant(["仅今年前"]) row_splits, utf8_text, starts = gen_string_ops.unicode_decode_with_offsets( text, "utf-8") with self.test_session(): self.assertAllEqual([ codepoint("仅"), codepoint("今"), codepoint("年"), codepoint("前"), ], utf8_text.eval().tolist()) self.assertAllEqual(row_splits.eval().tolist(), [0, 4]) self.assertAllEqual(starts.eval().tolist(), [0, 3, 6, 9])
def testReplaceControlChars(self): text = constant_op.constant(["\x02仅今年前"]) row_splits, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", replace_control_characters=True) with self.test_session(): self.assertAllEqual([ 65533, codepoint("仅"), codepoint("今"), codepoint("年"), codepoint("前"), ], utf8_text.eval().tolist()) self.assertAllEqual([0, 5], row_splits.eval().tolist())
def testBasicDecodeWithOffset(self): text = constant_op.constant(["仅今年前"]) row_splits, utf8_text, starts = gen_string_ops.unicode_decode_with_offsets( text, "utf-8") with self.test_session(): self.assertAllEqual([ codepoint("仅"), codepoint("今"), codepoint("年"), codepoint("前"), ], utf8_text.eval().tolist()) self.assertAllEqual(row_splits.eval().tolist(), [0, 4]) self.assertAllEqual(starts.eval().tolist(), [0, 3, 6, 9])
def testReplaceControlChars(self): text = constant_op.constant(["\x02仅今年前"]) row_splits, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", replace_control_characters=True) with self.test_session(): self.assertAllEqual([ 65533, codepoint("仅"), codepoint("今"), codepoint("年"), codepoint("前"), ], utf8_text.eval().tolist()) self.assertAllEqual([0, 5], row_splits.eval().tolist())
def testBadErrorPolicy(self): text = constant_op.constant(["hippopotamus"]) with self.assertRaises(ValueError): _, _, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="oranguatan")
def testBadErrorPolicy(self): text = constant_op.constant(["hippopotamus"]) with self.assertRaises(ValueError): _, _, _ = gen_string_ops.unicode_decode_with_offsets( text, "utf-8", errors="oranguatan")