def testBatchDecode(self):
    text = constant_op.constant(
        ["仅今年前", "分享介面終於迎來更新"])
    row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets(
        text, "utf-8")

    with self.test_session():
      self.assertAllEqual([
          codepoint("仅"),
          codepoint("今"),
          codepoint("年"),
          codepoint("前"),
          codepoint("分"),
          codepoint("享"),
          codepoint("介"),
          codepoint("面"),
          codepoint("終"),
          codepoint("於"),
          codepoint("迎"),
          codepoint("來"),
          codepoint("更"),
          codepoint("新")
      ],
                          self.evaluate(utf8_text).tolist())
      self.assertAllEqual([0, 4, 14], self.evaluate(row_splits).tolist())
      self.assertAllEqual([0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27],
                          self.evaluate(offsets).tolist())
    def testBatchDecode(self):
        text = constant_op.constant(["仅今年前", "分享介面終於迎來更新"])
        row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets(
            text, "utf-8")

        with self.test_session():
            self.assertAllEqual([
                codepoint("仅"),
                codepoint("今"),
                codepoint("年"),
                codepoint("前"),
                codepoint("分"),
                codepoint("享"),
                codepoint("介"),
                codepoint("面"),
                codepoint("終"),
                codepoint("於"),
                codepoint("迎"),
                codepoint("來"),
                codepoint("更"),
                codepoint("新")
            ],
                                utf8_text.eval().tolist())
            self.assertAllEqual([0, 4, 14], row_splits.eval().tolist())
            self.assertAllEqual(
                [0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27],
                offsets.eval().tolist())
  def testBatchDecode(self):
    text = constant_op.constant(
        ["仅今年前", "中国进出口银行与中国银行加强合作"])
    row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets(
        text, "utf-8")

    with self.test_session():
      self.assertAllEqual([
          codepoint("仅"),
          codepoint("今"),
          codepoint("年"),
          codepoint("前"),
          codepoint("中"),
          codepoint("国"),
          codepoint("进"),
          codepoint("出"),
          codepoint("口"),
          codepoint("银"),
          codepoint("行"),
          codepoint("与"),
          codepoint("中"),
          codepoint("国"),
          codepoint("银"),
          codepoint("行"),
          codepoint("加"),
          codepoint("强"),
          codepoint("合"),
          codepoint("作")
      ],
                          utf8_text.eval().tolist())
      self.assertAllEqual([0, 4, 20], row_splits.eval().tolist())
      self.assertAllEqual([0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30,
                           33, 36, 39, 42, 45],
                          offsets.eval().tolist())
Beispiel #4
0
    def testBatchDecode(self):
        text = constant_op.constant(["仅今年前", "中国进出口银行与中国银行加强合作"])
        row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets(
            text, "utf-8")

        with self.test_session():
            self.assertAllEqual([
                codepoint("仅"),
                codepoint("今"),
                codepoint("年"),
                codepoint("前"),
                codepoint("中"),
                codepoint("国"),
                codepoint("进"),
                codepoint("出"),
                codepoint("口"),
                codepoint("银"),
                codepoint("行"),
                codepoint("与"),
                codepoint("中"),
                codepoint("国"),
                codepoint("银"),
                codepoint("行"),
                codepoint("加"),
                codepoint("强"),
                codepoint("合"),
                codepoint("作")
            ],
                                utf8_text.eval().tolist())
            self.assertAllEqual([0, 4, 20], row_splits.eval().tolist())
            self.assertAllEqual([
                0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39,
                42, 45
            ],
                                offsets.eval().tolist())
Beispiel #5
0
    def testBadReplacementChar(self):
        text = constant_op.constant([b"\xFE"])
        _, error, _ = gen_string_ops.unicode_decode_with_offsets(
            text, "utf-8", errors="replace", replacement_char=11141111)

        with self.assertRaises(errors.InvalidArgumentError):
            with self.test_session():
                error.eval()
Beispiel #6
0
    def testStrictError(self):
        text = constant_op.constant([b"\xFEED"])
        _, error, _ = gen_string_ops.unicode_decode_with_offsets(
            text, "utf-8", errors="strict")

        with self.assertRaises(errors.InvalidArgumentError):
            with self.test_session():
                error.eval()
  def testReplaceOnError(self):
    text = constant_op.constant([b"\xFE"])

    _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
        text, "utf-8", errors="replace")

    with self.test_session():
      self.assertAllEqual(utf8_text.eval().tolist(), [65533])
  def testStrictError(self):
    text = constant_op.constant([b"\xFEED"])
    _, error, _ = gen_string_ops.unicode_decode_with_offsets(
        text, "utf-8", errors="strict")

    with self.assertRaises(errors.InvalidArgumentError):
      with self.test_session():
        error.eval()
  def testBadReplacementChar(self):
    text = constant_op.constant([b"\xFE"])
    _, error, _ = gen_string_ops.unicode_decode_with_offsets(
        text, "utf-8", errors="replace", replacement_char=11141111)

    with self.assertRaises(errors.InvalidArgumentError):
      with self.test_session():
        error.eval()
Beispiel #10
0
    def testReplaceOnError(self):
        text = constant_op.constant([b"\xFE"])

        _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
            text, "utf-8", errors="replace")

        with self.test_session():
            self.assertAllEqual(utf8_text.eval().tolist(), [65533])
 def testDecodeGenOp(self,
                     doc,
                     expected_row_splits=None,
                     expected_char_values=None,
                     expected_char_to_byte_starts=None,
                     **args):
   """Test for the c++ interface (gen_string_ops.unicode_decode)."""
   result = gen_string_ops.unicode_decode_with_offsets(**args)
   self.assertAllEqual(expected_row_splits, result.row_splits)
   self.assertAllEqual(expected_char_values, result.char_values)
   self.assertAllEqual(expected_char_to_byte_starts,
                       result.char_to_byte_starts)
Beispiel #12
0
 def testDecodeGenOp(self,
                     doc,
                     expected_row_splits=None,
                     expected_char_values=None,
                     expected_char_to_byte_starts=None,
                     **args):
   """Test for the c++ interface (gen_string_ops.unicode_decode)."""
   result = gen_string_ops.unicode_decode_with_offsets(**args)
   self.assertAllEqual(expected_row_splits, result.row_splits)
   self.assertAllEqual(expected_char_values, result.char_values)
   self.assertAllEqual(expected_char_to_byte_starts,
                       result.char_to_byte_starts)
  def testIgnoreOnError(self):
    text = constant_op.constant([b"\xFEhello"])

    _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
        text, "utf-8", errors="ignore")

    with self.test_session():
      self.assertAllEqual(utf8_text.eval().tolist(), [
          codepoint("h"),
          codepoint("e"),
          codepoint("l"),
          codepoint("l"),
          codepoint("o")
      ])
Beispiel #14
0
    def testIgnoreOnError(self):
        text = constant_op.constant([b"\xFEhello"])

        _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
            text, "utf-8", errors="ignore")

        with self.test_session():
            self.assertAllEqual(utf8_text.eval().tolist(), [
                codepoint("h"),
                codepoint("e"),
                codepoint("l"),
                codepoint("l"),
                codepoint("o")
            ])
  def testBasicDecodeWithOffset(self):
    text = constant_op.constant(["仅今年前"])
    row_splits, utf8_text, starts = gen_string_ops.unicode_decode_with_offsets(
        text, "utf-8")

    with self.test_session():
      self.assertAllEqual([
          codepoint("仅"),
          codepoint("今"),
          codepoint("年"),
          codepoint("前"),
      ],
                          utf8_text.eval().tolist())
      self.assertAllEqual(row_splits.eval().tolist(), [0, 4])
      self.assertAllEqual(starts.eval().tolist(), [0, 3, 6, 9])
  def testReplaceControlChars(self):
    text = constant_op.constant(["\x02仅今年前"])
    row_splits, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
        text, "utf-8", replace_control_characters=True)

    with self.test_session():
      self.assertAllEqual([
          65533,
          codepoint("仅"),
          codepoint("今"),
          codepoint("年"),
          codepoint("前"),
      ],
                          utf8_text.eval().tolist())
      self.assertAllEqual([0, 5], row_splits.eval().tolist())
Beispiel #17
0
    def testBasicDecodeWithOffset(self):
        text = constant_op.constant(["仅今年前"])
        row_splits, utf8_text, starts = gen_string_ops.unicode_decode_with_offsets(
            text, "utf-8")

        with self.test_session():
            self.assertAllEqual([
                codepoint("仅"),
                codepoint("今"),
                codepoint("年"),
                codepoint("前"),
            ],
                                utf8_text.eval().tolist())
            self.assertAllEqual(row_splits.eval().tolist(), [0, 4])
            self.assertAllEqual(starts.eval().tolist(), [0, 3, 6, 9])
Beispiel #18
0
    def testReplaceControlChars(self):
        text = constant_op.constant(["\x02仅今年前"])
        row_splits, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
            text, "utf-8", replace_control_characters=True)

        with self.test_session():
            self.assertAllEqual([
                65533,
                codepoint("仅"),
                codepoint("今"),
                codepoint("年"),
                codepoint("前"),
            ],
                                utf8_text.eval().tolist())
            self.assertAllEqual([0, 5], row_splits.eval().tolist())
  def testBadErrorPolicy(self):
    text = constant_op.constant(["hippopotamus"])

    with self.assertRaises(ValueError):
      _, _, _ = gen_string_ops.unicode_decode_with_offsets(
          text, "utf-8", errors="oranguatan")
Beispiel #20
0
    def testBadErrorPolicy(self):
        text = constant_op.constant(["hippopotamus"])

        with self.assertRaises(ValueError):
            _, _, _ = gen_string_ops.unicode_decode_with_offsets(
                text, "utf-8", errors="oranguatan")