예제 #1
0
  def update_mappings(self, mappings):
    """Replaces the character mappings with the supplied dictionary.

    The intent for character mappings is to enable tokenizers that support them
    to sanitize dangerous characters, such as newline and carriage return,
    with a nicer symbol.

    Each tokenizer may provide different semantics with respect to the
    mappings, or ignore them altogether.

    Args:
      mappings: Dictionary of original to sanitized strings. Keys are expected
        to have length 1.

    Raises:
      ValueError: if a key has length different from 1.
    """
    unified_tokenizer.check_mappings(mappings)
    self.mappings = mappings
 def test_check_mappings_raises_as_expected(self, mappings):
     with self.assertRaises(ValueError):
         unified_tokenizer.check_mappings(mappings)
 def test_check_mappings_accepts_legitimate(self, mappings):
     unified_tokenizer.check_mappings(mappings)