def test_get_new_tokens(self): tokens = TokenRegistry() # First, register four new tokens and make sure they get the # expected ids. for token_id, token in enumerate(u"this is a test".split()): self.assertEquals(chr(token_id), tokens.get_id(token)) # Then, repeat the same check to make sure they aren't # re-registered. for token_id, token in enumerate(u"this is a test".split()): self.assertEquals(chr(token_id), tokens.get_id(token))
def test_non_unicode(self): # Test the Unicode-checking entry points in TokenRegistry tokens = TokenRegistry() with self.assertRaises(TypeError): tokens.get_id("non-unicode")