Example #1
0
	def test_save_load(self):
		token_map = TokenMap(on_unk=SILENT)
		token_map.update(self.TOKENS)
		token_map.save('test-data/test-token-map/test-token-map.gz')

		token_map_copy = TokenMap(on_unk=SILENT)
		token_map_copy.load(
			'test-data/test-token-map/test-token-map.gz'
		)
		self.assertEqual(
			token_map_copy.get_ids(self.TOKENS),
			range(1, len(self.TOKENS)+1)
		)
		self.assertEqual(len(token_map_copy), len(self.TOKENS)+1)
Example #2
0
	def test_raise_error_on_unk(self):
		'''
		If the token_map is constructed passing 
			on_unk=TokenMap.ERROR
		then calling get_id() or get_ids() will throw a KeyError if one
		of the supplied tokens isn't in the token_map.  (Normally it 
		would return 0, which is a token id reserved for 'UNK' -- any
		unknown token).
		'''

		token_map = TokenMap(on_unk=ERROR)
		token_map.update(self.TOKENS)

		with self.assertRaises(KeyError):
			token_map.get_id('no-exist')

		with self.assertRaises(KeyError):
			token_map.get_ids(['apple', 'no-exist'])
Example #3
0
	def test_token_map_plural_functions(self):

		token_map = TokenMap(on_unk=SILENT)

		# In these assertions, we offset the expected list of ids by
		# 1 because the 0th id in token_map is reserved for the UNK
		# token

		# Ensure that update works
		ids = token_map.update(self.TOKENS)
		self.assertEqual(ids, range(1, len(self.TOKENS)+1))

		# Ensure that get_ids works
		self.assertEqual(
			token_map.get_ids(self.TOKENS),
			range(1, len(self.TOKENS)+1)
		)

		# Ensure that get_tokens works
		self.assertEqual(
			token_map.get_tokens(range(1, len(self.TOKENS)+1)),
			self.TOKENS
		)

		# Asking for ids of non-existent tokens raises KeyError
		self.assertEqual(
			token_map.get_ids(['apple', 'no-exist']),
			[self.TOKENS.index('apple')+1, 0]
		)

		# Asking for token at 0 returns the 'UNK' token
		self.assertEqual(
			token_map.get_tokens([3,0]),
			[self.TOKENS[3-1], 'UNK']
		)

		# Asking for token at non-existent idx raises IndexError
		with self.assertRaises(IndexError):
			token_map.get_tokens([1,99])