def test_save_load(self): token_map = TokenMap(on_unk=SILENT) token_map.update(self.TOKENS) token_map.save('test-data/test-token-map/test-token-map.gz') token_map_copy = TokenMap(on_unk=SILENT) token_map_copy.load( 'test-data/test-token-map/test-token-map.gz' ) self.assertEqual( token_map_copy.get_ids(self.TOKENS), range(1, len(self.TOKENS)+1) ) self.assertEqual(len(token_map_copy), len(self.TOKENS)+1)
def test_raise_error_on_unk(self): ''' If the token_map is constructed passing on_unk=TokenMap.ERROR then calling get_id() or get_ids() will throw a KeyError if one of the supplied tokens isn't in the token_map. (Normally it would return 0, which is a token id reserved for 'UNK' -- any unknown token). ''' token_map = TokenMap(on_unk=ERROR) token_map.update(self.TOKENS) with self.assertRaises(KeyError): token_map.get_id('no-exist') with self.assertRaises(KeyError): token_map.get_ids(['apple', 'no-exist'])
def test_token_map_plural_functions(self): token_map = TokenMap(on_unk=SILENT) # In these assertions, we offset the expected list of ids by # 1 because the 0th id in token_map is reserved for the UNK # token # Ensure that update works ids = token_map.update(self.TOKENS) self.assertEqual(ids, range(1, len(self.TOKENS)+1)) # Ensure that get_ids works self.assertEqual( token_map.get_ids(self.TOKENS), range(1, len(self.TOKENS)+1) ) # Ensure that get_tokens works self.assertEqual( token_map.get_tokens(range(1, len(self.TOKENS)+1)), self.TOKENS ) # Asking for ids of non-existent tokens raises KeyError self.assertEqual( token_map.get_ids(['apple', 'no-exist']), [self.TOKENS.index('apple')+1, 0] ) # Asking for token at 0 returns the 'UNK' token self.assertEqual( token_map.get_tokens([3,0]), [self.TOKENS[3-1], 'UNK'] ) # Asking for token at non-existent idx raises IndexError with self.assertRaises(IndexError): token_map.get_tokens([1,99])