def test_errors(self): token_to_freq = { 'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2 } sorted_by_freq_tuples = sorted(token_to_freq.items(), key=lambda x: x[1], reverse=True) c = OrderedDict(sorted_by_freq_tuples) with self.assertRaises(ValueError): # Test proper error raised when setting unk token to None Vocab(c, unk_token=None) with self.assertRaises(RuntimeError): # Test proper error raised when setting a token out of bounds v = Vocab(c, min_freq=3) v.insert_token('new_token', 100) with self.assertRaises(RuntimeError): # Test proper error raised when looking up a token out of bounds v = Vocab(c) v.lookup_token(100)
def test_errors(self): token_to_freq = { 'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2 } sorted_by_freq_tuples = sorted(token_to_freq.items(), key=lambda x: x[1], reverse=True) c = OrderedDict(sorted_by_freq_tuples) with self.assertRaises(ValueError): # Test proper error raised when setting unk token to None Vocab(c, specials=['<unk>', '<bos>'], unk_token=None) with self.assertRaises(ValueError): # Test proper error raised when specials token doesn't contain unk_token Vocab(c, specials=['<pad>', '<bos>']) with self.assertRaises(ValueError): # Test proper error raised when ordered_dict contains a special token updated_token_to_freq = { 'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2, '<pad>': 1 } updated_sorted_by_freq_tuples = sorted( updated_token_to_freq.items(), key=lambda x: x[1], reverse=True) updated_c = OrderedDict(updated_sorted_by_freq_tuples) Vocab(updated_c, specials=['<unk>', '<pad>', '<bos>']) with self.assertRaises(RuntimeError): # Test proper error raised when setting a token out of bounds v = Vocab(c, min_freq=3) v.insert_token('new_token', 100) with self.assertRaises(RuntimeError): # Test proper error raised when looking up a token out of bounds v = Vocab(c) v.lookup_token(100)
def test_vocab_lookup_token(self): token_to_freq = {'a': 2, 'b': 2, 'c': 2} sorted_by_freq_tuples = sorted(token_to_freq.items(), key=lambda x: x[1], reverse=True) c = OrderedDict(sorted_by_freq_tuples) v = Vocab(c, specials_first=False) self.assertEqual(v.lookup_token(0), 'a')