Example #1
0
 def test_counter(self):
     token_to_idx = {'一万七千多': 1, '一万七千余': 2, '一万万': 3}
     vocab = Vocab(
         counter=self.counter, unk_token='[UNK]', token_to_idx=token_to_idx)
     self.check_output_equal(vocab.to_tokens(1), '一万七千多')
     self.check_output_equal(vocab.to_tokens(2), '一万七千余')
     self.check_output_equal(vocab.to_tokens(3), '一万万')
Example #2
0
 def test_json(self):
     token_to_idx = {'一万七千多': 1, '一万七千余': 2, '一万万': 3}
     vocab = Vocab(
         counter=self.counter, unk_token='[UNK]', token_to_idx=token_to_idx)
     json_str = vocab.to_json()
     copied_vocab = Vocab.from_json(json_str)
     for key, value in copied_vocab.token_to_idx.items():
         self.check_output_equal(value, vocab[key])
Example #3
0
 def test_to_token_excess_size(self):
     token_to_idx = {'一万七千多': 1, '一万七千余': 2, '一万万': 3}
     vocab = Vocab(
         counter=self.counter, unk_token='[UNK]', token_to_idx=token_to_idx)
     vocab.to_tokens(len(vocab))
Example #4
0
 def test_sort_index_value_error3(self):
     token_to_idx = {'一万七千多': -1, '一万七千余': 2, '一万七千': 3}
     Vocab(
         counter=self.counter, unk_token='[UNK]', token_to_idx=token_to_idx)
Example #5
0
 def test_sort_index_value_error1(self):
     token_to_idx = {'一万七千多': 1, '一万七千余': 2, 'IP地址': 3}
     vocab = Vocab(
         counter=self.counter, unk_token='[UNK]', token_to_idx=token_to_idx)
Example #6
0
 def test_invalid_identifier(self):
     Vocab(counter=self.counter, _special_token='')
Example #7
0
 def test_invalid_specail_token(self):
     Vocab(wrong_kwarg='')