def test_from_dataset_no_entry(self): # 测试能否正确将no_create_entry正确设置 dataset = DataSet() start_char = 65 num_samples = 10 test_dataset = DataSet() for i in range(num_samples): char = [chr(start_char + i)] * 6 ins = Instance(char=char) dataset.append(ins) ins = Instance(char=[c + c for c in char]) test_dataset.append(ins) vocab = Vocabulary() vocab.from_dataset(dataset, field_name='char', no_create_entry_dataset=test_dataset) vocab.index_dataset(dataset, field_name='char') for i in range(num_samples): self.assertEqual( True, vocab._is_word_no_create_entry( chr(start_char + i) + chr(start_char + i)))
def test_no_entry(self): # 先建立vocabulary,然后变化no_create_entry, 测试能否正确识别 text = [ "FastNLP", "works", "well", "in", "most", "cases", "and", "scales", "well", "in", "works", "well", "in", "most", "cases", "scales", "well" ] vocab = Vocabulary() vocab.add_word_lst(text) self.assertFalse(vocab._is_word_no_create_entry('FastNLP')) vocab.add_word('FastNLP', no_create_entry=True) self.assertFalse(vocab._is_word_no_create_entry('FastNLP')) vocab.add_word('fastnlp', no_create_entry=True) self.assertTrue(vocab._is_word_no_create_entry('fastnlp')) vocab.add_word('fastnlp', no_create_entry=False) self.assertFalse(vocab._is_word_no_create_entry('fastnlp')) vocab.add_word_lst(['1'] * 10, no_create_entry=True) self.assertTrue(vocab._is_word_no_create_entry('1')) vocab.add_word('1') self.assertFalse(vocab._is_word_no_create_entry('1'))