Exemple #1
0
 def test_from_dataset_no_entry(self):
     # 测试能否正确将no_create_entry正确设置
     dataset = DataSet()
     start_char = 65
     num_samples = 10
     test_dataset = DataSet()
     for i in range(num_samples):
         char = [chr(start_char + i)] * 6
         ins = Instance(char=char)
         dataset.append(ins)
         ins = Instance(char=[c + c for c in char])
         test_dataset.append(ins)
     vocab = Vocabulary()
     vocab.from_dataset(dataset,
                        field_name='char',
                        no_create_entry_dataset=test_dataset)
     vocab.index_dataset(dataset, field_name='char')
     for i in range(num_samples):
         self.assertEqual(
             True,
             vocab._is_word_no_create_entry(
                 chr(start_char + i) + chr(start_char + i)))
Exemple #2
0
    def test_no_entry(self):
        # 先建立vocabulary,然后变化no_create_entry, 测试能否正确识别
        text = [
            "FastNLP", "works", "well", "in", "most", "cases", "and", "scales",
            "well", "in", "works", "well", "in", "most", "cases", "scales",
            "well"
        ]
        vocab = Vocabulary()
        vocab.add_word_lst(text)

        self.assertFalse(vocab._is_word_no_create_entry('FastNLP'))
        vocab.add_word('FastNLP', no_create_entry=True)
        self.assertFalse(vocab._is_word_no_create_entry('FastNLP'))

        vocab.add_word('fastnlp', no_create_entry=True)
        self.assertTrue(vocab._is_word_no_create_entry('fastnlp'))
        vocab.add_word('fastnlp', no_create_entry=False)
        self.assertFalse(vocab._is_word_no_create_entry('fastnlp'))

        vocab.add_word_lst(['1'] * 10, no_create_entry=True)
        self.assertTrue(vocab._is_word_no_create_entry('1'))
        vocab.add_word('1')
        self.assertFalse(vocab._is_word_no_create_entry('1'))