Beispiel #1
0
 def test_contains(self):
     vocab = Vocabulary(unknown=None)
     vocab.update(text)
     self.assertTrue(text[-1] in vocab)
     self.assertFalse("~!@#" in vocab)
     self.assertEqual(text[-1] in vocab, vocab.has_word(text[-1]))
     self.assertEqual("~!@#" in vocab, vocab.has_word("~!@#"))
Beispiel #2
0
    def test_index(self):
        vocab = Vocabulary()
        vocab.update(text)
        res = [vocab[w] for w in set(text)]
        self.assertEqual(len(res), len(set(res)))

        res = [vocab.to_index(w) for w in set(text)]
        self.assertEqual(len(res), len(set(res)))
Beispiel #3
0
 def test_iteration(self):
     vocab = Vocabulary()
     text = [
         "FastNLP", "works", "well", "in", "most", "cases", "and", "scales",
         "well", "in", "works", "well", "in", "most", "cases", "scales",
         "well"
     ]
     vocab.update(text)
     text = set(text)
     for word in vocab:
         self.assertTrue(word in text)
Beispiel #4
0
 def test_iteration(self):
     vocab = Vocabulary(padding=None, unknown=None)
     text = [
         "FastNLP", "works", "well", "in", "most", "cases", "and", "scales",
         "well", "in", "works", "well", "in", "most", "cases", "scales",
         "well"
     ]
     vocab.update(text)
     text = set(text)
     for word, idx in vocab:
         self.assertTrue(word in text)
         self.assertTrue(idx < len(vocab))
Beispiel #5
0
    def test_warning(self):
        vocab = Vocabulary(max_size=len(set(text)))
        vocab.update(text)
        self.assertEqual(vocab.rebuild, True)
        print(len(vocab))
        self.assertEqual(vocab.rebuild, False)

        vocab.update([
            "hahahha", "hhh", "vvvv", "ass", "asss", "jfweiong", "eqgfeg",
            "feqfw"
        ])
        # this will print a warning
        self.assertEqual(vocab.rebuild, True)
Beispiel #6
0
    def test_additional_update(self):
        vocab = Vocabulary()
        vocab.update(text)

        _ = vocab["well"]
        self.assertEqual(vocab.rebuild, False)

        vocab.add("hahaha")
        self.assertEqual(vocab.rebuild, True)

        _ = vocab["hahaha"]
        self.assertEqual(vocab.rebuild, False)
        self.assertTrue("hahaha" in vocab)
Beispiel #7
0
 def test_rebuild(self):
     # 测试build之后新加入词,原来的词顺序不变
     vocab = Vocabulary()
     text = [str(idx) for idx in range(10)]
     vocab.update(text)
     for i in text:
         self.assertEqual(int(i) + 2, vocab.to_index(i))
     indexes = []
     for word, index in vocab:
         indexes.append((word, index))
     vocab.add_word_lst([str(idx) for idx in range(10, 13)])
     for idx, pair in enumerate(indexes):
         self.assertEqual(pair[1], vocab.to_index(pair[0]))
     for i in range(13):
         self.assertEqual(int(i) + 2, vocab.to_index(str(i)))
Beispiel #8
0
 def test_update(self):
     vocab = Vocabulary()
     vocab.update(text)
     self.assertEqual(vocab.word_count, counter)
Beispiel #9
0
 def test_to_word(self):
     vocab = Vocabulary()
     vocab.update(text)
     self.assertEqual(
         text, [vocab.to_word(idx) for idx in [vocab[w] for w in text]])
Beispiel #10
0
 def test_len(self):
     vocab = Vocabulary(unknown=None, padding=None)
     vocab.update(text)
     self.assertEqual(len(vocab), len(counter))