class TestPerceptronTagger(unittest.TestCase):
    def setUp(self):
        self.text = ("Simple is better than complex. "
                     "Complex is better than complicated.")
        self.tagger = PerceptronTagger(load=False)

    def test_init(self):
        tagger = PerceptronTagger(load=False)
        assert_true(isinstance(tagger, BaseTagger))

    def test_train(self):
        sentences = _read_tagged(_wsj_train)
        nr_iter = 5
        self.tagger.train(sentences, nr_iter=nr_iter)
        nr_words = sum(len(words) for words, tags in sentences)
        # Check that the model has 'ticked over' once per instance
        assert_equal(nr_words * nr_iter, self.tagger.model.i)
        # Check that the tagger has a class for every seen tag
        tag_set = set()
        for _, tags in sentences:
            tag_set.update(tags)
        assert_equal(len(tag_set), len(self.tagger.model.classes))
        for tag in tag_set:
            assert_true(tag in self.tagger.model.classes)

    @attr("slow")
    def test_tag(self):
        trained_tagger = PerceptronTagger()
        tokens = trained_tagger.tag(self.text)
        assert_equal([w for w, t in tokens], [
            'Simple', 'is', 'better', 'than', 'complex', '.', 'Complex', 'is',
            'better', 'than', 'complicated', '.'
        ])

    @attr("slow")
    def test_tag_textblob(self):
        trained_tagger = PerceptronTagger()
        blob = TextBlob(self.text, pos_tagger=trained_tagger)
        # Punctuation is excluded
        assert_equal([w for w, t in blob.tags], [
            'Simple', 'is', 'better', 'than', 'complex', 'Complex', 'is',
            'better', 'than', 'complicated'
        ])

    def test_loading_missing_file_raises_missing_corpus_exception(self):
        tagger = PerceptronTagger(load=False)
        assert_raises(MissingCorpusError, tagger.load, 'missing.pickle')
Esempio n. 2
0
class TestPerceptronTagger(unittest.TestCase):

    def setUp(self):
        self.text = ("Simple is better than complex. "
                     "Complex is better than complicated.")
        self.tagger = PerceptronTagger(load=False)

    def test_init(self):
        tagger = PerceptronTagger(load=False)
        assert_true(isinstance(tagger, BaseTagger))

    def test_train(self):
        sentences = _read_tagged(_wsj_train)
        nr_iter = 5
        self.tagger.train(sentences, nr_iter=nr_iter)
        nr_words = sum(len(words) for words, tags in sentences)
        # Check that the model has 'ticked over' once per instance
        assert_equal(nr_words * nr_iter, self.tagger.model.i)
        # Check that the tagger has a class for every seen tag
        tag_set = set()
        for _, tags in sentences:
            tag_set.update(tags)
        assert_equal(len(tag_set), len(self.tagger.model.classes))
        for tag in tag_set:
            assert_true(tag in self.tagger.model.classes)

    @attr("slow")
    def test_tag(self):
        trained_tagger = PerceptronTagger()
        tokens = trained_tagger.tag(self.text)
        assert_equal([w for w, t in tokens],
            ['Simple', 'is', 'better', 'than', 'complex', '.', 'Complex', 'is',
             'better', 'than', 'complicated', '.'])

    @attr("slow")
    def test_tag_textblob(self):
        trained_tagger = PerceptronTagger()
        blob = TextBlob(self.text, pos_tagger=trained_tagger)
        # Punctuation is excluded
        assert_equal([w for w, t in blob.tags],
            ['Simple', 'is', 'better', 'than', 'complex', 'Complex', 'is',
             'better', 'than', 'complicated'])

    def test_loading_missing_file_raises_missing_corpus_exception(self):
        tagger = PerceptronTagger(load=False)
        assert_raises(MissingCorpusException, tagger.load, 'missing.pickle')