コード例 #1
0
def test_tag_distribution(corpus):

    ut = unigramtagger.UnigramTagger()
    ut.train(corpus)

    expected_distribution = {'VERB': 2 / 3, 'NOUN': 1 / 3}

    assert ut.N == 3
    assert ut.tag_distribution == expected_distribution
コード例 #2
0
def test_train(corpus):

    ut = unigramtagger.UnigramTagger()

    expected_model = {'train': {'VERB': 2, 'NOUN': 1}}

    ut.train(corpus)

    assert ut.model == expected_model
コード例 #3
0
    def test_train(self):

        corpus = self.corpus()
        ut = unigramtagger.UnigramTagger()

        expected_model = {'train': {'VERB': 2, 'NOUN': 1}}

        ut.train(corpus)

        self.assertEqual(ut.model, expected_model)
コード例 #4
0
def test_load(corpus):

    testfile = tempfile.NamedTemporaryFile()

    ut = unigramtagger.UnigramTagger()

    # For saving a file to load later
    _ut_saver = unigramtagger.UnigramTagger()
    _ut_saver.train(corpus)
    _ut_saver.save(testfile.name)

    expected_distribution = {'VERB': 2 / 3, 'NOUN': 1 / 3}

    expected_model = {'train': Counter(VERB=2, NOUN=1)}

    ut.load(testfile.name)

    assert (expected_model == ut.model
            and expected_distribution == ut.tag_distribution)
コード例 #5
0
    def test_tag_distribution(self):

        corpus = self.corpus()
        ut = unigramtagger.UnigramTagger()

        ut.train(corpus)

        expected_distribution = {'VERB': 2 / 3, 'NOUN': 1 / 3}

        self.assertEqual(ut.N, 3)
        self.assertEqual(ut.tag_distribution, expected_distribution)
コード例 #6
0
def test_tag(corpus):

    ut = unigramtagger.UnigramTagger()
    ut.train(corpus)

    expected_list = [('train', 'VERB')]

    words_to_tag = ['train']
    tagged_words = ut.tag(words_to_tag)

    assert expected_list == tagged_words
コード例 #7
0
    def test_tag(self):

        corpus = self.corpus()
        ut = unigramtagger.UnigramTagger()

        ut.train(corpus)

        expected_list = [('train', 'VERB')]

        words_to_tag = ['train']
        tagged_words = ut.tag(words_to_tag)

        self.assertEqual(expected_list, tagged_words)
コード例 #8
0
def test_save(corpus):

    testfile = tempfile.NamedTemporaryFile()

    ut = unigramtagger.UnigramTagger()
    ut.train(corpus)

    expected_data = ({
        'train': Counter(VERB=2, NOUN=1)
    }, {
        'VERB': 2 / 3,
        'NOUN': 1 / 3
    })

    ut.save(testfile.name)
    actual_data = pickle.load(testfile)

    assert expected_data == actual_data