def test_tag2(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {
                'D': 1.0
            },
            ('<s>', 'D'): {
                'N': 1.0
            },
            ('D', 'N'): {
                'V': 0.8,
                'N': 0.2
            },
            ('N', 'N'): {
                'V': 1.0
            },
            ('N', 'V'): {
                '</s>': 1.0
            },
        }
        out = {
            'D': {
                'the': 1.0
            },
            'N': {
                'dog': 0.4,
                'barks': 0.6
            },
            'V': {
                'dog': 0.1,
                'barks': 0.9
            },
        }
        hmm = HMM(3, tagset, trans, out)
        tagger = ViterbiTagger(hmm)

        x = 'the dog barks'.split()
        y = tagger.tag(x)

        pi = {
            0: {
                ('<s>', '<s>'): (log2(1.0), []),
            },
            1: {
                ('<s>', 'D'): (log2(1.0), ['D']),
            },
            2: {
                ('D', 'N'): (log2(0.4), ['D', 'N']),
            },
            3: {
                ('N', 'V'): (log2(0.8 * 0.4 * 0.9), ['D', 'N', 'V']),
                ('N', 'N'): (log2(0.2 * 0.4 * 0.6), ['D', 'N', 'N']),
            }
        }
        self.assertEqualPi(tagger._pi, pi)

        self.assertEqual(y, 'D N V'.split())
Example #2
0
    def test_prob2(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {
                'D': 1.0
            },
            ('<s>', 'D'): {
                'N': 1.0
            },
            ('D', 'N'): {
                'V': 0.8,
                'N': 0.2
            },
            ('N', 'N'): {
                'V': 1.0
            },
            ('N', 'V'): {
                '</s>': 1.0
            },
        }
        out = defaultdict(
            float, {
                'D': {
                    'the': 1.0
                },
                'N': {
                    'dog': 0.4,
                    'barks': 0.6
                },
                'V': {
                    'dog': 0.1,
                    'barks': 0.9
                },
            })
        hmm = HMM(3, tagset, trans, out)

        x = 'the dog barks'.split()
        y = 'D N V'.split()
        p = hmm.prob(x, y)
        self.assertAlmostEqual(p, 0.8 * 0.4 * 0.9)

        lp = hmm.log_prob(x, y)
        self.assertAlmostEqual(lp, log2(0.8 * 0.4 * 0.9))
Example #3
0
    def test_tag_prob2(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {
                'D': 1.0
            },
            ('<s>', 'D'): {
                'N': 1.0
            },
            ('D', 'N'): {
                'V': 0.8,
                'N': 0.2
            },
            ('N', 'N'): {
                'V': 1.0
            },
            ('N', 'V'): {
                '</s>': 1.0
            },
        }
        out = {
            'D': {
                'the': 1.0
            },
            'N': {
                'dog': 0.4,
                'barks': 0.6
            },
            'V': {
                'dog': 0.1,
                'barks': 0.9
            },
        }
        hmm = HMM(3, tagset, trans, out)

        y = 'D N V'.split()
        p = hmm.tag_prob(y)
        self.assertAlmostEqual(p, 0.8)

        lp = hmm.tag_log_prob(y)
        self.assertAlmostEqual(lp, log2(0.8))
Example #4
0
    def test_tag_prob(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {'D': 1.0},
            ('<s>', 'D'): {'N': 1.0},
            ('D', 'N'): {'V': 1.0},
            ('N', 'V'): {'</s>': 1.0},
        }
        out = {
            'D': {'the': 1.0},
            'N': {'dog': 0.4, 'barks': 0.6},
            'V': {'dog': 0.1, 'barks': 0.9},
        }
        hmm = HMM(3, tagset, trans, out)

        y = 'D N V'.split()
        p = hmm.tag_prob(y)
        self.assertAlmostEqual(p, 1.0)

        lp = hmm.tag_log_prob(y)
        self.assertAlmostEqual(lp, log2(1.0))
Example #5
0
    def test_prob(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {'D': 1.0},
            ('<s>', 'D'): {'N': 1.0},
            ('D', 'N'): {'V': 1.0},
            ('N', 'V'): {'</s>': 1.0},
        }
        out = defaultdict(float, {
            'D': {'the': 1.0},
            'N': {'dog': 0.4, 'barks': 0.6},
            'V': {'dog': 0.1, 'barks': 0.9},
        })
        hmm = HMM(3, tagset, trans, out)

        x = 'the dog barks'.split()
        y = 'D N V'.split()
        p = hmm.prob(x, y)
        self.assertAlmostEqual(p, 0.4 * 0.9)

        lp = hmm.log_prob(x, y)
        self.assertAlmostEqual(lp, log2(0.4 * 0.9))
 def setUp(self):
     tagset = {'D', 'N', 'V'}
     trans = {
         ('<s>', '<s>'): {'D': 1.0},
         ('<s>', 'D'): {'N': 1.0},
         ('D', 'N'): {'V': 1.0},
         ('N', 'V'): {'</s>': 1.0},
     }
     out = {
         'D': {'the': 1.0},
         'N': {'dog': 0.4, 'barks': 0.6},
         'V': {'dog': 0.1, 'barks': 0.9},
     }
     hmm = HMM(3, tagset, trans, out)
     self.tagger = ViterbiTagger(hmm)