def test_tag2(self):
        tagset = {"D", "N", "V"}
        trans = {
            ("<s>", "<s>"): {"D": 1.0},
            ("<s>", "D"): {"N": 1.0},
            ("D", "N"): {"V": 0.8, "N": 0.2},
            ("N", "N"): {"V": 1.0},
            ("N", "V"): {"</s>": 1.0},
        }
        out = {"D": {"the": 1.0}, "N": {"dog": 0.4, "barks": 0.6}, "V": {"dog": 0.1, "barks": 0.9}}
        hmm = HMM(3, tagset, trans, out)
        tagger = ViterbiTagger(hmm)

        x = "the dog barks".split()
        y = tagger.tag(x)

        pi = {
            0: {("<s>", "<s>"): (log2(1.0), [])},
            1: {("<s>", "D"): (log2(1.0), ["D"])},
            2: {("D", "N"): (log2(0.4), ["D", "N"])},
            3: {
                ("N", "V"): (log2(0.8 * 0.4 * 0.9), ["D", "N", "V"]),
                ("N", "N"): (log2(0.2 * 0.4 * 0.6), ["D", "N", "N"]),
            },
        }
        self.assertEqualPi(tagger._pi, pi)

        self.assertEqual(y, "D N V".split())
    def test_tag2(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {
                'D': 1.0
            },
            ('<s>', 'D'): {
                'N': 1.0
            },
            ('D', 'N'): {
                'V': 0.8,
                'N': 0.2
            },
            ('N', 'N'): {
                'V': 1.0
            },
            ('N', 'V'): {
                '</s>': 1.0
            },
        }
        out = {
            'D': {
                'the': 1.0
            },
            'N': {
                'dog': 0.4,
                'barks': 0.6
            },
            'V': {
                'dog': 0.1,
                'barks': 0.9
            },
        }
        hmm = HMM(3, tagset, trans, out)
        tagger = ViterbiTagger(hmm)

        x = 'the dog barks'.split()
        y = tagger.tag(x)

        pi = {
            0: {
                ('<s>', '<s>'): (log2(1.0), []),
            },
            1: {
                ('<s>', 'D'): (log2(1.0), ['D']),
            },
            2: {
                ('D', 'N'): (log2(0.4), ['D', 'N']),
            },
            3: {
                ('N', 'V'): (log2(0.8 * 0.4 * 0.9), ['D', 'N', 'V']),
                ('N', 'N'): (log2(0.2 * 0.4 * 0.6), ['D', 'N', 'N']),
            }
        }
        self.assertEqualPi(tagger._pi, pi)

        self.assertEqual(y, 'D N V'.split())
 def setUp(self):
     tagset = {'D', 'N', 'V'}
     trans = {
         ('<s>', '<s>'): {'D': 1.0},
         ('<s>', 'D'): {'N': 1.0},
         ('D', 'N'): {'V': 1.0},
         ('N', 'V'): {'</s>': 1.0},
     }
     out = {
         'D': {'the': 1.0},
         'N': {'dog': 0.4, 'barks': 0.6},
         'V': {'dog': 0.1, 'barks': 0.9},
     }
     hmm = HMM(3, tagset, trans, out)
     self.tagger = ViterbiTagger(hmm)
Example #4
0
    def test_viterbi_tagger(self):
        hmm = MLHMM(2, self.tagged_sents, addone=False)
        # XXX: or directly test hmm.tag?
        tagger = ViterbiTagger(hmm)

        y = tagger.tag('el gato come pescado .'.split())

        pi = {
            0: {
                ('<s>', ): (0.0, []),
            },
            1: {
                # 0.5 for el/D
                (
                    'D', ): (log2(0.5), ['D']),
            },
            2: {
                # 0.25 for gato/N
                (
                    'N', ): (log2(0.5 * 0.25), ['D', 'N']),
            },
            3: {
                # 0.5 for V after N
                (
                    'V', ): (log2(0.5 * 0.25 * 0.5), ['D', 'N', 'V']),
            },
            4: {
                # 0.25 for pescado/N
                (
                    'N', ):
                (log2(0.5 * 0.25 * 0.5 * 0.25), ['D', 'N', 'V', 'N']),
            },
            5: {
                # 0.5 for P after N
                (
                    'P', ): (log2(0.5 * 0.25 * 0.5 * 0.25 * 0.5),
                             ['D', 'N', 'V', 'N', 'P']),
            }
        }
        self.assertEqualPi(tagger._pi, pi)

        self.assertEqual(y, 'D N V N P'.split())
Example #5
0
    def test_tag2(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {'D': 1.0},
            ('<s>', 'D'): {'N': 1.0},
            ('D', 'N'): {'V': 0.8, 'N': 0.2},
            ('N', 'N'): {'V': 1.0},
            ('N', 'V'): {'</s>': 1.0},
        }
        out = {
            'D': {'the': 1.0},
            'N': {'dog': 0.4, 'barks': 0.6},
            'V': {'dog': 0.1, 'barks': 0.9},
        }
        hmm = HMM(3, tagset, trans, out)
        tagger = ViterbiTagger(hmm)

        x = 'the dog barks'.split()
        y = tagger.tag(x)

        pi = {
            0: {
                ('<s>', '<s>'): (log2(1.0), []),
            },
            1: {
                ('<s>', 'D'): (log2(1.0), ['D']),
            },
            2: {
                ('D', 'N'): (log2(0.4), ['D', 'N']),
            },
            3: {
                ('N', 'V'): (log2(0.8 * 0.4 * 0.9), ['D', 'N', 'V']),
                ('N', 'N'): (log2(0.2 * 0.4 * 0.6), ['D', 'N', 'N']),
            }
        }
        self.assertEqualPi(tagger._pi, pi)

        self.assertEqual(y, 'D N V'.split())
Example #6
0
    def test_viterbi_tagger(self):
        hmm = MLHMM(2, self.tagged_sents, addone=False)
        # XXX: or directly test hmm.tag?
        tagger = ViterbiTagger(hmm)

        y = tagger.tag('el gato come pescado .'.split())

        pi = {
            0: {
                ('<s>',): (0.0, []),
            },
            1: {
                # 0.5 for el/D
                ('D',): (log2(0.5), ['D']),
            },
            2: {
                # 0.25 for gato/N
                ('N',): (log2(0.5 * 0.25), ['D', 'N']),
            },
            3: {
                # 0.5 for V after N
                ('V',): (log2(0.5 * 0.25 * 0.5), ['D', 'N', 'V']),
            },
            4: {
                # 0.25 for pescado/N
                ('N',): (log2(0.5 * 0.25 * 0.5 * 0.25), ['D', 'N', 'V', 'N']),
            },
            5: {
                # 0.5 for P after N
                ('P',): (log2(0.5 * 0.25 * 0.5 * 0.25 * 0.5), ['D', 'N', 'V', 'N', 'P']),
            }

        }
        self.assertEqualPi(tagger._pi, pi)

        self.assertEqual(y, 'D N V N P'.split())
class TestViterbiInit(TestCase):

    def setUp(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {'D': 1.0},
            ('<s>', 'D'): {'N': 1.0},
            ('D', 'N'): {'V': 1.0},
            ('N', 'V'): {'</s>': 1.0},
        }
        out = {
            'D': {'the': 1.0},
            'N': {'dog': 0.4, 'barks': 0.6},
            'V': {'dog': 0.1, 'barks': 0.9},
        }
        hmm = HMM(3, tagset, trans, out)
        self.tagger = ViterbiTagger(hmm)

    def test_fill_column_seen_token(self):
        self.tagger.init_pi()
        self.tagger.fill_column(1, 'the')
        pi = self.tagger._pi

        self.assertIn(1, pi.keys())
        self.assertIn(('<s>', 'D'), pi[1].keys())

        # transition from (<s>, <s>) to (<s>, 'D')
        # plus probability of word 'the' given the tag 'D'
        self.assertAlmostEqual(
            pi[1][('<s>', 'D')][0],
            log2(1.0) + log2(1.0)
        )

    def test_fill_column_unseen_token(self):
        self.tagger.init_pi()
        self.tagger.fill_column(1, 'unseen')
        pi = self.tagger._pi

        self.assertIn(1, pi.keys())
        self.assertEqual(len(pi[1].keys()), 0)