예제 #1
0
    def test_tag2(self):
        tagset = {'D', 'N', 'V'}
        trans = {
            ('<s>', '<s>'): {
                'D': 1.0
            },
            ('<s>', 'D'): {
                'N': 1.0
            },
            ('D', 'N'): {
                'V': 0.8,
                'N': 0.2
            },
            ('N', 'N'): {
                'V': 1.0
            },
            ('N', 'V'): {
                '</s>': 1.0
            },
        }
        out = {
            'D': {
                'the': 1.0
            },
            'N': {
                'dog': 0.4,
                'barks': 0.6
            },
            'V': {
                'dog': 0.1,
                'barks': 0.9
            },
        }
        hmm = HMM(3, tagset, trans, out)
        tagger = ViterbiTagger(hmm)

        x = 'the dog barks'.split()
        y = tagger.tag(x)

        pi = {
            0: {
                ('<s>', '<s>'): (log2(1.0), []),
            },
            1: {
                ('<s>', 'D'): (log2(1.0), ['D']),
            },
            2: {
                ('D', 'N'): (log2(0.4), ['D', 'N']),
            },
            3: {
                ('N', 'V'): (log2(0.8 * 0.4 * 0.9), ['D', 'N', 'V']),
                ('N', 'N'): (log2(0.2 * 0.4 * 0.6), ['D', 'N', 'N']),
            }
        }
        self.assertEqualPi(tagger._pi, pi)

        self.assertEqual(y, 'D N V'.split())
 def setUp(self):
     tagset = {'D', 'N', 'V'}
     trans = {
         ('<s>', '<s>'): {'D': 1.0},
         ('<s>', 'D'): {'N': 1.0},
         ('D', 'N'): {'V': 1.0},
         ('N', 'V'): {'</s>': 1.0},
     }
     out = {
         'D': {'the': 1.0},
         'N': {'dog': 0.4, 'barks': 0.6},
         'V': {'dog': 0.1, 'barks': 0.9},
     }
     hmm = HMM(3, tagset, trans, out)
     self.tagger = ViterbiTagger(hmm)
예제 #3
0
    def test_viterbi_tagger(self):
        hmm = MLHMM(2, self.tagged_sents, addone=False)
        # XXX: or directly test hmm.tag?
        tagger = ViterbiTagger(hmm)

        y = tagger.tag('el gato come pescado .'.split())

        pi = {
            0: {
                ('<s>', ): (0.0, []),
            },
            1: {
                # 0.5 for el/D
                (
                    'D', ): (log2(0.5), ['D']),
            },
            2: {
                # 0.25 for gato/N
                (
                    'N', ): (log2(0.5 * 0.25), ['D', 'N']),
            },
            3: {
                # 0.5 for V after N
                (
                    'V', ): (log2(0.5 * 0.25 * 0.5), ['D', 'N', 'V']),
            },
            4: {
                # 0.25 for pescado/N
                (
                    'N', ):
                (log2(0.5 * 0.25 * 0.5 * 0.25), ['D', 'N', 'V', 'N']),
            },
            5: {
                # 0.5 for P after N
                (
                    'P', ): (log2(0.5 * 0.25 * 0.5 * 0.25 * 0.5),
                             ['D', 'N', 'V', 'N', 'P']),
            }
        }
        self.assertEqualPi(tagger._pi, pi)

        self.assertEqual(y, 'D N V N P'.split())