def test_tag2(self): tagset = {'D', 'N', 'V'} trans = { ('<s>', '<s>'): { 'D': 1.0 }, ('<s>', 'D'): { 'N': 1.0 }, ('D', 'N'): { 'V': 0.8, 'N': 0.2 }, ('N', 'N'): { 'V': 1.0 }, ('N', 'V'): { '</s>': 1.0 }, } out = { 'D': { 'the': 1.0 }, 'N': { 'dog': 0.4, 'barks': 0.6 }, 'V': { 'dog': 0.1, 'barks': 0.9 }, } hmm = HMM(3, tagset, trans, out) tagger = ViterbiTagger(hmm) x = 'the dog barks'.split() y = tagger.tag(x) pi = { 0: { ('<s>', '<s>'): (log2(1.0), []), }, 1: { ('<s>', 'D'): (log2(1.0), ['D']), }, 2: { ('D', 'N'): (log2(0.4), ['D', 'N']), }, 3: { ('N', 'V'): (log2(0.8 * 0.4 * 0.9), ['D', 'N', 'V']), ('N', 'N'): (log2(0.2 * 0.4 * 0.6), ['D', 'N', 'N']), } } self.assertEqualPi(tagger._pi, pi) self.assertEqual(y, 'D N V'.split())
def setUp(self): tagset = {'D', 'N', 'V'} trans = { ('<s>', '<s>'): {'D': 1.0}, ('<s>', 'D'): {'N': 1.0}, ('D', 'N'): {'V': 1.0}, ('N', 'V'): {'</s>': 1.0}, } out = { 'D': {'the': 1.0}, 'N': {'dog': 0.4, 'barks': 0.6}, 'V': {'dog': 0.1, 'barks': 0.9}, } hmm = HMM(3, tagset, trans, out) self.tagger = ViterbiTagger(hmm)
def test_viterbi_tagger(self): hmm = MLHMM(2, self.tagged_sents, addone=False) # XXX: or directly test hmm.tag? tagger = ViterbiTagger(hmm) y = tagger.tag('el gato come pescado .'.split()) pi = { 0: { ('<s>', ): (0.0, []), }, 1: { # 0.5 for el/D ( 'D', ): (log2(0.5), ['D']), }, 2: { # 0.25 for gato/N ( 'N', ): (log2(0.5 * 0.25), ['D', 'N']), }, 3: { # 0.5 for V after N ( 'V', ): (log2(0.5 * 0.25 * 0.5), ['D', 'N', 'V']), }, 4: { # 0.25 for pescado/N ( 'N', ): (log2(0.5 * 0.25 * 0.5 * 0.25), ['D', 'N', 'V', 'N']), }, 5: { # 0.5 for P after N ( 'P', ): (log2(0.5 * 0.25 * 0.5 * 0.25 * 0.5), ['D', 'N', 'V', 'N', 'P']), } } self.assertEqualPi(tagger._pi, pi) self.assertEqual(y, 'D N V N P'.split())