예제 #1
0
    def test_sent_histories_1gram(self):
        model = MEMM(1, self.tagged_sents)

        hs = list(model.sent_histories(self.tagged_sents[0]))

        sent = 'el gato come pescado .'.split()
        hs2 = [
            History(sent, (), 0),
            History(sent, (), 1),
            History(sent, (), 2),
            History(sent, (), 3),
            History(sent, (), 4),
        ]
        self.assertEqual(hs, hs2)
예제 #2
0
    def test_sent_histories_3gram(self):
        model = MEMM(3, self.tagged_sents)

        hs = list(model.sent_histories(self.tagged_sents[0]))

        sent = 'el gato come pescado .'.split()
        hs2 = [
            History(sent, ('<s>', '<s>'), 0),
            History(sent, ('<s>', 'D'), 1),
            History(sent, ('D', 'N'), 2),
            History(sent, ('N', 'V'), 3),
            History(sent, ('V', 'N'), 4),
        ]
        self.assertEqual(hs, hs2)
예제 #3
0
    def test_sent_histories_3gram(self):
        model = MEMM(3, self.tagged_sents)

        hs = list(model.sent_histories(self.tagged_sents[0]))

        sent = 'el gato come pescado .'.split()
        hs2 = [
            History(sent, ('<s>', '<s>'), 0),
            History(sent, ('<s>', 'D'), 1),
            History(sent, ('D', 'N'), 2),
            History(sent, ('N', 'V'), 3),
            History(sent, ('V', 'N'), 4),
        ]
        self.assertEqual(hs, hs2)
예제 #4
0
    def test_sent_histories_1gram(self):
        model = MEMM(1, self.tagged_sents)

        hs = list(model.sent_histories(self.tagged_sents[0]))

        sent = 'el gato come pescado .'.split()
        hs2 = [
            History(sent, (), 0),
            History(sent, (), 1),
            History(sent, (), 2),
            History(sent, (), 3),
            History(sent, (), 4),
        ]
        self.assertEqual(hs, hs2)
예제 #5
0
    def test_tag(self):
        models = [MEMM(i, self.tagged_sents) for i in [1, 2, 3]]

        sent = 'el gato come pescado .'.split()
        result = 'D N V N P'.split()

        for model in models:
            self.assertEqual(model.tag(sent), result)
예제 #6
0
    def test_tag_history(self):
        models = [MEMM(i, self.tagged_sents) for i in [1, 2, 3]]

        result = 'D N V N P'.split()

        for model in models:
            hs = model.sent_histories(self.tagged_sents[0])
            for h, r in zip(hs, result):
                self.assertEqual(model.tag_history(h), r)
예제 #7
0
    def test_sents_tags(self):
        model = MEMM(3, self.tagged_sents)

        tags = list(model.sents_tags(self.tagged_sents))
        self.assertEqual(tags, 'D N V N P D N V N P'.split())
예제 #8
0
    def test_sents_tags(self):
        model = MEMM(3, self.tagged_sents)

        tags = list(model.sents_tags(self.tagged_sents))
        self.assertEqual(tags, 'D N V N P D N V N P'.split())
예제 #9
0
def MEM_trainer(tagged_sents):
    return MEMM(n, tagged_sents, c)
예제 #10
0
    sents = list(corpus.tagged_sents())

    # order of the model
    m = str(opts['-m'])
    # train the model
    filename = opts['-o']

    if m == "base":
        print("Baseline Model selected")
        model = BaselineTagger(tagged_sents=sents)
    elif m == "mlhmm":
        n = int(opts['-n'])
        print("Maximum Likelihood Hidden Markov Model selected, n=%d" % n)
        model = MLHMM(n=n, tagged_sents=sents, addone=True)
    elif m == 'memm':
        n = int(opts['-n'])
        c = str(opts['-c'])
        if c not in ['logreg', 'nb', 'svc']:
            print("Bad classifier type, use --help option for help")
            exit()
        print("Maximum Entropy Markov Model selected, n=%d, c=%s" % (n, c))
        model = MEMM(n=n, tagged_sents=sents, classifier=c)
    else:
        print("Bad model type, use --help option for help")
        exit()

    # save it
    f = open(filename, 'wb')
    pickle.dump(model, f)
    f.close()