Example #1
0
    def test_tag(self):
        baseline = BaselineTagger(self.tagged_sents)

        y = baseline.tag('el gato come pescado .'.split())
        self.assertEqual(y, 'D N V N P'.split())

        y = baseline.tag('el perro come salame .'.split())
        self.assertEqual(y, 'D nc0s000 V nc0s000 P'.split())
Example #2
0
    def test_tag_word(self):
        baseline = BaselineTagger(self.tagged_sents)
        for w, t in zip('el gato come pescado .'.split(), 'D N V N P'.split()):
            self.assertEqual(t, baseline.tag_word(w))

        for w, t in zip('el perro come salame .'.split(),
                        'D nc0s000 V nc0s000 P'.split()):
            self.assertEqual(t, baseline.tag_word(w))
Example #3
0
    def test_tag(self):
        baseline = BaselineTagger(self.tagged_sents)

        y = baseline.tag('el gato come pescado .'.split())
        self.assertEqual(y, 'D N V N P'.split())

        y = baseline.tag('el perro come salame .'.split())
        self.assertEqual(y, 'D N V N P'.split())
Example #4
0
    def test_tag_word(self):
        baseline = BaselineTagger(self.tagged_sents)

        for w, t in zip('el gato come pescado .'.split(), 'D N V N P'.split()):
            self.assertEqual(t, baseline.tag_word(w))

        for w, t in zip('el perro come salame .'.split(), 'D N V N P'.split()):
            self.assertEqual(t, baseline.tag_word(w))
Example #5
0
    def test_unknown(self):
        baseline = BaselineTagger(self.tagged_sents)

        known = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'}
        for w in known:
            self.assertFalse(baseline.unknown(w))

        unknown = {'perro', 'salame'}
        for w in unknown:
            self.assertTrue(baseline.unknown(w))
Example #6
0
    def test_unknown(self):
        baseline = BaselineTagger(self.tagged_sents)

        known = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'}
        for w in known:
            self.assertFalse(baseline.unknown(w))

        unknown = {'perro', 'salame'}
        for w in unknown:
            self.assertTrue(baseline.unknown(w))
Example #7
0
    opts = docopt(__doc__)

    # load the data
    print("Loading corpus data...")
    files = 'CESS-CAST-(A|AA|P)/.*\.tbf\.xml'
    corpus = SimpleAncoraCorpusReader('corpus/ancora-2.0/', files)
    sents = list(corpus.tagged_sents())

    # order of the model
    m = str(opts['-m'])
    # train the model
    filename = opts['-o']

    if m == "base":
        print("Baseline Model selected")
        model = BaselineTagger(tagged_sents=sents)
    elif m == "mlhmm":
        n = int(opts['-n'])
        print("Maximum Likelihood Hidden Markov Model selected, n=%d" % n)
        model = MLHMM(n=n, tagged_sents=sents, addone=True)
    elif m == 'memm':
        n = int(opts['-n'])
        c = str(opts['-c'])
        if c not in ['logreg', 'nb', 'svc']:
            print("Bad classifier type, use --help option for help")
            exit()
        print("Maximum Entropy Markov Model selected, n=%d, c=%s" % (n, c))
        model = MEMM(n=n, tagged_sents=sents, classifier=c)
    else:
        print("Bad model type, use --help option for help")
        exit()