def test_tag(self): baseline = BaselineTagger(self.tagged_sents) y = baseline.tag('el gato come pescado .'.split()) self.assertEqual(y, 'D N V N P'.split()) y = baseline.tag('el perro come salame .'.split()) self.assertEqual(y, 'D nc0s000 V nc0s000 P'.split())
def test_tag_word(self): baseline = BaselineTagger(self.tagged_sents) for w, t in zip('el gato come pescado .'.split(), 'D N V N P'.split()): self.assertEqual(t, baseline.tag_word(w)) for w, t in zip('el perro come salame .'.split(), 'D nc0s000 V nc0s000 P'.split()): self.assertEqual(t, baseline.tag_word(w))
def test_tag(self): baseline = BaselineTagger(self.tagged_sents) y = baseline.tag('el gato come pescado .'.split()) self.assertEqual(y, 'D N V N P'.split()) y = baseline.tag('el perro come salame .'.split()) self.assertEqual(y, 'D N V N P'.split())
def test_tag_word(self): baseline = BaselineTagger(self.tagged_sents) for w, t in zip('el gato come pescado .'.split(), 'D N V N P'.split()): self.assertEqual(t, baseline.tag_word(w)) for w, t in zip('el perro come salame .'.split(), 'D N V N P'.split()): self.assertEqual(t, baseline.tag_word(w))
def test_unknown(self): baseline = BaselineTagger(self.tagged_sents) known = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'} for w in known: self.assertFalse(baseline.unknown(w)) unknown = {'perro', 'salame'} for w in unknown: self.assertTrue(baseline.unknown(w))
opts = docopt(__doc__) # load the data print("Loading corpus data...") files = 'CESS-CAST-(A|AA|P)/.*\.tbf\.xml' corpus = SimpleAncoraCorpusReader('corpus/ancora-2.0/', files) sents = list(corpus.tagged_sents()) # order of the model m = str(opts['-m']) # train the model filename = opts['-o'] if m == "base": print("Baseline Model selected") model = BaselineTagger(tagged_sents=sents) elif m == "mlhmm": n = int(opts['-n']) print("Maximum Likelihood Hidden Markov Model selected, n=%d" % n) model = MLHMM(n=n, tagged_sents=sents, addone=True) elif m == 'memm': n = int(opts['-n']) c = str(opts['-c']) if c not in ['logreg', 'nb', 'svc']: print("Bad classifier type, use --help option for help") exit() print("Maximum Entropy Markov Model selected, n=%d, c=%s" % (n, c)) model = MEMM(n=n, tagged_sents=sents, classifier=c) else: print("Bad model type, use --help option for help") exit()