コード例 #1
0
	def train(self, sentences, model_file='resources/extractor.model'):

		self.tagger = IOBTagger(patterns=[
			'*',

			'*:tl1=%X[-1,1]',
			'*:t=%X[0,1]',
			'*:tr1=%X[1,1]',

			'*:cp=%m[0,2,"..$"]',
			'*:c=%X[0,2]',

			'*:c0l1=%X[-1,2]/%X[0,2]',
			'*:c0r1=%X[0,2]/%X[1,2]',

			'*:cl1=%X[-1,2]',
			'*:cl2=%X[-2,2]',
			'*:cr1=%X[1,2]',
			'*:cr2=%X[2,2]',
		])

		self.tagger.train(sentences)
		self.tagger.save_model(model_file)
コード例 #2
0
ファイル: 200DadeganSents.py プロジェクト: ecnumjc/baaz
#dep_trees = parser.parse_sents(sentences)
dep_tagged_sents = []
chunk_tagged_sents = []
for number, gold_sent in enumerate(gold):

    sentence = ' '.join(sentences[number])
    chunk_tree = chunk_trees[number]
    dep_tree = dep_trees[number]
    chunk_informations = list(chunk_extractor.extract(chunk_tree))
    dep_informations = list(dep_extractor.extract(dep_tree))
    evaluation_sent = [(w, l) for w, t, c, l in gold_sent]
    dep_tagged_sent = [(w, l) for w, t, c, l in [
        tokens for tokens in info2iob(sentence, chunk_tree, dep_informations)
    ]]
    chunk_tagged_sent = [(w, l) for w, t, c, l in [
        tokens for tokens in info2iob(sentence, chunk_tree, chunk_informations)
    ]]
    if len(evaluation_sent) == len(dep_tagged_sent):
        evaluation_sents.append(evaluation_sent)
        dep_tagged_sents.append(dep_tagged_sent)
        chunk_tagged_sents.append(chunk_tagged_sent)
    else:
        print(chunk_tagged_sent)
        print()
print('dependency accuracy: %f' %
      (accuracy(sum(evaluation_sents, []), sum(dep_tagged_sents, []))))
print('chunk accuracy: %f' %
      (accuracy(sum(evaluation_sents, []), sum(chunk_tagged_sents, []))))

information_tagger = IOBTagger(model='informations-all.model')
print(information_tagger.evaluate(gold))