Ejemplo n.º 1
0
    'Resources/Dadegan-pages/003.tsv')
sentences = []
evaluation_sents = []
for gold_sent in gold:
    sentences.append([w for w, t, c, l in gold_sent])
#tokens = tagger.tag_sents(sentences)
#chunk_trees = list(chunker.parse_sents(tokens))
#dep_trees = parser.parse_sents(sentences)
dep_tagged_sents = []
chunk_tagged_sents = []
for number, gold_sent in enumerate(gold):

    sentence = ' '.join(sentences[number])
    chunk_tree = chunk_trees[number]
    dep_tree = dep_trees[number]
    chunk_informations = list(chunk_extractor.extract(chunk_tree))
    dep_informations = list(dep_extractor.extract(dep_tree))
    evaluation_sent = [(w, l) for w, t, c, l in gold_sent]
    dep_tagged_sent = [(w, l) for w, t, c, l in [
        tokens for tokens in info2iob(sentence, chunk_tree, dep_informations)
    ]]
    chunk_tagged_sent = [(w, l) for w, t, c, l in [
        tokens for tokens in info2iob(sentence, chunk_tree, chunk_informations)
    ]]
    if len(evaluation_sent) == len(dep_tagged_sent):
        evaluation_sents.append(evaluation_sent)
        dep_tagged_sents.append(dep_tagged_sent)
        chunk_tagged_sents.append(chunk_tagged_sent)
    else:
        print(chunk_tagged_sent)
        print()
Ejemplo n.º 2
0
gold = gold_IOB_sents('Resources/Dadegan-pages/001.tsv') + gold_IOB_sents('Resources/Dadegan-pages/003.tsv')
sentences = []
evaluation_sents = []
for gold_sent in gold:
	sentences.append([w for w, t, c, l in gold_sent])
#tokens = tagger.tag_sents(sentences)
#chunk_trees = list(chunker.parse_sents(tokens))
#dep_trees = parser.parse_sents(sentences)
dep_tagged_sents = []
chunk_tagged_sents = []
for number, gold_sent in enumerate(gold):

	sentence = ' '.join(sentences[number])
	chunk_tree = chunk_trees[number]
	dep_tree = dep_trees[number]
	chunk_informations = list(chunk_extractor.extract(chunk_tree))
	dep_informations = list(dep_extractor.extract(dep_tree))
	evaluation_sent = [(w, l) for w, t, c, l in gold_sent]
	dep_tagged_sent = [(w,l) for w, t, c, l in [tokens for tokens in info2iob(sentence, chunk_tree, dep_informations)]]
	chunk_tagged_sent = [(w,l) for w, t, c, l in [tokens for tokens in info2iob(sentence, chunk_tree, chunk_informations)]]
	if len(evaluation_sent) == len(dep_tagged_sent):
		evaluation_sents.append(evaluation_sent)
		dep_tagged_sents.append(dep_tagged_sent)
		chunk_tagged_sents.append(chunk_tagged_sent)
	else:
		print(chunk_tagged_sent)
		print()
print('dependency accuracy: %f' % (accuracy(sum(evaluation_sents, []), sum(dep_tagged_sents, []))))
print('chunk accuracy: %f' % (accuracy(sum(evaluation_sents, []), sum(chunk_tagged_sents, []))))

information_tagger = IOBTagger(model='informations-all.model')
Ejemplo n.º 3
0
import codecs
from hazm import DadeganReader
from baaz import DependencyTreeInformationExtractor, ChunkTreeInformationExtractor


output = codecs.open('resources/informations.txt', 'w', encoding='utf8')
dadegan = DadeganReader('corpora/train.conll')
chunk_extractor = ChunkTreeInformationExtractor()
dependency_extractor = DependencyTreeInformationExtractor()

for chunk_tree, dependency_tree in zip(dadegan.chunked_trees(), dadegan.trees()):
	for information in chunk_extractor.extract(chunk_tree):
		print(*information, sep=' - ', file=output)
	print(file=output)
	for information in dependency_extractor.extract(dependency_tree):
		print(*information, sep=' + ', file=output)
	print(file=output)