Esempio n. 1
0
    def evaluate(self, dataset_name, dataset, dataset_conllu, args):
        import io

        self.session.run(self.reset_metrics)
        conllu = self.predict(dataset, args)
        metrics = conll18_ud_eval.evaluate(
            dataset_conllu, conll18_ud_eval.load_conllu(io.StringIO(conllu)))
        self.session.run(
            self.summaries[dataset_name],
            dict((self.metrics[metric], metrics[metric].f1)
                 for metric in self.METRICS))

        return metrics["LAS"].f1 if metrics["LAS"].f1 < 1 else metrics[
            "AllTags"].f1, metrics
Esempio n. 2
0
    def evaluate(self, dataset_name, dataset, dataset_conllu, args):
        import io

        conllu = self.predict(dataset, True, args)
        metrics = conll18_ud_eval.evaluate(
            dataset_conllu, conll18_ud_eval.load_conllu(io.StringIO(conllu)))
        self.session.run(
            self.summaries[dataset_name],
            dict((self.metrics[metric], metrics[metric].f1)
                 for metric in self.METRICS))

        if args.parse:
            return (metrics["LAS"].f1 + metrics["MLAS"].f1 +
                    metrics["BLEX"].f1) / 3., metrics
        else:
            return metrics["AllTags"].f1, metrics
Esempio n. 3
0
corenlp_conll_en = ""
with CoreNLPClient(
        annotators=['tokenize', 'ssplit', 'pos', 'parse', 'depparse'],
        output_format="conllu",
        timeout=3000001,
        endpoint='http://localhost:9001') as client:
    for s in nltk.corpus.dependency_treebank.sents()[:200]:
        sent = detok.detokenize(s)
        corenlp_model = client.annotate(sent)
        corenlp_conll_en += corenlp_model  # + '\r\n'

f_corenlp_en = io.StringIO(
    corenlp_conll_en.replace("Corp.", "Corp").replace("Conn.", "Conn").replace(
        "Â", "").replace("Ltd.", "Ltd"))
corenlp_en_eval = load_conllu(f_corenlp_en)

f_gold_en = io.StringIO(
    gold_conll_en.replace("Corp.",
                          "Corp").replace("Conn.",
                                          "Conn").replace("Ltd.", "Ltd"))
gold_en_eval = load_conllu(f_gold_en)
corenlp_en_evaluation = evaluate(gold_en_eval, corenlp_en_eval)

print_results(
    corenlp_en_evaluation,
    "Results for Penn Treebank dataset using CoreNLP Dependency Parser")

# Spanish
spanish_dep_file = '../../dependency/UD_Spanish-AnCora-master/es_ancora-ud-test.conllu'
Esempio n. 4
0
nlp = stanza.Pipeline(processors='tokenize,mwt,pos,lemma,depparse')
stanza_conll_en = ""
for s in nltk.corpus.dependency_treebank.sents()[:200]:
    sent = detok.detokenize(s)
    doc = nlp(sent)
    for s in CoNLL.convert_dict(doc.to_dict()):
        for w in s:
            for i, content in enumerate(w):
                stanza_conll_en += content + '\t'
            stanza_conll_en = stanza_conll_en[:-1] + '\r\n'
        stanza_conll_en += '\r\n'

f_gold_en = io.StringIO(gold_conll_en)
f_stanza_en = io.StringIO(stanza_conll_en)

gold_en_eval = load_conllu(f_gold_en)
stanza_en_eval = load_conllu(f_stanza_en)

stanza_en_evaluation = evaluate(gold_en_eval, stanza_en_eval)

print_results(stanza_en_evaluation,
              "Results for Penn Treebank dataset using Stanza Dependency Parser")

# Spanish
spanish_dep_file = '../../dependency/UD_Spanish-AnCora-master/es_ancora-ud-test.conllu'

with open(spanish_dep_file, 'r') as ancora_f:
    ancora_text = conll_text_reader(ancora_f)

nlp = stanza.Pipeline(processors='tokenize,mwt,pos,lemma,depparse', tokenize_pretokenized=True, lang='es')
doc = nlp(ancora_text)