コード例 #1
0
ファイル: agreement.py プロジェクト: emsrc/daeso-framework
def cross_evaluate(corpus_fns, annotators=None, pickle_fn=None,
                   words_only=False):
    """
    Performs a cross evaluation of each annotator against each of the other
    ones. Returns an AlignEval object. Optionally pickles to a file.
    """
    if annotators:
        assert len(annotators) == len(corpus_fns)
    else:
        annotators = ["A%d" % (i+1) 
                      for i in range(len(corpus_fns))]
        
    corpora = read_corpora(corpus_fns, words_only)
    align_eval = AlignEval()
    
    for true_corp, true_annot in zip(corpora, annotators):
        for pred_corp, pred_annot in zip(corpora, annotators):
            if true_annot != pred_annot:
                name = (true_annot, pred_annot)
                align_eval.add(true_corp, pred_corp, name)

    align_eval.run_eval()
        
    if pickle_fn:
        pickle.dump(align_eval, open(pickle_fn, "wb"))
        
    return align_eval
コード例 #2
0
def cross_evaluate(corpus_fns,
                   annotators=None,
                   pickle_fn=None,
                   words_only=False):
    """
    Performs a cross evaluation of each annotator against each of the other
    ones. Returns an AlignEval object. Optionally pickles to a file.
    """
    if annotators:
        assert len(annotators) == len(corpus_fns)
    else:
        annotators = ["A%d" % (i + 1) for i in range(len(corpus_fns))]

    corpora = read_corpora(corpus_fns, words_only)
    align_eval = AlignEval()

    for true_corp, true_annot in zip(corpora, annotators):
        for pred_corp, pred_annot in zip(corpora, annotators):
            if true_annot != pred_annot:
                name = (true_annot, pred_annot)
                align_eval.add(true_corp, pred_corp, name)

    align_eval.run_eval()

    if pickle_fn:
        pickle.dump(align_eval, open(pickle_fn, "wb"))

    return align_eval
コード例 #3
0
ファイル: test_baseline.py プロジェクト: danger89/daeso-dutch
 def test_word_baseline_3(self):
     print "Baseline: greedy_align_phrases"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_phrases(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
コード例 #4
0
ファイル: test_baseline.py プロジェクト: emsrc/daeso-dutch
 def test_word_baseline_3(self):
     print "Baseline: greedy_align_phrases"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_phrases(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
コード例 #5
0
ファイル: test_baseline.py プロジェクト: danger89/daeso-dutch
 def test_word_baseline_2(self):
     # evaluation is incorrect, because true corpus also contains phrase
     # alignments
     print "Baseline: greedy_align_words"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_words(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
コード例 #6
0
ファイル: test_baseline.py プロジェクト: emsrc/daeso-dutch
 def test_word_baseline_2(self):
     # evaluation is incorrect, because true corpus also contains phrase
     # alignments
     print "Baseline: greedy_align_words"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_words(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
コード例 #7
0
    def test_pickle(self):
        true_corpus = pred_corpus = ParallelGraphCorpus(inf="data/corpus-1.pgc")
        align_eval = AlignEval()
        align_eval.add(true_corpus, pred_corpus, "corpus-1")
        align_eval.run_eval()

        pickle_file = tempfile.TemporaryFile()
        pickle.dump(align_eval, pickle_file, 2)
        pickle_file.seek(0)
        align_eval_2 = pickle.load(pickle_file)
        align_eval_2.write()
コード例 #8
0
def eval_corpora(true_corpora, pred_corpora, names, eval_fname,
                 align_eval=None, n=None):
    """
    Evaluate predicted against true parallel graph corpora.
    
    @param true_fns: iterable of true corpora
    
    @param pred_fns: iterable of predicted corpora
    
    @param names: iterable of labels for true/predicted pairs
    
    @param eval_fname: name of file to which evaluation output is written 
    
    @keyword align_eval: AlignEval instance
    
    @keyword n: limit evaluation to the first n files
    """
    if align_eval:
        assert isinstance(align_eval, AlignEval)
        # reset evaluator to prevent accidents
        align_eval.__init__()
    else:
        align_eval = AlignEval()
    
    count = 0

    for true_corpus, pred_corpus, name in itertools.izip(true_corpora, 
                                                         pred_corpora,
                                                         names):
        align_eval.add(true_corpus, pred_corpus, name)   
        count += 1
        if count == n:
            break
        
    align_eval.run_eval()
    log.info("saving evaluation report {0}".format(eval_fname))
    makedirs(os.path.dirname(eval_fname))
    align_eval.write(eval_fname)
    return align_eval
コード例 #9
0
ファイル: eval-baselines.py プロジェクト: emsrc/daeso-dutch
run from the exp dir which contains a data subdir with the true pgc
files and an eval subdir for evaluation results
"""

import copy
import glob
import os

from daeso.pgc.corpus import ParallelGraphCorpus
from daeso.pgc.evaluate import AlignEval
from daeso_nl.ga.kb.baseline import greedy_align_equal_words, greedy_align_equal_words_roots

eval1 = AlignEval()
eval2 = AlignEval()

for pgc_fn in glob.glob("data/part*true.pgc"):
    true_corpus = ParallelGraphCorpus(inf=pgc_fn)
    pred_corpus = copy.deepcopy(true_corpus)

    greedy_align_equal_words(pred_corpus)
    eval1.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))

    greedy_align_equal_words_roots(pred_corpus)
    eval2.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))

eval1.run_eval()
eval1.write("eval/greedy_align_equals_words.txt")

eval2.run_eval()
eval2.write("eval/greedy_align_equals_words_roots.txt")
コード例 #10
0
ファイル: pgc_diff.py プロジェクト: emsrc/daeso-framework
    "-r", "--relations",
    metavar="REL",
    nargs="*",
    help="limit output to given relations")

args = parser.parse_args()


corpus1 = ParallelGraphCorpus(inf=args.corpus1)
corpus2 = ParallelGraphCorpus(inf=args.corpus2)

pgc_diff(corpus1, corpus2, 
         corpus_name1=args.corpus1,
         corpus_name2=args.corpus2,
         annot1=args.first_annotator, 
         annot2=args.second_annotator,
         show_comments=args.with_comments,
         show_ident=args.with_ident,
         relations=args.relations)

if args.evaluate:
    from daeso.pgc.evaluate import AlignEval
    align_eval = AlignEval()
    align_eval.add(corpus1, corpus2)
    align_eval.run_eval()
    align_eval.write()
    



コード例 #11
0
"""

import copy
import glob
import os

from daeso.pgc.corpus import ParallelGraphCorpus
from daeso.pgc.evaluate import AlignEval
from daeso_nl.ga.kb.baseline import greedy_align_equal_words, greedy_align_equal_words_roots


eval1 = AlignEval()
eval2 = AlignEval()

for pgc_fn in glob.glob("data/part*true.pgc"):
    true_corpus = ParallelGraphCorpus(inf=pgc_fn)
    pred_corpus = copy.deepcopy(true_corpus)
    
    greedy_align_equal_words(pred_corpus)
    eval1.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))
    
    greedy_align_equal_words_roots(pred_corpus)
    eval2.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))
    
eval1.run_eval()
eval1.write("eval/greedy_align_equals_words.txt")
    
eval2.run_eval()
eval2.write("eval/greedy_align_equals_words_roots.txt")