Exemple #1
0
def cross_evaluate(corpus_fns, annotators=None, pickle_fn=None,
                   words_only=False):
    """
    Performs a cross evaluation of each annotator against each of the other
    ones. Returns an AlignEval object. Optionally pickles to a file.
    """
    if annotators:
        assert len(annotators) == len(corpus_fns)
    else:
        annotators = ["A%d" % (i+1) 
                      for i in range(len(corpus_fns))]
        
    corpora = read_corpora(corpus_fns, words_only)
    align_eval = AlignEval()
    
    for true_corp, true_annot in zip(corpora, annotators):
        for pred_corp, pred_annot in zip(corpora, annotators):
            if true_annot != pred_annot:
                name = (true_annot, pred_annot)
                align_eval.add(true_corp, pred_corp, name)

    align_eval.run_eval()
        
    if pickle_fn:
        pickle.dump(align_eval, open(pickle_fn, "wb"))
        
    return align_eval
Exemple #2
0
def cross_evaluate(corpus_fns,
                   annotators=None,
                   pickle_fn=None,
                   words_only=False):
    """
    Performs a cross evaluation of each annotator against each of the other
    ones. Returns an AlignEval object. Optionally pickles to a file.
    """
    if annotators:
        assert len(annotators) == len(corpus_fns)
    else:
        annotators = ["A%d" % (i + 1) for i in range(len(corpus_fns))]

    corpora = read_corpora(corpus_fns, words_only)
    align_eval = AlignEval()

    for true_corp, true_annot in zip(corpora, annotators):
        for pred_corp, pred_annot in zip(corpora, annotators):
            if true_annot != pred_annot:
                name = (true_annot, pred_annot)
                align_eval.add(true_corp, pred_corp, name)

    align_eval.run_eval()

    if pickle_fn:
        pickle.dump(align_eval, open(pickle_fn, "wb"))

    return align_eval
 def test_word_baseline_3(self):
     print "Baseline: greedy_align_phrases"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_phrases(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
Exemple #4
0
 def test_word_baseline_3(self):
     print "Baseline: greedy_align_phrases"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_phrases(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
 def test_word_baseline_2(self):
     # evaluation is incorrect, because true corpus also contains phrase
     # alignments
     print "Baseline: greedy_align_words"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_words(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
Exemple #6
0
 def test_word_baseline_2(self):
     # evaluation is incorrect, because true corpus also contains phrase
     # alignments
     print "Baseline: greedy_align_words"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_words(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
    def test_pickle(self):
        true_corpus = pred_corpus = ParallelGraphCorpus(inf="data/corpus-1.pgc")
        align_eval = AlignEval()
        align_eval.add(true_corpus, pred_corpus, "corpus-1")
        align_eval.run_eval()

        pickle_file = tempfile.TemporaryFile()
        pickle.dump(align_eval, pickle_file, 2)
        pickle_file.seek(0)
        align_eval_2 = pickle.load(pickle_file)
        align_eval_2.write()
Exemple #8
0
def eval_corpora(true_corpora, pred_corpora, names, eval_fname,
                 align_eval=None, n=None):
    """
    Evaluate predicted against true parallel graph corpora.
    
    @param true_fns: iterable of true corpora
    
    @param pred_fns: iterable of predicted corpora
    
    @param names: iterable of labels for true/predicted pairs
    
    @param eval_fname: name of file to which evaluation output is written 
    
    @keyword align_eval: AlignEval instance
    
    @keyword n: limit evaluation to the first n files
    """
    if align_eval:
        assert isinstance(align_eval, AlignEval)
        # reset evaluator to prevent accidents
        align_eval.__init__()
    else:
        align_eval = AlignEval()
    
    count = 0

    for true_corpus, pred_corpus, name in itertools.izip(true_corpora, 
                                                         pred_corpora,
                                                         names):
        align_eval.add(true_corpus, pred_corpus, name)   
        count += 1
        if count == n:
            break
        
    align_eval.run_eval()
    log.info("saving evaluation report {0}".format(eval_fname))
    makedirs(os.path.dirname(eval_fname))
    align_eval.write(eval_fname)
    return align_eval
Exemple #9
0
run from the exp dir which contains a data subdir with the true pgc
files and an eval subdir for evaluation results
"""

import copy
import glob
import os

from daeso.pgc.corpus import ParallelGraphCorpus
from daeso.pgc.evaluate import AlignEval
from daeso_nl.ga.kb.baseline import greedy_align_equal_words, greedy_align_equal_words_roots

eval1 = AlignEval()
eval2 = AlignEval()

for pgc_fn in glob.glob("data/part*true.pgc"):
    true_corpus = ParallelGraphCorpus(inf=pgc_fn)
    pred_corpus = copy.deepcopy(true_corpus)

    greedy_align_equal_words(pred_corpus)
    eval1.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))

    greedy_align_equal_words_roots(pred_corpus)
    eval2.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))

eval1.run_eval()
eval1.write("eval/greedy_align_equals_words.txt")

eval2.run_eval()
eval2.write("eval/greedy_align_equals_words_roots.txt")
Exemple #10
0
    "-r", "--relations",
    metavar="REL",
    nargs="*",
    help="limit output to given relations")

args = parser.parse_args()


corpus1 = ParallelGraphCorpus(inf=args.corpus1)
corpus2 = ParallelGraphCorpus(inf=args.corpus2)

pgc_diff(corpus1, corpus2, 
         corpus_name1=args.corpus1,
         corpus_name2=args.corpus2,
         annot1=args.first_annotator, 
         annot2=args.second_annotator,
         show_comments=args.with_comments,
         show_ident=args.with_ident,
         relations=args.relations)

if args.evaluate:
    from daeso.pgc.evaluate import AlignEval
    align_eval = AlignEval()
    align_eval.add(corpus1, corpus2)
    align_eval.run_eval()
    align_eval.write()
    



"""

import copy
import glob
import os

from daeso.pgc.corpus import ParallelGraphCorpus
from daeso.pgc.evaluate import AlignEval
from daeso_nl.ga.kb.baseline import greedy_align_equal_words, greedy_align_equal_words_roots


eval1 = AlignEval()
eval2 = AlignEval()

for pgc_fn in glob.glob("data/part*true.pgc"):
    true_corpus = ParallelGraphCorpus(inf=pgc_fn)
    pred_corpus = copy.deepcopy(true_corpus)
    
    greedy_align_equal_words(pred_corpus)
    eval1.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))
    
    greedy_align_equal_words_roots(pred_corpus)
    eval2.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))
    
eval1.run_eval()
eval1.write("eval/greedy_align_equals_words.txt")
    
eval2.run_eval()
eval2.write("eval/greedy_align_equals_words_roots.txt")