def nfold_xaml(): xaml_paths = glob("/Users/rgeorgi/Documents/code/dissertation/data/annotation/filtered/*.xml") lang_test = {} lang_train = {} lang_all = {} tagger = StanfordPOSTagger(tagger_model) for xaml_path in xaml_paths: lang = os.path.basename(xaml_path)[:3] xc = xc_load(xaml_path) train, dev, test = split_instances(xc, train=0.5, test=0.5, dev=0.0) lang_train[lang] = train lang_all[lang] = train+test lang_test[lang] = test # Now, build our classifiers... all_other = POSEvalDict() all_all = POSEvalDict() all_odin = POSEvalDict() all_proj = POSEvalDict() for lang in lang_all.keys(): other_lang_instances = [] all_lang_instances = lang_train[lang] for other_lang in lang_all.keys(): if other_lang != lang: other_lang_instances.extend(lang_all[other_lang]) all_lang_instances.extend(lang_all[other_lang]) other_lang_classifier = extract_from_instances(other_lang_instances, 'test.class', 'test.feats', '/dev/null') all_lang_classifier = extract_from_instances(all_lang_instances, 'all.class', 'all.feats', '/dev/null') test_instances = lang_test[lang] print(lang) prj_other_eval, cls_other_eval = evaluate_classifier_on_instances(test_instances, other_lang_classifier, tagger) prj_all_eval, cls_all_eval = evaluate_classifier_on_instances(test_instances, all_lang_classifier, tagger) prj_odin_eval, cls_odin_eval = evaluate_classifier_on_instances(test_instances, MalletMaxent('/Users/rgeorgi/Documents/code/dissertation/gc.classifier'), tagger) all_other += cls_other_eval all_all += cls_all_eval all_odin += cls_odin_eval all_proj += prj_all_eval print('ALL') print('{:.2f},{:.2f},{:.2f},{:.2f},{:.2f}'.format(all_proj.precision(), all_proj.unaligned(), all_other.accuracy(), all_all.accuracy(), all_odin.accuracy())) print(all_proj.error_matrix(csv=True))
def first_test(self): d = POSEvalDict() d.add('NOUN', 'NOUN') d.add('NOUN', 'VERB') self.assertEqual(d.recall(), 50.) self.assertEqual(d.accuracy(), 50.) self.assertEqual(d.precision(), 50.)
def second_test(self): d = POSEvalDict() d.add('NOUN', 'NOUN') d.add('NOUN', 'VERB') d.add('NOUN', 'VERB') d.add('VERB', 'NOUN') d.add('VERB', 'VERB') self.assertAlmostEqual(d.recall(), 40) self.assertAlmostEqual(d.precision(), 40) self.assertAlmostEqual(d.accuracy(), 40) self.assertAlmostEqual(d.tag_recall('NOUN'), 33.3, places=1) self.assertAlmostEqual(d.tag_precision('NOUN'), 50.0) self.assertAlmostEqual(d.tag_recall('VERB'), 50, places=1) self.assertAlmostEqual(d.tag_precision('VERB'), 33.3, places=1)
def poseval(eval_sents, gold_sents, out_f = sys.stdout, csv=True, ansi=False, greedy_1_to_1=False, greedy_n_to_1=False, matrix=False, details=False, length_limit=None): if len(eval_sents) != len(gold_sents): raise EvalException('Number of eval sents does not match number of gold sents.') #=========================================================================== # Set up counters #=========================================================================== c = POSEvalDict() d = POSEvalDict() i = 1 for eval_sent, gold_sent in zip(eval_sents, gold_sents): if length_limit is not None and len(eval_sent) > length_limit: continue # Check whether the whole sentence is correct. sent_correct = True if len(eval_sent) != len(gold_sent): raise EvalException('Number of tokens for sent #%d is unequal' % i) for eval_token, gold_token in zip(eval_sent, gold_sent): gold_label = str(gold_token.label) eval_label = str(eval_token.label) # Kludgy way to make sure all the assigned # labels end up getting seen. c[eval_label].add(eval_label, 0) c[gold_label].add(eval_label, 1) d[eval_label].add(gold_label, 1) # If one of the labels does not match, # the sentence does not match. if gold_label != eval_label: sent_correct = False # If the sentence matches it, count it... #=========================================================================== # Now, evaluate based on the gold-to-eval labels #=========================================================================== eval_print_helper(out_f, 'STANDARD', matrix, c, ansi, csv) if greedy_1_to_1: c.greedy_1_to_1() eval_print_helper(out_f, 'GREEDY 1-to-1', matrix, c, ansi, csv) if greedy_n_to_1: c.greedy_n_to_1() eval_print_helper(out_f, 'GREEDY N-to-1', matrix, c, ansi, csv) #=========================================================================== # If details is specified, just give slightly more detail on #=========================================================================== if details: out_f.write('{}\n'.format(c.overall_breakdown())) out_f.write('{}\n'.format(c.breakdown_csv())) return c
def evaluate_intent(filelist, classifier_path=None, eval_alignment=None, eval_ds=None, eval_posproj=None, classifier_feats=CLASS_FEATS_DEFAULT, eval_tagger=None, gold_tagmap=None, trans_tagmap=None, outpath=None): """ Given a list of files that have manual POS tags and manual alignment, evaluate the various INTENT methods on that file. :param filelist: List of paths to evaluate against. :type filelist: list[str] :param classifier_path: Path to the classifier model :type classifier_path: str :param eval_alignment: """ tagger = StanfordPOSTagger(tagger_model) outstream = sys.stdout if outpath is not None: outstream = open(outpath, mode='w', encoding='utf-8') # ============================================================================= # Set up the objects to run as "servers" # ============================================================================= classifier_obj = MalletMaxent(classifier) if classifier_path is not None: classifier_obj = MalletMaxent(classifier_path) class_matches, class_compares = 0, 0 e_tagger = None if eval_tagger is not None: e_tagger = StanfordPOSTagger(eval_tagger) mas = MultAlignScorer() ds_plma = PerLangMethodAccuracies() pos_plma= PerLangMethodAccuracies() pos_pla = POSEvalDict() pos_proj_matrix = POSMatrix() pos_class_matrix = POSMatrix() # ------------------------------------------- # If a tag map is specified, let's load it. # ------------------------------------------- g_tm = TagMap(gold_tagmap) if gold_tagmap is not None else None t_tm = TagMap(trans_tagmap) if trans_tagmap is not None else None # Go through all the files in the list... for f in filelist: outstream.write('Evaluating on file: {}\n'.format(f)) xc = xc_load(f, mode=FULL) lang = os.path.basename(f) # ------------------------------------------- # Test the classifier if evaluation is requested. # ------------------------------------------- if classifier_path is not None: matches, compares, acc = evaluate_classifier_on_instances(xc, classifier_obj, classifier_feats, pos_class_matrix, gold_tagmap=g_tm) outstream.write('{},{},{},{:.2f}\n'.format(lang, matches, compares, acc)) class_matches += matches class_compares += compares # ------------------------------------------- # Test alignment if requested. # ------------------------------------------- if eval_alignment: mas.add_corpus('gold', INTENT_ALN_MANUAL, lang, xc) EVAL_LOG.log(NORM_LEVEL, "Evaluating heuristic methods...") evaluate_heuristic_methods_on_file(f, xc, mas, classifier_obj, tagger, lang) EVAL_LOG.log(NORM_LEVEL, "Evaluating statistical methods...") evaluate_statistic_methods_on_file(f, xc, mas, classifier_obj, tagger, lang) # ------------------------------------------- # Test DS Projection if requested # ------------------------------------------- if eval_ds: evaluate_ds_projections_on_file(lang, xc, ds_plma, outstream=outstream) outstream.write('{}\n'.format(ds_plma)) # ------------------------------------------- # Test POS Projection # ------------------------------------------- if eval_posproj: evaluate_pos_projections_on_file(lang, xc, pos_plma, pos_proj_matrix, tagger, gold_tagmap=g_tm, trans_tagmap=t_tm, outstream=outstream) if e_tagger is not None: evaluate_lang_pos(lang, xc, e_tagger, pos_pla, gold_tagmap=g_tm, outstream=outstream) if eval_alignment: mas.eval_all(outstream=outstream) if eval_ds: outstream.write('{}\n'.format(ds_plma)) if e_tagger is not None: outstream.write('{},{},{},{:.2f}\n'.format(lang, pos_pla.all_matches(), pos_pla.fulltotal(), pos_pla.accuracy())) e_tagger.close() # Report the POS tagging accuracy... if classifier_path is not None: outstream.write("ALL...\n") outstream.write('{},{},{:.2f}\n'.format(class_matches, class_compares, class_matches/class_compares*100)) outstream.write('{}\n'.format(pos_class_matrix)) if eval_posproj: outstream.write('{}\n'.format(pos_proj_matrix)) outstream.close()