Esempio n. 1
0
def nfold_xaml():
    xaml_paths = glob("/Users/rgeorgi/Documents/code/dissertation/data/annotation/filtered/*.xml")

    lang_test = {}
    lang_train = {}
    lang_all  = {}

    tagger = StanfordPOSTagger(tagger_model)


    for xaml_path in xaml_paths:
        lang = os.path.basename(xaml_path)[:3]
        xc = xc_load(xaml_path)

        train, dev, test = split_instances(xc, train=0.5, test=0.5, dev=0.0)

        lang_train[lang] = train
        lang_all[lang] = train+test
        lang_test[lang] = test

    # Now, build our classifiers...

    all_other = POSEvalDict()
    all_all   = POSEvalDict()
    all_odin  = POSEvalDict()
    all_proj  = POSEvalDict()

    for lang in lang_all.keys():

        other_lang_instances = []
        all_lang_instances   = lang_train[lang]

        for other_lang in lang_all.keys():
            if other_lang != lang:
                other_lang_instances.extend(lang_all[other_lang])
                all_lang_instances.extend(lang_all[other_lang])

        other_lang_classifier = extract_from_instances(other_lang_instances, 'test.class', 'test.feats', '/dev/null')
        all_lang_classifier = extract_from_instances(all_lang_instances, 'all.class', 'all.feats', '/dev/null')


        test_instances = lang_test[lang]

        print(lang)
        prj_other_eval, cls_other_eval = evaluate_classifier_on_instances(test_instances, other_lang_classifier, tagger)
        prj_all_eval, cls_all_eval = evaluate_classifier_on_instances(test_instances, all_lang_classifier, tagger)
        prj_odin_eval, cls_odin_eval = evaluate_classifier_on_instances(test_instances, MalletMaxent('/Users/rgeorgi/Documents/code/dissertation/gc.classifier'), tagger)

        all_other += cls_other_eval
        all_all   += cls_all_eval
        all_odin  += cls_odin_eval
        all_proj  += prj_all_eval

    print('ALL')
    print('{:.2f},{:.2f},{:.2f},{:.2f},{:.2f}'.format(all_proj.precision(), all_proj.unaligned(), all_other.accuracy(), all_all.accuracy(), all_odin.accuracy()))
    print(all_proj.error_matrix(csv=True))
Esempio n. 2
0
    def first_test(self):
        d = POSEvalDict()

        d.add('NOUN', 'NOUN')
        d.add('NOUN', 'VERB')

        self.assertEqual(d.recall(), 50.)
        self.assertEqual(d.accuracy(), 50.)
        self.assertEqual(d.precision(), 50.)
Esempio n. 3
0
    def second_test(self):
        d = POSEvalDict()
        d.add('NOUN', 'NOUN')
        d.add('NOUN', 'VERB')
        d.add('NOUN', 'VERB')
        d.add('VERB', 'NOUN')
        d.add('VERB', 'VERB')

        self.assertAlmostEqual(d.recall(), 40)
        self.assertAlmostEqual(d.precision(), 40)
        self.assertAlmostEqual(d.accuracy(), 40)

        self.assertAlmostEqual(d.tag_recall('NOUN'), 33.3, places=1)
        self.assertAlmostEqual(d.tag_precision('NOUN'), 50.0)
        self.assertAlmostEqual(d.tag_recall('VERB'), 50, places=1)
        self.assertAlmostEqual(d.tag_precision('VERB'), 33.3, places=1)
Esempio n. 4
0
def poseval(eval_sents, gold_sents, out_f = sys.stdout, csv=True,
            ansi=False, greedy_1_to_1=False, greedy_n_to_1=False,
            matrix=False, details=False, length_limit=None):

    if len(eval_sents) != len(gold_sents):
        raise EvalException('Number of eval sents does not match number of gold sents.')

    #===========================================================================
    # Set up counters
    #===========================================================================
    c = POSEvalDict()
    d = POSEvalDict()

    i = 1

    for eval_sent, gold_sent in zip(eval_sents, gold_sents):

        if length_limit is not None and len(eval_sent) > length_limit:
            continue

        # Check whether the whole sentence is correct.
        sent_correct = True

        if len(eval_sent) != len(gold_sent):
            raise EvalException('Number of tokens for sent #%d is unequal' % i)

        for eval_token, gold_token in zip(eval_sent, gold_sent):

            gold_label = str(gold_token.label)
            eval_label = str(eval_token.label)

            # Kludgy way to make sure all the assigned
            # labels end up getting seen.
            c[eval_label].add(eval_label, 0)

            c[gold_label].add(eval_label, 1)
            d[eval_label].add(gold_label, 1)

            # If one of the labels does not match,
            # the sentence does not match.
            if gold_label != eval_label:
                sent_correct = False

        # If the sentence matches it, count it...


    #===========================================================================
    # Now, evaluate based on the gold-to-eval labels
    #===========================================================================
    eval_print_helper(out_f, 'STANDARD', matrix, c, ansi, csv)


    if greedy_1_to_1:
        c.greedy_1_to_1()
        eval_print_helper(out_f, 'GREEDY 1-to-1',  matrix, c, ansi, csv)

    if greedy_n_to_1:
        c.greedy_n_to_1()
        eval_print_helper(out_f, 'GREEDY N-to-1', matrix, c, ansi, csv)

    #===========================================================================
    # If details is specified, just give slightly more detail on
    #===========================================================================
    if details:
        out_f.write('{}\n'.format(c.overall_breakdown()))
        out_f.write('{}\n'.format(c.breakdown_csv()))

    return c
Esempio n. 5
0
def evaluate_intent(filelist, classifier_path=None, eval_alignment=None, eval_ds=None, eval_posproj=None,
                    classifier_feats=CLASS_FEATS_DEFAULT,
                    eval_tagger=None,
                    gold_tagmap=None, trans_tagmap=None, outpath=None):
    """
    Given a list of files that have manual POS tags and manual alignment,
    evaluate the various INTENT methods on that file.

    :param filelist: List of paths to evaluate against.
    :type filelist: list[str]
    :param classifier_path: Path to the classifier model
    :type classifier_path: str
    :param eval_alignment:
    """
    tagger = StanfordPOSTagger(tagger_model)

    outstream = sys.stdout
    if outpath is not None:
        outstream = open(outpath, mode='w', encoding='utf-8')

    # =============================================================================
    # Set up the objects to run as "servers"
    # =============================================================================

    classifier_obj = MalletMaxent(classifier)
    if classifier_path is not None:
        classifier_obj = MalletMaxent(classifier_path)

    class_matches, class_compares = 0, 0

    e_tagger = None
    if eval_tagger is not None:
        e_tagger = StanfordPOSTagger(eval_tagger)

    mas = MultAlignScorer()
    ds_plma = PerLangMethodAccuracies()
    pos_plma= PerLangMethodAccuracies()

    pos_pla = POSEvalDict()

    pos_proj_matrix = POSMatrix()
    pos_class_matrix = POSMatrix()

    # -------------------------------------------
    # If a tag map is specified, let's load it.
    # -------------------------------------------
    g_tm = TagMap(gold_tagmap) if gold_tagmap is not None else None
    t_tm = TagMap(trans_tagmap) if trans_tagmap is not None else None

    # Go through all the files in the list...
    for f in filelist:
        outstream.write('Evaluating on file: {}\n'.format(f))
        xc = xc_load(f, mode=FULL)
        lang = os.path.basename(f)

        # -------------------------------------------
        # Test the classifier if evaluation is requested.
        # -------------------------------------------
        if classifier_path is not None:
            matches, compares, acc = evaluate_classifier_on_instances(xc, classifier_obj, classifier_feats,
                                                                      pos_class_matrix, gold_tagmap=g_tm)
            outstream.write('{},{},{},{:.2f}\n'.format(lang, matches, compares, acc))
            class_matches += matches
            class_compares += compares

        # -------------------------------------------
        # Test alignment if requested.
        # -------------------------------------------
        if eval_alignment:
            mas.add_corpus('gold', INTENT_ALN_MANUAL, lang, xc)
            EVAL_LOG.log(NORM_LEVEL, "Evaluating heuristic methods...")
            evaluate_heuristic_methods_on_file(f, xc, mas, classifier_obj, tagger, lang)

            EVAL_LOG.log(NORM_LEVEL, "Evaluating statistical methods...")
            evaluate_statistic_methods_on_file(f, xc, mas, classifier_obj, tagger, lang)

        # -------------------------------------------
        # Test DS Projection if requested
        # -------------------------------------------
        if eval_ds:
            evaluate_ds_projections_on_file(lang, xc, ds_plma, outstream=outstream)
            outstream.write('{}\n'.format(ds_plma))

        # -------------------------------------------
        #  Test POS Projection
        # -------------------------------------------
        if eval_posproj:
            evaluate_pos_projections_on_file(lang, xc, pos_plma, pos_proj_matrix, tagger, gold_tagmap=g_tm, trans_tagmap=t_tm, outstream=outstream)

        if e_tagger is not None:
            evaluate_lang_pos(lang, xc, e_tagger, pos_pla, gold_tagmap=g_tm, outstream=outstream)



    if eval_alignment:
        mas.eval_all(outstream=outstream)

    if eval_ds:
        outstream.write('{}\n'.format(ds_plma))

    if e_tagger is not None:
        outstream.write('{},{},{},{:.2f}\n'.format(lang, pos_pla.all_matches(), pos_pla.fulltotal(), pos_pla.accuracy()))
        e_tagger.close()

    # Report the POS tagging accuracy...
    if classifier_path is not None:
        outstream.write("ALL...\n")
        outstream.write('{},{},{:.2f}\n'.format(class_matches, class_compares, class_matches/class_compares*100))
        outstream.write('{}\n'.format(pos_class_matrix))

    if eval_posproj:
        outstream.write('{}\n'.format(pos_proj_matrix))

    outstream.close()