def evaluate_on_test_set(test_essay_feats, out_predictions_file, out_predicted_margins_file, out_metrics_file, out_categories_file):

    test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats)
    wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags)
    test_x = feature_transformer.transform(test_feats)
    """ TEST Tagger """
    test_wd_predictions_by_code = test_classifier_per_code(test_x, tag2word_classifier, wd_test_tags)
    print "\nRunning Sentence Model"
    """ SENTENCE LEVEL PREDICTIONS FROM STACKING """
    sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags,
                                                                                         sent_input_interaction_tags,
                                                                                         test_essay_feats, test_x,
                                                                                         wd_test_ys_bytag,
                                                                                         tag2word_classifier,
                                                                                         SPARSE_SENT_FEATS, LOOK_BACK)
    """ Test Stack Classifier """
    test_sent_predictions_by_code \
        = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags)
    if USE_SVM:
        test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier,
                                                                   sent_output_train_test_tags,
                                                                   predict_fn=decision_function_for_tag)
    else:
        test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier,
                                                                   sent_output_train_test_tags,
                                                                   predict_fn=probability_for_tag)

    """ Write out the predicted classes """
    with open(out_predictions_file, "w+") as f_output_file:
        f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags)
    with open(out_predicted_margins_file, "w+") as f_output_file:
        f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags, output_confidence=True)
    """ Write out the accuracy metrics """
    train_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code)
    test_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code)
    train_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
    test_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
    with open(out_metrics_file, "w+") as f_metrics_file:
        s = ""
        pad = ResultsProcessor.pad_str
        s += ResultsProcessor.metrics_to_string(train_wd_metrics, test_wd_metrics,
                                                "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
        s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics,
                                                "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
        f_metrics_file.write(s)
        write_categories(out_predictions_file, "CB", out_categories_file)
        print s
コード例 #2
0
    test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag)
else:
    test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag)

""" Write out the predicted classes """
with open(out_predictions_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags)

with open(out_predicted_margins_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True)

""" Write out the accuracy metrics """
train_wd_metrics    = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code)
test_wd_metrics     = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code)

train_sent_metrics  = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
test_sent_metrics   = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)

with open(out_metrics_file, "w+") as f_metrics_file:
    s = ""
    pad = ResultsProcessor.pad_str
    s += ResultsProcessor.metrics_to_string(train_wd_metrics,   test_wd_metrics,   "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
    s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
    f_metrics_file.write(s)
    print s
    #TODO - need to add logic here for GW
    #write_categories(out_predictions_file, "CB", out_categories_file)

コード例 #3
0
def evaluate_on_test_set(test_essay_feats, out_predictions_file,
                         out_predicted_margins_file, out_metrics_file,
                         out_categories_file):

    test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats)
    wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags)
    test_x = feature_transformer.transform(test_feats)
    """ TEST Tagger """
    test_wd_predictions_by_code = test_classifier_per_code(
        test_x, tag2word_classifier, wd_test_tags)
    print "\nRunning Sentence Model"
    """ SENTENCE LEVEL PREDICTIONS FROM STACKING """
    sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(
        sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats,
        test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS,
        LOOK_BACK)
    """ Test Stack Classifier """
    test_sent_predictions_by_code \
        = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags)
    if USE_SVM:
        test_decision_functions_by_code = test_classifier_per_code(
            sent_test_xs,
            tag2sent_classifier,
            sent_output_train_test_tags,
            predict_fn=decision_function_for_tag)
    else:
        test_decision_functions_by_code = test_classifier_per_code(
            sent_test_xs,
            tag2sent_classifier,
            sent_output_train_test_tags,
            predict_fn=probability_for_tag)
    """ Write out the predicted classes """
    with open(out_predictions_file, "w+") as f_output_file:
        f_output_file.write(
            "Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode,
                            test_sent_predictions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags)
    with open(out_predicted_margins_file, "w+") as f_output_file:
        f_output_file.write(
            "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n"
        )
        predictions_to_file(f_output_file,
                            sent_test_ys_bycode,
                            test_decision_functions_by_code,
                            test_essay_feats,
                            regular_tags + sent_output_train_test_tags,
                            output_confidence=True)
    """ Write out the accuracy metrics """
    train_wd_metrics = ResultsProcessor.compute_mean_metrics(
        wd_td_ys_bytag, train_wd_predictions_by_code)
    test_wd_metrics = ResultsProcessor.compute_mean_metrics(
        wd_test_ys_bytag, test_wd_predictions_by_code)
    train_sent_metrics = ResultsProcessor.compute_mean_metrics(
        sent_test_ys_bycode, test_sent_predictions_by_code)
    test_sent_metrics = ResultsProcessor.compute_mean_metrics(
        sent_test_ys_bycode, test_sent_predictions_by_code)
    with open(out_metrics_file, "w+") as f_metrics_file:
        s = ""
        pad = ResultsProcessor.pad_str
        s += ResultsProcessor.metrics_to_string(
            train_wd_metrics, test_wd_metrics,
            "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
        s += ResultsProcessor.metrics_to_string(
            train_sent_metrics, test_sent_metrics,
            "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
        f_metrics_file.write(s)
        write_categories(out_predictions_file, "CB", out_categories_file)
        print s
コード例 #4
0
    )
    predictions_to_file(f_output_file,
                        sent_test_ys_bycode,
                        test_decision_functions_by_code,
                        test_essay_feats,
                        regular_tags + sent_output_train_test_tags,
                        output_confidence=True)
""" Write out the accuracy metrics """
train_wd_metrics = ResultsProcessor.compute_mean_metrics(
    wd_td_ys_bytag, train_wd_predictions_by_code)
test_wd_metrics = ResultsProcessor.compute_mean_metrics(
    wd_test_ys_bytag, test_wd_predictions_by_code)

train_sent_metrics = ResultsProcessor.compute_mean_metrics(
    sent_test_ys_bycode, test_sent_predictions_by_code)
test_sent_metrics = ResultsProcessor.compute_mean_metrics(
    sent_test_ys_bycode, test_sent_predictions_by_code)

with open(out_metrics_file, "w+") as f_metrics_file:
    s = ""
    pad = ResultsProcessor.pad_str
    s += ResultsProcessor.metrics_to_string(
        train_wd_metrics, test_wd_metrics,
        "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
    s += ResultsProcessor.metrics_to_string(
        train_sent_metrics, test_sent_metrics,
        "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
    f_metrics_file.write(s)
    write_categories(out_predictions_file, "CB", out_categories_file)
    print s