def evaluate_on_test_set(test_essay_feats, out_predictions_file, out_predicted_margins_file, out_metrics_file, out_categories_file):

    test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats)
    wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags)
    test_x = feature_transformer.transform(test_feats)
    """ TEST Tagger """
    test_wd_predictions_by_code = test_classifier_per_code(test_x, tag2word_classifier, wd_test_tags)
    print "\nRunning Sentence Model"
    """ SENTENCE LEVEL PREDICTIONS FROM STACKING """
    sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags,
                                                                                         sent_input_interaction_tags,
                                                                                         test_essay_feats, test_x,
                                                                                         wd_test_ys_bytag,
                                                                                         tag2word_classifier,
                                                                                         SPARSE_SENT_FEATS, LOOK_BACK)
    """ Test Stack Classifier """
    test_sent_predictions_by_code \
        = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags)
    if USE_SVM:
        test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier,
                                                                   sent_output_train_test_tags,
                                                                   predict_fn=decision_function_for_tag)
    else:
        test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier,
                                                                   sent_output_train_test_tags,
                                                                   predict_fn=probability_for_tag)

    """ Write out the predicted classes """
    with open(out_predictions_file, "w+") as f_output_file:
        f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags)
    with open(out_predicted_margins_file, "w+") as f_output_file:
        f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags, output_confidence=True)
    """ Write out the accuracy metrics """
    train_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code)
    test_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code)
    train_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
    test_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
    with open(out_metrics_file, "w+") as f_metrics_file:
        s = ""
        pad = ResultsProcessor.pad_str
        s += ResultsProcessor.metrics_to_string(train_wd_metrics, test_wd_metrics,
                                                "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
        s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics,
                                                "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
        f_metrics_file.write(s)
        write_categories(out_predictions_file, "CB", out_categories_file)
        print s
Exemple #2
0
        = test_classifier_per_code(sent_td_xs, tag2sent_classifier, sent_output_train_test_tags )

    vd_sent_predictions_by_code \
        = test_classifier_per_code(sent_vd_xs, tag2sent_classifier, sent_output_train_test_tags )

    merge_dictionaries(wd_td_ys_bytag, cv_wd_td_ys_by_tag)
    merge_dictionaries(wd_vd_ys_bytag, cv_wd_vd_ys_by_tag)
    merge_dictionaries(td_wd_predictions_by_code, cv_wd_td_predictions_by_tag)
    merge_dictionaries(vd_wd_predictions_by_code, cv_wd_vd_predictions_by_tag)

    merge_dictionaries(sent_td_ys_bycode, cv_sent_td_ys_by_tag)
    merge_dictionaries(sent_vd_ys_bycode, cv_sent_vd_ys_by_tag)
    merge_dictionaries(td_sent_predictions_by_code, cv_sent_td_predictions_by_tag)
    merge_dictionaries(vd_sent_predictions_by_code, cv_sent_vd_predictions_by_tag)

    predictions_to_file(f_output_file, sent_vd_ys_bycode, vd_sent_predictions_by_code, essays_VD, codes=sent_output_train_test_tags)

f_output_file.close()
# print results for each code
logger.info("Training completed")

""" Persist Results to Mongo DB """

wd_algo   = str(fn_create_wd_cls())
sent_algo = str(fn_create_sent_cls())

SUFFIX = "_CAUSE_EFFECT_LBLS"
SC_TAGGING_TD, SC_TAGGING_VD, SC_SENT_TD, SC_SENT_VD = "SC_TAGGING_TD" + SUFFIX, "SC_TAGGING_VD" + SUFFIX, "SC_SENT_TD" + SUFFIX, "SC_SENT_VD" + SUFFIX
parameters = dict(config)
parameters["extractors"] = map(lambda fn: fn.func_name, extractors)
parameters["min_feat_freq"] = MIN_FEAT_FREQ
Exemple #3
0
essays_TD = essay_feats

# TD and VD are lists of Essay objects. The sentences are lists
# of featureextractortransformer.Word objects
print "Running Tagging Model"
""" Data Partitioning and Training """
td_feats, _ = flatten_to_wordlevel_feat_tags(essays_TD)

td_X = feature_transformer.transform(td_feats)

""" TEST Tagger """
td_wd_predictions_by_code = test_classifier_per_code(td_X, tag2word_classifier, wd_test_tags)

print "\nRunning Sentence Model"
""" SENTENCE LEVEL PREDICTIONS FROM STACKING """

dummy_wd_td_ys_bytag = defaultdict(lambda : np.asarray([0.0] * td_X.shape[0]))
sent_td_xs, sent_td_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, essays_TD, td_X, dummy_wd_td_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK)

""" Test Stack Classifier """
td_sent_predictions_by_code \
    = test_classifier_per_code(sent_td_xs, tag2sent_classifier, sent_output_train_test_tags )

merge_dictionaries(td_wd_predictions_by_code, cv_wd_td_predictions_by_tag)

with open(out_predictions_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
    predictions_to_file(f_output_file, sent_td_ys_bycode, td_sent_predictions_by_code, essays_TD, regular_tags + CAUSE_TAGS + CAUSAL_REL_TAGS)
# print results for each code
print out_predictions_file
Exemple #4
0
    vd_sent_predictions_by_code \
        = test_classifier_per_code(sent_vd_xs, tag2sent_classifier, sent_output_train_test_tags )

    merge_dictionaries(wd_td_ys_bytag, cv_wd_td_ys_by_tag)
    merge_dictionaries(wd_vd_ys_bytag, cv_wd_vd_ys_by_tag)
    merge_dictionaries(td_wd_predictions_by_code, cv_wd_td_predictions_by_tag)
    merge_dictionaries(vd_wd_predictions_by_code, cv_wd_vd_predictions_by_tag)

    merge_dictionaries(sent_td_ys_bycode, cv_sent_td_ys_by_tag)
    merge_dictionaries(sent_vd_ys_bycode, cv_sent_vd_ys_by_tag)
    merge_dictionaries(td_sent_predictions_by_code,
                       cv_sent_td_predictions_by_tag)
    merge_dictionaries(vd_sent_predictions_by_code,
                       cv_sent_vd_predictions_by_tag)

    predictions_to_file(f_output_file, sent_vd_ys_bycode,
                        vd_sent_predictions_by_code, essays_VD)

f_output_file.close()
# print results for each code
""" Persist Results to Mongo DB """

wd_algo = str(fn_create_wd_cls())
sent_algo = str(fn_create_sent_cls())

CB_TAGGING_TD, CB_TAGGING_VD, CB_SENT_TD, CB_SENT_VD = "CB_TAGGING_TD", "CB_TAGGING_VD", "CB_SENT_TD", "CB_SENT_VD"
parameters = dict(config)
parameters["extractors"] = map(lambda fn: fn.func_name, extractors)
parameters["min_feat_freq"] = MIN_FEAT_FREQ

wd_td_objectid = processor.persist_results(CB_TAGGING_TD, cv_wd_td_ys_by_tag,
                                           cv_wd_td_predictions_by_tag,
sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats, test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK)

""" Test Stack Classifier """
test_sent_predictions_by_code \
    = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags )

if USE_SVM:
    test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag)
else:
    test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag)

""" Write out the predicted classes """
with open(out_predictions_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags)

with open(out_predicted_margins_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True)

""" Write out the accuracy metrics """
train_wd_metrics    = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code)
test_wd_metrics     = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code)

train_sent_metrics  = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
test_sent_metrics   = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)

with open(out_metrics_file, "w+") as f_metrics_file:
    s = ""
    pad = ResultsProcessor.pad_str
cv_wd_td_ys_by_tag, cv_wd_td_predictions_by_tag = defaultdict(list), defaultdict(list)

# TD and VD are lists of Essay objects. The sentences are lists
# of featureextractortransformer.Word objects
print "Running Tagging Model"
""" Data Partitioning and Training """
test_feats, _ = flatten_to_wordlevel_feat_tags(test_essay_feats)

test_x = feature_transformer.transform(test_feats)

""" TEST Tagger """
td_wd_predictions_by_code = test_classifier_per_code(test_x, tag2word_classifier, wd_test_tags)

print "\nRunning Sentence Model"
""" SENTENCE LEVEL PREDICTIONS FROM STACKING """

dummy_wd_td_ys_bytag = defaultdict(lambda : np.asarray([0.0] * test_x.shape[0]))
sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats, test_x, dummy_wd_td_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK)

""" Test Stack Classifier """
test_sent_predictions_by_code \
    = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags )

merge_dictionaries(td_wd_predictions_by_code, cv_wd_td_predictions_by_tag)

with open(out_predictions_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + CAUSE_TAGS + CAUSAL_REL_TAGS)
# print results for each code
print out_predictions_file
Exemple #7
0
def evaluate_on_test_set(test_essay_feats, out_predictions_file,
                         out_predicted_margins_file, out_metrics_file,
                         out_categories_file):

    test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats)
    wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags)
    test_x = feature_transformer.transform(test_feats)
    """ TEST Tagger """
    test_wd_predictions_by_code = test_classifier_per_code(
        test_x, tag2word_classifier, wd_test_tags)
    print "\nRunning Sentence Model"
    """ SENTENCE LEVEL PREDICTIONS FROM STACKING """
    sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(
        sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats,
        test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS,
        LOOK_BACK)
    """ Test Stack Classifier """
    test_sent_predictions_by_code \
        = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags)
    if USE_SVM:
        test_decision_functions_by_code = test_classifier_per_code(
            sent_test_xs,
            tag2sent_classifier,
            sent_output_train_test_tags,
            predict_fn=decision_function_for_tag)
    else:
        test_decision_functions_by_code = test_classifier_per_code(
            sent_test_xs,
            tag2sent_classifier,
            sent_output_train_test_tags,
            predict_fn=probability_for_tag)
    """ Write out the predicted classes """
    with open(out_predictions_file, "w+") as f_output_file:
        f_output_file.write(
            "Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode,
                            test_sent_predictions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags)
    with open(out_predicted_margins_file, "w+") as f_output_file:
        f_output_file.write(
            "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n"
        )
        predictions_to_file(f_output_file,
                            sent_test_ys_bycode,
                            test_decision_functions_by_code,
                            test_essay_feats,
                            regular_tags + sent_output_train_test_tags,
                            output_confidence=True)
    """ Write out the accuracy metrics """
    train_wd_metrics = ResultsProcessor.compute_mean_metrics(
        wd_td_ys_bytag, train_wd_predictions_by_code)
    test_wd_metrics = ResultsProcessor.compute_mean_metrics(
        wd_test_ys_bytag, test_wd_predictions_by_code)
    train_sent_metrics = ResultsProcessor.compute_mean_metrics(
        sent_test_ys_bycode, test_sent_predictions_by_code)
    test_sent_metrics = ResultsProcessor.compute_mean_metrics(
        sent_test_ys_bycode, test_sent_predictions_by_code)
    with open(out_metrics_file, "w+") as f_metrics_file:
        s = ""
        pad = ResultsProcessor.pad_str
        s += ResultsProcessor.metrics_to_string(
            train_wd_metrics, test_wd_metrics,
            "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
        s += ResultsProcessor.metrics_to_string(
            train_sent_metrics, test_sent_metrics,
            "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
        f_metrics_file.write(s)
        write_categories(out_predictions_file, "CB", out_categories_file)
        print s
        = test_classifier_per_code(sent_td_xs, tag2sent_classifier, sent_output_train_test_tags )

    vd_sent_predictions_by_code \
        = test_classifier_per_code(sent_vd_xs, tag2sent_classifier, sent_output_train_test_tags )

    merge_dictionaries(wd_td_ys_bytag, cv_wd_td_ys_by_tag)
    merge_dictionaries(wd_vd_ys_bytag, cv_wd_vd_ys_by_tag)
    merge_dictionaries(td_wd_predictions_by_code, cv_wd_td_predictions_by_tag)
    merge_dictionaries(vd_wd_predictions_by_code, cv_wd_vd_predictions_by_tag)

    merge_dictionaries(sent_td_ys_bycode, cv_sent_td_ys_by_tag)
    merge_dictionaries(sent_vd_ys_bycode, cv_sent_vd_ys_by_tag)
    merge_dictionaries(td_sent_predictions_by_code, cv_sent_td_predictions_by_tag)
    merge_dictionaries(vd_sent_predictions_by_code, cv_sent_vd_predictions_by_tag)

    predictions_to_file(f_output_file, sent_vd_ys_bycode, vd_sent_predictions_by_code, essays_VD)

f_output_file.close()
# print results for each code

""" Persist Results to Mongo DB """

wd_algo   = str(fn_create_wd_cls())
sent_algo = str(fn_create_sent_cls())

CB_TAGGING_TD, CB_TAGGING_VD, CB_SENT_TD, CB_SENT_VD = "CB_TAGGING_TD", "CB_TAGGING_VD", "CB_SENT_TD", "CB_SENT_VD"
parameters = dict(config)
parameters["extractors"] = map(lambda fn: fn.func_name, extractors)
parameters["min_feat_freq"] = MIN_FEAT_FREQ

wd_td_objectid = processor.persist_results(CB_TAGGING_TD, cv_wd_td_ys_by_tag, cv_wd_td_predictions_by_tag, parameters, wd_algo)
Exemple #9
0
        tag2sent_classifier,
        sent_output_train_test_tags,
        predict_fn=probability_for_tag)
""" TEST REPLACING ESSAYS WITH PREDICTED TAGS For PETER"""
#predicted_test_essay_feats = replace_essay_labels_with_predictions(test_essay_feats, test_x, tag2Classifier=tag2word_classifier, confidence_threshold=0.0)
""" Write out the predicted classes """
with open(out_word_predictions_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Words with Predictions\n")
    word_predictions_to_file(f_output_file, test_essay_feats, test_x,
                             wd_test_ys_bytag, tag2word_classifier)

with open(out_predictions_file, "w+") as f_output_file:
    f_output_file.write(
        "Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode,
                        test_sent_predictions_by_code, test_essay_feats,
                        regular_tags + sent_output_train_test_tags)

with open(out_predicted_margins_file, "w+") as f_output_file:
    f_output_file.write(
        "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n"
    )
    predictions_to_file(f_output_file,
                        sent_test_ys_bycode,
                        test_decision_functions_by_code,
                        test_essay_feats,
                        regular_tags + sent_output_train_test_tags,
                        output_confidence=True)
""" Write out the accuracy metrics """
train_wd_metrics = ResultsProcessor.compute_mean_metrics(
    wd_td_ys_bytag, train_wd_predictions_by_code)
Exemple #10
0
""" Data Partitioning and Training """
test_feats, _ = flatten_to_wordlevel_feat_tags(test_essay_feats)

test_x = feature_transformer.transform(test_feats)
""" TEST Tagger """
td_wd_predictions_by_code = test_classifier_per_code(test_x,
                                                     tag2word_classifier,
                                                     wd_test_tags)

print "\nRunning Sentence Model"
""" SENTENCE LEVEL PREDICTIONS FROM STACKING """

dummy_wd_td_ys_bytag = defaultdict(lambda: np.asarray([0.0] * test_x.shape[0]))
sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(
    sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats,
    test_x, dummy_wd_td_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS,
    LOOK_BACK)
""" Test Stack Classifier """
test_sent_predictions_by_code \
    = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags )

merge_dictionaries(td_wd_predictions_by_code, cv_wd_td_predictions_by_tag)

with open(out_predictions_file, "w+") as f_output_file:
    f_output_file.write(
        "Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode,
                        test_sent_predictions_by_code, test_essay_feats,
                        regular_tags + CAUSE_TAGS + CAUSAL_REL_TAGS)
# print results for each code
print out_predictions_file