def test_modified_annotations_metrics(self): # For non-partial matching, the partial overlap counts as 1 FP + 1 FN. correct_connective_metrics = ClassificationMetrics(5, 2, 2) correct_cause_span_metrics = ClassificationMetrics(2, 3, 4) correct_effect_span_metrics = ClassificationMetrics(4, 3, 3) correct_cause_jaccard = 0.6 correct_effect_jaccard = 33 / 35. metrics = CausalityMetrics(self.sentences, self.modified_sentences, False) self._test_metrics( metrics, correct_connective_metrics, correct_cause_span_metrics, correct_effect_span_metrics, correct_cause_jaccard, correct_effect_jaccard) swapped_sentences, swapped_modified = [ self._get_sentences_with_swapped_args(s) for s in self.sentences, self.modified_sentences] swapped_metrics = CausalityMetrics(swapped_sentences, swapped_modified, False) # Swap all the correct arguments self._test_metrics( swapped_metrics, correct_connective_metrics, correct_effect_span_metrics, correct_cause_span_metrics, correct_effect_jaccard, correct_cause_jaccard)
def test_same_annotations_metrics(self): correct_connective_metrics = ClassificationMetrics(7, 0, 0) correct_cause_metrics = ClassificationMetrics(6, 0, 0) correct_effect_metrics = correct_connective_metrics swapped = self._get_sentences_with_swapped_args(self.sentences) correct_arg_metrics = [correct_cause_metrics, correct_effect_metrics] for sentences, arg_metrics in zip( [self.sentences, swapped], [correct_arg_metrics, list(reversed(correct_arg_metrics))]): metrics = CausalityMetrics(sentences, sentences, False) self._test_metrics(metrics, correct_connective_metrics, *(arg_metrics + [1.0] * 2))
def test_add_metrics(self): metrics = CausalityMetrics(self.sentences, self.modified_sentences, False) modified_metrics = deepcopy(metrics) modified_metrics.cause_metrics.jaccard = 0.3 modified_metrics.effect_metrics.jaccard = 1.0 summed_metrics = metrics + modified_metrics correct_connective_metrics = ClassificationMetrics(10, 4, 4) correct_cause_span_metrics = ClassificationMetrics(4, 6, 8) correct_effect_span_metrics = ClassificationMetrics(8, 6, 6) correct_cause_jaccard = 0.45 correct_effect_jaccard = 34 / 35. self._test_metrics( summed_metrics, correct_connective_metrics, correct_cause_span_metrics, correct_effect_span_metrics, correct_cause_jaccard, correct_effect_jaccard)
def setUp(self): self.metrics = ClassificationMetrics(tp=15, fp=10, fn=5, tn=1)
classifier = SKLPipeline([('feature_selection', SelectPercentile(f_classif, 1)), ('classification', classifier)]) classifier.fit(features_train, labels_train) labels_test_predicted = classifier.predict(features_test) results.append(diff_binary_vectors(labels_test_predicted, labels_test_gold)) # support = classifier.steps[0][1].get_support(True) # print 'Selected', len(support), 'features:' # for index in support: # print ' ', feature_names[index] print 'Results:' print ClassificationMetrics.average(results, False) # Visualize last round ''' fig = plt.figure() fig.canvas.set_window_title('All training features') vis_features.vis_features(features_train, labels_train) selected = classifier.steps[0][1].transform(features_train) fig = plt.figure() fig.canvas.set_window_title('All selected training features') vis_features.vis_features(selected, labels_train) ''' selected = classifier.steps[0][1].transform(features_test) for predicted_class, win_title in [(1, 'TPs/FPs'), (0, 'TNs/FNs')]:
def test_aggregate_metrics(self): metrics = CausalityMetrics(self.sentences, self.modified_sentences, False) aggregated = CausalityMetrics.aggregate([metrics] * 3) self._test_metrics( aggregated, metrics.connective_metrics, metrics.cause_metrics.span_metrics, metrics.effect_metrics.span_metrics, metrics.cause_metrics.jaccard, metrics.effect_metrics.jaccard) self_metrics = CausalityMetrics(self.sentences, self.sentences, False) aggregated = CausalityMetrics.aggregate([metrics, self_metrics]) correct_connective_metrics = ClassificationMetrics(6, 1, 1) correct_cause_span_metrics = ClassificationMetrics(4, 1.5, 2) correct_cause_span_metrics._precision = (1 + 2/5.) / 2 correct_cause_span_metrics._recall = (1 + 2/6.) / 2 correct_cause_span_metrics._f1 = (1 + f1(2/5., 2/6.)) / 2 correct_cause_jaccard = (1.0 + 0.6) / 2.0 correct_effect_span_metrics = ClassificationMetrics(5.5, 1.5, 1.5) effect_p_r_f1 = (4/7. + 1) / 2 correct_effect_span_metrics._precision = effect_p_r_f1 correct_effect_span_metrics._recall = effect_p_r_f1 correct_effect_span_metrics._f1 = effect_p_r_f1 correct_effect_jaccard = 34 / 35. self._test_metrics( aggregated, correct_connective_metrics, correct_cause_span_metrics, correct_effect_span_metrics, correct_cause_jaccard, correct_effect_jaccard)
def complete_evaluation(self): return ClassificationMetrics(self.tp, self.fp, self.fn, self.tn)
def aggregate_results(self, results_list): return ClassificationMetrics.average(results_list)