def main(domainxml, trainingsetcsv, manifold_value, restrictionstxt): restrictions = dataset.restrictions_from_text(restrictionstxt) cols, data = dataset.read(trainingsetcsv.read(), True, restrictions) expected, actual, expected_hunked, actual_hunked = sampling.cross_validate(data, list(cols), manifold_value) print("Overall confusion matrix:") print(sampling.confusion_matrix(expected, actual)) print("\nOverall recall:") print(sampling.recall(expected, actual, "Obama")) print("\nOverall precision:") print(sampling.precision(expected, actual, "Obama")) print("\nOverall pf:") print(sampling.pf(expected, actual, "Obama")) print("\nOverall f-measure:") print(sampling.f_measure(expected, actual, "Obama")) print("\nOverall accuracy:") print(sampling.accuracy(expected, actual)) print("\nAverage accuracy:") print(sum(sampling.accuracy(e, a) for e, a in zip(expected_hunked, actual_hunked)) / len(expected_hunked)) print("\nOverall error rate:") print(sampling.error_rate(expected, actual)) print("\nAverage error rate:") print(sum(sampling.error_rate(e, a) for e, a in zip(expected_hunked, actual_hunked)) / len(expected_hunked))
def test_recall(self): # The values in the training set expected = ['a', 'b', 'a', 'b', 'a', 'c'] # The results from the classifier actual = ['a', 'b', 'a', 'a', 'b', 'd'] # As though we're asking the question "does record belong in class a?" positive = 'a' result = sampling.recall(expected, actual, positive) TP = 2 TN = 1 FP = 1 FN = 1 irrelevant = 1 self.assertEqual(result, TP/(TP+FN))