def test_compute_recall(self): file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_keys') eval_tool = EvaluationTool(legit=0) load_tool = LoadingTool() stats = defaultdict(lambda: defaultdict(int)) trues = pd.Series() preds = pd.Series() for chunk in load_tool.load_classifications(file_path, ';', True): chunk_stats = eval_tool.compute_stats(chunk) trues = trues.append(chunk[0]) preds = preds.append(chunk[1]) for label in chunk_stats: stats[label]['FP'] += chunk_stats[label]['FP'] stats[label]['FN'] += chunk_stats[label]['FN'] stats[label]['TP'] += chunk_stats[label]['TP'] labels = [1, 2] rec = [eval_tool.compute_recall(x, stats) for x in labels] rec_sklearn = list( recall_score(y_true=trues, y_pred=preds, labels=labels, average=None)) assert rec == rec_sklearn
def test_compute_recall_unbalanced(self): file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_unbalanced') eval_tool = EvaluationTool() load_tool = LoadingTool() stats = defaultdict(lambda: defaultdict(int)) trues = pd.Series() preds = pd.Series() for chunk in load_tool.load_classifications(file_path, ';'): chunk_stats = eval_tool.compute_stats(chunk) trues = trues.append(chunk[0]) preds = preds.append(chunk[1]) for label in chunk_stats: stats[label]['FP'] += chunk_stats[label]['FP'] stats[label]['FN'] += chunk_stats[label]['FN'] stats[label]['TP'] += chunk_stats[label]['TP'] rec = [eval_tool.compute_recall(x, stats) for x in eval_tool.labels] assert np.isnan(rec[3])
def test_compute_relaxed_stats(self): file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_relax') eval_tool = EvaluationTool(legit=0) load_tool = LoadingTool() stats = defaultdict(lambda: defaultdict(set)) trues = pd.Series() preds = pd.Series() metadata = pd.DataFrame() for chunk in load_tool.load_classifications(file_path, ';', True): chunk_stats = eval_tool.compute_stats_for_agg('user', chunk, True) trues = trues.append(chunk[0]) preds = preds.append(chunk[1]) metadata = metadata.append(chunk[2]) for k, v in chunk_stats.items(): stats[k]['FP'] = stats[k]['FP'] | v['FP'] stats[k]['FN'] = stats[k]['FN'] | v['FN'] stats[k]['TP'] = stats[k]['TP'] | v['TP'] stats = eval_tool.aggregate_stats(stats) expected_stats = { 0: { 'TP': 1, 'FP': 1, 'FN': 1 }, 1: { 'TP': 7, 'FP': 0, 'FN': 2 }, 2: { 'TP': 1, 'FP': 0, 'FN': 0 }, 3: { 'TP': 2, 'FP': 1, 'FN': 3 } } assert stats == expected_stats
def test_get_avg_rec_nans_true(self): file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_unbalanced') eval_tool = EvaluationTool() load_tool = LoadingTool() stats = defaultdict(lambda: defaultdict(int)) trues = pd.Series() preds = pd.Series() for chunk in load_tool.load_classifications(file_path, ';'): chunk_stats = eval_tool.compute_stats(chunk) trues = trues.append(chunk[0]) preds = preds.append(chunk[1]) for label in chunk_stats: stats[label]['FP'] += chunk_stats[label]['FP'] stats[label]['FN'] += chunk_stats[label]['FN'] stats[label]['TP'] += chunk_stats[label]['TP'] rec = eval_tool.get_avg_recall(stats=stats, nan=True) # TODO: Think of a better assert assert np.allclose(rec, 0.242857)
def test_get_labels_with_prec_above(self): file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_keys') e_tool = EvaluationTool() l_tool = LoadingTool() stats = defaultdict(lambda: defaultdict(int)) trues = pd.Series() preds = pd.Series() for chunk in l_tool.load_classifications(file_path, ';', True): chunk_stats = e_tool.compute_stats(chunk) for label in chunk_stats: stats[label]['FP'] += chunk_stats[label]['FP'] stats[label]['FN'] += chunk_stats[label]['FN'] stats[label]['TP'] += chunk_stats[label]['TP'] prec = [e_tool.compute_precision(x, stats) for x in e_tool.labels] threshold = 0.3 precs_above_threshold = e_tool.get_labels_with_prec_above_thres( threshold, e_tool.labels, stats) expected = [0, 1] assert expected == precs_above_threshold
def test_get_stats_counts_one_label(self): file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_one_label') eval_tool = EvaluationTool() load_tool = LoadingTool() stats = defaultdict(lambda: defaultdict(set)) trues = pd.Series() preds = pd.Series() metadata = pd.DataFrame() for chunk in load_tool.load_classifications(file_path, ';', True): chunk_stats = eval_tool.compute_stats_for_agg('user', chunk) trues = trues.append(chunk[0]) preds = preds.append(chunk[1]) metadata = metadata.append(chunk[2]) for k, v in chunk_stats.items(): stats[k]['FP'] = stats[k]['FP'] | v['FP'] stats[k]['FN'] = stats[k]['FN'] | v['FN'] stats[k]['TP'] = stats[k]['TP'] | v['TP'] stats = eval_tool.aggregate_stats(stats) expected_counts = {'TP': 1, 'FP': 0, 'FN': 0} counts = eval_tool.get_stats_counts(1, stats) assert expected_counts == counts
def test_get_avg_recall(self): file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_strings') eval_tool = EvaluationTool() load_tool = LoadingTool() stats = defaultdict(lambda: defaultdict(int)) trues = pd.Series() preds = pd.Series() for chunk in load_tool.load_classifications(file_path, ';'): chunk_stats = eval_tool.compute_stats(chunk) trues = trues.append(chunk[0]) preds = preds.append(chunk[1]) for label in chunk_stats: stats[label]['FP'] += chunk_stats[label]['FP'] stats[label]['FN'] += chunk_stats[label]['FN'] stats[label]['TP'] += chunk_stats[label]['TP'] rec = eval_tool.get_avg_recall(stats=stats) rec_avg_sklearn = recall_score(y_true=trues, y_pred=preds, labels=eval_tool.labels, average='macro') assert np.allclose(rec, rec_avg_sklearn)
def test_compute_stats(self): file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_keys') eval_tool = EvaluationTool(legit=0) load_tool = LoadingTool() result = defaultdict(lambda: defaultdict(int)) for chunk in load_tool.load_classifications(file_path, ';', True): chunk_stats = eval_tool.compute_stats(chunk) for label in chunk_stats: result[label]['FP'] += chunk_stats[label]['FP'] result[label]['FN'] += chunk_stats[label]['FN'] result[label]['TP'] += chunk_stats[label]['TP'] expected = defaultdict(lambda: defaultdict(int)) expected[0]['TP'] = 1 expected[0]['FP'] = 2 expected[0]['FN'] = 4 expected[1]['TP'] = 5 expected[1]['FP'] = 3 expected[1]['FN'] = 2 expected[2]['TP'] = 1 expected[2]['FP'] = 3 expected[2]['FN'] = 2 assert result == expected
def test_decision_tree(self): tree = DecisionTree(max_features='sqrt', min_samples_split=2, random_state=0) sktree = DecisionTreeClassifier(criterion='entropy', min_samples_split=2, max_features='sqrt', random_state=0) data = pd.read_csv(os.path.join(ROOT_DIR, 'datasets', 'letter'), header=None) X = data[data.columns[1:]] y = data[data.columns[0]] data = None X.rename(columns=lambda x: x - 1, inplace=True) y = y.apply(lambda x: ord(x)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.98, random_state=0) tree.fit(X_train, y_train) sktree.fit(X_train, y_train) tree_output_file = os.path.join(ROOT_DIR, 'outputs/tree.test') sktree_output_file = os.path.join(ROOT_DIR, 'outputs/sktree.test') if os.path.isfile(tree_output_file): os.remove(tree_output_file) if os.path.isfile(sktree_output_file): os.remove(sktree_output_file) tree_clas_tool = ClassificationTool(tree) sktree_clas_tool = ClassificationTool(sktree) tree_clas_tool.save_predictions((X_test, y_test), tree_output_file, None, False, legit=None) sktree_clas_tool.save_predictions((X_test, y_test), sktree_output_file, None, False, legit=None) loading_tool = LoadingTool() eval_tool = EvaluationTool() stats = defaultdict(lambda: defaultdict(int)) trues = pd.Series() preds = pd.Series() for chunk in loading_tool.load_classifications(tree_output_file, ';'): chunk_stats = eval_tool.compute_stats(chunk) trues = trues.append(chunk[0]) preds = preds.append(chunk[1]) for label in chunk_stats: stats[label]['FP'] += chunk_stats[label]['FP'] stats[label]['FN'] += chunk_stats[label]['FN'] stats[label]['TP'] += chunk_stats[label]['TP'] prec = eval_tool.get_avg_precision(stats=stats) skstats = defaultdict(lambda: defaultdict(int)) sktrues = pd.Series() skpreds = pd.Series() for chunk in loading_tool.load_classifications(sktree_output_file, ';'): chunk_stats = eval_tool.compute_stats(chunk) sktrues = sktrues.append(chunk[0]) skpreds = skpreds.append(chunk[1]) for label in chunk_stats: skstats[label]['FP'] += chunk_stats[label]['FP'] skstats[label]['FN'] += chunk_stats[label]['FN'] skstats[label]['TP'] += chunk_stats[label]['TP'] skprec = eval_tool.get_avg_precision(stats=skstats) assert math.isclose(prec, skprec, abs_tol=0.02)