예제 #1
0
    def test_compute_relaxed_stats(self):
        file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_relax')
        eval_tool = EvaluationTool(legit=0)
        load_tool = LoadingTool()
        stats = defaultdict(lambda: defaultdict(set))
        trues = pd.Series()
        preds = pd.Series()
        metadata = pd.DataFrame()
        for chunk in load_tool.load_classifications(file_path, ';', True):
            chunk_stats = eval_tool.compute_stats_for_agg('user', chunk, True)
            trues = trues.append(chunk[0])
            preds = preds.append(chunk[1])
            metadata = metadata.append(chunk[2])
            for k, v in chunk_stats.items():
                stats[k]['FP'] = stats[k]['FP'] | v['FP']
                stats[k]['FN'] = stats[k]['FN'] | v['FN']
                stats[k]['TP'] = stats[k]['TP'] | v['TP']
        stats = eval_tool.aggregate_stats(stats)

        expected_stats = {
            0: {
                'TP': 1,
                'FP': 1,
                'FN': 1
            },
            1: {
                'TP': 7,
                'FP': 0,
                'FN': 2
            },
            2: {
                'TP': 1,
                'FP': 0,
                'FN': 0
            },
            3: {
                'TP': 2,
                'FP': 1,
                'FN': 3
            }
        }
        assert stats == expected_stats
예제 #2
0
    def test_get_stats_counts_one_label(self):
        file_path = os.path.join(ROOT_DIR, 'datasets/tests/example_one_label')
        eval_tool = EvaluationTool()
        load_tool = LoadingTool()
        stats = defaultdict(lambda: defaultdict(set))
        trues = pd.Series()
        preds = pd.Series()
        metadata = pd.DataFrame()
        for chunk in load_tool.load_classifications(file_path, ';', True):
            chunk_stats = eval_tool.compute_stats_for_agg('user', chunk)
            trues = trues.append(chunk[0])
            preds = preds.append(chunk[1])
            metadata = metadata.append(chunk[2])
            for k, v in chunk_stats.items():
                stats[k]['FP'] = stats[k]['FP'] | v['FP']
                stats[k]['FN'] = stats[k]['FN'] | v['FN']
                stats[k]['TP'] = stats[k]['TP'] | v['TP']
        stats = eval_tool.aggregate_stats(stats)

        expected_counts = {'TP': 1, 'FP': 0, 'FN': 0}
        counts = eval_tool.get_stats_counts(1, stats)
        assert expected_counts == counts