Beispiel #1
0
def evaluate_real_datasets():
    REAL_DATASET_GROUP_PATH = 'data/raw/'
    real_dataset_groups = glob.glob(REAL_DATASET_GROUP_PATH + '*')
    seeds = np.random.randint(np.iinfo(np.uint32).max,
                              size=RUNS,
                              dtype=np.uint32)
    results = pd.DataFrame()
    datasets = [KDDCup(seed=1)]
    for real_dataset_group in real_dataset_groups:
        for data_set_path in glob.glob(real_dataset_group +
                                       '/labeled/train/*'):
            data_set_name = data_set_path.split('/')[-1].replace('.pkl', '')
            dataset = RealPickledDataset(data_set_name, data_set_path)
            datasets.append(dataset)

    for seed in seeds:
        datasets[0] = KDDCup(seed)
        evaluator = Evaluator(datasets, detectors, seed=seed)
        evaluator.evaluate()
        result = evaluator.benchmarks()
        evaluator.plot_roc_curves()
        evaluator.plot_threshold_comparison()
        evaluator.plot_scores()
        results = results.append(result, ignore_index=True)

    avg_results = results.groupby(['dataset', 'algorithm'],
                                  as_index=False).mean()
    evaluator.set_benchmark_results(avg_results)
    evaluator.export_results('run_real_datasets')
    evaluator.create_boxplots(runs=RUNS, data=results, detectorwise=False)
    evaluator.create_boxplots(runs=RUNS, data=results, detectorwise=True)
Beispiel #2
0
def evaluate_real_datasets(folder_name=None, skill=None, anomaly_region=None):
    # seeds = np.random.randint(np.iinfo(np.uint32).max, size=RUNS, dtype=np.uint32)
    seeds = [0]
    # results = pd.DataFrame()
    for seed in seeds:
        datasets = [KittingExp(seed, folder_name=folder_name, skill=skill)]
        evaluator = Evaluator(datasets, detectors, seed=seed)
        evaluator.evaluate()
        result = evaluator.benchmarks()
        if anomaly_region is None:
            file = open(
                './reports/logs/%s_result_skill_%d.txt' % (folder_name, skill),
                'w')
        else:
            file = open(
                './reports/logs/%s_result_skill_%d_%s.txt' %
                (folder_name, skill, anomaly_region), 'w')
        file.write(str(result))
        file.close()

        evaluator.save_roc_curves(skill=skill)