예제 #1
0
def load_or_create_datasets(window_ms=100,
                            stride_ms=100,
                            imp_split=True,
                            dos_type='original',
                            force_create=False,
                            verbose=False,
                            in_parallel=True):
    """Returns the training and validation sets associated with input argument combination.
    If the datasets do not exist, they are created and saved in the process.

    :param window_ms: the window size (int ms).
    :param stride_ms: the step size (int ms).
    :param imp_split: a flag indicating whether the impersonation dataset has split labels.
    :param dos_type: a string indicating the DoS dataset used ('modified', 'original').
    :param force_create: a flag indicating whether the dataset should be generated, even if it exists.
    :param verbose: a flag indicating how much progress should be output to console.
    :param in_parallel: a flag indicating whether features should be calculated in parallel.
    :return: a list of training DataPoints, a list of validation DataPoints, a dictionary of feature durations.
    """

    training_name, _ = __get_dataset_path(window_ms, stride_ms, imp_split,
                                          dos_type, 'training')
    validation_name, _ = __get_dataset_path(window_ms, stride_ms, imp_split,
                                            dos_type, 'validation')
    time_path, directory = __get_dataset_path(window_ms, stride_ms, imp_split,
                                              dos_type, 'validation_time')

    # Load the datasets if they exist.
    if os.path.exists(training_name) and os.path.exists(
            validation_name) and not force_create:
        training_set = datareader_csv.load_datapoints(training_name,
                                                      verbose=verbose)
        validation_set = datareader_csv.load_datapoints(validation_name,
                                                        verbose=verbose)
        feature_durations = datareader_csv.load_feature_durations(time_path)
    else:
        # Create and save the datasets otherwise.
        training_set, validation_set, feature_durations = get_mixed_training_validation(
            window_ms,
            stride_ms,
            imp_split,
            dos_type,
            verbose=verbose,
            in_parallel=in_parallel)

        __write_datapoints_csv(training_set, window_ms, stride_ms, imp_split,
                               dos_type, 'training')
        __write_datapoints_csv(validation_set, window_ms, stride_ms, imp_split,
                               dos_type, 'validation')
        datawriter_csv.save_feature_durations(feature_durations, time_path,
                                              directory)

    return training_set, validation_set, feature_durations
예제 #2
0
def get_result_feature_breakdown(result: Result, type='validation'):
    """
    Gets a dict mapping feature labels to feature times. This is loaded according to a specific Result
    :param result: The result to load feature times to
    :param type: The type of feature times to load. Valid values are 'validation' and 'test'
    :return:
    """
    imp_name = "imp_split" if result.imp_split else "imp_full"
    name = f"mixed_{type}_time_{result.window_ms}ms_{result.stride_ms}ms"
    directory = f"data/feature/{imp_name}/{result.dos_type}/"

    path = directory + name + ".csv"
    return datareader_csv.load_feature_durations(path)
예제 #3
0
                            result.imp_split, result.dos_type, result.model,
                            result.baseline, result.subset, True)
        for result in best_validation_results
    ]

    plot_types = ['f1', 'fpr', 'fnr', 'recall', 'precision', 'accuracy']
    metrics_types = ['macro', 'normal', 'impersonation', 'dos', 'fuzzy']

    for plot_type in plot_types:
        for metrics_type in metrics_types:
            plot_barchart_results(best_test_results, plot_type, metrics_type)
    plot_barchart_results(best_test_results, 'model_time')
    plot_barchart_feature_results(best_test_results)

    durations_path = f"data\\feature\\{conf.imp_type}\\{conf.dos_type}\\mixed_validation_time_100ms_100ms.csv"
    feature_times = datareader_csv.load_feature_durations(durations_path)
    del feature_times['time_ms']
    del feature_times['class_label']
    plot_feature_barcharts(feature_times)

    feature_results = metrics.filter_results(validation_results)
    plot_features_f1s(feature_results, datapoint_features[0:1], 1, 1)
    plot_features_f1s(feature_results, datapoint_features[1:], 3, 3)

    plot_features_f1s(feature_results,
                      datapoint_features,
                      5,
                      2,
                      plot_type='include')
    plot_features_f1s(feature_results,
                      datapoint_features,