def load_or_create_datasets(window_ms=100, stride_ms=100, imp_split=True, dos_type='original', force_create=False, verbose=False, in_parallel=True): """Returns the training and validation sets associated with input argument combination. If the datasets do not exist, they are created and saved in the process. :param window_ms: the window size (int ms). :param stride_ms: the step size (int ms). :param imp_split: a flag indicating whether the impersonation dataset has split labels. :param dos_type: a string indicating the DoS dataset used ('modified', 'original'). :param force_create: a flag indicating whether the dataset should be generated, even if it exists. :param verbose: a flag indicating how much progress should be output to console. :param in_parallel: a flag indicating whether features should be calculated in parallel. :return: a list of training DataPoints, a list of validation DataPoints, a dictionary of feature durations. """ training_name, _ = __get_dataset_path(window_ms, stride_ms, imp_split, dos_type, 'training') validation_name, _ = __get_dataset_path(window_ms, stride_ms, imp_split, dos_type, 'validation') time_path, directory = __get_dataset_path(window_ms, stride_ms, imp_split, dos_type, 'validation_time') # Load the datasets if they exist. if os.path.exists(training_name) and os.path.exists( validation_name) and not force_create: training_set = datareader_csv.load_datapoints(training_name, verbose=verbose) validation_set = datareader_csv.load_datapoints(validation_name, verbose=verbose) feature_durations = datareader_csv.load_feature_durations(time_path) else: # Create and save the datasets otherwise. training_set, validation_set, feature_durations = get_mixed_training_validation( window_ms, stride_ms, imp_split, dos_type, verbose=verbose, in_parallel=in_parallel) __write_datapoints_csv(training_set, window_ms, stride_ms, imp_split, dos_type, 'training') __write_datapoints_csv(validation_set, window_ms, stride_ms, imp_split, dos_type, 'validation') datawriter_csv.save_feature_durations(feature_durations, time_path, directory) return training_set, validation_set, feature_durations
def get_result_feature_breakdown(result: Result, type='validation'): """ Gets a dict mapping feature labels to feature times. This is loaded according to a specific Result :param result: The result to load feature times to :param type: The type of feature times to load. Valid values are 'validation' and 'test' :return: """ imp_name = "imp_split" if result.imp_split else "imp_full" name = f"mixed_{type}_time_{result.window_ms}ms_{result.stride_ms}ms" directory = f"data/feature/{imp_name}/{result.dos_type}/" path = directory + name + ".csv" return datareader_csv.load_feature_durations(path)
result.imp_split, result.dos_type, result.model, result.baseline, result.subset, True) for result in best_validation_results ] plot_types = ['f1', 'fpr', 'fnr', 'recall', 'precision', 'accuracy'] metrics_types = ['macro', 'normal', 'impersonation', 'dos', 'fuzzy'] for plot_type in plot_types: for metrics_type in metrics_types: plot_barchart_results(best_test_results, plot_type, metrics_type) plot_barchart_results(best_test_results, 'model_time') plot_barchart_feature_results(best_test_results) durations_path = f"data\\feature\\{conf.imp_type}\\{conf.dos_type}\\mixed_validation_time_100ms_100ms.csv" feature_times = datareader_csv.load_feature_durations(durations_path) del feature_times['time_ms'] del feature_times['class_label'] plot_feature_barcharts(feature_times) feature_results = metrics.filter_results(validation_results) plot_features_f1s(feature_results, datapoint_features[0:1], 1, 1) plot_features_f1s(feature_results, datapoint_features[1:], 3, 3) plot_features_f1s(feature_results, datapoint_features, 5, 2, plot_type='include') plot_features_f1s(feature_results, datapoint_features,