def set_features_columns_options(): # Get the columns in test data set in order to do feature selection by the user test_data_path = InputSettings.get_test_data_path() flight_route = get_subdirectories(test_data_path).__getitem__(0) flight_dir = os.path.join(test_data_path, flight_route) attack = get_subdirectories(flight_dir).__getitem__(0) flight_csv = os.listdir( f'{test_data_path}/{flight_route}/{attack}').__getitem__(0) df_test = pd.read_csv( f'{test_data_path}/{flight_route}/{attack}/{flight_csv}') test_columns = list(df_test.columns) # Cleaning meta-data - Remove columns from a yaml file, such as: index, flight_id etc. for column in COLUMNS_TO_REMOVE: if column in test_columns: test_columns.remove(column) InputSettings.FEATURES_COLUMNS_OPTIONS = test_columns
def rename_files(directory_path): """ Rename the files in the input - add sensors_ as prefix of files' names :param directory_path: the path of the directory in the input :return: """ FLIGHT_ROUTES = get_subdirectories(directory_path) for flight_route in FLIGHT_ROUTES: flight_dir = os.path.join(directory_path, flight_route) attacks = get_subdirectories(flight_dir) for attack in attacks: attack_dir = os.path.join(flight_dir, attack) for flight_csv in os.listdir( f'{directory_path}/{flight_route}/{attack}'): origin_file_dir = os.path.join(attack_dir, flight_csv) new_file_dir = os.path.join(attack_dir, "sensors_" + flight_csv) os.rename(origin_file_dir, new_file_dir)
def move_test_files_to_target_path(source_directory, target_directory): """ move test files from source path to target path :param source_directory: source path :param target_directory: target path :return: """ flight_files = get_subdirectories(source_directory) for index, rout in enumerate(flight_files): current_directory = os.path.join(source_directory, rout) sensors_file = get_sensors_file(current_directory) move_file_to_target_path(current_directory, target_directory, sensors_file, "sensors_{0}.csv".format(index))
def init_models(): """ executes all the algorithms which were chosen - suitable for both flows :return: """ similarity_score, test_data_path, results_path, new_model_running = ModelsExecution.get_parameters( ) # Init evaluation metrics data which will be presented in the results table InputSettings.init_results_metrics_data() # Set test data - flight routes flight_routes = get_subdirectories(test_data_path) InputSettings.set_flight_routes(flight_routes) return similarity_score, test_data_path, results_path, new_model_running
def move_train_files_to_target_path(source_directory, target_directory, train_folder_name): """ move train files from source path to target path :param source_directory: source path :param target_directory: target path :param train_folder_name: the name of the route :return: """ flight_files = get_subdirectories(source_directory) for index, route in enumerate(flight_files): new_route_name = train_folder_name + "Route_" + str(index) current_directory = os.path.join(source_directory, route) sensors_file = get_sensors_file(current_directory) create_directories(f'{target_directory}/{new_route_name}') move_file_to_target_path(current_directory, f'{target_directory}/{new_route_name}', sensors_file, "without_anom.csv")
def move_test_files_to_target_path(source_directory, target_directory, test_folder_name, attack_name): """ move test files from source path to target path :param source_directory: source path :param target_directory: target path :param test_folder_name: the name of the route :param attack_name: the name of the attack :return: """ flight_files = get_subdirectories(source_directory) for index, route in enumerate(flight_files): current_directory = os.path.join(source_directory, route) sensors_file = get_sensors_file(current_directory) middle_target_directory = os.path.join(target_directory, test_folder_name) create_directories(middle_target_directory) full_path = os.path.join(middle_target_directory, attack_name) create_directories(full_path) move_file_to_target_path(current_directory, full_path, sensors_file, "sensors_{0}.csv".format(index))
def execute_creation(source_directory, target_directory): """ creation of test routes files :param source_directory: directory that contains the train routes :param target_directory: target path that the test files will be save there :return: """ flight_files = get_subdirectories(source_directory) for flight in flight_files: current_flight_path = os.path.join(source_directory, flight) routes_files = get_subdirectories(current_flight_path) for index, route in enumerate(routes_files): full_route_name = str(flight) + "_Route_" + str(index) route_dir = os.path.join(current_flight_path, route) create_directories( f'{target_directory}/{full_route_name}' ) # create the route folder in the target directory train_file_df = pd.read_csv(f'{route_dir}/{route}.csv') num_of_way_points = len(train_file_df["NameOfWpt"]) - 4 for j in range(1): create_test_set( num_of_way_points=num_of_way_points, directory_path=f'{target_directory}/{full_route_name}', file_name="Mixed_Attack", spoofed_way_point=random.randint(num_of_way_points - 3, num_of_way_points - 1), attack="Mixed", source_df=train_file_df) create_test_set( num_of_way_points=num_of_way_points, directory_path=f'{target_directory}/{full_route_name}', file_name="Velocity_Attack", spoofed_way_point=random.randint(num_of_way_points - 3, num_of_way_points - 1), attack="Velocity", source_df=train_file_df) create_test_set( num_of_way_points=num_of_way_points, directory_path=f'{target_directory}/{full_route_name}', file_name="Height_Attack", spoofed_way_point=random.randint(num_of_way_points - 3, num_of_way_points - 1), attack="Height", source_df=train_file_df) create_test_set( num_of_way_points=num_of_way_points, directory_path=f'{target_directory}/{full_route_name}', file_name="Constant_Attack", spoofed_way_point=random.randint(num_of_way_points - 3, num_of_way_points - 1), attack="Constant", source_df=train_file_df)
def run_model(training_data_path, test_data_path, results_path, similarity_score, save_model, new_model_running, algorithm_path, threshold, features_list, target_features_list, train_scaler_path, target_scaler_path, event): """ Run SVR model process :param training_data_path: train data set directory path :param test_data_path: test data set directory path :param results_path: results directory path :param similarity_score: chosen similarity functions :param save_model: indicator whether the user want to save the model or not :param new_model_running: indicator whether we are in new model creation flow or not :param algorithm_path: path of existing algorithm :param threshold: saved threshold for load model flow :param features_list: saved chosen features for load model flow :param target_features_list: all the features in the test data set for the target :param train_scaler_path: path of existing input train scaler directory :param target_scaler_path: path of existing input target scaler directory :param event: running state flag :return: reported results for SVR execution """ # Choose between new model creation flow and load existing model flow if new_model_running: kernel, gamma, epsilon, threshold, window_size = get_svr_new_model_parameters( ) else: svr_model = pickle.load(open(algorithm_path, 'rb')) X_train_scaler = pickle.load(open(train_scaler_path, 'rb')) Y_train_scaler = pickle.load(open(target_scaler_path, 'rb')) window_size = svr_model.n_prev X_train = None Y_train = None FLIGHT_ROUTES = get_subdirectories(test_data_path) current_time = get_current_time() current_time_path = os.path.join( *[str(results_path), 'svr', str(current_time)]) create_directories(f"{current_time_path}") # Create sub directories for each similarity function for similarity in similarity_score: similarity_path = os.path.join( *[str(current_time_path), str(similarity)]) create_directories(f"{similarity_path}") # Train the model for each flight route for flight_route in FLIGHT_ROUTES: # Execute training for new model flow if new_model_running: svr_model, X_train_scaler, Y_train_scaler, X_train, Y_train = execute_train( flight_route, training_data_path=training_data_path, kernel=kernel, gamma=gamma, epsilon=epsilon, features_list=features_list, window_size=window_size, target_features_list=target_features_list, event=event) # Get results for each similarity function for similarity in similarity_score: current_results_path = os.path.join( *[str(current_time_path), str(similarity), str(flight_route)]) create_directories(f"{current_results_path}") tpr_scores, fpr_scores, acc_scores, delay_scores, routes_duration, attacks_duration = execute_predict( flight_route, test_data_path=test_data_path, similarity_score=similarity, threshold=threshold, svr_model=svr_model, X_train_scaler=X_train_scaler, results_path=current_results_path, add_plots=True, run_new_model=new_model_running, X_train=X_train, features_list=features_list, target_features_list=target_features_list, save_model=save_model, Y_train_scaler=Y_train_scaler, Y_train=Y_train, window_size=window_size, event=event) df = pd.DataFrame(tpr_scores) tpr_path = os.path.join( *[str(current_results_path), str(flight_route) + '_tpr.csv']) df.to_csv(f"{tpr_path}", index=False) df = pd.DataFrame(fpr_scores) fpr_path = os.path.join( *[str(current_results_path), str(flight_route) + '_fpr.csv']) df.to_csv(f"{fpr_path}", index=False) df = pd.DataFrame(acc_scores) acc_path = os.path.join( *[str(current_results_path), str(flight_route) + '_acc.csv']) df.to_csv(f"{acc_path}", index=False) df = pd.DataFrame(delay_scores) delay_path = os.path.join( *[str(current_results_path), str(flight_route) + '_delay.csv']) df.to_csv(f"{delay_path}", index=False) algorithm_name = "SVR" # Report results for training data to csv files for similarity in similarity_score: report_similarity_path = os.path.join( *[str(results_path), 'svr', str(current_time), str(similarity)]) report_results(f"{report_similarity_path}", test_data_path, FLIGHT_ROUTES, algorithm_name, similarity, routes_duration, attacks_duration)
def execute_predict(flight_route, test_data_path=None, similarity_score=None, threshold=None, svr_model=None, X_train_scaler=None, results_path=None, add_plots=True, run_new_model=False, X_train=None, features_list=None, target_features_list=None, save_model=False, Y_train_scaler=None, Y_train=None, window_size=None, event=None): """ Execute predictions function for a specific flight route :param flight_route: current flight route we should train on :param test_data_path: the path of test data directory :param similarity_score: similarity function :param threshold: threshold from the train :param svr_model: SVR model :param X_train_scaler: normalization train input scalar :param results_path: the path of results directory :param add_plots: indicator whether to add plots or not :param run_new_model: indicator whether current flow is new model creation or not :param X_train: train input data frame :param features_list: the list of features which the user chose for the input :param target_features_list: the list of features which the user chose for the target :param save_model: indicator whether the user want to save the model or not :param Y_train_scaler: normalization train target scalar :param Y_train: train target data frame :param window_size: window size for each instance in training :param event: running state flag :return: tpr scores, fpr scores, acc scores, delay scores, routes duration, attack duration """ tpr_scores = defaultdict(list) fpr_scores = defaultdict(list) acc_scores = defaultdict(list) delay_scores = defaultdict(list) routes_duration = defaultdict(list) attack_duration = defaultdict(list) # Set a threshold in new model creation flow if run_new_model: event.wait() threshold = predict_train_set(svr_model, X_train, save_model, add_plots, threshold, features_list, target_features_list, results_path, flight_route, similarity_score, X_train_scaler, Y_train, Y_train_scaler) flight_dir = os.path.join(test_data_path, flight_route) ATTACKS = get_subdirectories(flight_dir) figures_results_path = os.path.join(results_path, "Figures") create_directories(figures_results_path) attacks_figures_results_path = os.path.join(figures_results_path, "Attacks") create_directories(attacks_figures_results_path) # Iterate over all attacks in order to find anomalies for attack in ATTACKS: event.wait() attack_name = attack if "_" in attack_name: attack_name = attack_name.split("_")[0] current_attack_figures_results_path = os.path.join( attacks_figures_results_path, attack_name) create_directories(current_attack_figures_results_path) attacks_path = os.path.join( *[str(test_data_path), str(flight_route), str(attack)]) for flight_csv in os.listdir(f"{attacks_path}"): flight_attack_path = os.path.join( *[str(attacks_path), str(flight_csv)]) df_test_source = pd.read_csv(f"{flight_attack_path}") Y_test_labels = df_test_source[[ATTACK_COLUMN]].values Y_test_labels_preprocessed = svr_model._preprocess( Y_test_labels, Y_test_labels)[1] attack_time = len(Y_test_labels) input_df_test = df_test_source[features_list] target_df_test = df_test_source[target_features_list] # Step 1 : Clean test data set input_clean_df_test = clean_data(input_df_test) target_clean_df_test = clean_data(target_df_test) # Step 2: Normalize the data X_test = X_train_scaler.transform(input_clean_df_test) # Y_test = normalize_data(data=target_clean_df_test, # scaler="power_transform")[0] Y_test = Y_train_scaler.transform(target_clean_df_test) Y_test_preprocessed = svr_model._preprocess(Y_test, Y_test)[1] X_pred = svr_model.predict(X_test) assert len(Y_test_preprocessed) == len(X_pred) scores_test = [] for i, pred in enumerate(X_pred): scores_test.append( anomaly_score(Y_test_preprocessed[i], pred, similarity_score)) # Add reconstruction error scatter if plots indicator is true event.wait() if add_plots: plot_reconstruction_error_scatter( scores=scores_test, labels=Y_test_labels_preprocessed, threshold=threshold, plot_dir=current_attack_figures_results_path, title= f'Outlier Score Testing for {flight_csv} in {flight_route}({attack})' ) for i, target_feature in enumerate(target_features_list): title = "Test performance of SVR for " + target_feature + " feature in " + flight_csv plot_prediction_performance( Y_train=Y_test_preprocessed[:, i], X_pred=X_pred[:, i], results_path=current_attack_figures_results_path, title=title) predictions = [1 if x >= threshold else 0 for x in scores_test] # Add roc curve if plots indicator is true if add_plots: pass # plot_roc(y_true=Y_test,y_pred=predictions, plot_dir=results_path,title=f'ROC Curve - {flight_csv} in {flight_route}({attack})') attack_start, attack_end = get_attack_boundaries( df_test_source[ATTACK_COLUMN]) method_scores = get_method_scores(predictions, attack_start, attack_end, add_window_size=True, window_size=window_size) tpr_scores[attack].append(method_scores[0]) fpr_scores[attack].append(method_scores[1]) acc_scores[attack].append(method_scores[2]) delay_scores[attack].append(method_scores[3]) routes_duration[attack].append(attack_time) attack_duration[attack].append(method_scores[4]) return tpr_scores, fpr_scores, acc_scores, delay_scores, routes_duration, attack_duration