def run_lstm_performance_plot(file_path, result_path): df_train = pd.read_csv(f'{file_path}/without_anom.csv') features_list = ['Time', 'Route Index', 'GPS Distance', 'Longitude'] target_features_list = [ 'CINR1 OMNI', 'Radio Distance', 'Barometer Altitude' ] input_df_train = df_train[features_list] target_df_train = df_train[target_features_list] window_size = 2 # Step 1 : Clean train data set input_df_train = clean_data(input_df_train) target_df_train = clean_data(target_df_train) # Step 2: Normalize the data X_train, X_train_scaler = normalize_data(data=input_df_train, scaler="min_max") X_train_preprocessed = get_training_data_lstm(X_train, window_size) Y_train, Y_train_scaler = normalize_data( data=target_df_train, # target data scaler="min_max") Y_train_preprocessed = get_training_data_lstm(Y_train, window_size) # Get the model which is created by user's parameters lstm = get_lstm_autoencoder_model(timesteps=window_size, input_features=input_df_train.shape[1], target_features=target_df_train.shape[1], encoding_dimension=8, activation='relu', loss='mean_squared_error', optimizer='Adam') history = lstm.fit(X_train_preprocessed, Y_train_preprocessed, epochs=5, verbose=0).history X_pred = lstm.predict(X_train_preprocessed, verbose=0) mean_y_train = multi_mean(Y_train_preprocessed) mean_x_pred = multi_mean(X_pred) assert mean_y_train.shape == mean_x_pred.shape for i, target_feature in enumerate(target_features_list): title = "Training performance of LSTM for " + target_feature plot_prediction_performance(Y_train=mean_y_train[:, i], X_pred=mean_x_pred[:, i], results_path=result_path, title=title, y_label="Sensor's Mean Value")
def execute_predict(flight_route, test_data_path=None, similarity_score=None, threshold=None, svr_model=None, X_train_scaler=None, results_path=None, add_plots=True, run_new_model=False, X_train=None, features_list=None, target_features_list=None, save_model=False, Y_train_scaler=None, Y_train=None, window_size=None, event=None): """ Execute predictions function for a specific flight route :param flight_route: current flight route we should train on :param test_data_path: the path of test data directory :param similarity_score: similarity function :param threshold: threshold from the train :param svr_model: SVR model :param X_train_scaler: normalization train input scalar :param results_path: the path of results directory :param add_plots: indicator whether to add plots or not :param run_new_model: indicator whether current flow is new model creation or not :param X_train: train input data frame :param features_list: the list of features which the user chose for the input :param target_features_list: the list of features which the user chose for the target :param save_model: indicator whether the user want to save the model or not :param Y_train_scaler: normalization train target scalar :param Y_train: train target data frame :param window_size: window size for each instance in training :param event: running state flag :return: tpr scores, fpr scores, acc scores, delay scores, routes duration, attack duration """ tpr_scores = defaultdict(list) fpr_scores = defaultdict(list) acc_scores = defaultdict(list) delay_scores = defaultdict(list) routes_duration = defaultdict(list) attack_duration = defaultdict(list) # Set a threshold in new model creation flow if run_new_model: event.wait() threshold = predict_train_set(svr_model, X_train, save_model, add_plots, threshold, features_list, target_features_list, results_path, flight_route, similarity_score, X_train_scaler, Y_train, Y_train_scaler) flight_dir = os.path.join(test_data_path, flight_route) ATTACKS = get_subdirectories(flight_dir) figures_results_path = os.path.join(results_path, "Figures") create_directories(figures_results_path) attacks_figures_results_path = os.path.join(figures_results_path, "Attacks") create_directories(attacks_figures_results_path) # Iterate over all attacks in order to find anomalies for attack in ATTACKS: event.wait() attack_name = attack if "_" in attack_name: attack_name = attack_name.split("_")[0] current_attack_figures_results_path = os.path.join( attacks_figures_results_path, attack_name) create_directories(current_attack_figures_results_path) attacks_path = os.path.join( *[str(test_data_path), str(flight_route), str(attack)]) for flight_csv in os.listdir(f"{attacks_path}"): flight_attack_path = os.path.join( *[str(attacks_path), str(flight_csv)]) df_test_source = pd.read_csv(f"{flight_attack_path}") Y_test_labels = df_test_source[[ATTACK_COLUMN]].values Y_test_labels_preprocessed = svr_model._preprocess( Y_test_labels, Y_test_labels)[1] attack_time = len(Y_test_labels) input_df_test = df_test_source[features_list] target_df_test = df_test_source[target_features_list] # Step 1 : Clean test data set input_clean_df_test = clean_data(input_df_test) target_clean_df_test = clean_data(target_df_test) # Step 2: Normalize the data X_test = X_train_scaler.transform(input_clean_df_test) # Y_test = normalize_data(data=target_clean_df_test, # scaler="power_transform")[0] Y_test = Y_train_scaler.transform(target_clean_df_test) Y_test_preprocessed = svr_model._preprocess(Y_test, Y_test)[1] X_pred = svr_model.predict(X_test) assert len(Y_test_preprocessed) == len(X_pred) scores_test = [] for i, pred in enumerate(X_pred): scores_test.append( anomaly_score(Y_test_preprocessed[i], pred, similarity_score)) # Add reconstruction error scatter if plots indicator is true event.wait() if add_plots: plot_reconstruction_error_scatter( scores=scores_test, labels=Y_test_labels_preprocessed, threshold=threshold, plot_dir=current_attack_figures_results_path, title= f'Outlier Score Testing for {flight_csv} in {flight_route}({attack})' ) for i, target_feature in enumerate(target_features_list): title = "Test performance of SVR for " + target_feature + " feature in " + flight_csv plot_prediction_performance( Y_train=Y_test_preprocessed[:, i], X_pred=X_pred[:, i], results_path=current_attack_figures_results_path, title=title) predictions = [1 if x >= threshold else 0 for x in scores_test] # Add roc curve if plots indicator is true if add_plots: pass # plot_roc(y_true=Y_test,y_pred=predictions, plot_dir=results_path,title=f'ROC Curve - {flight_csv} in {flight_route}({attack})') attack_start, attack_end = get_attack_boundaries( df_test_source[ATTACK_COLUMN]) method_scores = get_method_scores(predictions, attack_start, attack_end, add_window_size=True, window_size=window_size) tpr_scores[attack].append(method_scores[0]) fpr_scores[attack].append(method_scores[1]) acc_scores[attack].append(method_scores[2]) delay_scores[attack].append(method_scores[3]) routes_duration[attack].append(attack_time) attack_duration[attack].append(method_scores[4]) return tpr_scores, fpr_scores, acc_scores, delay_scores, routes_duration, attack_duration
def predict_train_set(svr_model, X_train, save_model, add_plots, threshold, features_list, target_features_list, results_path, flight_route, similarity_score, X_train_scaler, Y_train, Y_train_scaler): """ Execute prediction on the train data set :param svr_model: SVR model :param X_train: train input data frame :param save_model: indicator whether the user want to save the model or not :param add_plots: indicator whether to add plots or not :param threshold: threshold from the train :param features_list: the list of features which the user chose for the input :param target_features_list: the list of features which the user chose for the target :param results_path: the path of results directory :param flight_route: current flight route we are working on :param similarity_score: similarity function :param X_train_scaler: train input normalization scalar :param Y_train: train target data frame :param Y_train_scaler: train target normalization scalar :return: threshold """ X_pred = svr_model.predict(X_train) scores_train = [] Y_train_preprocessed = svr_model._preprocess(Y_train, Y_train)[1] assert len(Y_train_preprocessed) == len(X_pred) for i, pred in enumerate(X_pred): scores_train.append( anomaly_score(Y_train_preprocessed[i], pred, similarity_score)) # choose threshold for which <MODEL_THRESHOLD_FROM_TRAINING_PERCENT> % of training were lower threshold = get_threshold(scores_train, threshold) figures_results_path = os.path.join(results_path, "Figures") create_directories(figures_results_path) if add_plots: train_figures_results_path = os.path.join(figures_results_path, "Train") create_directories(train_figures_results_path) for i, target_feature in enumerate(target_features_list): title = "Training performance of SVR for " + target_feature + " in " + flight_route plot_prediction_performance( Y_train=Y_train_preprocessed[:, i], X_pred=X_pred[:, i], results_path=train_figures_results_path, title=title) # Save created model if the indicator is true if save_model: data = {} data['features'] = features_list data['target_features'] = target_features_list data['threshold'] = threshold data['params'] = get_svr_parameters_dictionary() model_results_path = os.path.join(results_path, "model_data") create_directories(model_results_path) model_data_path = os.path.join( *[str(model_results_path), 'model_data.json']) with open(f"{model_data_path}", 'w') as outfile: json.dump(data, outfile) save_model_file_path = os.path.join(model_results_path, flight_route + "_model.pkl") with open(save_model_file_path, 'wb') as file: pickle.dump(svr_model, file) save_input_scaler_file_path = os.path.join( model_results_path, flight_route + "_train_scaler.pkl") with open(save_input_scaler_file_path, 'wb') as file: pickle.dump(X_train_scaler, file) save_target_scaler_file_path = os.path.join( model_results_path, flight_route + "_target_scaler.pkl") with open(save_target_scaler_file_path, 'wb') as file: pickle.dump(Y_train_scaler, file) return threshold
def predict_train_set(lstm, X_train, save_model, add_plots, threshold, features_list, target_features_list, results_path, flight_route, similarity_score, X_train_scaler, Y_train, Y_train_scaler): """ Execute prediction on the train data set :param lstm: LSTM model :param X_train: train input data frame :param save_model: indicator whether the user want to save the model or not :param add_plots: indicator whether to add plots or not :param threshold: threshold from the train :param features_list: the list of features which the user chose for the input :param target_features_list: the list of features which the user chose for the target :param results_path: the path of results directory :param flight_route: current flight route we are working on :param similarity_score: similarity function :param X_train_scaler: train input normalization scalar :param Y_train: train target data frame :param Y_train_scaler: train target normalization scalar :return: threshold """ X_pred = lstm.predict(X_train, verbose=0) scores_train = [] for i, pred in enumerate(X_pred): scores_train.append( anomaly_score_multi(Y_train[i], pred, similarity_score)) # choose threshold for which <LSTM_THRESHOLD_FROM_TRAINING_PERCENT> % of training were lower threshold = get_threshold(scores_train, threshold) figures_results_path = os.path.join(results_path, "Figures") create_directories(figures_results_path) if add_plots: train_figures_results_path = os.path.join(figures_results_path, "Train") create_directories(train_figures_results_path) mean_x_actual = multi_mean(Y_train) mean_x_pred = multi_mean(X_pred) assert mean_x_actual.shape == mean_x_pred.shape for i, target_feature in enumerate(target_features_list): title = "Training performance of LSTM for " + target_feature + " in " + flight_route plot_prediction_performance( Y_train=mean_x_actual[:, i], X_pred=mean_x_pred[:, i], results_path=train_figures_results_path, title=title, y_label="Sensor's Mean Value") # Save created model if the indicator is true if save_model: data = {} data['features'] = features_list data['target_features'] = target_features_list data['threshold'] = threshold data['params'] = get_lstm_parameters_dictionary() model_results_path = os.path.join(results_path, "model_data") create_directories(model_results_path) model_data_path = os.path.join( *[str(model_results_path), 'model_data.json']) with open(f"{model_data_path}", 'w') as outfile: json.dump(data, outfile) lstm_model_path = os.path.join( *[str(model_results_path), str(flight_route) + '.h5']) lstm.save(f"{lstm_model_path}") save_input_scaler_file_path = os.path.join( model_results_path, flight_route + "_train_scaler.pkl") with open(save_input_scaler_file_path, 'wb') as file: pickle.dump(X_train_scaler, file) save_target_scaler_file_path = os.path.join( model_results_path, flight_route + "_target_scaler.pkl") with open(save_target_scaler_file_path, 'wb') as file: pickle.dump(Y_train_scaler, file) return threshold