Ejemplo n.º 1
0
def run_lstm_performance_plot(file_path, result_path):
    df_train = pd.read_csv(f'{file_path}/without_anom.csv')
    features_list = ['Time', 'Route Index', 'GPS Distance', 'Longitude']

    target_features_list = [
        'CINR1 OMNI', 'Radio Distance', 'Barometer Altitude'
    ]

    input_df_train = df_train[features_list]
    target_df_train = df_train[target_features_list]

    window_size = 2

    # Step 1 : Clean train data set
    input_df_train = clean_data(input_df_train)

    target_df_train = clean_data(target_df_train)

    # Step 2: Normalize the data
    X_train, X_train_scaler = normalize_data(data=input_df_train,
                                             scaler="min_max")
    X_train_preprocessed = get_training_data_lstm(X_train, window_size)

    Y_train, Y_train_scaler = normalize_data(
        data=target_df_train,  # target data
        scaler="min_max")
    Y_train_preprocessed = get_training_data_lstm(Y_train, window_size)

    # Get the model which is created by user's parameters
    lstm = get_lstm_autoencoder_model(timesteps=window_size,
                                      input_features=input_df_train.shape[1],
                                      target_features=target_df_train.shape[1],
                                      encoding_dimension=8,
                                      activation='relu',
                                      loss='mean_squared_error',
                                      optimizer='Adam')
    history = lstm.fit(X_train_preprocessed,
                       Y_train_preprocessed,
                       epochs=5,
                       verbose=0).history

    X_pred = lstm.predict(X_train_preprocessed, verbose=0)

    mean_y_train = multi_mean(Y_train_preprocessed)
    mean_x_pred = multi_mean(X_pred)

    assert mean_y_train.shape == mean_x_pred.shape

    for i, target_feature in enumerate(target_features_list):
        title = "Training performance of LSTM for " + target_feature
        plot_prediction_performance(Y_train=mean_y_train[:, i],
                                    X_pred=mean_x_pred[:, i],
                                    results_path=result_path,
                                    title=title,
                                    y_label="Sensor's Mean Value")
Ejemplo n.º 2
0
def execute_predict(flight_route,
                    test_data_path=None,
                    similarity_score=None,
                    threshold=None,
                    svr_model=None,
                    X_train_scaler=None,
                    results_path=None,
                    add_plots=True,
                    run_new_model=False,
                    X_train=None,
                    features_list=None,
                    target_features_list=None,
                    save_model=False,
                    Y_train_scaler=None,
                    Y_train=None,
                    window_size=None,
                    event=None):
    """
    Execute predictions function for a specific flight route
    :param flight_route: current flight route we should train on
    :param test_data_path: the path of test data directory
    :param similarity_score: similarity function
    :param threshold: threshold from the train
    :param svr_model: SVR model
    :param X_train_scaler: normalization train input scalar
    :param results_path: the path of results directory
    :param add_plots: indicator whether to add plots or not
    :param run_new_model: indicator whether current flow is new model creation or not
    :param X_train: train input data frame
    :param features_list: the list of features which the user chose for the input
    :param target_features_list: the list of features which the user chose for the target
    :param save_model: indicator whether the user want to save the model or not
    :param Y_train_scaler: normalization train target scalar
    :param Y_train: train target data frame
    :param window_size: window size for each instance in training
    :param event: running state flag
    :return: tpr scores, fpr scores, acc scores, delay scores, routes duration, attack duration
    """

    tpr_scores = defaultdict(list)
    fpr_scores = defaultdict(list)
    acc_scores = defaultdict(list)
    delay_scores = defaultdict(list)
    routes_duration = defaultdict(list)
    attack_duration = defaultdict(list)

    # Set a threshold in new model creation flow
    if run_new_model:
        event.wait()
        threshold = predict_train_set(svr_model, X_train, save_model,
                                      add_plots, threshold, features_list,
                                      target_features_list, results_path,
                                      flight_route, similarity_score,
                                      X_train_scaler, Y_train, Y_train_scaler)

    flight_dir = os.path.join(test_data_path, flight_route)
    ATTACKS = get_subdirectories(flight_dir)

    figures_results_path = os.path.join(results_path, "Figures")
    create_directories(figures_results_path)

    attacks_figures_results_path = os.path.join(figures_results_path,
                                                "Attacks")
    create_directories(attacks_figures_results_path)

    # Iterate over all attacks in order to find anomalies
    for attack in ATTACKS:
        event.wait()
        attack_name = attack

        if "_" in attack_name:
            attack_name = attack_name.split("_")[0]

        current_attack_figures_results_path = os.path.join(
            attacks_figures_results_path, attack_name)
        create_directories(current_attack_figures_results_path)

        attacks_path = os.path.join(
            *[str(test_data_path),
              str(flight_route),
              str(attack)])
        for flight_csv in os.listdir(f"{attacks_path}"):

            flight_attack_path = os.path.join(
                *[str(attacks_path), str(flight_csv)])
            df_test_source = pd.read_csv(f"{flight_attack_path}")

            Y_test_labels = df_test_source[[ATTACK_COLUMN]].values
            Y_test_labels_preprocessed = svr_model._preprocess(
                Y_test_labels, Y_test_labels)[1]

            attack_time = len(Y_test_labels)

            input_df_test = df_test_source[features_list]
            target_df_test = df_test_source[target_features_list]

            # Step 1 : Clean test data set
            input_clean_df_test = clean_data(input_df_test)
            target_clean_df_test = clean_data(target_df_test)

            # Step 2: Normalize the data
            X_test = X_train_scaler.transform(input_clean_df_test)

            # Y_test = normalize_data(data=target_clean_df_test,
            #                         scaler="power_transform")[0]

            Y_test = Y_train_scaler.transform(target_clean_df_test)

            Y_test_preprocessed = svr_model._preprocess(Y_test, Y_test)[1]

            X_pred = svr_model.predict(X_test)
            assert len(Y_test_preprocessed) == len(X_pred)

            scores_test = []
            for i, pred in enumerate(X_pred):
                scores_test.append(
                    anomaly_score(Y_test_preprocessed[i], pred,
                                  similarity_score))

            # Add reconstruction error scatter if plots indicator is true
            event.wait()
            if add_plots:
                plot_reconstruction_error_scatter(
                    scores=scores_test,
                    labels=Y_test_labels_preprocessed,
                    threshold=threshold,
                    plot_dir=current_attack_figures_results_path,
                    title=
                    f'Outlier Score Testing for {flight_csv} in {flight_route}({attack})'
                )

                for i, target_feature in enumerate(target_features_list):
                    title = "Test performance of SVR for " + target_feature + " feature in " + flight_csv
                    plot_prediction_performance(
                        Y_train=Y_test_preprocessed[:, i],
                        X_pred=X_pred[:, i],
                        results_path=current_attack_figures_results_path,
                        title=title)

            predictions = [1 if x >= threshold else 0 for x in scores_test]

            # Add roc curve if plots indicator is true
            if add_plots:
                pass
                # plot_roc(y_true=Y_test,y_pred=predictions, plot_dir=results_path,title=f'ROC Curve - {flight_csv} in {flight_route}({attack})')

            attack_start, attack_end = get_attack_boundaries(
                df_test_source[ATTACK_COLUMN])

            method_scores = get_method_scores(predictions,
                                              attack_start,
                                              attack_end,
                                              add_window_size=True,
                                              window_size=window_size)

            tpr_scores[attack].append(method_scores[0])
            fpr_scores[attack].append(method_scores[1])
            acc_scores[attack].append(method_scores[2])
            delay_scores[attack].append(method_scores[3])
            routes_duration[attack].append(attack_time)
            attack_duration[attack].append(method_scores[4])

    return tpr_scores, fpr_scores, acc_scores, delay_scores, routes_duration, attack_duration
Ejemplo n.º 3
0
def predict_train_set(svr_model, X_train, save_model, add_plots, threshold,
                      features_list, target_features_list, results_path,
                      flight_route, similarity_score, X_train_scaler, Y_train,
                      Y_train_scaler):
    """
    Execute prediction on the train data set
    :param svr_model: SVR model
    :param X_train: train input data frame
    :param save_model: indicator whether the user want to save the model or not
    :param add_plots: indicator whether to add plots or not
    :param threshold: threshold from the train
    :param features_list: the list of features which the user chose for the input
    :param target_features_list: the list of features which the user chose for the target
    :param results_path: the path of results directory
    :param flight_route: current flight route we are working on
    :param similarity_score: similarity function
    :param X_train_scaler: train input normalization scalar
    :param Y_train: train target data frame
    :param Y_train_scaler: train target normalization scalar
    :return: threshold
    """

    X_pred = svr_model.predict(X_train)
    scores_train = []

    Y_train_preprocessed = svr_model._preprocess(Y_train, Y_train)[1]
    assert len(Y_train_preprocessed) == len(X_pred)

    for i, pred in enumerate(X_pred):
        scores_train.append(
            anomaly_score(Y_train_preprocessed[i], pred, similarity_score))

    # choose threshold for which <MODEL_THRESHOLD_FROM_TRAINING_PERCENT> % of training were lower
    threshold = get_threshold(scores_train, threshold)

    figures_results_path = os.path.join(results_path, "Figures")
    create_directories(figures_results_path)

    if add_plots:

        train_figures_results_path = os.path.join(figures_results_path,
                                                  "Train")
        create_directories(train_figures_results_path)

        for i, target_feature in enumerate(target_features_list):
            title = "Training performance of SVR for " + target_feature + " in " + flight_route
            plot_prediction_performance(
                Y_train=Y_train_preprocessed[:, i],
                X_pred=X_pred[:, i],
                results_path=train_figures_results_path,
                title=title)

    # Save created model if the indicator is true
    if save_model:
        data = {}
        data['features'] = features_list
        data['target_features'] = target_features_list
        data['threshold'] = threshold
        data['params'] = get_svr_parameters_dictionary()

        model_results_path = os.path.join(results_path, "model_data")
        create_directories(model_results_path)

        model_data_path = os.path.join(
            *[str(model_results_path), 'model_data.json'])
        with open(f"{model_data_path}", 'w') as outfile:
            json.dump(data, outfile)

        save_model_file_path = os.path.join(model_results_path,
                                            flight_route + "_model.pkl")
        with open(save_model_file_path, 'wb') as file:
            pickle.dump(svr_model, file)

        save_input_scaler_file_path = os.path.join(
            model_results_path, flight_route + "_train_scaler.pkl")
        with open(save_input_scaler_file_path, 'wb') as file:
            pickle.dump(X_train_scaler, file)

        save_target_scaler_file_path = os.path.join(
            model_results_path, flight_route + "_target_scaler.pkl")
        with open(save_target_scaler_file_path, 'wb') as file:
            pickle.dump(Y_train_scaler, file)

    return threshold
def predict_train_set(lstm, X_train, save_model, add_plots, threshold,
                      features_list, target_features_list, results_path,
                      flight_route, similarity_score, X_train_scaler, Y_train,
                      Y_train_scaler):
    """
    Execute prediction on the train data set
    :param lstm: LSTM model
    :param X_train: train input data frame
    :param save_model: indicator whether the user want to save the model or not
    :param add_plots: indicator whether to add plots or not
    :param threshold: threshold from the train
    :param features_list: the list of features which the user chose for the input
    :param target_features_list: the list of features which the user chose for the target
    :param results_path: the path of results directory
    :param flight_route: current flight route we are working on
    :param similarity_score: similarity function
    :param X_train_scaler: train input normalization scalar
    :param Y_train: train target data frame
    :param Y_train_scaler: train target normalization scalar
    :return: threshold
    """

    X_pred = lstm.predict(X_train, verbose=0)

    scores_train = []

    for i, pred in enumerate(X_pred):
        scores_train.append(
            anomaly_score_multi(Y_train[i], pred, similarity_score))

    # choose threshold for which <LSTM_THRESHOLD_FROM_TRAINING_PERCENT> % of training were lower
    threshold = get_threshold(scores_train, threshold)

    figures_results_path = os.path.join(results_path, "Figures")
    create_directories(figures_results_path)

    if add_plots:

        train_figures_results_path = os.path.join(figures_results_path,
                                                  "Train")
        create_directories(train_figures_results_path)

        mean_x_actual = multi_mean(Y_train)
        mean_x_pred = multi_mean(X_pred)

        assert mean_x_actual.shape == mean_x_pred.shape

        for i, target_feature in enumerate(target_features_list):
            title = "Training performance of LSTM for " + target_feature + " in " + flight_route
            plot_prediction_performance(
                Y_train=mean_x_actual[:, i],
                X_pred=mean_x_pred[:, i],
                results_path=train_figures_results_path,
                title=title,
                y_label="Sensor's Mean Value")

    # Save created model if the indicator is true
    if save_model:
        data = {}
        data['features'] = features_list
        data['target_features'] = target_features_list
        data['threshold'] = threshold
        data['params'] = get_lstm_parameters_dictionary()

        model_results_path = os.path.join(results_path, "model_data")
        create_directories(model_results_path)

        model_data_path = os.path.join(
            *[str(model_results_path), 'model_data.json'])
        with open(f"{model_data_path}", 'w') as outfile:
            json.dump(data, outfile)

        lstm_model_path = os.path.join(
            *[str(model_results_path),
              str(flight_route) + '.h5'])
        lstm.save(f"{lstm_model_path}")

        save_input_scaler_file_path = os.path.join(
            model_results_path, flight_route + "_train_scaler.pkl")
        with open(save_input_scaler_file_path, 'wb') as file:
            pickle.dump(X_train_scaler, file)

        save_target_scaler_file_path = os.path.join(
            model_results_path, flight_route + "_target_scaler.pkl")
        with open(save_target_scaler_file_path, 'wb') as file:
            pickle.dump(Y_train_scaler, file)

    return threshold