Python normalize_data 예제들, models.data_preprocessing.data_normalization.normalize_data Python 예제들

예제 #1

0

파일 보기

def execute_train(flight_route,
                  training_data_path=None,
                  n_estimators=None,
                  criterion=None,
                  max_features=None,
                  random_state=None,
                  features_list=None,
                  window_size=1,
                  target_features_list=None,
                  event=None):
    """
    Execute train function for a specific flight route
    :param flight_route: current flight route we should train on
    :param training_data_path: the path of training data directory
    :param n_estimators: n estimators value
    :param criterion: criterion variable
    :param max_features: max amount of features
    :param random_state: random state value
    :param features_list: the list of features which the user chose for the train
    :param window_size: window size for each instance in training
    :param target_features_list: the list of features which the user chose for the target
    :param event: running state flag
    :return: random forest model, normalization input train scalar,normalization input target scalar, X_train data frame,Y_train data frame
    """

    without_anomaly_path = os.path.join(
        *[str(training_data_path),
          str(flight_route), 'without_anom.csv'])
    df_train = pd.read_csv(f"{without_anomaly_path}")

    input_df_train = df_train[features_list]
    target_df_train = df_train[target_features_list]

    # Step 1 : Clean train data set
    input_df_train = clean_data(input_df_train)

    target_df_train = clean_data(target_df_train)

    # Step 2: Normalize the data
    X_train, X_train_scaler = normalize_data(data=input_df_train,
                                             scaler="min_max")

    Y_train, Y_train_scaler = normalize_data(
        data=target_df_train,  # target data
        scaler="min_max")

    # Get the model which is created by user's parameters
    random_forest_model = get_random_forest_model(n_estimators=n_estimators,
                                                  criterion=criterion,
                                                  max_features=max_features,
                                                  random_state=random_state)

    tsr = TimeSeriesRegressor(random_forest_model, n_prev=window_size)

    event.wait()

    tsr.fit(X_train, Y_train)

    return tsr, X_train_scaler, Y_train_scaler, X_train, Y_train

예제 #2

0

파일 보기

def execute_train(flight_route,
                  training_data_path=None,
                  hidden_layer_sizes=None,
                  activation=None,
                  solver=None,
                  alpha=None,
                  random_state=None,
                  features_list=None,
                  window_size=1,
                  target_features_list=None,
                  event=None):
    """
    Execute train function for a specific flight route
    :param flight_route: current flight route we should train on
    :param training_data_path: the path of training data directory
    :param hidden_layer_sizes: The ith element represents the number of neurons in the ith hidden layer.
    :param activation: Activation function for the hidden layer.
    :param solver: The solver for weight optimization.
    :param alpha: L2 penalty (regularization term) parameter.
    :param random_state: If int, random_state is the seed used by the random number generator
    :param features_list: the list of features which the user chose for the train
    :param window_size: window size for each instance in training
    :param target_features_list: the list of features which the user chose for the target
    :param event: running state flag
    :return: MLP model, normalization input train scalar,normalization input target scalar, X_train data frame,Y_train data frame
    """

    without_anomaly_path = os.path.join(*[str(training_data_path), str(flight_route), 'without_anom.csv'])
    df_train = pd.read_csv(f"{without_anomaly_path}")

    input_df_train = df_train[features_list]
    target_df_train = df_train[target_features_list]

    # Step 1 : Clean train data set
    input_df_train = clean_data(input_df_train)

    target_df_train = clean_data(target_df_train)

    # Step 2: Normalize the data
    X_train, X_train_scaler = normalize_data(data=input_df_train,
                                             scaler="min_max")

    Y_train, Y_train_scaler = normalize_data(data=target_df_train,  # target data
                                             scaler="min_max")

    # Get the model which is created by user's parameters
    mlp_model = get_mlp_model(hidden_layer_sizes=hidden_layer_sizes,
                              activation=activation,
                              solver=solver,
                              alpha=alpha,
                              random_state=random_state)

    tsr = TimeSeriesRegressor(mlp_model, n_prev=window_size)

    event.wait()

    tsr.fit(X_train, Y_train)

    return tsr, X_train_scaler, Y_train_scaler, X_train, Y_train

예제 #3

0

파일 보기

def run_lstm_performance_plot(file_path, result_path):
    df_train = pd.read_csv(f'{file_path}/without_anom.csv')
    features_list = ['Time', 'Route Index', 'GPS Distance', 'Longitude']

    target_features_list = [
        'CINR1 OMNI', 'Radio Distance', 'Barometer Altitude'
    ]

    input_df_train = df_train[features_list]
    target_df_train = df_train[target_features_list]

    window_size = 2

    # Step 1 : Clean train data set
    input_df_train = clean_data(input_df_train)

    target_df_train = clean_data(target_df_train)

    # Step 2: Normalize the data
    X_train, X_train_scaler = normalize_data(data=input_df_train,
                                             scaler="min_max")
    X_train_preprocessed = get_training_data_lstm(X_train, window_size)

    Y_train, Y_train_scaler = normalize_data(
        data=target_df_train,  # target data
        scaler="min_max")
    Y_train_preprocessed = get_training_data_lstm(Y_train, window_size)

    # Get the model which is created by user's parameters
    lstm = get_lstm_autoencoder_model(timesteps=window_size,
                                      input_features=input_df_train.shape[1],
                                      target_features=target_df_train.shape[1],
                                      encoding_dimension=8,
                                      activation='relu',
                                      loss='mean_squared_error',
                                      optimizer='Adam')
    history = lstm.fit(X_train_preprocessed,
                       Y_train_preprocessed,
                       epochs=5,
                       verbose=0).history

    X_pred = lstm.predict(X_train_preprocessed, verbose=0)

    mean_y_train = multi_mean(Y_train_preprocessed)
    mean_x_pred = multi_mean(X_pred)

    assert mean_y_train.shape == mean_x_pred.shape

    for i, target_feature in enumerate(target_features_list):
        title = "Training performance of LSTM for " + target_feature
        plot_prediction_performance(Y_train=mean_y_train[:, i],
                                    X_pred=mean_x_pred[:, i],
                                    results_path=result_path,
                                    title=title,
                                    y_label="Sensor's Mean Value")

예제 #4

0

파일 보기

파일: svr_execution.py 프로젝트: yehudapashay/AnomalyDetection

def execute_train(flight_route,
                  training_data_path=None,
                  kernel=None,
                  gamma=None,
                  epsilon=None,
                  features_list=None,
                  window_size=1,
                  target_features_list=None,
                  event=None):
    """
    Execute train function for a specific flight route
    :param flight_route: current flight route we should train on
    :param training_data_path: the path of training data directory
    :param kernel: kernel string values
    :param gamma: gamma string value
    :param epsilon: epsilon float value
    :param features_list: the list of features which the user chose for the train
    :param window_size: window size for each instance in training
    :param target_features_list: the list of features which the user chose for the target
    :param event: running state flag
    :return: svr model, normalization input train scalar,normalization input target scalar, X_train data frame,Y_train data frame
    """

    without_anomaly_path = os.path.join(
        *[str(training_data_path),
          str(flight_route), 'without_anom.csv'])
    df_train = pd.read_csv(f"{without_anomaly_path}")

    input_df_train = df_train[features_list]
    target_df_train = df_train[target_features_list]

    # Step 1 : Clean train data set
    input_df_train = clean_data(input_df_train)

    target_df_train = clean_data(target_df_train)

    # Step 2: Normalize the data
    X_train, X_train_scaler = normalize_data(data=input_df_train,
                                             scaler="min_max")

    Y_train, Y_train_scaler = normalize_data(
        data=target_df_train,  # target data
        scaler="min_max")

    # Get the model which is created by user's parameters
    svr_model = get_svr_model(kernel=kernel, gamma=gamma, epsilon=epsilon)

    tsr = TimeSeriesRegressor(svr_model, n_prev=window_size)

    event.wait()

    tsr.fit(X_train, Y_train)

    return tsr, X_train_scaler, Y_train_scaler, X_train, Y_train

예제 #5

0

파일 보기

def window_size_parameter_tuning_sklearn(ml_model, train_path, input_features,
                                         target_features, scaler,
                                         max_look_back):
    """
    Tune window size parameter over a constant range to get the optimal window size
    :param ml_model: machine learning model
    :param train_path: train set path
    :param input_features: input features list
    :param target_features: target features list
    :param scaler: scaler string
    :param max_look_back: maximum window size
    :return: MSEs test
    """

    df = pd.read_csv(train_path)

    original_input_df = df[input_features]

    input_df, X_train_scaler = normalize_data(data=original_input_df,
                                              scaler=scaler)

    original_target_df = df[target_features]

    target_df, Y_train_scaler = normalize_data(data=original_target_df,
                                               scaler=scaler)

    x_train, x_test, y_train, y_test = train_test_split(input_df,
                                                        target_df,
                                                        shuffle=False)

    n_prevs = range(1, max_look_back)
    mses_test = np.empty((len(n_prevs), safe_shape(y_test, 1)))

    for i, n_prev in enumerate(n_prevs):
        tsr = TimeSeriesRegressor(ml_model, n_prev=n_prev)
        tsr.fit(x_train, y_train)
        # get the (i+1)-th row
        mses_test[i, :] = mse(tsr.predict(x_test), y_test[n_prev:])

    return mses_test

예제 #6

0

파일 보기

def model_tuning(file_path, input_features, target_features, window_size,
                 scaler, results_path, model_name):
    """
    model's tuning process by using GridSearchCV
    :param file_path: data file path
    :param input_features: the list of features which the user chose for the train
    :param target_features: the list of features which the user chose for the test
    :param window_size: window size variable
    :param scaler: scaler name
    :param results_path: results path
    :param model_name: model name
    :return: model name , best models params
    """

    df_train = pd.read_csv(f'{file_path}')

    input_df_train = df_train[input_features]
    target_df_train = df_train[target_features]

    # Step 1 : Clean train data set
    input_df_train = clean_data(input_df_train)

    target_df_train = clean_data(target_df_train)

    # Step 2: Normalize the data

    X = normalize_data(data=input_df_train, scaler=scaler)[0]

    Y = normalize_data(data=target_df_train, scaler=scaler)[0]

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

    model = get_model(model_name)

    model_grid_params = get_model_params(model_name)

    tsr = TimeSeriesRegressor(model, n_prev=window_size)

    grid_search = GridSearchCV(tsr, model_grid_params)

    grid_search.fit(X_train, Y_train)
    prediction = grid_search.predict(X_test)

    plot_title = "Optimized Time Series " + model_name + " model"
    print(str(model_name) + " " + str(grid_search.best_params_))
    current_time = get_current_time()
    file_name = str(current_time) + "-" + str(model_name) + "-model_data.json"
    data = {}
    data['model'] = model_name
    data["input_features"] = input_features
    data["target_features"] = target_features
    data['params'] = grid_search.best_params_
    data['score'] = grid_search.best_score_

    file_path = os.path.join(str(results_path), str(file_name))
    with open(f"{file_path}", 'w') as outfile:
        json.dump(data, outfile)

    # Y_test_preprocessed = tsr._preprocess(X_test, Y_test)[1]
    #
    # for i, target_feature in enumerate(target_features):
    #     title = "Grid search test performance of " + model_name + " for window size: " + \
    #             str(window_size) + " and " + target_feature + " feature"
    #     plot_prediction_performance(Y_train=Y_test_preprocessed[:, i],
    #                                 X_pred=prediction[:, i],
    #                                 results_path=results_path,
    #                                 title=title)

    return data['params'], data['score']

예제 #7

0

파일 보기

파일: lstm_execution.py 프로젝트: yehudapashay/AnomalyDetection

def execute_train(flight_route,
                  training_data_path=None,
                  results_path=None,
                  window_size=None,
                  encoding_dimension=None,
                  activation=None,
                  loss=None,
                  optimizer=None,
                  add_plots=True,
                  features_list=None,
                  epochs=10,
                  target_features_list=None,
                  event=None):
    """
    Execute train function for a specific flight route
    :param flight_route: current flight route we should train on
    :param training_data_path: the path of training data directory
    :param results_path: the path of results directory
    :param window_size: window size variable
    :param encoding_dimension: encoding dimension variable
    :param activation: activation function
    :param loss: loss function
    :param optimizer: optimizer
    :param add_plots: indicator whether to add plots or not
    :param features_list: the list of features which the user chose
    :param epochs: num of epochs that was chosen by the user
    :param target_features_list: the list of features which the user chose for the target
    :param event: running state flag
    :return: LSTM model, normalization input train scalar,normalization input target scalar, X_train data frame,Y_train data frame
    """

    without_anomaly_path = os.path.join(
        *[str(training_data_path),
          str(flight_route), 'without_anom.csv'])
    df_train = pd.read_csv(f"{without_anomaly_path}")

    input_df_train = df_train[features_list]
    target_df_train = df_train[target_features_list]

    # Step 1 : Clean train data set
    input_df_train = clean_data(input_df_train)

    target_df_train = clean_data(target_df_train)

    # Step 2: Normalize the data
    X_train, X_train_scaler = normalize_data(data=input_df_train,
                                             scaler="min_max")
    X_train_preprocessed = get_training_data_lstm(X_train, window_size)

    Y_train, Y_train_scaler = normalize_data(
        data=target_df_train,  # target data
        scaler="min_max")
    Y_train_preprocessed = get_training_data_lstm(Y_train, window_size)

    # Get the model which is created by user's parameters
    lstm = get_lstm_autoencoder_model(timesteps=window_size,
                                      input_features=input_df_train.shape[1],
                                      target_features=target_df_train.shape[1],
                                      encoding_dimension=encoding_dimension,
                                      activation=activation,
                                      loss=loss,
                                      optimizer=optimizer)

    event.wait()

    history = lstm.fit(X_train_preprocessed,
                       Y_train_preprocessed,
                       epochs=epochs,
                       verbose=0).history

    # Add plots if the indicator is true
    if add_plots:
        plot(history['loss'],
             ylabel='loss',
             xlabel='epoch',
             title=f'{flight_route} Epoch Loss',
             plot_dir=results_path)

    return lstm, X_train_scaler, Y_train_scaler, X_train_preprocessed, Y_train_preprocessed

예제 #8

0

파일 보기

파일: lstm_model_tuning.py 프로젝트: yehudapashay/AnomalyDetection

def model_tuning(file_path, input_features, target_features, window_size,
                 scaler, results_path):
    """
    model's tuning process by using GridSearchCV
    :param model_name: model name
    :param file_path: data file  path
    :param input_features: the list of features which the user chose for the train
    :param target_features: the list of features which the user chose for the test
    :param window_size: window size variable
    :param scaler: scaler name
    :param results_path: results path
    :return: model name , best models params
    """

    df_train = pd.read_csv(f'{file_path}')

    input_df_train = df_train[input_features]
    target_df_train = df_train[target_features]

    X = normalize_data(data=input_df_train, scaler=scaler)[0]

    Y = normalize_data(data=target_df_train, scaler=scaler)[0]

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

    assert len(X_train) == len(Y_train)
    assert len(X_test) == len(Y_test)

    X_train_preprocessed = get_training_data_lstm(X_train, window_size)
    X_test_preprocessed = get_training_data_lstm(X_test, window_size)

    Y_train_preprocessed = get_training_data_lstm(Y_train, window_size)
    Y_test_preprocessed = get_training_data_lstm(Y_test, window_size)

    params_configurations = get_lstm_params_configurations()

    total_scores = dict()

    for config in params_configurations:
        encoding_dimension, activation, loss, optimizer, epochs = config

        lstm_model = get_lstm_autoencoder_model(
            timesteps=window_size,
            input_features=input_df_train.shape[1],
            target_features=target_df_train.shape[1],
            encoding_dimension=encoding_dimension,
            activation=activation,
            loss=loss,
            optimizer=optimizer)
        lstm_model.fit(X_train_preprocessed,
                       Y_train_preprocessed,
                       epochs=epochs,
                       verbose=0)

        X_test_pred = lstm_model.predict(X_test_preprocessed)

        scores = []
        for i, pred in enumerate(X_test_pred):
            scores.append(
                anomaly_score_multi(Y_test_preprocessed[i], pred, 'MSE'))

        total_scores[str(config)] = mean(scores)

    total_sorted = {
        k: v
        for k, v in sorted(total_scores.items(), key=lambda item: item[1])
    }

    best_config = list(total_sorted.items())[0][0]
    best_score = list(total_sorted.items())[0][1]
    print(best_config)
    print(best_score)

    current_time = get_current_time()
    file_name = str(current_time) + "-LSTM-model_data.json"
    data = {}
    data['model'] = 'LSTM'
    data["input_features"] = input_features
    data["target_features"] = target_features
    data["window_size"] = window_size
    data['params'] = best_config
    data['score'] = best_score

    file_path = os.path.join(*[str(results_path), str(file_name)])
    with open(f"{file_path}", 'w') as outfile:
        json.dump(data, outfile)

    return data['params'], data['score']

예제 #9

0

파일 보기

def window_size_parameter_tuning_keras(train_path, input_features,
                                       target_features, scaler, max_look_back):
    """
    Tune window size parameter over a constant range to get the optimal window size - lstm
    :param train_path: train set path
    :param input_features: input features list
    :param target_features: target features list
    :param scaler: scaler string
    :param max_look_back: maximum window size
    :return: MSEs test
    """

    df = pd.read_csv(train_path)

    original_input_df = df[input_features]

    input_df, X_train_scaler = normalize_data(data=original_input_df,
                                              scaler=scaler)

    original_target_df = df[target_features]

    target_df, Y_train_scaler = normalize_data(data=original_target_df,
                                               scaler=scaler)

    x_train, x_test, y_train, y_test = train_test_split(input_df,
                                                        target_df,
                                                        shuffle=False)

    n_prevs = range(1, max_look_back)
    mses_test = np.empty((len(n_prevs), safe_shape(y_test, 1)))

    for i, n_prev in enumerate(n_prevs):
        X_train_preprocessed = get_training_data_lstm(x_train, n_prev)
        Y_train_preprocessed = get_training_data_lstm(y_train, n_prev)
        X_test_preprocessed = get_training_data_lstm(x_test, n_prev)
        y_test_preprocessed = get_training_data_lstm(y_test, n_prev)

        tsr = get_lstm_autoencoder_model(
            timesteps=n_prev,
            input_features=original_input_df.shape[1],
            target_features=original_target_df.shape[1],
            encoding_dimension=10,
            activation='relu',
            loss='mse',
            optimizer='adam')

        tsr.fit(X_train_preprocessed,
                Y_train_preprocessed,
                epochs=10,
                verbose=0)

        lstm = get_lstm_autoencoder_model(
            timesteps=n_prev,
            input_features=original_input_df.shape[1],
            target_features=original_target_df.shape[1],
            encoding_dimension=10,
            activation='relu',
            loss='mse',
            optimizer='adam')

        lstm.fit(X_train_preprocessed,
                 Y_train_preprocessed,
                 epochs=10,
                 verbose=0)

        predicted = tsr.predict(X_test_preprocessed)
        lstm_predicted = lstm.predict(X_test_preprocessed)

        assert predicted == lstm_predicted

        actual = y_test_preprocessed

        assert predicted.shape == actual.shape

        mses_test[i, :] = multi_mse(predicted, y_test_preprocessed)

    return mses_test