Beispiel #1
0
def ahmad_health_indicator_ahmad_classifier_ffnn_rul_prediction():
    training_set = read_feature_dfs(LEARNING_SET)
    training_labels = pop_labels(training_set)
    training_set = construct_ahmad_health_indicator(training_set)
    training_set = [
        pd.merge(training_set[i],
                 training_labels[i],
                 left_index=True,
                 right_index=True) for i in range(len(training_labels))
    ]
    cut_dfs, fpts = cut_fpts(training_set,
                             fpt_method=ahmad_et_al_2019,
                             signal_key='ahmad_health_indicator')
    plot_fpts(fpts,
              df_list=training_set,
              classification_indicator='ahmad_health_indicator')

    training_set = concat_dfs(training_set)
    training_labels = training_set.pop('RUL')
    ffnn, trainin_history = fit_ffnn(training_set,
                                     training_labels,
                                     dropout=True,
                                     epochs=30,
                                     hidden_layers=3,
                                     hidden_units=128)

    comparison_set = read_feature_dfs(FULL_TEST_SET)
    comparison_set, first_prediction_times = cut_fpts(comparison_set)

    # Remove label
    label_data = pop_labels(comparison_set)
    plot_rul_comparisons(comparison_set,
                         label_data=label_data,
                         prediction_model=ffnn)
def all_features_pca_combination_no_classifier_ffnn_rul_prediction():
    # Read Training data
    feature_list = ALL_FEATURES
    training_data = read_feature_dfs(data_set_sub_set=LEARNING_SET, features=feature_list)
    learning_set = concat_dfs(training_data)
    training_labes = learning_set.pop('RUL')

    # Remove labels
    model_training_data, pca = pca_embedded_data_frame(learning_set, verbose=False)

    model_training_data = pd.DataFrame(model_training_data)
    ffnn, training_history = fit_ffnn(model_training_data, training_labes, epochs=60)  # TODO dropout=True

    plot_trainings_history(training_history)

    # Visualize predicted RUL in comparison to real RUL of training set
    # Remove label
    training_label_data = pop_labels(training_data)
    # Apply PCA
    transformed_training_set = []
    for df in tqdm(training_data, desc="Transforming validation set.", position=0, leave=True):
        transformed_training_set += [pd.DataFrame(pca.transform(X=df))]
    plot_rul_comparisons(transformed_training_set, label_data=training_label_data, prediction_model=ffnn)

    # Visualize predicted RUL in comparison to real RUL of validation set
    comparison_set = read_feature_dfs(data_set_sub_set=FULL_TEST_SET, features=feature_list)
    # Remove label
    label_data = pop_labels(comparison_set)
    # Apply PCA
    transformed_comparison_set = []
    for df in tqdm(comparison_set, desc="Transforming validation set.", position=0, leave=True):
        transformed_comparison_set += [pd.DataFrame(pca.transform(X=df))]

    plot_rul_comparisons(transformed_comparison_set, label_data=label_data, prediction_model=ffnn)
Beispiel #3
0
def isomap_features_no_classifier_ffnn_rul_prediction(training_data,
                                                      comparison_set):
    # Read Training data
    isomap_training_data = training_data

    # Remove labels
    isomap_training_data = concat_dfs(isomap_training_data)
    labels = isomap_training_data.pop('RUL')
    model_training_data, isomap = isomap_embedded_data_frame(
        isomap_training_data, verbose=False)

    ffnn, training_history = fit_ffnn(model_training_data, labels, epochs=100)

    plot_trainings_history(training_history)
    # Visualize predicted RUL in comparison to real RUL
    #   comparison_set = read_raw_dfs(FULL_TEST_SET)
    # Remove label
    label_data = pop_labels(comparison_set)
    # Apply autoencoder
    comparison_set = [
        pd.DataFrame(isomap.transform(X=df)) for df in comparison_set
    ]

    plot_rul_comparisons(comparison_set,
                         label_data=label_data,
                         prediction_model=ffnn)
Beispiel #4
0
def all_features_and_autoencoder_li_2019_classifier_ffnn_rul_prediction():
    # Input features: statistical features
    learning_feature_df_list = read_feature_dfs(LEARNING_SET,
                                                FEATURES_CSV_NAME)

    # Two-Stage: lei et al 2019
    cut_dfs, first_prediction_times = cut_fpts(learning_feature_df_list)
    # Visualize FPTs
    # plot_fpts(first_prediction_times, learning_feature_df_list, 'root_mean_square')

    # Concatenate trainings data
    all_bearings = concat_dfs(cut_dfs)
    labels = all_bearings.pop('RUL')
    all_bearings, pca = pca_embedded_data_frame(all_bearings)

    # RUL prediction: FFNN
    trainings_history, ffnn = fit_ffnn(X=all_bearings,
                                       y=labels,
                                       dropout=True,
                                       epochs=150)

    # Visualize training history and later validation history
    plot_trainings_history(trainings_history)
    # Visualize predicted RUL in comparison to real RUL
    comparison_set = read_feature_dfs(FULL_TEST_SET, FEATURES_CSV_NAME)
    comparison_set, first_prediction_times = cut_fpts(comparison_set)
    # Remove label
    label_data = pop_labels(comparison_set)
    # Apply PCA
    comparison_set = [pd.DataFrame(pca.transform(df)) for df in comparison_set]

    plot_rul_comparisons(comparison_set,
                         label_data=label_data,
                         prediction_model=ffnn)
def pca_features_no_classifier_ffnn_rul_prediction(training_data,
                                                   comparison_set):
    hi = __name__
    # Read Training data
    # Remove labels
    pca_training_data = concat_dfs(training_data)
    labels = pca_training_data.pop('RUL')
    model_training_data, pca = pca_embedded_data_frame(pca_training_data,
                                                       n_components=300,
                                                       verbose=False)

    model_training_data = pd.DataFrame(model_training_data)
    ffnn, training_history = fit_ffnn(model_training_data,
                                      labels,
                                      epochs=60,
                                      dropout=True)

    plot_trainings_history(training_history)

    # Visualize predicted RUL in comparison to real RUL of training set
    # Remove label
    training_label_data = pop_labels(training_data)
    # Apply PCA
    transformed_training_set = []
    for df in tqdm(training_data,
                   desc="Transforming training set.",
                   position=0,
                   leave=True):
        transformed_training_set += [pd.DataFrame(pca.transform(X=df))]
    plot_rul_comparisons(transformed_training_set,
                         label_data=training_label_data,
                         prediction_model=ffnn)

    # Visualize predicted RUL in comparison to real RUL of validation set
    # Remove label
    label_data = pop_labels(comparison_set)
    # Apply PCA
    transformed_comparison_set = []
    for df in tqdm(comparison_set,
                   desc="Transforming validation set.",
                   position=0,
                   leave=True):
        transformed_comparison_set += [pd.DataFrame(pca.transform(X=df))]

    plot_rul_comparisons(transformed_comparison_set,
                         label_data=label_data,
                         prediction_model=ffnn)
Beispiel #6
0
def all_features_no_classifier_lstm_rul_prediction():
    trainings_data = read_feature_dfs(LEARNING_SET)
    #trainings_data = concat_dfs(trainings_data)
    trainings_labels = pop_labels(trainings_data)

    lstm, trainings_history = fit_lstm(trainings_data,
                                       trainings_labels,
                                       dropout=True)
    plot_trainings_history(trainings_history=trainings_history)
Beispiel #7
0
def statistical_features_li_2019_classifier_ffnn_rul_prediction():
    # Input features: statistical features
    learning_feature_df_list = read_dfs(LEARNING_SET,
                                        FEATURES_CSV_NAME,
                                        features=BASIC_STATISTICAL_FEATURES)

    # Two-Stage: lei et al 2019
    cut_dfs, first_prediction_times = cut_fpts(learning_feature_df_list)
    # Visualize FPTs
    # plot_fpts(first_prediction_times, learning_feature_df_list, 'root_mean_square')

    # Concatenate trainings data
    all_bearings = concat_dfs(cut_dfs)
    labels = all_bearings.pop('RUL')

    # RUL prediction: FFNN
    trainings_history, ffnn = fit_ffnn(X=all_bearings,
                                       y=labels,
                                       dropout=True,
                                       epochs=100,
                                       hidden_layers=3,
                                       hidden_units=1024)
    # Visualize training history and later validation history
    plot_trainings_history(trainings_history)

    # Visualize predicted RUL in comparison to real RUL on trainings set
    label_data = pop_labels(cut_dfs)
    plot_rul_comparisons(cut_dfs, label_data=label_data, prediction_model=ffnn)

    # Visualize predicted RUL in comparison to real RUL on test set
    comparison_set = read_dfs(FULL_TEST_SET,
                              FEATURES_CSV_NAME,
                              features=BASIC_STATISTICAL_FEATURES)
    comparison_set, first_prediction_times = cut_fpts(comparison_set)

    # Remove label
    label_data = pop_labels(comparison_set)
    plot_rul_comparisons(comparison_set,
                         label_data=label_data,
                         prediction_model=ffnn)
Beispiel #8
0
def eval_isomap():
    training_data, training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET))
    validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET)
    validation_labels = pop_labels(validation_dict)

    isomap_5 = EmbeddingFeaturesFNNN(name="Isomap combined 5",
                                     embedding_method=IsomapEmbedding(),
                                     encoding_size=5,
                                     data_set_type=DataSetType.computed)
    isomap_15 = EmbeddingFeaturesFNNN(name="Isomap combined 15",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=15,
                                      data_set_type=DataSetType.computed)
    isomap_25 = EmbeddingFeaturesFNNN(name="Isomap combined 25",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=25,
                                      data_set_type=DataSetType.computed)
    isomap_35 = EmbeddingFeaturesFNNN(name="Isomap combined 35",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=35,
                                      data_set_type=DataSetType.computed)
    isomap_45 = EmbeddingFeaturesFNNN(name="Isomap combined 45",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=45,
                                      data_set_type=DataSetType.computed)
    isomap_55 = EmbeddingFeaturesFNNN(name="Isomap combined 55",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=55,
                                      data_set_type=DataSetType.computed)

    isomap_models = [
        isomap_5, isomap_15, isomap_25, isomap_35, isomap_45, isomap_55
    ]
    metrics_dict = {}
    for isomap_model in isomap_models:
        # print("Currently evaluating: ", isomap_model.name)
        isomap_model.train(training_data,
                           training_labels,
                           validation_data=None,
                           validation_labels=None)
        metrics_dict[isomap_model.name] = isomap_model.compute_metrics(
            df_dict=validation_dict,
            labels=validation_labels,
            metrics_list=[rmse, correlation_coefficient])
    save_latex_aggregated_table(metrics_dict, None)
Beispiel #9
0
def spectra_features_no_classifier_cnn_rul_prediction(train: bool = True):
    model_path: str = Path('keras_models').joinpath('spectra_none_cnn')
    n_rows: int = 129
    n_cols: int = 21
    spectra_shape: tuple = (n_rows, n_cols)
    input_shape: tuple = (n_rows, n_cols, 1)

    if train:
        # Read in training data
        # print("Read in training data")
        read_spectra_dfs = read_feature_dfs(LEARNING_SET, SPECTRA_CSV_NAME)
        spectra_dfs = pd.concat(
            read_spectra_dfs,
            ignore_index=True,
            keys=['Bearing' + str(x) for x in range(0, len(read_spectra_dfs))])
        labels = spectra_dfs.pop('RUL')

        # Reformat flattened spectra
        spectra_dfs = spectra_dfs.to_numpy()
        spectra_dfs = np.array(
            [df.reshape(spectra_shape) for df in spectra_dfs])

        # Train and save CNN
        # print("Train and save CNN")
        trainings_history, cnn = fit_cnn(spectra_dfs,
                                         labels,
                                         input_shape=input_shape,
                                         epochs=20)

        # Visualize training history
        plot_trainings_history(trainings_history)

        cnn.save(model_path)
    else:
        # Load pre-trained CNN model
        cnn = keras.models.load_model(model_path)

    # Visualize predicted RUL in comparison to real RUL of learning set
    # print("Visualize predicted RUL in comparison to real RUL of learning set")
    comparison_set = read_feature_dfs(FULL_TEST_SET, SPECTRA_CSV_NAME)
    label_data = pop_labels(comparison_set)
    reshaped_comparison_set = reformat_flattened_data(comparison_set,
                                                      n_rows=n_rows,
                                                      n_cols=n_cols)
    plot_rul_comparisons(reshaped_comparison_set, label_data, cnn)
def train_pca():
    training_data, training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET, csv_name=RAW_CSV_NAME))
    validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET, csv_name=RAW_CSV_NAME)
    validation_labels = pop_labels(validation_dict)

    computed_features_pca_combiner_ffnn = EmbeddingFeaturesFNNN(name="Isomap combined",
                                                                embedding_method=PCAEmbedding(),
                                                                encoding_size=5,
                                                                data_set_type=DataSetType.computed)

    computed_features_pca_combiner_ffnn.train(training_data, training_labels, validation_data=None,
                                              validation_labels=None)
    metrics_dict = {computed_features_pca_combiner_ffnn.name: computed_features_pca_combiner_ffnn.compute_metrics(
        df_dict=validation_dict, labels=validation_labels,
        metrics_list=[rmse, correlation_coefficient])}
    computed_features_pca_combiner_ffnn.visualize_rul(df_dict=validation_dict,
                                                      label_data=validation_labels, experiment_name=None)
    save_latex_aggregated_table(metrics_dict, None)
def train_svr():
    training_data, training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET))
    validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET)
    validation_labels = pop_labels(validation_dict)

    svr_model = ComputedFeaturesFFNN(name="SVR", feature_list=ENTROPY_FEATURES)

    svr_model.train_svr(training_data, training_labels)
    metrics_dict = {
        "Entropy Poly":
        svr_model.compute_metrics(df_dict=validation_dict,
                                  labels=validation_labels,
                                  metrics_list=[rmse, correlation_coefficient],
                                  use_svr=True)
    }
    svr_model.visualize_rul(df_dict=validation_dict,
                            label_data=validation_labels,
                            use_svr=True,
                            experiment_name=None)
    save_latex_aggregated_table(metrics_dict, None)
def do_eval(model_dict: Dict[str, Sequence[DegradationModel]],
            health_stage_classifier: HealthStageClassifier = None,
            use_svr: bool = False,
            use_gpr: bool = False,
            use_poly_reg: bool = False):
    assert not (use_svr and use_gpr and use_poly_reg)
    # Read evaluation data
    raw_metric_data = read_raw_dfs_as_dict(FULL_TEST_SET)
    feature_metric_data = read_feature_dfs_as_dict(
        data_set_sub_set=FULL_TEST_SET)
    spectra_metric_data = read_feature_dfs_as_dict(
        data_set_sub_set=FULL_TEST_SET, csv_name=SPECTRA_CSV_NAME)

    # Read Raw Data
    raw_training_data, raw_training_labels = df_dict_to_df_dataframe(
        read_raw_dfs_as_dict(LEARNING_SET))
    raw_validation_data, raw_validation_labes = df_dict_to_df_dataframe(
        copy.deepcopy(raw_metric_data))

    # Read Computed Feature Data
    feature_training_data, feature_training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET))
    feature_validation_data, feature_validation_labels = df_dict_to_df_dataframe(
        copy.deepcopy(feature_metric_data))

    # Read Frequency Spectra Data
    spectra_training_dict, spectra_training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET,
                                 csv_name=SPECTRA_CSV_NAME))
    spectra_validation_dict, spectra_validation_labels = df_dict_to_df_dataframe(
        copy.deepcopy(spectra_metric_data))

    training_data_dict: Dict[DataSetType, Sequence[pd.DataFrame]] = {
        DataSetType.raw: ((raw_training_data, raw_training_labels),
                          (raw_validation_data, raw_validation_labes)),
        DataSetType.computed:
        ((feature_training_data, feature_training_labels),
         (feature_validation_data, feature_validation_labels)),
        DataSetType.spectra:
        ((spectra_training_dict, spectra_training_labels),
         (spectra_validation_dict, spectra_validation_labels))
    }

    # Format validation data
    raw_metric_labels = pop_labels(raw_metric_data)
    feature_metric_labels = pop_labels(feature_metric_data)
    spectra_metric_labels = pop_labels(spectra_metric_data)

    validation_metric_data: Dict[DataSetType,
                                 Sequence[Dict[str, pd.DataFrame],
                                          Dict[str, pd.Series]]] = {
                                              DataSetType.raw:
                                              (raw_metric_data,
                                               raw_metric_labels),
                                              DataSetType.computed:
                                              (feature_metric_data,
                                               feature_metric_labels),
                                              DataSetType.spectra:
                                              (spectra_metric_data,
                                               spectra_metric_labels)
                                          }

    # Cut dfs according to health_stage_classifier
    if health_stage_classifier is not None:
        for key in training_data_dict.keys():
            training_data_frames = training_data_dict.get(key)
            new_datasets = []
            (training_data,
             training_labels), (validation_data,
                                validation_labels) = training_data_frames
            new_datasets += [(health_stage_classifier.cut_FPTs_of_dataframe(
                training_data, training_labels, feature_training_data))]
            new_datasets += [(health_stage_classifier.cut_FPTs_of_dataframe(
                validation_data, validation_labels, feature_validation_data))]
            training_data_dict[key] = new_datasets

        fpt_dict = {}
        for key, (data, labels) in validation_metric_data.items():
            cut_data, cut_labels, fpts = health_stage_classifier.cut_FPTs_of_dataframe_dict(
                data, labels, feature_validation_data)
            validation_metric_data[key] = (cut_data, cut_labels)
            fpt_dict[str(key)] = fpts
        fpt_path = Path("logs").joinpath("first_prediction_times")
        if not os.path.exists(fpt_path):
            Path(fpt_path).mkdir(parents=True, exist_ok=True)
        with open(fpt_path.joinpath(health_stage_classifier.name),
                  'w') as file:
            json.dump(fpt_dict, file, indent=4)

    # Evaluate Models
    for model_group in tqdm(model_dict.keys(), desc="Evaluating model groups"):
        experiment_name = model_group
        if health_stage_classifier is not None:
            experiment_name += "_true"
        else:
            experiment_name += "_false"
        if use_svr:
            experiment_name += "_SVR"
        elif use_gpr:
            experiment_name += "_GPR"
        elif use_poly_reg:
            experiment_name += "_MLR"
        else:
            experiment_name += "_ANN"
        model_list = model_dict.get(model_group)
        # Train Models
        for model in tqdm(model_list,
                          desc="Training models for model group %s" %
                          experiment_name):
            (training_data,
             training_labels), (validation_data,
                                validation_labels) = training_data_dict.get(
                                    model.get_data_set_type())
            if use_svr:
                model.train_svr(training_data=training_data,
                                training_labels=training_labels)
            elif use_gpr:
                model.train_gpr(training_data=training_data,
                                training_labels=training_labels)
            elif use_poly_reg:
                model.train_poly_reg(training_data=training_data,
                                     training_labels=training_labels,
                                     memory_path=MEMORY_CACHE_PATH)
            else:
                trainings_history = model.train(
                    training_data=training_data,
                    training_labels=training_labels,
                    validation_data=validation_data,
                    validation_labels=validation_labels)

        metric_data = {}
        # Evaluate Models
        for model in tqdm(model_list,
                          desc="Evaluating models for model group %s" %
                          experiment_name,
                          position=0):
            model_metric_data, model_metric_labels = validation_metric_data.get(
                model.get_data_set_type())
            metric_data[model.get_name()] = model.compute_metrics(
                df_dict=model_metric_data,
                labels=model_metric_labels,
                metrics_list=[rmse, correlation_coefficient],
                use_svr=use_svr,
                use_gpr=use_gpr,
                use_poly_reg=use_poly_reg)

            model.visualize_rul(model_metric_data,
                                model_metric_labels,
                                experiment_name=experiment_name,
                                use_svr=use_svr,
                                use_gpr=use_gpr,
                                use_poly_reg=use_poly_reg)
        store_metrics_dict(dict=metric_data, experiment_name=experiment_name)