def ahmad_health_indicator_ahmad_classifier_ffnn_rul_prediction(): training_set = read_feature_dfs(LEARNING_SET) training_labels = pop_labels(training_set) training_set = construct_ahmad_health_indicator(training_set) training_set = [ pd.merge(training_set[i], training_labels[i], left_index=True, right_index=True) for i in range(len(training_labels)) ] cut_dfs, fpts = cut_fpts(training_set, fpt_method=ahmad_et_al_2019, signal_key='ahmad_health_indicator') plot_fpts(fpts, df_list=training_set, classification_indicator='ahmad_health_indicator') training_set = concat_dfs(training_set) training_labels = training_set.pop('RUL') ffnn, trainin_history = fit_ffnn(training_set, training_labels, dropout=True, epochs=30, hidden_layers=3, hidden_units=128) comparison_set = read_feature_dfs(FULL_TEST_SET) comparison_set, first_prediction_times = cut_fpts(comparison_set) # Remove label label_data = pop_labels(comparison_set) plot_rul_comparisons(comparison_set, label_data=label_data, prediction_model=ffnn)
def all_features_pca_combination_no_classifier_ffnn_rul_prediction(): # Read Training data feature_list = ALL_FEATURES training_data = read_feature_dfs(data_set_sub_set=LEARNING_SET, features=feature_list) learning_set = concat_dfs(training_data) training_labes = learning_set.pop('RUL') # Remove labels model_training_data, pca = pca_embedded_data_frame(learning_set, verbose=False) model_training_data = pd.DataFrame(model_training_data) ffnn, training_history = fit_ffnn(model_training_data, training_labes, epochs=60) # TODO dropout=True plot_trainings_history(training_history) # Visualize predicted RUL in comparison to real RUL of training set # Remove label training_label_data = pop_labels(training_data) # Apply PCA transformed_training_set = [] for df in tqdm(training_data, desc="Transforming validation set.", position=0, leave=True): transformed_training_set += [pd.DataFrame(pca.transform(X=df))] plot_rul_comparisons(transformed_training_set, label_data=training_label_data, prediction_model=ffnn) # Visualize predicted RUL in comparison to real RUL of validation set comparison_set = read_feature_dfs(data_set_sub_set=FULL_TEST_SET, features=feature_list) # Remove label label_data = pop_labels(comparison_set) # Apply PCA transformed_comparison_set = [] for df in tqdm(comparison_set, desc="Transforming validation set.", position=0, leave=True): transformed_comparison_set += [pd.DataFrame(pca.transform(X=df))] plot_rul_comparisons(transformed_comparison_set, label_data=label_data, prediction_model=ffnn)
def isomap_features_no_classifier_ffnn_rul_prediction(training_data, comparison_set): # Read Training data isomap_training_data = training_data # Remove labels isomap_training_data = concat_dfs(isomap_training_data) labels = isomap_training_data.pop('RUL') model_training_data, isomap = isomap_embedded_data_frame( isomap_training_data, verbose=False) ffnn, training_history = fit_ffnn(model_training_data, labels, epochs=100) plot_trainings_history(training_history) # Visualize predicted RUL in comparison to real RUL # comparison_set = read_raw_dfs(FULL_TEST_SET) # Remove label label_data = pop_labels(comparison_set) # Apply autoencoder comparison_set = [ pd.DataFrame(isomap.transform(X=df)) for df in comparison_set ] plot_rul_comparisons(comparison_set, label_data=label_data, prediction_model=ffnn)
def all_features_and_autoencoder_li_2019_classifier_ffnn_rul_prediction(): # Input features: statistical features learning_feature_df_list = read_feature_dfs(LEARNING_SET, FEATURES_CSV_NAME) # Two-Stage: lei et al 2019 cut_dfs, first_prediction_times = cut_fpts(learning_feature_df_list) # Visualize FPTs # plot_fpts(first_prediction_times, learning_feature_df_list, 'root_mean_square') # Concatenate trainings data all_bearings = concat_dfs(cut_dfs) labels = all_bearings.pop('RUL') all_bearings, pca = pca_embedded_data_frame(all_bearings) # RUL prediction: FFNN trainings_history, ffnn = fit_ffnn(X=all_bearings, y=labels, dropout=True, epochs=150) # Visualize training history and later validation history plot_trainings_history(trainings_history) # Visualize predicted RUL in comparison to real RUL comparison_set = read_feature_dfs(FULL_TEST_SET, FEATURES_CSV_NAME) comparison_set, first_prediction_times = cut_fpts(comparison_set) # Remove label label_data = pop_labels(comparison_set) # Apply PCA comparison_set = [pd.DataFrame(pca.transform(df)) for df in comparison_set] plot_rul_comparisons(comparison_set, label_data=label_data, prediction_model=ffnn)
def pca_features_no_classifier_ffnn_rul_prediction(training_data, comparison_set): hi = __name__ # Read Training data # Remove labels pca_training_data = concat_dfs(training_data) labels = pca_training_data.pop('RUL') model_training_data, pca = pca_embedded_data_frame(pca_training_data, n_components=300, verbose=False) model_training_data = pd.DataFrame(model_training_data) ffnn, training_history = fit_ffnn(model_training_data, labels, epochs=60, dropout=True) plot_trainings_history(training_history) # Visualize predicted RUL in comparison to real RUL of training set # Remove label training_label_data = pop_labels(training_data) # Apply PCA transformed_training_set = [] for df in tqdm(training_data, desc="Transforming training set.", position=0, leave=True): transformed_training_set += [pd.DataFrame(pca.transform(X=df))] plot_rul_comparisons(transformed_training_set, label_data=training_label_data, prediction_model=ffnn) # Visualize predicted RUL in comparison to real RUL of validation set # Remove label label_data = pop_labels(comparison_set) # Apply PCA transformed_comparison_set = [] for df in tqdm(comparison_set, desc="Transforming validation set.", position=0, leave=True): transformed_comparison_set += [pd.DataFrame(pca.transform(X=df))] plot_rul_comparisons(transformed_comparison_set, label_data=label_data, prediction_model=ffnn)
def all_features_no_classifier_lstm_rul_prediction(): trainings_data = read_feature_dfs(LEARNING_SET) #trainings_data = concat_dfs(trainings_data) trainings_labels = pop_labels(trainings_data) lstm, trainings_history = fit_lstm(trainings_data, trainings_labels, dropout=True) plot_trainings_history(trainings_history=trainings_history)
def statistical_features_li_2019_classifier_ffnn_rul_prediction(): # Input features: statistical features learning_feature_df_list = read_dfs(LEARNING_SET, FEATURES_CSV_NAME, features=BASIC_STATISTICAL_FEATURES) # Two-Stage: lei et al 2019 cut_dfs, first_prediction_times = cut_fpts(learning_feature_df_list) # Visualize FPTs # plot_fpts(first_prediction_times, learning_feature_df_list, 'root_mean_square') # Concatenate trainings data all_bearings = concat_dfs(cut_dfs) labels = all_bearings.pop('RUL') # RUL prediction: FFNN trainings_history, ffnn = fit_ffnn(X=all_bearings, y=labels, dropout=True, epochs=100, hidden_layers=3, hidden_units=1024) # Visualize training history and later validation history plot_trainings_history(trainings_history) # Visualize predicted RUL in comparison to real RUL on trainings set label_data = pop_labels(cut_dfs) plot_rul_comparisons(cut_dfs, label_data=label_data, prediction_model=ffnn) # Visualize predicted RUL in comparison to real RUL on test set comparison_set = read_dfs(FULL_TEST_SET, FEATURES_CSV_NAME, features=BASIC_STATISTICAL_FEATURES) comparison_set, first_prediction_times = cut_fpts(comparison_set) # Remove label label_data = pop_labels(comparison_set) plot_rul_comparisons(comparison_set, label_data=label_data, prediction_model=ffnn)
def eval_isomap(): training_data, training_labels = df_dict_to_df_dataframe( read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET)) validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET) validation_labels = pop_labels(validation_dict) isomap_5 = EmbeddingFeaturesFNNN(name="Isomap combined 5", embedding_method=IsomapEmbedding(), encoding_size=5, data_set_type=DataSetType.computed) isomap_15 = EmbeddingFeaturesFNNN(name="Isomap combined 15", embedding_method=IsomapEmbedding(), encoding_size=15, data_set_type=DataSetType.computed) isomap_25 = EmbeddingFeaturesFNNN(name="Isomap combined 25", embedding_method=IsomapEmbedding(), encoding_size=25, data_set_type=DataSetType.computed) isomap_35 = EmbeddingFeaturesFNNN(name="Isomap combined 35", embedding_method=IsomapEmbedding(), encoding_size=35, data_set_type=DataSetType.computed) isomap_45 = EmbeddingFeaturesFNNN(name="Isomap combined 45", embedding_method=IsomapEmbedding(), encoding_size=45, data_set_type=DataSetType.computed) isomap_55 = EmbeddingFeaturesFNNN(name="Isomap combined 55", embedding_method=IsomapEmbedding(), encoding_size=55, data_set_type=DataSetType.computed) isomap_models = [ isomap_5, isomap_15, isomap_25, isomap_35, isomap_45, isomap_55 ] metrics_dict = {} for isomap_model in isomap_models: # print("Currently evaluating: ", isomap_model.name) isomap_model.train(training_data, training_labels, validation_data=None, validation_labels=None) metrics_dict[isomap_model.name] = isomap_model.compute_metrics( df_dict=validation_dict, labels=validation_labels, metrics_list=[rmse, correlation_coefficient]) save_latex_aggregated_table(metrics_dict, None)
def spectra_features_no_classifier_cnn_rul_prediction(train: bool = True): model_path: str = Path('keras_models').joinpath('spectra_none_cnn') n_rows: int = 129 n_cols: int = 21 spectra_shape: tuple = (n_rows, n_cols) input_shape: tuple = (n_rows, n_cols, 1) if train: # Read in training data # print("Read in training data") read_spectra_dfs = read_feature_dfs(LEARNING_SET, SPECTRA_CSV_NAME) spectra_dfs = pd.concat( read_spectra_dfs, ignore_index=True, keys=['Bearing' + str(x) for x in range(0, len(read_spectra_dfs))]) labels = spectra_dfs.pop('RUL') # Reformat flattened spectra spectra_dfs = spectra_dfs.to_numpy() spectra_dfs = np.array( [df.reshape(spectra_shape) for df in spectra_dfs]) # Train and save CNN # print("Train and save CNN") trainings_history, cnn = fit_cnn(spectra_dfs, labels, input_shape=input_shape, epochs=20) # Visualize training history plot_trainings_history(trainings_history) cnn.save(model_path) else: # Load pre-trained CNN model cnn = keras.models.load_model(model_path) # Visualize predicted RUL in comparison to real RUL of learning set # print("Visualize predicted RUL in comparison to real RUL of learning set") comparison_set = read_feature_dfs(FULL_TEST_SET, SPECTRA_CSV_NAME) label_data = pop_labels(comparison_set) reshaped_comparison_set = reformat_flattened_data(comparison_set, n_rows=n_rows, n_cols=n_cols) plot_rul_comparisons(reshaped_comparison_set, label_data, cnn)
def train_pca(): training_data, training_labels = df_dict_to_df_dataframe( read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET, csv_name=RAW_CSV_NAME)) validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET, csv_name=RAW_CSV_NAME) validation_labels = pop_labels(validation_dict) computed_features_pca_combiner_ffnn = EmbeddingFeaturesFNNN(name="Isomap combined", embedding_method=PCAEmbedding(), encoding_size=5, data_set_type=DataSetType.computed) computed_features_pca_combiner_ffnn.train(training_data, training_labels, validation_data=None, validation_labels=None) metrics_dict = {computed_features_pca_combiner_ffnn.name: computed_features_pca_combiner_ffnn.compute_metrics( df_dict=validation_dict, labels=validation_labels, metrics_list=[rmse, correlation_coefficient])} computed_features_pca_combiner_ffnn.visualize_rul(df_dict=validation_dict, label_data=validation_labels, experiment_name=None) save_latex_aggregated_table(metrics_dict, None)
def train_svr(): training_data, training_labels = df_dict_to_df_dataframe( read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET)) validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET) validation_labels = pop_labels(validation_dict) svr_model = ComputedFeaturesFFNN(name="SVR", feature_list=ENTROPY_FEATURES) svr_model.train_svr(training_data, training_labels) metrics_dict = { "Entropy Poly": svr_model.compute_metrics(df_dict=validation_dict, labels=validation_labels, metrics_list=[rmse, correlation_coefficient], use_svr=True) } svr_model.visualize_rul(df_dict=validation_dict, label_data=validation_labels, use_svr=True, experiment_name=None) save_latex_aggregated_table(metrics_dict, None)
def do_eval(model_dict: Dict[str, Sequence[DegradationModel]], health_stage_classifier: HealthStageClassifier = None, use_svr: bool = False, use_gpr: bool = False, use_poly_reg: bool = False): assert not (use_svr and use_gpr and use_poly_reg) # Read evaluation data raw_metric_data = read_raw_dfs_as_dict(FULL_TEST_SET) feature_metric_data = read_feature_dfs_as_dict( data_set_sub_set=FULL_TEST_SET) spectra_metric_data = read_feature_dfs_as_dict( data_set_sub_set=FULL_TEST_SET, csv_name=SPECTRA_CSV_NAME) # Read Raw Data raw_training_data, raw_training_labels = df_dict_to_df_dataframe( read_raw_dfs_as_dict(LEARNING_SET)) raw_validation_data, raw_validation_labes = df_dict_to_df_dataframe( copy.deepcopy(raw_metric_data)) # Read Computed Feature Data feature_training_data, feature_training_labels = df_dict_to_df_dataframe( read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET)) feature_validation_data, feature_validation_labels = df_dict_to_df_dataframe( copy.deepcopy(feature_metric_data)) # Read Frequency Spectra Data spectra_training_dict, spectra_training_labels = df_dict_to_df_dataframe( read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET, csv_name=SPECTRA_CSV_NAME)) spectra_validation_dict, spectra_validation_labels = df_dict_to_df_dataframe( copy.deepcopy(spectra_metric_data)) training_data_dict: Dict[DataSetType, Sequence[pd.DataFrame]] = { DataSetType.raw: ((raw_training_data, raw_training_labels), (raw_validation_data, raw_validation_labes)), DataSetType.computed: ((feature_training_data, feature_training_labels), (feature_validation_data, feature_validation_labels)), DataSetType.spectra: ((spectra_training_dict, spectra_training_labels), (spectra_validation_dict, spectra_validation_labels)) } # Format validation data raw_metric_labels = pop_labels(raw_metric_data) feature_metric_labels = pop_labels(feature_metric_data) spectra_metric_labels = pop_labels(spectra_metric_data) validation_metric_data: Dict[DataSetType, Sequence[Dict[str, pd.DataFrame], Dict[str, pd.Series]]] = { DataSetType.raw: (raw_metric_data, raw_metric_labels), DataSetType.computed: (feature_metric_data, feature_metric_labels), DataSetType.spectra: (spectra_metric_data, spectra_metric_labels) } # Cut dfs according to health_stage_classifier if health_stage_classifier is not None: for key in training_data_dict.keys(): training_data_frames = training_data_dict.get(key) new_datasets = [] (training_data, training_labels), (validation_data, validation_labels) = training_data_frames new_datasets += [(health_stage_classifier.cut_FPTs_of_dataframe( training_data, training_labels, feature_training_data))] new_datasets += [(health_stage_classifier.cut_FPTs_of_dataframe( validation_data, validation_labels, feature_validation_data))] training_data_dict[key] = new_datasets fpt_dict = {} for key, (data, labels) in validation_metric_data.items(): cut_data, cut_labels, fpts = health_stage_classifier.cut_FPTs_of_dataframe_dict( data, labels, feature_validation_data) validation_metric_data[key] = (cut_data, cut_labels) fpt_dict[str(key)] = fpts fpt_path = Path("logs").joinpath("first_prediction_times") if not os.path.exists(fpt_path): Path(fpt_path).mkdir(parents=True, exist_ok=True) with open(fpt_path.joinpath(health_stage_classifier.name), 'w') as file: json.dump(fpt_dict, file, indent=4) # Evaluate Models for model_group in tqdm(model_dict.keys(), desc="Evaluating model groups"): experiment_name = model_group if health_stage_classifier is not None: experiment_name += "_true" else: experiment_name += "_false" if use_svr: experiment_name += "_SVR" elif use_gpr: experiment_name += "_GPR" elif use_poly_reg: experiment_name += "_MLR" else: experiment_name += "_ANN" model_list = model_dict.get(model_group) # Train Models for model in tqdm(model_list, desc="Training models for model group %s" % experiment_name): (training_data, training_labels), (validation_data, validation_labels) = training_data_dict.get( model.get_data_set_type()) if use_svr: model.train_svr(training_data=training_data, training_labels=training_labels) elif use_gpr: model.train_gpr(training_data=training_data, training_labels=training_labels) elif use_poly_reg: model.train_poly_reg(training_data=training_data, training_labels=training_labels, memory_path=MEMORY_CACHE_PATH) else: trainings_history = model.train( training_data=training_data, training_labels=training_labels, validation_data=validation_data, validation_labels=validation_labels) metric_data = {} # Evaluate Models for model in tqdm(model_list, desc="Evaluating models for model group %s" % experiment_name, position=0): model_metric_data, model_metric_labels = validation_metric_data.get( model.get_data_set_type()) metric_data[model.get_name()] = model.compute_metrics( df_dict=model_metric_data, labels=model_metric_labels, metrics_list=[rmse, correlation_coefficient], use_svr=use_svr, use_gpr=use_gpr, use_poly_reg=use_poly_reg) model.visualize_rul(model_metric_data, model_metric_labels, experiment_name=experiment_name, use_svr=use_svr, use_gpr=use_gpr, use_poly_reg=use_poly_reg) store_metrics_dict(dict=metric_data, experiment_name=experiment_name)