from sklearn.feature_selection import RFE from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error from sklearn_pandas import DataFrameMapper from dataprocessor import ProcessData, Filter training_frame = ProcessData.trainData() testing_frame = ProcessData.testData() # Remove anomalies in training frame based on percentile all_columns = list(training_frame.columns) rm_columns = ['UnitNumber', 'Time', 'Setting1', 'Setting2', 'Setting3', 'RUL'] filter_columns = [x for x in all_columns if x not in rm_columns] training_frame = Filter.filterDataPercentile(panda_frame=training_frame, columns=filter_columns, lower_percentile=0.01, upper_percentile=0.99, column_err_threshold=2) # Training data columns del training_frame['UnitNumber'] del training_frame['Time'] # Testing columns del testing_frame['UnitNumber'] del testing_frame['Time'] training_columns = list(training_frame.columns) training_columns.remove('RUL') response_column = 'RUL'
# Load training data frame pData = ProcessData.trainData() # Select columns selected_columns = list(pData.columns) selected_columns.remove('UnitNumber') selected_columns.remove('Time') selected_columns.remove('RUL') selected_columns.remove('Setting1') selected_columns.remove('Setting2') selected_columns.remove('Setting3') # Filtered data frame df = Filter.filterDataPercentile(panda_frame=pData, columns=selected_columns, lower_percentile=0.01, upper_percentile=0.99, column_err_threshold=1) # Feature engineering data_frame = ProcessData.trainDataToFrame(df, moving_k_closest_average=True, standard_deviation=True) testing_frame = ProcessData.testData(moving_k_closest_average=True, standard_deviation=True) # Create h2o frame hData = h2o.H2OFrame(data_frame) hData.set_names(list(data_frame.columns)) hTesting = h2o.H2OFrame(testing_frame)