selected_features.remove('RUL') rm_columns = ['Setting1', 'Setting2', 'Setting3'] for col in rm_columns: if col in training_frame and col in testing_frame: selected_features.remove(col) training_frame = ProcessData.trainDataToFrame( training_frame=training_frame, selected_column_names=selected_features, moving_k_closest_average=True, standard_deviation=True, probability_distribution=True) testing_frame = ProcessData.testDataToFrame( testing_frame=testing_frame, selected_column_names=selected_features, moving_k_closest_average=True, standard_deviation=True, probability_from_file=True) # Training data columns del training_frame['UnitNumber'] del training_frame['Time'] # Testing columns del testing_frame['UnitNumber'] del testing_frame['Time'] training_columns = list(training_frame.columns) training_columns.remove('RUL') response_column = 'RUL'
del p_filter['Sensor10'] del p_filter['Sensor16'] del p_filter['Sensor18'] del p_filter['Sensor19'] # Feature engineering process columns = [ 'Sensor14', 'Sensor9', 'Sensor11', 'Sensor12', 'Sensor13', 'Sensor7', 'Sensor4', 'Sensor8', 'Sensor20', 'Sensor21', 'Sensor15', 'Sensor6', 'Sensor2', 'Sensor17', 'Sensor3' ] p_featured_train = ProcessData.trainDataToFrame(training_frame=p_filter, selected_column_names=columns, probability_distribution=True) p_featured_test = ProcessData.testDataToFrame(testing_frame=p_test, selected_column_names=columns, probability_from_file=True) h_filter = h2o.H2OFrame(p_featured_train) h_filter.set_names(list(p_featured_train.columns)) h_test = h2o.H2OFrame(p_featured_test) h_test.set_names(list(p_featured_test.columns)) training_columns = list(p_featured_train.columns) training_columns.remove('UnitNumber') training_columns.remove('Time') training_columns.remove('RUL') model = H2ODeepLearningEstimator(variable_importances=True) model.train(x=columns, y='RUL', training_frame=h_filter, nfolds=10)