def calculateError(actual, predict): actual = DataFrameParser.h2oToNumpyArray(actual) predict = DataFrameParser.h2oToNumpyArray(predict) error = 0 for i in range(len(actual)): d = predict[i] - actual[i] if d > 0: error += math.exp(d / 10.0) elif d < 0: error += math.exp(d / 13.0) return error
training_columns = list(p_filter.columns) training_columns.remove('UnitNumber') training_columns.remove('Time') training_columns.remove('RUL') training_columns.remove('BIN') h_filter['BIN'] = h_filter['BIN'].asfactor() h_test['BIN'] = h_test['BIN'].asfactor() h2o.export_file(frame=h_test, path='test_sid.csv', force=True) h2o.export_file(frame=h_filter, path='train_sid.csv', force=True) model = H2ORandomForestEstimator(nbins=250, ntress=100, max_depth=50, nfolds=10) model.train(x=training_columns, y='BIN', training_frame=h_filter) predict = model.predict(test_data=h_test) predict = DataFrameParser.h2oToList(predict['predict']) actual = DataFrameParser.h2oToList(h_test['BIN']) Measures.confusion_matrix(actual, predict) print(predict) print(actual) h2o.download_pojo(model=model, path="/home/wso2123/PycharmProjects/FeatureProcessor/", get_jar=True)
training_columns = list(pTrain.columns) training_columns.remove('UnitNumber') training_columns.remove('Time') training_columns.remove('RUL') response_column = 'RUL' print("OK") model = H2OGradientBoostingEstimator(distribution='poisson', histogram_type='QuantilesGlobal', fold_assignment='auto') #model = H2ORandomForestEstimator(ntrees=50, max_depth=20, nbins=100, seed=12345) model.train(x=training_columns, y=response_column, training_frame=hTrain, validation_frame=hValidate) print(model.model_performance(test_data=hTest)) predict = DataFrameParser.h2oToNumpyArray(model.predict(test_data=hTest)) actual = DataFrameParser.h2oToNumpyArray(hTest['RUL']) Chart.residual_histogram(actual, predict) Chart.residual_vs_estimated(actual, predict) Chart.acutal_and_predict(actual, predict)
def function(): # AutoEncoder anomaly removal process p_train = ProcessData.trainData(moving_median_centered_average=True, standard_deviation=True, probability_distribution=True, bin_classification=True) p_test = ProcessData.testData(moving_median_centered_average=True, standard_deviation=True, probability_from_file=True, bin_classification=True) # Converting to h2o frane h_test = h2o.H2OFrame(p_test) h_test.set_names(list(p_test.columns)) h_train = h2o.H2OFrame(p_train) h_train.set_names(list(p_train.columns)) # Define autoencoder anomaly_model = H2OAutoEncoderEstimator(activation="Rectifier", hidden=[25, 12, 25], sparse=True, l1=1e-4, epochs=100) # Select relevant features anomaly_train_columns = list(p_train.columns) print(anomaly_train_columns) anomaly_train_columns.remove('RUL') anomaly_train_columns.remove('BIN') anomaly_train_columns.remove('UnitNumber') anomaly_train_columns.remove('Time') anomaly_train_columns.remove('Setting1') anomaly_train_columns.remove('Setting2') anomaly_train_columns.remove('Setting3') # Train model anomaly_model.train(x=anomaly_train_columns, training_frame=h_train) # Get reconstruction error reconstruction_error = anomaly_model.anomaly(test_data=h_train, per_feature=False) error_str = reconstruction_error.get_frame_data() err_list = list(map(float, error_str.split("\n")[1:-1])) err_list = np.array(err_list) # Threshold threshold = np.amax(err_list) * 0.97 print("Max Reconstruction Error :", reconstruction_error.max()) print("Threshold Reconstruction Error :", threshold) # Filter anomalies based on reconstruction error p_filter = Filter.filterDataAutoEncoder(panda_frame=p_train, reconstruction_error=err_list, threshold=threshold) # Drop features del p_filter['Setting3'] del p_filter['Sensor1'] del p_filter['Sensor5'] del p_filter['Sensor10'] del p_filter['Sensor16'] del p_filter['Sensor18'] del p_filter['Sensor19'] h_filter = h2o.H2OFrame(p_filter) h_filter.set_names(list(p_filter.columns)) h_test = h2o.H2OFrame(p_test) h_test.set_names(list(p_test.columns)) training_columns = list(p_filter.columns) training_columns.remove('UnitNumber') training_columns.remove('Time') training_columns.remove('RUL') training_columns.remove('BIN') h_filter['BIN'] = h_filter['BIN'].asfactor() h_test['BIN'] = h_test['BIN'].asfactor() model = H2ODeepLearningEstimator(variable_importances=True) model.train(x=training_columns, y='BIN', training_frame=h_filter, nfolds=10) predict = model.predict(test_data=h_test) predict = DataFrameParser.h2oToList(predict['predict']) actual = DataFrameParser.h2oToList(h_test['BIN']) Measures.confusion_matrix(actual, predict) print(predict) print(actual)