from h2o.estimators.gbm import H2OGradientBoostingEstimator from parser import DataFrameParser from presenting import Chart # Initialize h2o server h2o.init() pTrain = pd.read_csv("hTrainMy.csv") pValidate = pd.read_csv("hValidateMy.csv") pTest = pd.read_csv("hTestingMy.csv") all_columns = list(pTrain.columns) removing_columns = ['UnitNumber', 'Time', 'RUL', 'Setting1', 'Setting2', 'Setting3'] selected_columns = [x for x in all_columns if x not in removing_columns] filtered = Filter.filterData(panda_frame=pTrain, columns=selected_columns, removal_method="iqr", threshold=3) hTrain = h2o.H2OFrame(filtered) hTrain.set_names(list(pTrain.columns)) hValidate = h2o.H2OFrame(pValidate) hValidate.set_names(list(pValidate.columns)) hTest = h2o.H2OFrame(pTest) hTest.set_names(list(pTest.columns)) training_columns = list(pTrain.columns) training_columns.remove('UnitNumber') training_columns.remove('Time') training_columns.remove('RUL')
# Load data p_train = pd.read_csv('Training.csv') p_test = pd.read_csv('Testing.csv') all_columns = list(p_train.columns) removing_columns = ['UnitNumber', 'Time', 'RUL', 'Setting1', 'Setting2', 'Setting3'] selected_columns = [x for x in all_columns if x not in removing_columns] # Filter training dataset p_noise_filtered = Filter.filterData(panda_frame=p_train, columns=[], removal_method="iqr", threshold=3) removing_columns = ['UnitNumber', 'Time', 'RUL'] training_columns = [x for x in all_columns if x not in removing_columns] response_column = 'RUL' # Set mapper df_mapper = DataFrameMapper([(training_columns, None), (response_column, None)]) # Pandas to sklearn train = df_mapper.fit_transform(p_noise_filtered) test = df_mapper.fit_transform(p_test) # [row : column] column_count = len(train[0, :])