예제 #1
0
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from parser import DataFrameParser
from presenting import Chart

# Initialize h2o server
h2o.init()

pTrain = pd.read_csv("hTrainMy.csv")
pValidate = pd.read_csv("hValidateMy.csv")
pTest = pd.read_csv("hTestingMy.csv")

all_columns = list(pTrain.columns)
removing_columns = ['UnitNumber', 'Time', 'RUL', 'Setting1', 'Setting2', 'Setting3']
selected_columns = [x for x in all_columns if x not in removing_columns]

filtered = Filter.filterData(panda_frame=pTrain, columns=selected_columns, removal_method="iqr", threshold=3)

hTrain = h2o.H2OFrame(filtered)
hTrain.set_names(list(pTrain.columns))

hValidate = h2o.H2OFrame(pValidate)
hValidate.set_names(list(pValidate.columns))

hTest = h2o.H2OFrame(pTest)
hTest.set_names(list(pTest.columns))

training_columns = list(pTrain.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')
예제 #2
0





# Load data
p_train = pd.read_csv('Training.csv')
p_test = pd.read_csv('Testing.csv')

all_columns = list(p_train.columns)
removing_columns = ['UnitNumber', 'Time', 'RUL', 'Setting1', 'Setting2', 'Setting3']
selected_columns = [x for x in all_columns if x not in removing_columns]

# Filter training dataset
p_noise_filtered = Filter.filterData(panda_frame=p_train, columns=[], removal_method="iqr", threshold=3)

removing_columns = ['UnitNumber', 'Time', 'RUL']
training_columns = [x for x in all_columns if x not in removing_columns]
response_column = 'RUL'

# Set mapper
df_mapper = DataFrameMapper([(training_columns, None), (response_column, None)])

# Pandas to sklearn
train = df_mapper.fit_transform(p_noise_filtered)
test = df_mapper.fit_transform(p_test)

# [row : column]
column_count = len(train[0, :])