del training_frame[feature]
        del testing_frame[feature]

selected_features = list(training_frame.columns)
selected_features.remove('UnitNumber')
selected_features.remove('Time')
selected_features.remove('RUL')

rm_columns = ['Setting1', 'Setting2', 'Setting3']
for col in rm_columns:
    if col in training_frame and col in testing_frame:
        selected_features.remove(col)

training_frame = ProcessData.trainDataToFrame(
    training_frame=training_frame,
    selected_column_names=selected_features,
    moving_k_closest_average=True,
    standard_deviation=True,
    probability_distribution=True)
testing_frame = ProcessData.testDataToFrame(
    testing_frame=testing_frame,
    selected_column_names=selected_features,
    moving_k_closest_average=True,
    standard_deviation=True,
    probability_from_file=True)

# Training data columns
del training_frame['UnitNumber']
del training_frame['Time']

# Testing columns
del testing_frame['UnitNumber']
Esempio n. 2
0
    series = pData[column]
    anomaly = Test.iqr(series, threshold=5)
    anomaly_series.extend(anomaly)

# Sort indexes
anomaly_series.sort()
anomaly_series = list(set(anomaly_series))
print anomaly_series
print len(anomaly_series)

# Remove anomalies
df = pData.drop(pData.index[anomaly_series])

# Feature engineering
data_frame = ProcessData.trainDataToFrame(df,
                                          moving_k_closest_average=True,
                                          standard_deviation=True)
testing_frame = ProcessData.testData(moving_k_closest_average=True,
                                     standard_deviation=True)

# Create h2o frame
hData = h2o.H2OFrame(data_frame)
hData.set_names(list(data_frame.columns))

hTesting = h2o.H2OFrame(testing_frame)
hTesting.set_names(list(testing_frame.columns))

# Split data inti training and validation
hTrain, hValidate = hData.split_frame(ratios=[0.8])

h2o.export_file(hTrain, "hTrainMy.csv", force=True)
Esempio n. 3
0
        filtered_train = filtered_train.append(df1, ignore_index=True)
        count += 1
    Progress.printProgress(iteration=(i + 1),
                           total=hTrain.nrow,
                           decimals=1,
                           prefix="Progress",
                           suffix="Complete")

print filtered_train
print "Original Size :", hTrain.nrow
print "Filtered Size :", len(filtered_train)
print "Removed Rows  :", (hTrain.nrow - len(filtered_train))

# Feature Engineering
pTrain = ProcessData.trainDataToFrame(filtered_train,
                                      moving_k_closest_average=True,
                                      standard_deviation=True,
                                      probability_distribution=True)
pTest = ProcessData.testData(moving_k_closest_average=True,
                             standard_deviation=True,
                             probability_from_file=True)

# Convert pandas to h2o frame - for model training
hValidate = h2o.H2OFrame(pValidate)
hValidate.set_names(list(pValidate.columns))

hTrain = h2o.H2OFrame(pTrain)
hTrain.set_names(list(pTrain.columns))

hTest = h2o.H2OFrame(pTest)
hTest.set_names(list(pTest.columns))
Esempio n. 4
0
filtered_train = pd.DataFrame()
count = 0
for i in trange(hTrain.nrow):
    if abs(err_list[i] - q75) < 2 * iqr:
        df1 = pTrain.iloc[i, :]
        filtered_train = filtered_train.append(df1, ignore_index=True)
        count += 1

print(filtered_train)
print("Original Size :", hTrain.nrow)
print("Filtered Size :", len(filtered_train))
print("Removed Rows  :", (hTrain.nrow - len(filtered_train)))

# Feature Engineering
pTrain = ProcessData.trainDataToFrame(filtered_train,
                                      moving_average=True,
                                      standard_deviation=True)
pTest = ProcessData.testData(moving_average=True, standard_deviation=True)

# Convert pandas to h2o frame - for model training
hData = h2o.H2OFrame(pTrain)
hData.set_names(list(pTrain.columns))

hTrain, hValidate = hData.split_frame(ratios=[_validation_ratio_2])

hTest = h2o.H2OFrame(pTest)
hTest.set_names(list(pTest.columns))

# Save filtered frames
h2o.export_file(hTrain, "FilterTrain.csv", force=True)
h2o.export_file(hValidate, "FilterValidate.csv", force=True)
Esempio n. 5
0
for column in all_columns:
    if column not in sustain:
        del train[column]
        del test[column]

training_columns = sustain
training_columns.remove('UnitNumber')
training_columns.remove('RUL')
training_columns.remove('Time')

#filter_train = Process.filterData(panda_frame=train, columns=sustain, removal_method='iqr', threshold=4)
filter_train = train

feature_engineered_train = ProcessData.trainDataToFrame(
    training_frame=filter_train,
    moving_k_closest_average=True,
    standard_deviation=True)
feature_engineered_test = ProcessData.trainDataToFrame(
    training_frame=test,
    moving_k_closest_average=True,
    standard_deviation=True,
    rul=True)

h_train = h2o.H2OFrame(feature_engineered_train)

h_train.set_names(list(feature_engineered_train.columns))

h_test = h2o.H2OFrame(feature_engineered_test)
h_test.set_names(list(feature_engineered_test.columns))

model = H2ODeepLearningEstimator(epochs=100,
    series = pData[column]
    anomaly = Test.iqr(series, threshold=3)
    anomaly_series.extend(anomaly)

# Sort indexes
anomaly_series.sort()
anomaly_series = list(set(anomaly_series))
print anomaly_series
print len(anomaly_series)

# Remove anomalies
df = pData.drop(pData.index[anomaly_series])

# Feature engineering
data_frame = ProcessData.trainDataToFrame(df,
                                          moving_k_closest_average=True,
                                          standard_deviation=True,
                                          probability_distribution=True)
testing_frame = ProcessData.testData(moving_k_closest_average=True,
                                     standard_deviation=True,
                                     probability_from_file=True)

# Create h2o frame
hData = h2o.H2OFrame(data_frame)
hData.set_names(list(data_frame.columns))

hTesting = h2o.H2OFrame(testing_frame)
hTesting.set_names(list(testing_frame.columns))

# Split data inti training and validation
hTrain, hValidate = hData.split_frame(ratios=[0.8])
Esempio n. 7
0
del p_filter['Setting3']
del p_filter['Sensor1']
del p_filter['Sensor5']
del p_filter['Sensor10']
del p_filter['Sensor16']
del p_filter['Sensor18']
del p_filter['Sensor19']

# Feature engineering process
columns = [
    'Sensor14', 'Sensor9', 'Sensor11', 'Sensor12', 'Sensor13', 'Sensor7',
    'Sensor4', 'Sensor8', 'Sensor20', 'Sensor21', 'Sensor15', 'Sensor6',
    'Sensor2', 'Sensor17', 'Sensor3'
]
p_featured_train = ProcessData.trainDataToFrame(training_frame=p_filter,
                                                selected_column_names=columns,
                                                probability_distribution=True)
p_featured_test = ProcessData.testDataToFrame(testing_frame=p_test,
                                              selected_column_names=columns,
                                              probability_from_file=True)

h_filter = h2o.H2OFrame(p_featured_train)
h_filter.set_names(list(p_featured_train.columns))

h_test = h2o.H2OFrame(p_featured_test)
h_test.set_names(list(p_featured_test.columns))

training_columns = list(p_featured_train.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')