Esempio n. 1
0
#data_2 = bm.read_data(SECOND_FILE)
#data_2['Scaled_2'], sc = bm.scale_data(data_2, sc)
#data_2.drop(['Speed'], axis = 'columns', inplace = True)

#data_3 = bm.read_data(THIRD_FILE)
#data_3['Scaled_3'], sc = bm.scale_data(data_3, sc)
#data_3.drop(['Speed'], axis = 'columns', inplace = True)
#data_2 = bm.merge_two_data(data_2, data_3)

#data = bm.merge_two_data(data_2, data)

#adding more prev data
data_prev = data.shift(7 * 24 * 12 - int(TIME_DIFFERENCE / SAMPLE_FREQUENCY))
#data_prev_2 = data.shift(2*7*24*12)
#data_prev = bm.merge_two_data(data_prev_2, data_prev)
data = bm.merge_two_data(data_prev, data)

#channge missing values 0 to NaN
data.replace(0, np.nan, inplace=True)

#add one hots to data
data = bm.join_weekday_one_hot(data)
data = bm.join_daypart_one_hot(data)
#data = bm.join_minute_one_hot(data)

#Prepare the sets
features = len(data.columns)
x_features = features * TIME_STEP

reframed = bm.series_to_supervised(data, TIME_INTERVAL, TIME_DIFFERENCE,
                                   SAMPLE_FREQUENCY)
#data = pd.concat([bm.read_data("preprocessed_471_2016.csv"), data])
data['Scaled'], sc = bm.scale_data(data)

#drop the speed column which includes real speed values (scaled values will be used instead)
data.drop(['Speed'], axis='columns', inplace=True)

#Nerging another sensor data to main one
#data_2 = bm.read_data("preprocessed_470.csv")
#data_2['Scaled_2'], sc = bm.scale_data(data_2, sc)
#data_2.drop(['Speed'], axis = 'columns', inplace = True)
#data = bm.merge_two_data(data_2, data)

#adding more prev data
data_prev = data.shift(7 * 24 * 12)
data_prev_2 = data.shift(2 * 7 * 24 * 12)
data_prev = bm.merge_two_data(data_prev_2, data_prev)
data = bm.merge_two_data(data_prev, data)

#channge missing values 0 to NaN
data.replace(0, np.nan, inplace=True)

#add one hots to data
data = bm.join_weekday_one_hot(data)
data = bm.join_daypart_one_hot(data)
#data = bm.join_minute_one_hot(data)

#Prepare the sets
features = len(data.columns)
x_features = features * TIME_STEP

reframed = bm.series_to_supervised(data, TIME_INTERVAL, TIME_DIFFERENCE,
Esempio n. 3
0
data = bm.read_data(FILE_NAME)
#pd.concat([bm.read_data("preprocessed_471_2016.csv"), data])
data['Scaled'], sc = bm.scale_data(data)

#drop the speed column which includes real speed values (scaled values will be used instead)
data.drop(['Speed'], axis='columns', inplace=True)

#Nerging another sensor data to main one
data_2 = bm.read_data(SECOND_FILE)
data_2['Scaled_2'], sc = bm.scale_data(data_2, sc)
data_2.drop(['Speed'], axis = 'columns', inplace = True)

data_3 = bm.read_data(THIRD_FILE)
data_3['Scaled_3'], sc = bm.scale_data(data_3, sc)
data_3.drop(['Speed'], axis = 'columns', inplace = True)
data_2 = bm.merge_two_data(data_2, data_3)

data = bm.merge_two_data(data_2, data)

#adding more prev data
data_prev = data.shift(7*24*12)
#data_prev_2 = data.shift(2*7*24*12)
#data_prev = bm.merge_two_data(data_prev_2, data_prev)
data = bm.merge_two_data(data_prev, data)

#channge missing values 0 to NaN
data.replace(0, np.nan, inplace = True)

#add one hots to data
data = bm.join_weekday_one_hot(data)
data = bm.join_daypart_one_hot(data)
Esempio n. 4
0
    est = np.mean(reframed.values[:, :-1], axis=1)
    result = reframed.values[:, -1]
    mape = bm.mean_absolute_percentage_error(result, est)
    mapes[file_name] = mape

mapes_20 = pd.DataFrame.from_dict(data=mapes,
                                  orient='index',
                                  columns=['mape_20'])

mapes = {}

for file_name in FILES.keys():
    time_interval = 30

    data = bm.read_data(FILES[file_name])
    data['Scaled'], sc = bm.scale_data(data)
    data.drop(['Speed'], axis='columns', inplace=True)
    data.replace(0, np.nan, inplace=True)

    reframed = bm.series_to_supervised(data, time_interval, 7 * 24 * 60, 5)
    reframed = reframed[reframed.index.month == 5]
    est = np.mean(reframed.values[:, :-1], axis=1)
    result = reframed.values[:, -1]
    mape = bm.mean_absolute_percentage_error(result, est)
    mapes[file_name] = mape

mapes_30 = pd.DataFrame.from_dict(data=mapes,
                                  orient='index',
                                  columns=['mape_30'])
mapes = bm.merge_two_data(mapes_20, mapes_30)
mapes.to_csv("mapes.csv")