#data_2 = bm.read_data(SECOND_FILE) #data_2['Scaled_2'], sc = bm.scale_data(data_2, sc) #data_2.drop(['Speed'], axis = 'columns', inplace = True) #data_3 = bm.read_data(THIRD_FILE) #data_3['Scaled_3'], sc = bm.scale_data(data_3, sc) #data_3.drop(['Speed'], axis = 'columns', inplace = True) #data_2 = bm.merge_two_data(data_2, data_3) #data = bm.merge_two_data(data_2, data) #adding more prev data data_prev = data.shift(7 * 24 * 12 - int(TIME_DIFFERENCE / SAMPLE_FREQUENCY)) #data_prev_2 = data.shift(2*7*24*12) #data_prev = bm.merge_two_data(data_prev_2, data_prev) data = bm.merge_two_data(data_prev, data) #channge missing values 0 to NaN data.replace(0, np.nan, inplace=True) #add one hots to data data = bm.join_weekday_one_hot(data) data = bm.join_daypart_one_hot(data) #data = bm.join_minute_one_hot(data) #Prepare the sets features = len(data.columns) x_features = features * TIME_STEP reframed = bm.series_to_supervised(data, TIME_INTERVAL, TIME_DIFFERENCE, SAMPLE_FREQUENCY)
#data = pd.concat([bm.read_data("preprocessed_471_2016.csv"), data]) data['Scaled'], sc = bm.scale_data(data) #drop the speed column which includes real speed values (scaled values will be used instead) data.drop(['Speed'], axis='columns', inplace=True) #Nerging another sensor data to main one #data_2 = bm.read_data("preprocessed_470.csv") #data_2['Scaled_2'], sc = bm.scale_data(data_2, sc) #data_2.drop(['Speed'], axis = 'columns', inplace = True) #data = bm.merge_two_data(data_2, data) #adding more prev data data_prev = data.shift(7 * 24 * 12) data_prev_2 = data.shift(2 * 7 * 24 * 12) data_prev = bm.merge_two_data(data_prev_2, data_prev) data = bm.merge_two_data(data_prev, data) #channge missing values 0 to NaN data.replace(0, np.nan, inplace=True) #add one hots to data data = bm.join_weekday_one_hot(data) data = bm.join_daypart_one_hot(data) #data = bm.join_minute_one_hot(data) #Prepare the sets features = len(data.columns) x_features = features * TIME_STEP reframed = bm.series_to_supervised(data, TIME_INTERVAL, TIME_DIFFERENCE,
data = bm.read_data(FILE_NAME) #pd.concat([bm.read_data("preprocessed_471_2016.csv"), data]) data['Scaled'], sc = bm.scale_data(data) #drop the speed column which includes real speed values (scaled values will be used instead) data.drop(['Speed'], axis='columns', inplace=True) #Nerging another sensor data to main one data_2 = bm.read_data(SECOND_FILE) data_2['Scaled_2'], sc = bm.scale_data(data_2, sc) data_2.drop(['Speed'], axis = 'columns', inplace = True) data_3 = bm.read_data(THIRD_FILE) data_3['Scaled_3'], sc = bm.scale_data(data_3, sc) data_3.drop(['Speed'], axis = 'columns', inplace = True) data_2 = bm.merge_two_data(data_2, data_3) data = bm.merge_two_data(data_2, data) #adding more prev data data_prev = data.shift(7*24*12) #data_prev_2 = data.shift(2*7*24*12) #data_prev = bm.merge_two_data(data_prev_2, data_prev) data = bm.merge_two_data(data_prev, data) #channge missing values 0 to NaN data.replace(0, np.nan, inplace = True) #add one hots to data data = bm.join_weekday_one_hot(data) data = bm.join_daypart_one_hot(data)
est = np.mean(reframed.values[:, :-1], axis=1) result = reframed.values[:, -1] mape = bm.mean_absolute_percentage_error(result, est) mapes[file_name] = mape mapes_20 = pd.DataFrame.from_dict(data=mapes, orient='index', columns=['mape_20']) mapes = {} for file_name in FILES.keys(): time_interval = 30 data = bm.read_data(FILES[file_name]) data['Scaled'], sc = bm.scale_data(data) data.drop(['Speed'], axis='columns', inplace=True) data.replace(0, np.nan, inplace=True) reframed = bm.series_to_supervised(data, time_interval, 7 * 24 * 60, 5) reframed = reframed[reframed.index.month == 5] est = np.mean(reframed.values[:, :-1], axis=1) result = reframed.values[:, -1] mape = bm.mean_absolute_percentage_error(result, est) mapes[file_name] = mape mapes_30 = pd.DataFrame.from_dict(data=mapes, orient='index', columns=['mape_30']) mapes = bm.merge_two_data(mapes_20, mapes_30) mapes.to_csv("mapes.csv")