def graph_simple_imputer(): """ Graph the results of the simple imputer """ timeseries: pd.DataFrame = get_time_series(get_engine(), "zurich", "Zch_Stampfenbachstrasse") timeseries = timeseries[["date", "Zch_Stampfenbachstrasse.Humidity"]][-750:-250] data = timeseries.set_index("date") data_imputed = impute_simple_imputer(data, False) data.columns = ["Humidity (Original)"] data_imputed.columns = ["Humidity (Imputed)"] data_plt = data_imputed.join(data) sns.set(rc={"figure.figsize": (2, 1)}) sns.set_theme(style="darkgrid") plot = sns.relplot(data=data_plt, kind="line", dashes=["", ""], legend=False, aspect=2) plt.setp(plot.ax.lines, linewidth=2) plot.set(xlabel="Time", ylabel="Humidity [%]", title="Simple Imputer (Humidity Data)") plot.ax.xaxis.set_major_locator(plt.NullLocator()) plot.ax.xaxis.set_major_formatter(plt.NullFormatter()) plt.legend(loc="lower center", labels=["Imputed", "Original"]) plot.tight_layout() plot.savefig("simple_imputer.png")
def graph_moving_average(): """ Graphs some part of out dataset once without moving average, once with moving average """ timeseries: pd.DataFrame = get_time_series(get_engine(), "zurich", "Zch_Stampfenbachstrasse") timeseries = timeseries[["date", "Zch_Stampfenbachstrasse.Humidity"]][-750:-250] timeseries.set_index("date", inplace=True) data = impute_simple_imputer(timeseries, False) data_smooth = moving_average(data, False) data.columns = ["Humidity (Original)"] data_smooth.columns = ["Humidity (Smoothed)"] data_plt = data.join(data_smooth) sns.set(rc={"figure.figsize": (2, 1)}) sns.set_theme(style="darkgrid") plot = sns.relplot(data=data_plt, kind="line", dashes=["", ""], legend=False, aspect=2) plt.setp(plot.ax.lines, linewidth=2) plot.set(xlabel="Time", ylabel="Humidity [%]", title="Moving Average (Humidity Data)") plot.ax.xaxis.set_major_locator(plt.NullLocator()) plot.ax.xaxis.set_major_formatter(plt.NullFormatter()) plt.legend(loc="lower center", labels=["Original", "Smoothed"]) plot.fig.autofmt_xdate() plot.tight_layout() plot.savefig("moving_average.png")
async def main(): """ Main function of the application. :return: Nothing. """ print_header() timer_main = Timer() config = default_config() logger.info("start predicting new time") config["influx"]["drops"] = '["pm1", "pm4.0", "result", "table", "_time"]' config["influx"]["limit"] = "10000" with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data() imputed_data = impute_simple_imputer(data) # Impute avg_data = moving_average(imputed_data) # Average input logger.info(f"data len {len(avg_data)}") sns.set_theme(style="darkgrid") g = sns.jointplot(x="pm2.5", y="pm10", data=avg_data, kind="reg", truncate=False, xlim=(0, 40), ylim=(0, 40), color="m", height=7) g2 = sns.jointplot(x="temperature", y="humidity", data=avg_data, kind="reg", truncate=False, color="m", height=7) g3 = sns.jointplot(x="humidity", y="pm10", data=avg_data, kind="reg", truncate=False, color="m", height=7) g4 = sns.jointplot(x="temperature", y="pm10", data=avg_data, kind="reg", truncate=False, color="m", height=7)
def graph_model_exp_smoothing(): if from_excel: y_train, y_test, y_pred = get_data_from_excel("ExpSmoothing.PM10") write_model_graph(y_train, y_test, y_pred, "Exponential Smoothing") else: ts: pd.DataFrame = get_time_series(get_engine(), "zurich", "Zch_Stampfenbachstrasse")[-1100:-900] ts.drop(columns=["date", "Zch_Stampfenbachstrasse.PM2.5"], inplace=True) ts_imputed = impute_simple_imputer(ts, False) ts_smooth = moving_average(ts_imputed, False) y, x = expsmoothing.transform_data(ts_smooth, False) y_train, y_test, x_train, x_test = temporal_train_test_split(y, x, test_size=fh) model = expsmoothing.train_model_expSmooting(y_train, x_train, False) y_pred = model.predict(X=x_test, fh=np.linspace(1, fh, fh)) write_model_graph(y_train, y_test, y_pred, "Exponential Smoothing")
async def main(): """ Main function of the application. :return: Nothing. """ print_header() timer_main = Timer() config = default_config() # read and prepare dataset for training # df_timeseries_complete = load_dataset("zurich_adapter", config) with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server df_timeseries_complete = client.get_data().rename(columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature", "_time": "date" }) df_timeseries = chop_first_fringe(df_timeseries_complete) # Chop first improper filled rows imputed_timeseries = impute_simple_imputer(df_timeseries) smooth_timeseries = moving_average(imputed_timeseries) smooth_timeseries.dropna(inplace=True) # Make sure there really is no empty cell anymore, else drop row # Split training/testing data in 80%/20% df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20) # Define all models at our disposal models = [ ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config), ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config), ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config), ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config), ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config) ] # Train the models trained_models = await gather(*[to_thread(train_model, model=model, data=df_train_val) for model in models]) [model.model.store(model.config) for model in trained_models] # Stores if not existing. Does NOT OVERWRITE!!! # Test the generalization performance of our models forecast_test = [model.model.predict(x=df_test, fh=5) for model in trained_models] all: DataFrame = df_test.copy() all["Arima.PM10"] = forecast_test[0].values all["AutoArima.PM10"] = forecast_test[1].values all["ExpSmoothing.PM10"] = forecast_test[2].values all["LSTM.PM10"] = (forecast_test[3]['Live.PM10_Pred']) all["LSTMSeq.PM10"] = (forecast_test[4]['Live.PM10_Pred']) # all.to_csv(PROJECT_DIR / 'pm10_predictions.csv') print(all) logger.info(f"Script completed in {timer_main}.") logger.info("Terminating gracefully...") exit(0) logger.info("start predicting new time") with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().rename(columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature" }) imputed_data = impute_simple_imputer(data) # Impute avg_data = moving_average(imputed_data) # Average input logger.debug("Forecasting") forecast_list = [model.model.predict(x=avg_data, fh=5) for model in trained_models] # Make predictions logger.info(forecast_list) forecast_dict = { "arima": forecast_list[0], "autoarima": forecast_list[1], "expsmoothing": forecast_list[2], "lstm": forecast_list[3].iloc[:, forecast_list[3].columns.get_loc("Live.PM10_Pred")], "lstm_seq": forecast_list[4].iloc[:, forecast_list[4].columns.get_loc("Live.PM10_Pred")] } forecast = pd.DataFrame(data=forecast_dict) logger.debug(forecast) forecast=forecast.mean(axis=1).head(n=50) logger.info(f"Forcasting finished with forecast value\n {forecast}") sns.set_theme(style="darkgrid") sns.lineplot(data=forecast)
async def main(): """ Main function of the application. :return: Nothing. """ print_header() timer_main = Timer() config = default_config() # read and prepare dataset for training df_timeseries_complete = load_dataset("zurich_adapter", config) df_timeseries = chop_first_fringe( df_timeseries_complete) # Chop first improper filled rows imputed_timeseries = impute_simple_imputer(df_timeseries) smooth_timeseries = moving_average(imputed_timeseries) smooth_timeseries.dropna( inplace=True ) # Make sure there really is no empty cell anymore, else drop row # Split training/testing data in 80%/20% df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20) # Define all models at our disposal models = [ ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config), ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config), ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config), ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config), ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config) ] # Train the models trained_models = await gather(*[ to_thread(train_model, model=model, data=df_train_val) for model in models ]) [model.model.store(model.config) for model in trained_models ] # Stores if not existing. Does NOT OVERWRITE!!! # Test the generalization performance of our models forecast_test = [ model.model.predict(x=df_test, fh=5) for model in trained_models ] print(forecast_test) # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.PM10', 'Zch_Stampfenbachstrasse.PM10_Pred']]) # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.Humidity', 'Zch_Stampfenbachstrasse.Temperature']]) # plt.show() logger.info(f"Script completed in {timer_main}.") logger.info("Terminating gracefully...") logger.info("start predicting new time") forecast_dict = { "arima": pd.Series(), "autoarima": pd.Series(), "expsmoothing": pd.Series(), "lstm": pd.Series(), "lstm_seq": pd.Series() } with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().rename( columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature" }) imputed_data = impute_simple_imputer(data) # Impute avg_data = moving_average(imputed_data) # Average input logger.debug("Forecasting") forecast_list = [ model.model.predict(x=avg_data, fh=5) for model in trained_models ] # Make predictions logger.info(forecast_list) forecast_dict = { "arima": forecast_list[0], "autoarima": forecast_list[1], "expsmoothing": forecast_list[2], "lstm": forecast_list[0].iloc[:, forecast_list[0].columns. get_loc("Live.PM10_Pred")], # was item 3 "lstm_seq": forecast_list[1].iloc[:, forecast_list[1].columns.get_loc( "Live.PM10_Pred")] # was item 4 } forecast = pd.DataFrame(data=forecast_dict) logger.debug(forecast) forecast = forecast.mean(axis=1).head(n=50) forecast.name = "forecast" logger.info(f"Forcasting finished with forecast value\n {forecast}") config["influx"]["limit"] = "150" config["influx"][ "drops"] = '["pm1", "pm4.0", "pm2.5", "result", "table", "_time", "humidity", "temperature"]' with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().tail(n=50) data.index = range(len(data)) data = data.iloc[:, 0] print(f"data {data}") sns.set_theme(style="darkgrid") sns.lineplot(data=[forecast, data])