def train_model(model: ModelHolder, data: pd.DataFrame): model.model = model.trainer(model.name, model.config, data) return model
async def main(): """ Main function of the application. :return: Nothing. """ print_header() timer_main = Timer() config = default_config() # read and prepare dataset for training # df_timeseries_complete = load_dataset("zurich_adapter", config) with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server df_timeseries_complete = client.get_data().rename(columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature", "_time": "date" }) df_timeseries = chop_first_fringe(df_timeseries_complete) # Chop first improper filled rows imputed_timeseries = impute_simple_imputer(df_timeseries) smooth_timeseries = moving_average(imputed_timeseries) smooth_timeseries.dropna(inplace=True) # Make sure there really is no empty cell anymore, else drop row # Split training/testing data in 80%/20% df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20) # Define all models at our disposal models = [ ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config), ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config), ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config), ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config), ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config) ] # Train the models trained_models = await gather(*[to_thread(train_model, model=model, data=df_train_val) for model in models]) [model.model.store(model.config) for model in trained_models] # Stores if not existing. Does NOT OVERWRITE!!! # Test the generalization performance of our models forecast_test = [model.model.predict(x=df_test, fh=5) for model in trained_models] all: DataFrame = df_test.copy() all["Arima.PM10"] = forecast_test[0].values all["AutoArima.PM10"] = forecast_test[1].values all["ExpSmoothing.PM10"] = forecast_test[2].values all["LSTM.PM10"] = (forecast_test[3]['Live.PM10_Pred']) all["LSTMSeq.PM10"] = (forecast_test[4]['Live.PM10_Pred']) # all.to_csv(PROJECT_DIR / 'pm10_predictions.csv') print(all) logger.info(f"Script completed in {timer_main}.") logger.info("Terminating gracefully...") exit(0) logger.info("start predicting new time") with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().rename(columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature" }) imputed_data = impute_simple_imputer(data) # Impute avg_data = moving_average(imputed_data) # Average input logger.debug("Forecasting") forecast_list = [model.model.predict(x=avg_data, fh=5) for model in trained_models] # Make predictions logger.info(forecast_list) forecast_dict = { "arima": forecast_list[0], "autoarima": forecast_list[1], "expsmoothing": forecast_list[2], "lstm": forecast_list[3].iloc[:, forecast_list[3].columns.get_loc("Live.PM10_Pred")], "lstm_seq": forecast_list[4].iloc[:, forecast_list[4].columns.get_loc("Live.PM10_Pred")] } forecast = pd.DataFrame(data=forecast_dict) logger.debug(forecast) forecast=forecast.mean(axis=1).head(n=50) logger.info(f"Forcasting finished with forecast value\n {forecast}") sns.set_theme(style="darkgrid") sns.lineplot(data=forecast)
async def main(): """ Main function of the application. :return: Nothing. """ print_header() timer_main = Timer() config = default_config() # read and prepare dataset for training df_timeseries_complete = load_dataset("zurich_adapter", config) df_timeseries = chop_first_fringe( df_timeseries_complete) # Chop first improper filled rows imputed_timeseries = impute_simple_imputer(df_timeseries) smooth_timeseries = moving_average(imputed_timeseries) smooth_timeseries.dropna( inplace=True ) # Make sure there really is no empty cell anymore, else drop row # Split training/testing data in 80%/20% df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20) # Define all models at our disposal models = [ ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config), ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config), ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config), ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config), ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config) ] # Train the models trained_models = await gather(*[ to_thread(train_model, model=model, data=df_train_val) for model in models ]) [model.model.store(model.config) for model in trained_models ] # Stores if not existing. Does NOT OVERWRITE!!! # Test the generalization performance of our models forecast_test = [ model.model.predict(x=df_test, fh=5) for model in trained_models ] print(forecast_test) # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.PM10', 'Zch_Stampfenbachstrasse.PM10_Pred']]) # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.Humidity', 'Zch_Stampfenbachstrasse.Temperature']]) # plt.show() logger.info(f"Script completed in {timer_main}.") logger.info("Terminating gracefully...") logger.info("start predicting new time") forecast_dict = { "arima": pd.Series(), "autoarima": pd.Series(), "expsmoothing": pd.Series(), "lstm": pd.Series(), "lstm_seq": pd.Series() } with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().rename( columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature" }) imputed_data = impute_simple_imputer(data) # Impute avg_data = moving_average(imputed_data) # Average input logger.debug("Forecasting") forecast_list = [ model.model.predict(x=avg_data, fh=5) for model in trained_models ] # Make predictions logger.info(forecast_list) forecast_dict = { "arima": forecast_list[0], "autoarima": forecast_list[1], "expsmoothing": forecast_list[2], "lstm": forecast_list[0].iloc[:, forecast_list[0].columns. get_loc("Live.PM10_Pred")], # was item 3 "lstm_seq": forecast_list[1].iloc[:, forecast_list[1].columns.get_loc( "Live.PM10_Pred")] # was item 4 } forecast = pd.DataFrame(data=forecast_dict) logger.debug(forecast) forecast = forecast.mean(axis=1).head(n=50) forecast.name = "forecast" logger.info(f"Forcasting finished with forecast value\n {forecast}") config["influx"]["limit"] = "150" config["influx"][ "drops"] = '["pm1", "pm4.0", "pm2.5", "result", "table", "_time", "humidity", "temperature"]' with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().tail(n=50) data.index = range(len(data)) data = data.iloc[:, 0] print(f"data {data}") sns.set_theme(style="darkgrid") sns.lineplot(data=[forecast, data])