예제 #1
0
def train_model(model: ModelHolder, data: pd.DataFrame):
	model.model = model.trainer(model.name, model.config, data)
	return model
예제 #2
0
async def main():
	"""
	Main function of the application.
	:return: Nothing.
	"""
	print_header()
	timer_main = Timer()

	config = default_config()

	# read and prepare dataset for training
	# df_timeseries_complete = load_dataset("zurich_adapter", config)
	with InfluxSensorData(config=config, name="influx") as client:
		# Load the data from the server
		df_timeseries_complete = client.get_data().rename(columns={
			"humidity": "Live.Humidity",
			"pm10": "Live.PM10",
			"temperature": "Live.Temperature",
			"_time": "date"
		})

	df_timeseries = chop_first_fringe(df_timeseries_complete)  # Chop first improper filled rows
	imputed_timeseries = impute_simple_imputer(df_timeseries)
	smooth_timeseries = moving_average(imputed_timeseries)
	smooth_timeseries.dropna(inplace=True)  # Make sure there really is no empty cell anymore, else drop row
	# Split training/testing data in 80%/20%
	df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20)

	# Define all models at our disposal
	models = [
		ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config),
		ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config),
		ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config),
		ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config),
		ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config)
	]

	# Train the models
	trained_models = await gather(*[to_thread(train_model, model=model, data=df_train_val) for model in models])
	[model.model.store(model.config) for model in trained_models]  # Stores if not existing. Does NOT OVERWRITE!!!

	# Test the generalization performance of our models
	forecast_test = [model.model.predict(x=df_test, fh=5) for model in trained_models]

	all: DataFrame = df_test.copy()
	all["Arima.PM10"] = forecast_test[0].values
	all["AutoArima.PM10"] = forecast_test[1].values
	all["ExpSmoothing.PM10"] = forecast_test[2].values
	all["LSTM.PM10"] = (forecast_test[3]['Live.PM10_Pred'])
	all["LSTMSeq.PM10"] = (forecast_test[4]['Live.PM10_Pred'])
	# all.to_csv(PROJECT_DIR / 'pm10_predictions.csv')

	print(all)
	logger.info(f"Script completed in {timer_main}.")
	logger.info("Terminating gracefully...")
	exit(0)

	logger.info("start predicting new time")

	with InfluxSensorData(config=config, name="influx") as client:
		# Load the data from the server
		data = client.get_data().rename(columns={
			"humidity": "Live.Humidity",
			"pm10": "Live.PM10",
			"temperature": "Live.Temperature"
		})
		imputed_data = impute_simple_imputer(data) # Impute
		avg_data = moving_average(imputed_data) # Average input
		logger.debug("Forecasting")
		forecast_list = [model.model.predict(x=avg_data, fh=5) for model in trained_models] # Make predictions

		logger.info(forecast_list)
		forecast_dict = {
			"arima": forecast_list[0],
			"autoarima": forecast_list[1],
			"expsmoothing": forecast_list[2],
			"lstm": forecast_list[3].iloc[:, forecast_list[3].columns.get_loc("Live.PM10_Pred")],
			"lstm_seq": forecast_list[4].iloc[:, forecast_list[4].columns.get_loc("Live.PM10_Pred")]
		}

		forecast = pd.DataFrame(data=forecast_dict)
		logger.debug(forecast)
		forecast=forecast.mean(axis=1).head(n=50)
		logger.info(f"Forcasting finished with forecast value\n {forecast}")

		sns.set_theme(style="darkgrid")
		sns.lineplot(data=forecast)
async def main():
    """
	Main function of the application.
	:return: Nothing.
	"""
    print_header()
    timer_main = Timer()

    config = default_config()

    # read and prepare dataset for training
    df_timeseries_complete = load_dataset("zurich_adapter", config)

    df_timeseries = chop_first_fringe(
        df_timeseries_complete)  # Chop first improper filled rows
    imputed_timeseries = impute_simple_imputer(df_timeseries)
    smooth_timeseries = moving_average(imputed_timeseries)
    smooth_timeseries.dropna(
        inplace=True
    )  # Make sure there really is no empty cell anymore, else drop row
    # Split training/testing data in 80%/20%
    df_train_val, df_test = temporal_train_test_split(smooth_timeseries,
                                                      test_size=.20)

    # Define all models at our disposal
    models = [
        ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config),
        ModelHolder(name="autoarima",
                    trainer=train_or_load_autoARIMA,
                    config=config),
        ModelHolder(name="expsmooting",
                    trainer=train_or_load_expSmoothing,
                    config=config),
        ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config),
        ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config)
    ]

    # Train the models
    trained_models = await gather(*[
        to_thread(train_model, model=model, data=df_train_val)
        for model in models
    ])
    [model.model.store(model.config) for model in trained_models
     ]  # Stores if not existing. Does NOT OVERWRITE!!!

    # Test the generalization performance of our models
    forecast_test = [
        model.model.predict(x=df_test, fh=5) for model in trained_models
    ]

    print(forecast_test)

    # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.PM10', 'Zch_Stampfenbachstrasse.PM10_Pred']])
    # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.Humidity', 'Zch_Stampfenbachstrasse.Temperature']])
    # plt.show()

    logger.info(f"Script completed in {timer_main}.")
    logger.info("Terminating gracefully...")

    logger.info("start predicting new time")

    forecast_dict = {
        "arima": pd.Series(),
        "autoarima": pd.Series(),
        "expsmoothing": pd.Series(),
        "lstm": pd.Series(),
        "lstm_seq": pd.Series()
    }
    with InfluxSensorData(config=config, name="influx") as client:
        # Load the data from the server
        data = client.get_data().rename(
            columns={
                "humidity": "Live.Humidity",
                "pm10": "Live.PM10",
                "temperature": "Live.Temperature"
            })
        imputed_data = impute_simple_imputer(data)  # Impute
        avg_data = moving_average(imputed_data)  # Average input
        logger.debug("Forecasting")
        forecast_list = [
            model.model.predict(x=avg_data, fh=5) for model in trained_models
        ]  # Make predictions

        logger.info(forecast_list)
        forecast_dict = {
            "arima":
            forecast_list[0],
            "autoarima":
            forecast_list[1],
            "expsmoothing":
            forecast_list[2],
            "lstm":
            forecast_list[0].iloc[:, forecast_list[0].columns.
                                  get_loc("Live.PM10_Pred")],  # was item 3
            "lstm_seq":
            forecast_list[1].iloc[:, forecast_list[1].columns.get_loc(
                "Live.PM10_Pred")]  # was item 4
        }

    forecast = pd.DataFrame(data=forecast_dict)
    logger.debug(forecast)
    forecast = forecast.mean(axis=1).head(n=50)
    forecast.name = "forecast"
    logger.info(f"Forcasting finished with forecast value\n {forecast}")

    config["influx"]["limit"] = "150"
    config["influx"][
        "drops"] = '["pm1", "pm4.0", "pm2.5", "result", "table", "_time", "humidity", "temperature"]'
    with InfluxSensorData(config=config, name="influx") as client:
        # Load the data from the server
        data = client.get_data().tail(n=50)
        data.index = range(len(data))
        data = data.iloc[:, 0]
        print(f"data {data}")

    sns.set_theme(style="darkgrid")
    sns.lineplot(data=[forecast, data])