Python impute_simple_imputer 예제들, preprocessing.imputing.impute_simple_imputer Python 예제들

예제 #1

0

파일 보기

def graph_simple_imputer():
    """
		Graph the results of the simple imputer
	"""
    timeseries: pd.DataFrame = get_time_series(get_engine(), "zurich",
                                               "Zch_Stampfenbachstrasse")
    timeseries = timeseries[["date",
                             "Zch_Stampfenbachstrasse.Humidity"]][-750:-250]
    data = timeseries.set_index("date")
    data_imputed = impute_simple_imputer(data, False)

    data.columns = ["Humidity (Original)"]
    data_imputed.columns = ["Humidity (Imputed)"]

    data_plt = data_imputed.join(data)

    sns.set(rc={"figure.figsize": (2, 1)})
    sns.set_theme(style="darkgrid")
    plot = sns.relplot(data=data_plt,
                       kind="line",
                       dashes=["", ""],
                       legend=False,
                       aspect=2)
    plt.setp(plot.ax.lines, linewidth=2)
    plot.set(xlabel="Time",
             ylabel="Humidity [%]",
             title="Simple Imputer (Humidity Data)")
    plot.ax.xaxis.set_major_locator(plt.NullLocator())
    plot.ax.xaxis.set_major_formatter(plt.NullFormatter())
    plt.legend(loc="lower center", labels=["Imputed", "Original"])
    plot.tight_layout()
    plot.savefig("simple_imputer.png")

예제 #2

0

파일 보기

def graph_moving_average():
    """
		Graphs some part of out dataset once without moving average, once with moving average
	"""
    timeseries: pd.DataFrame = get_time_series(get_engine(), "zurich",
                                               "Zch_Stampfenbachstrasse")
    timeseries = timeseries[["date",
                             "Zch_Stampfenbachstrasse.Humidity"]][-750:-250]
    timeseries.set_index("date", inplace=True)
    data = impute_simple_imputer(timeseries, False)
    data_smooth = moving_average(data, False)

    data.columns = ["Humidity (Original)"]
    data_smooth.columns = ["Humidity (Smoothed)"]

    data_plt = data.join(data_smooth)

    sns.set(rc={"figure.figsize": (2, 1)})
    sns.set_theme(style="darkgrid")
    plot = sns.relplot(data=data_plt,
                       kind="line",
                       dashes=["", ""],
                       legend=False,
                       aspect=2)
    plt.setp(plot.ax.lines, linewidth=2)
    plot.set(xlabel="Time",
             ylabel="Humidity [%]",
             title="Moving Average (Humidity Data)")
    plot.ax.xaxis.set_major_locator(plt.NullLocator())
    plot.ax.xaxis.set_major_formatter(plt.NullFormatter())
    plt.legend(loc="lower center", labels=["Original", "Smoothed"])
    plot.fig.autofmt_xdate()
    plot.tight_layout()
    plot.savefig("moving_average.png")

예제 #3

0

파일 보기

파일: graph_input.py 프로젝트: gilleswaeber/dstses

async def main():
	"""
	Main function of the application.
	:return: Nothing.
	"""
	print_header()
	timer_main = Timer()

	config = default_config()

	logger.info("start predicting new time")

	config["influx"]["drops"] = '["pm1", "pm4.0", "result", "table", "_time"]'
	config["influx"]["limit"] = "10000"
	with InfluxSensorData(config=config, name="influx") as client:
		# Load the data from the server
		data = client.get_data()
		imputed_data = impute_simple_imputer(data) # Impute
		avg_data = moving_average(imputed_data) # Average input
		logger.info(f"data len {len(avg_data)}")

		sns.set_theme(style="darkgrid")
		g = sns.jointplot(x="pm2.5", y="pm10", data=avg_data, kind="reg", truncate=False, xlim=(0, 40), ylim=(0, 40),
																										   color="m", height=7)
		g2 = sns.jointplot(x="temperature", y="humidity", data=avg_data, kind="reg", truncate=False, color="m", height=7)
		g3 = sns.jointplot(x="humidity", y="pm10", data=avg_data, kind="reg", truncate=False, color="m", height=7)
		g4 = sns.jointplot(x="temperature", y="pm10", data=avg_data, kind="reg", truncate=False, color="m", height=7)

예제 #4

0

파일 보기

def graph_model_exp_smoothing():
	if from_excel:
		y_train, y_test, y_pred = get_data_from_excel("ExpSmoothing.PM10")
		write_model_graph(y_train, y_test, y_pred, "Exponential Smoothing")
	else:
		ts: pd.DataFrame = get_time_series(get_engine(), "zurich", "Zch_Stampfenbachstrasse")[-1100:-900]
		ts.drop(columns=["date", "Zch_Stampfenbachstrasse.PM2.5"], inplace=True)
		ts_imputed = impute_simple_imputer(ts, False)
		ts_smooth = moving_average(ts_imputed, False)
		y, x = expsmoothing.transform_data(ts_smooth, False)
		
		y_train, y_test, x_train, x_test = temporal_train_test_split(y, x, test_size=fh)
		model = expsmoothing.train_model_expSmooting(y_train, x_train, False)
		y_pred = model.predict(X=x_test, fh=np.linspace(1, fh, fh))
		
		write_model_graph(y_train, y_test, y_pred, "Exponential Smoothing")

예제 #5

0

파일 보기

파일: model_main.py 프로젝트: gilleswaeber/dstses

async def main():
	"""
	Main function of the application.
	:return: Nothing.
	"""
	print_header()
	timer_main = Timer()

	config = default_config()

	# read and prepare dataset for training
	# df_timeseries_complete = load_dataset("zurich_adapter", config)
	with InfluxSensorData(config=config, name="influx") as client:
		# Load the data from the server
		df_timeseries_complete = client.get_data().rename(columns={
			"humidity": "Live.Humidity",
			"pm10": "Live.PM10",
			"temperature": "Live.Temperature",
			"_time": "date"
		})

	df_timeseries = chop_first_fringe(df_timeseries_complete)  # Chop first improper filled rows
	imputed_timeseries = impute_simple_imputer(df_timeseries)
	smooth_timeseries = moving_average(imputed_timeseries)
	smooth_timeseries.dropna(inplace=True)  # Make sure there really is no empty cell anymore, else drop row
	# Split training/testing data in 80%/20%
	df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20)

	# Define all models at our disposal
	models = [
		ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config),
		ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config),
		ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config),
		ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config),
		ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config)
	]

	# Train the models
	trained_models = await gather(*[to_thread(train_model, model=model, data=df_train_val) for model in models])
	[model.model.store(model.config) for model in trained_models]  # Stores if not existing. Does NOT OVERWRITE!!!

	# Test the generalization performance of our models
	forecast_test = [model.model.predict(x=df_test, fh=5) for model in trained_models]

	all: DataFrame = df_test.copy()
	all["Arima.PM10"] = forecast_test[0].values
	all["AutoArima.PM10"] = forecast_test[1].values
	all["ExpSmoothing.PM10"] = forecast_test[2].values
	all["LSTM.PM10"] = (forecast_test[3]['Live.PM10_Pred'])
	all["LSTMSeq.PM10"] = (forecast_test[4]['Live.PM10_Pred'])
	# all.to_csv(PROJECT_DIR / 'pm10_predictions.csv')

	print(all)
	logger.info(f"Script completed in {timer_main}.")
	logger.info("Terminating gracefully...")
	exit(0)

	logger.info("start predicting new time")

	with InfluxSensorData(config=config, name="influx") as client:
		# Load the data from the server
		data = client.get_data().rename(columns={
			"humidity": "Live.Humidity",
			"pm10": "Live.PM10",
			"temperature": "Live.Temperature"
		})
		imputed_data = impute_simple_imputer(data) # Impute
		avg_data = moving_average(imputed_data) # Average input
		logger.debug("Forecasting")
		forecast_list = [model.model.predict(x=avg_data, fh=5) for model in trained_models] # Make predictions

		logger.info(forecast_list)
		forecast_dict = {
			"arima": forecast_list[0],
			"autoarima": forecast_list[1],
			"expsmoothing": forecast_list[2],
			"lstm": forecast_list[3].iloc[:, forecast_list[3].columns.get_loc("Live.PM10_Pred")],
			"lstm_seq": forecast_list[4].iloc[:, forecast_list[4].columns.get_loc("Live.PM10_Pred")]
		}

		forecast = pd.DataFrame(data=forecast_dict)
		logger.debug(forecast)
		forecast=forecast.mean(axis=1).head(n=50)
		logger.info(f"Forcasting finished with forecast value\n {forecast}")

		sns.set_theme(style="darkgrid")
		sns.lineplot(data=forecast)

예제 #6

0

파일 보기

파일: graph_result_to_forecast.py 프로젝트: gilleswaeber/dstses

async def main():
    """
	Main function of the application.
	:return: Nothing.
	"""
    print_header()
    timer_main = Timer()

    config = default_config()

    # read and prepare dataset for training
    df_timeseries_complete = load_dataset("zurich_adapter", config)

    df_timeseries = chop_first_fringe(
        df_timeseries_complete)  # Chop first improper filled rows
    imputed_timeseries = impute_simple_imputer(df_timeseries)
    smooth_timeseries = moving_average(imputed_timeseries)
    smooth_timeseries.dropna(
        inplace=True
    )  # Make sure there really is no empty cell anymore, else drop row
    # Split training/testing data in 80%/20%
    df_train_val, df_test = temporal_train_test_split(smooth_timeseries,
                                                      test_size=.20)

    # Define all models at our disposal
    models = [
        ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config),
        ModelHolder(name="autoarima",
                    trainer=train_or_load_autoARIMA,
                    config=config),
        ModelHolder(name="expsmooting",
                    trainer=train_or_load_expSmoothing,
                    config=config),
        ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config),
        ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config)
    ]

    # Train the models
    trained_models = await gather(*[
        to_thread(train_model, model=model, data=df_train_val)
        for model in models
    ])
    [model.model.store(model.config) for model in trained_models
     ]  # Stores if not existing. Does NOT OVERWRITE!!!

    # Test the generalization performance of our models
    forecast_test = [
        model.model.predict(x=df_test, fh=5) for model in trained_models
    ]

    print(forecast_test)

    # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.PM10', 'Zch_Stampfenbachstrasse.PM10_Pred']])
    # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.Humidity', 'Zch_Stampfenbachstrasse.Temperature']])
    # plt.show()

    logger.info(f"Script completed in {timer_main}.")
    logger.info("Terminating gracefully...")

    logger.info("start predicting new time")

    forecast_dict = {
        "arima": pd.Series(),
        "autoarima": pd.Series(),
        "expsmoothing": pd.Series(),
        "lstm": pd.Series(),
        "lstm_seq": pd.Series()
    }
    with InfluxSensorData(config=config, name="influx") as client:
        # Load the data from the server
        data = client.get_data().rename(
            columns={
                "humidity": "Live.Humidity",
                "pm10": "Live.PM10",
                "temperature": "Live.Temperature"
            })
        imputed_data = impute_simple_imputer(data)  # Impute
        avg_data = moving_average(imputed_data)  # Average input
        logger.debug("Forecasting")
        forecast_list = [
            model.model.predict(x=avg_data, fh=5) for model in trained_models
        ]  # Make predictions

        logger.info(forecast_list)
        forecast_dict = {
            "arima":
            forecast_list[0],
            "autoarima":
            forecast_list[1],
            "expsmoothing":
            forecast_list[2],
            "lstm":
            forecast_list[0].iloc[:, forecast_list[0].columns.
                                  get_loc("Live.PM10_Pred")],  # was item 3
            "lstm_seq":
            forecast_list[1].iloc[:, forecast_list[1].columns.get_loc(
                "Live.PM10_Pred")]  # was item 4
        }

    forecast = pd.DataFrame(data=forecast_dict)
    logger.debug(forecast)
    forecast = forecast.mean(axis=1).head(n=50)
    forecast.name = "forecast"
    logger.info(f"Forcasting finished with forecast value\n {forecast}")

    config["influx"]["limit"] = "150"
    config["influx"][
        "drops"] = '["pm1", "pm4.0", "pm2.5", "result", "table", "_time", "humidity", "temperature"]'
    with InfluxSensorData(config=config, name="influx") as client:
        # Load the data from the server
        data = client.get_data().tail(n=50)
        data.index = range(len(data))
        data = data.iloc[:, 0]
        print(f"data {data}")

    sns.set_theme(style="darkgrid")
    sns.lineplot(data=[forecast, data])