def create_test_pipeline(modules): regressor_svr, regressor_lin_reg = modules # Create test pipeline which works on a batch size of one hour. pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h")) # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime regressor_svr_power_statistics = regressor_svr(ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: not is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('SVR')]) # Add the linear regressor to the pipeline. This regressor should be called if it is daytime regressor_lin_reg_power_statistics = regressor_lin_reg(ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('LinearRegression')]) # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file RmseCalculator()( y_hat=(regressor_svr_power_statistics, regressor_lin_reg_power_statistics), y=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('RMSE'), CSVCallback('RMSE')]) return pipeline
def create_test_pipeline(modules): regressor_svr, regressor_lin_reg = modules # Create test pipeline which works on a batch size of one hour. pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h")) # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime regressor_svr_power_statistics = regressor_svr( ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: not is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('SVR')]) # Add the linear regressor to the pipeline. This regressor should be called if it is daytime regressor_lin_reg_power_statistics = regressor_lin_reg( ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('LinearRegression')]) # TODO what kind of RMSE has to be used here? # * Rolling would not work, since the complete RMSE should be calculated for each Time Point # * Summary do not work, since summaries are only executed once # Is the current solution useful? # Possible Solution: window_size=-1 means that the window is from the start until the current point in time. # In that case, the online learning has to be built in that way, that module only calculate # data for the desired/requested time steps. # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file RollingRMSE(window_size=1, window_size_unit="d")( y_hat=(regressor_svr_power_statistics, regressor_lin_reg_power_statistics), y=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('RMSE'), CSVCallback('RMSE')]) return pipeline
def pipe(params): keras_model = get_keras_model(params) pipeline = Pipeline(path="../results") imputer_power_statistics = LinearInterpolater( method='nearest', dim='time', name='imputer_power')(x=pipeline['load_power_statistics']) power_scaler = SKLearnWrapper(module=StandardScaler(), name='scaler_power') scale_power_statistics = power_scaler(x=imputer_power_statistics) shift_power_statistics = ClockShift( lag=1, name='ClockShift_Lag1')(x=scale_power_statistics) shift_power_statistics2 = ClockShift( lag=2, name='ClockShift_Lag2')(x=scale_power_statistics) keras_wrapper = KerasWrapper(keras_model, fit_kwargs={'batch_size': 32, 'epochs': 100, 'verbose': 0}, compile_kwargs={'loss': 'mse', 'optimizer': 'Adam', 'metrics': ['mse']}) \ (ClockShift_Lag1=shift_power_statistics, ClockShift_Lag2=shift_power_statistics2, target=scale_power_statistics) inverse_power_scale_dl = power_scaler( x=keras_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('prediction')]) rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl, y=pipeline['load_power_statistics'], callbacks=[CSVCallback('RMSE')]) pipeline.train(train) result = pipeline.test(test) return { "loss": float(result['RmseCalculator'].values), "status": STATUS_OK, "eval_time": time.time() - start }
regressor_lin_reg = SKLearnWrapper(module=LinearRegression(fit_intercept=True), name="Regression") regressor_svr = SKLearnWrapper(module=SVR(), name="Regression") power_scaler = SKLearnWrapper(module=StandardScaler(), name="scaler_power") # Build a train pipeline. In this pipeline, each step processes all data at once. train_pipeline = Pipeline(path="../results/train") # Create preprocessing pipeline for the preprocessing steps preprocessing_pipeline = create_preprocessing_pipeline(power_scaler) preprocessing_pipeline = preprocessing_pipeline(scaler_power=train_pipeline["load_power_statistics"]) # Addd the regressors to the train pipeline regressor_lin_reg(ClockShift=preprocessing_pipeline["ClockShift"], ClockShift_1=preprocessing_pipeline["ClockShift_1"], target=train_pipeline["load_power_statistics"], callbacks=[LinePlotCallback('LinearRegression')]) regressor_svr(ClockShift=preprocessing_pipeline["ClockShift"], ClockShift_1=preprocessing_pipeline["ClockShift_1"], target=train_pipeline["load_power_statistics"], callbacks=[LinePlotCallback('SVR')]) print("Start training") train_pipeline.train(data) print("Training finished") # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline. pipeline = Pipeline(path="../results") # Get preprocessing pipeline preprocessing_pipeline = create_preprocessing_pipeline(power_scaler) preprocessing_pipeline = preprocessing_pipeline(scaler_power=pipeline["load_power_statistics"])
# Create lagged time series to later be used in the regression shift_power_statistics = ClockShift( lag=1, name="ClockShift_Lag1")(x=scale_power_statistics) shift_power_statistics2 = ClockShift( lag=2, name="ClockShift_Lag2")(x=scale_power_statistics) # Create a linear regression that uses the lagged values to predict the current value # NOTE: SKLearnWrapper has to collect all **kwargs itself and fit it against target. # It is also possible to implement a join/collect class regressor_power_statistics = SKLearnWrapper(module=LinearRegression( fit_intercept=True))( power_lag1=shift_power_statistics, power_lag2=shift_power_statistics2, calendar=calendar, target=scale_power_statistics, callbacks=[LinePlotCallback('linear_regression')], ) # Rescale the predictions to be on the original time scale inverse_power_scale = power_scaler( x=regressor_power_statistics, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('rescale')]) # Calculate the root mean squared error (RMSE) between the linear regression and the true values # save it as csv file rmse = RMSE()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"]) # Now, the pipeline is complete so we can run it and explore the results
lag=1, name="ClockShift_Lag1")(x=scale_power_statistics) shift_power_statistics2 = ClockShift( lag=2, name="ClockShift_Lag2")(x=scale_power_statistics) keras_wrapper = KerasWrapper(keras_model, fit_kwargs={"batch_size": 8, "epochs": 1}, compile_kwargs={"loss": "mse", "optimizer": "Adam", "metrics": ["mse"]}) \ (ClockShift_Lag1=shift_power_statistics, ClockShift_Lag2=shift_power_statistics2, target=scale_power_statistics) inverse_power_scale_dl = power_scaler( x=keras_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback("prediction")]) rmse_dl = RMSE()(keras_model=inverse_power_scale_dl, y=pipeline["load_power_statistics"]) # Now, the pipeline is complete # so we can load data and train the model data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") pipeline.train(data) pipeline.to_folder("../results/pipe_keras")
pytorch_wrapper = PyTorchWrapper(model, fit_kwargs={"batch_size": 8, "epochs": 1}, optimizer=optimizer, loss_fn=torch.nn.MSELoss(reduction='sum'))\ ( power_lag1=shift_power_statistics, power_lag2=shift_power_statistics2, target=scale_power_statistics ) inverse_power_scale = power_scaler( x=pytorch_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('forecast')]) rmse_dl = RMSE()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"]) # Now, the pipeline is complete # so we can load data and train the model data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") pipeline.train(data) pipeline.to_folder("./pipe_pytorch")
shift_power_statistics = ClockShift(lag=1, name="ClockShift_Lag1" )(x=scale_power_statistics) shift_power_statistics2 = ClockShift(lag=2, name="ClockShift_Lag2" )(x=scale_power_statistics) # Create a statsmodel that uses the lagged values to predict the current value regressor_power_statistics = SmTimeSeriesModelWrapper( module=ARIMA, module_kwargs={ "order": (2, 0, 0) } )( power_lag1=shift_power_statistics, power_lag2=shift_power_statistics2, calendar=cal_features, target=scale_power_statistics, callbacks=[LinePlotCallback('ARIMA')], ) # Rescale the predictions to be on the original time scale inverse_power_scale = power_scaler( x=regressor_power_statistics, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('rescale')] ) # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file rmse = RmseCalculator()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"], callbacks=[CSVCallback('RMSE')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline data = pd.read_csv("../data/getting_started_data.csv",
def custom_multiplication(x: xr.Dataset): # Multiply the given dataset with 100. return x * 1000 # The main function is where the pipeline is created and run if __name__ == "__main__": # Create a pipeline pipeline = Pipeline(path="../results") # Add a custom function to the FunctionModule and add the module to the pipeline function_module = FunctionModule( custom_multiplication, name="Multiplication")( x=pipeline["load_power_statistics"], callbacks=[CSVCallback("Mul"), LinePlotCallback("Mul")]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline df = pd.read_csv("../data/getting_started_data.csv", parse_dates=["time"], infer_datetime_format=True, index_col="time") pipeline.train(df) # Generate a plot of the pipeline showing the flow of data through different modules pipeline.draw() plt.show()
sampled_humidity = Sampler(HORIZON)(x=pipeline["Humidity"]) sampled_temperature = Sampler(HORIZON)(x=pipeline["Temperature"]) sampled_profile_moving = Sampler(HORIZON)(x=profile_moving) sampled_trend = Sampler(HORIZON)(x=trend) target = Sampler(HORIZON)(x=pipeline["BldgX"]) prediction_moving = ProfileNeuralNetwork(offset=24 * 7 * 11, epochs=1000)( historical_input=sampled_difference, calendar=sampled_calendar, temperature=sampled_temperature, humidity=sampled_humidity, profile=sampled_profile_moving, trend=sampled_trend, target=target, callbacks=[LinePlotCallback("PNN")]) rmse = RmseCalculator(offset=11 * 168)(pnn_moving=prediction_moving, moving_pred=sampled_profile_moving, y=target, callbacks=[CSVCallback('RMSE')]) rmse_cleaned = RmseCalculator(name="RMSE_cleaned", offset=11 * 168)( pnn_moving=prediction_moving, moving_pred=sampled_profile_moving, y=target, callbacks=[CSVCallback('RMSE')]) data = pd.read_csv("data/data.csv", index_col="time", parse_dates=["time"],
from pywatts.callbacks import CSVCallback, LinePlotCallback # All modules required for the pipeline are imported from pywatts.wrapper import FunctionModule def custom_multiplication(x: xr.Dataset): # Multiply the given dataset with 100. return x * 1000 # The main function is where the pipeline is created and run if __name__ == "__main__": # Create a pipeline pipeline = Pipeline(path="../results") # Add a custom function to the FunctionModule and add the module to the pipeline function_module = FunctionModule(custom_multiplication, name="Multiplication")(x=pipeline["load_power_statistics"], callbacks=[CSVCallback("Mul"), LinePlotCallback("Mul")]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline df = pd.read_csv("../data/getting_started_data.csv", parse_dates=["time"], infer_datetime_format=True, index_col="time") pipeline.train(df) # Generate a plot of the pipeline showing the flow of data through different modules pipeline.draw() plt.show()