def create_test_pipeline(modules):
    regressor_svr, regressor_lin_reg = modules

    # Create test pipeline which works on a batch size of one hour.
    pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h"))

    # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime
    regressor_svr_power_statistics = regressor_svr(ClockShift=pipeline["ClockShift"],
                                                   ClockShift_1=pipeline["ClockShift_1"],
                                                   condition=lambda x, y: not is_daytime(x, y),
                                                   computation_mode=ComputationMode.Transform,
                                                   callbacks=[LinePlotCallback('SVR')])

    # Add the linear regressor to the pipeline. This regressor should be called if it is daytime
    regressor_lin_reg_power_statistics = regressor_lin_reg(ClockShift=pipeline["ClockShift"],
                                                           ClockShift_1=pipeline["ClockShift_1"],
                                                           condition=lambda x, y: is_daytime(x, y),
                                                           computation_mode=ComputationMode.Transform,
                                                           callbacks=[LinePlotCallback('LinearRegression')])

    # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file
    RmseCalculator()(
        y_hat=(regressor_svr_power_statistics, regressor_lin_reg_power_statistics), y=pipeline["load_power_statistics"],
        callbacks=[LinePlotCallback('RMSE'), CSVCallback('RMSE')])

    return pipeline
Example #2
0
def create_test_pipeline(modules):
    regressor_svr, regressor_lin_reg = modules

    # Create test pipeline which works on a batch size of one hour.
    pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h"))

    # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime
    regressor_svr_power_statistics = regressor_svr(
        ClockShift=pipeline["ClockShift"],
        ClockShift_1=pipeline["ClockShift_1"],
        condition=lambda x, y: not is_daytime(x, y),
        computation_mode=ComputationMode.Transform,
        callbacks=[LinePlotCallback('SVR')])

    # Add the linear regressor to the pipeline. This regressor should be called if it is daytime
    regressor_lin_reg_power_statistics = regressor_lin_reg(
        ClockShift=pipeline["ClockShift"],
        ClockShift_1=pipeline["ClockShift_1"],
        condition=lambda x, y: is_daytime(x, y),
        computation_mode=ComputationMode.Transform,
        callbacks=[LinePlotCallback('LinearRegression')])

    # TODO what kind of RMSE has to be used here?
    #   * Rolling would not work, since the complete RMSE should be calculated for each Time Point
    #   * Summary do not work, since summaries are only executed once
    #   Is the current solution useful?
    #   Possible Solution: window_size=-1 means that the window is from the start until the current point in time.
    #                      In that case, the online learning has to be built in that way, that module only calculate
    #                      data for the desired/requested time steps.

    # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file
    RollingRMSE(window_size=1, window_size_unit="d")(
        y_hat=(regressor_svr_power_statistics,
               regressor_lin_reg_power_statistics),
        y=pipeline["load_power_statistics"],
        callbacks=[LinePlotCallback('RMSE'),
                   CSVCallback('RMSE')])

    return pipeline
Example #3
0
    def pipe(params):
        keras_model = get_keras_model(params)

        pipeline = Pipeline(path="../results")

        imputer_power_statistics = LinearInterpolater(
            method='nearest', dim='time',
            name='imputer_power')(x=pipeline['load_power_statistics'])

        power_scaler = SKLearnWrapper(module=StandardScaler(),
                                      name='scaler_power')
        scale_power_statistics = power_scaler(x=imputer_power_statistics)

        shift_power_statistics = ClockShift(
            lag=1, name='ClockShift_Lag1')(x=scale_power_statistics)
        shift_power_statistics2 = ClockShift(
            lag=2, name='ClockShift_Lag2')(x=scale_power_statistics)

        keras_wrapper = KerasWrapper(keras_model,
                                     fit_kwargs={'batch_size': 32, 'epochs': 100, 'verbose': 0},
                                     compile_kwargs={'loss': 'mse', 'optimizer': 'Adam', 'metrics': ['mse']}) \
            (ClockShift_Lag1=shift_power_statistics,
             ClockShift_Lag2=shift_power_statistics2,
             target=scale_power_statistics)

        inverse_power_scale_dl = power_scaler(
            x=keras_wrapper,
            computation_mode=ComputationMode.Transform,
            use_inverse_transform=True,
            callbacks=[LinePlotCallback('prediction')])

        rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl,
                                   y=pipeline['load_power_statistics'],
                                   callbacks=[CSVCallback('RMSE')])

        pipeline.train(train)
        result = pipeline.test(test)

        return {
            "loss": float(result['RmseCalculator'].values),
            "status": STATUS_OK,
            "eval_time": time.time() - start
        }
    regressor_lin_reg = SKLearnWrapper(module=LinearRegression(fit_intercept=True), name="Regression")
    regressor_svr = SKLearnWrapper(module=SVR(), name="Regression")
    power_scaler = SKLearnWrapper(module=StandardScaler(), name="scaler_power")

    # Build a train pipeline. In this pipeline, each step processes all data at once.
    train_pipeline = Pipeline(path="../results/train")

    # Create preprocessing pipeline for the preprocessing steps
    preprocessing_pipeline = create_preprocessing_pipeline(power_scaler)
    preprocessing_pipeline = preprocessing_pipeline(scaler_power=train_pipeline["load_power_statistics"])

    # Addd the regressors to the train pipeline
    regressor_lin_reg(ClockShift=preprocessing_pipeline["ClockShift"],
                      ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                      target=train_pipeline["load_power_statistics"],
                      callbacks=[LinePlotCallback('LinearRegression')])
    regressor_svr(ClockShift=preprocessing_pipeline["ClockShift"],
                  ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                  target=train_pipeline["load_power_statistics"],
                  callbacks=[LinePlotCallback('SVR')])

    print("Start training")
    train_pipeline.train(data)
    print("Training finished")

    # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline.
    pipeline = Pipeline(path="../results")

    # Get preprocessing pipeline
    preprocessing_pipeline = create_preprocessing_pipeline(power_scaler)
    preprocessing_pipeline = preprocessing_pipeline(scaler_power=pipeline["load_power_statistics"])
Example #5
0
    # Create lagged time series to later be used in the regression
    shift_power_statistics = ClockShift(
        lag=1, name="ClockShift_Lag1")(x=scale_power_statistics)
    shift_power_statistics2 = ClockShift(
        lag=2, name="ClockShift_Lag2")(x=scale_power_statistics)

    # Create a linear regression that uses the lagged values to predict the current value
    # NOTE: SKLearnWrapper has to collect all **kwargs itself and fit it against target.
    #       It is also possible to implement a join/collect class
    regressor_power_statistics = SKLearnWrapper(module=LinearRegression(
        fit_intercept=True))(
            power_lag1=shift_power_statistics,
            power_lag2=shift_power_statistics2,
            calendar=calendar,
            target=scale_power_statistics,
            callbacks=[LinePlotCallback('linear_regression')],
        )

    # Rescale the predictions to be on the original time scale
    inverse_power_scale = power_scaler(
        x=regressor_power_statistics,
        computation_mode=ComputationMode.Transform,
        use_inverse_transform=True,
        callbacks=[LinePlotCallback('rescale')])

    # Calculate the root mean squared error (RMSE) between the linear regression and the true values
    # save it as csv file
    rmse = RMSE()(y_hat=inverse_power_scale,
                  y=pipeline["load_power_statistics"])

    # Now, the pipeline is complete so we can run it and explore the results
Example #6
0
        lag=1, name="ClockShift_Lag1")(x=scale_power_statistics)
    shift_power_statistics2 = ClockShift(
        lag=2, name="ClockShift_Lag2")(x=scale_power_statistics)

    keras_wrapper = KerasWrapper(keras_model,
                                 fit_kwargs={"batch_size": 8, "epochs": 1},
                                 compile_kwargs={"loss": "mse", "optimizer": "Adam", "metrics": ["mse"]}) \
        (ClockShift_Lag1=shift_power_statistics,
         ClockShift_Lag2=shift_power_statistics2,
         target=scale_power_statistics)

    inverse_power_scale_dl = power_scaler(
        x=keras_wrapper,
        computation_mode=ComputationMode.Transform,
        use_inverse_transform=True,
        callbacks=[LinePlotCallback("prediction")])

    rmse_dl = RMSE()(keras_model=inverse_power_scale_dl,
                     y=pipeline["load_power_statistics"])

    # Now, the pipeline is complete
    # so we can load data and train the model
    data = pd.read_csv("../data/getting_started_data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True,
                       sep=",")

    pipeline.train(data)
    pipeline.to_folder("../results/pipe_keras")
Example #7
0
    pytorch_wrapper = PyTorchWrapper(model,
                                     fit_kwargs={"batch_size": 8, "epochs": 1},
                                     optimizer=optimizer,
                                     loss_fn=torch.nn.MSELoss(reduction='sum'))\
                      (
                        power_lag1=shift_power_statistics,
                        power_lag2=shift_power_statistics2,
                        target=scale_power_statistics
                      )

    inverse_power_scale = power_scaler(
        x=pytorch_wrapper,
        computation_mode=ComputationMode.Transform,
        use_inverse_transform=True,
        callbacks=[LinePlotCallback('forecast')])

    rmse_dl = RMSE()(y_hat=inverse_power_scale,
                     y=pipeline["load_power_statistics"])

    # Now, the pipeline is complete
    # so we can load data and train the model
    data = pd.read_csv("../data/getting_started_data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True,
                       sep=",")

    pipeline.train(data)
    pipeline.to_folder("./pipe_pytorch")
Example #8
0
    shift_power_statistics = ClockShift(lag=1, name="ClockShift_Lag1"
                                        )(x=scale_power_statistics)
    shift_power_statistics2 = ClockShift(lag=2, name="ClockShift_Lag2"
                                         )(x=scale_power_statistics)

    # Create a statsmodel that uses the lagged values to predict the current value
    regressor_power_statistics = SmTimeSeriesModelWrapper(
        module=ARIMA,
        module_kwargs={
            "order": (2, 0, 0)
        }
    )(
        power_lag1=shift_power_statistics,
        power_lag2=shift_power_statistics2,
        calendar=cal_features,
        target=scale_power_statistics, callbacks=[LinePlotCallback('ARIMA')],
    )

    # Rescale the predictions to be on the original time scale
    inverse_power_scale = power_scaler(
        x=regressor_power_statistics, computation_mode=ComputationMode.Transform,
        use_inverse_transform=True, callbacks=[LinePlotCallback('rescale')]
    )

    # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file
    rmse = RmseCalculator()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"],
                            callbacks=[CSVCallback('RMSE')])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    data = pd.read_csv("../data/getting_started_data.csv",
Example #9
0
def custom_multiplication(x: xr.Dataset):
    # Multiply the given dataset with 100.
    return x * 1000


# The main function is where the pipeline is created and run
if __name__ == "__main__":
    # Create a pipeline
    pipeline = Pipeline(path="../results")

    # Add a custom function to the FunctionModule and add the module to the pipeline
    function_module = FunctionModule(
        custom_multiplication, name="Multiplication")(
            x=pipeline["load_power_statistics"],
            callbacks=[CSVCallback("Mul"),
                       LinePlotCallback("Mul")])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    df = pd.read_csv("../data/getting_started_data.csv",
                     parse_dates=["time"],
                     infer_datetime_format=True,
                     index_col="time")

    pipeline.train(df)

    # Generate a plot of the pipeline showing the flow of data through different modules
    pipeline.draw()
    plt.show()
Example #10
0
    sampled_humidity = Sampler(HORIZON)(x=pipeline["Humidity"])
    sampled_temperature = Sampler(HORIZON)(x=pipeline["Temperature"])
    sampled_profile_moving = Sampler(HORIZON)(x=profile_moving)
    sampled_trend = Sampler(HORIZON)(x=trend)

    target = Sampler(HORIZON)(x=pipeline["BldgX"])

    prediction_moving = ProfileNeuralNetwork(offset=24 * 7 * 11, epochs=1000)(
        historical_input=sampled_difference,
        calendar=sampled_calendar,
        temperature=sampled_temperature,
        humidity=sampled_humidity,
        profile=sampled_profile_moving,
        trend=sampled_trend,
        target=target,
        callbacks=[LinePlotCallback("PNN")])

    rmse = RmseCalculator(offset=11 * 168)(pnn_moving=prediction_moving,
                                           moving_pred=sampled_profile_moving,
                                           y=target,
                                           callbacks=[CSVCallback('RMSE')])

    rmse_cleaned = RmseCalculator(name="RMSE_cleaned", offset=11 * 168)(
        pnn_moving=prediction_moving,
        moving_pred=sampled_profile_moving,
        y=target,
        callbacks=[CSVCallback('RMSE')])

    data = pd.read_csv("data/data.csv",
                       index_col="time",
                       parse_dates=["time"],
from pywatts.callbacks import CSVCallback, LinePlotCallback

# All modules required for the pipeline are imported
from pywatts.wrapper import FunctionModule


def custom_multiplication(x: xr.Dataset):
    # Multiply the given dataset with 100.
    return x * 1000


# The main function is where the pipeline is created and run
if __name__ == "__main__":
    # Create a pipeline
    pipeline = Pipeline(path="../results")

    # Add a custom function to the FunctionModule and add the module to the pipeline
    function_module = FunctionModule(custom_multiplication, name="Multiplication")(x=pipeline["load_power_statistics"],
                                                                                   callbacks=[CSVCallback("Mul"), LinePlotCallback("Mul")])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    df = pd.read_csv("../data/getting_started_data.csv", parse_dates=["time"], infer_datetime_format=True,
                     index_col="time")

    pipeline.train(df)

    # Generate a plot of the pipeline showing the flow of data through different modules
    pipeline.draw()
    plt.show()