예제 #1
0
    def test_create_and_run_simple_pipeline(self):
        pipeline = Pipeline()
        imputer_power_statistics = LinearInterpolater(method="nearest", dim="time",
                                                      name="imputer_power")(x=pipeline["load_power_statistics"])
        imputer_price = LinearInterpolater(method="nearest", dim="time",
                                           name="imputer_price")(x=pipeline["price_day_ahead"])
        scaler = SKLearnWrapper(StandardScaler())(x=imputer_price)
        lin_regression = SKLearnWrapper(LinearRegression())(x=scaler, target1=imputer_price, target2=imputer_power_statistics)

        RMSE(name="Load")(y=imputer_power_statistics, pred=lin_regression["target2"])
        RMSE(name="Price")(y=imputer_price, pred=lin_regression["target1"])
        data = pd.read_csv(f"{FIXTURE_DIR}/getting_started_data.csv", index_col="time", sep=",", parse_dates=["time"],
                           infer_datetime_format=True)
        train = data[6000:]
        test = data[:6000]
        pipeline.train(train)
        pipeline.test(test)
예제 #2
0
class TestLinearInterpolater(unittest.TestCase):
    def setUp(self) -> None:
        self.linear_interpolater = LinearInterpolater()

    def tearDown(self) -> None:
        self.linear_interpolater = None

    def test_get_params(self):
        self.assertEqual(self.linear_interpolater.get_params(), {
            "method": "linear",
            "dim": "time",
            "fill_value": "extrapolate"
        })

    def test_set_params(self):
        self.assertEqual(self.linear_interpolater.get_params(), {
            "method": "linear",
            "dim": "time",
            "fill_value": "extrapolate"
        })
        self.linear_interpolater.set_params(method="index",
                                            dim="location",
                                            fill_value="inside")
        self.assertEqual(self.linear_interpolater.get_params(), {
            "method": "index",
            "dim": "location",
            "fill_value": "inside"
        })
        self.linear_interpolater.set_params(method="linear",
                                            dim="time",
                                            fill_value="extrapolate")

    def test_transform(self):
        time = pd.to_datetime([
            '2015-06-03 00:00:00', '2015-06-03 01:00:00',
            '2015-06-03 02:00:00', '2015-06-03 03:00:00', '2015-06-03 04:00:00'
        ])
        test_data = xr.Dataset({
            "test": ("time", xr.DataArray([1, 2, np.nan, 4, 5]).data),
            "test2": ("time", xr.DataArray([np.nan, 2, 3, 4, 5]).data),
            "test3": ("time", xr.DataArray([1, 2, 3, 4, np.nan]).data),
            "test4": ("time", xr.DataArray([1, np.nan, np.nan, np.nan,
                                            5]).data),
            "time":
            time
        })
        test_result = self.linear_interpolater.transform(test_data)
        expected_result = xr.Dataset({
            "test": ("time", xr.DataArray([1., 2., 3., 4., 5.]).data),
            "test2": ("time", xr.DataArray([1., 2., 3., 4., 5.]).data),
            "test3": ("time", xr.DataArray([1., 2., 3., 4., 5.]).data),
            "test4": ("time", xr.DataArray([1., 2., 3., 4., 5.]).data),
            "time":
            time
        })
        xr.testing.assert_equal(test_result, expected_result)
예제 #3
0
    def test_run_reloaded_simple_pipeline(self):
        pipeline = Pipeline()

        imputer_power_statistics = LinearInterpolater(method="nearest", dim="time",
                                                      name="imputer_power")(x=pipeline["load_power_statistics"])
        imputer_price = LinearInterpolater(method="nearest", dim="time",
                                           name="imputer_price")(x=pipeline["price_day_ahead"])
        scaler = SKLearnWrapper(StandardScaler())(x=imputer_price)
        SKLearnWrapper(LinearRegression())(x=scaler, target1=imputer_price, target2=imputer_power_statistics)

        pipeline.to_folder("./pipe1")
        sleep(1)

        pipeline2 = Pipeline.from_folder("./pipe1")

        data = pd.read_csv(f"{FIXTURE_DIR}/getting_started_data.csv", index_col="time", sep=",", parse_dates=["time"],
                           infer_datetime_format=True)
        train = data[6000:]
        test = data[:6000]
        pipeline2.train(train)
        pipeline2.test(test)
예제 #4
0
def create_preprocessing_pipeline(power_scaler):
    pipeline = Pipeline(path="../results/preprocessing")

    # Deal with missing values through linear interpolation
    imputer_power_statistics = LinearInterpolater(method="nearest", dim="time",
                                                  name="imputer_power")(x=pipeline["scaler_power"])
    # Scale the data using a standard SKLearn scaler
    scale_power_statistics = power_scaler(x=imputer_power_statistics)

    # Create lagged time series to later be used in the regression
    ClockShift(lag=1)(x=scale_power_statistics)
    ClockShift(lag=2)(x=scale_power_statistics)
    return pipeline
예제 #5
0
    def pipe(params):
        keras_model = get_keras_model(params)

        pipeline = Pipeline(path="../results")

        imputer_power_statistics = LinearInterpolater(
            method='nearest', dim='time',
            name='imputer_power')(x=pipeline['load_power_statistics'])

        power_scaler = SKLearnWrapper(module=StandardScaler(),
                                      name='scaler_power')
        scale_power_statistics = power_scaler(x=imputer_power_statistics)

        shift_power_statistics = ClockShift(
            lag=1, name='ClockShift_Lag1')(x=scale_power_statistics)
        shift_power_statistics2 = ClockShift(
            lag=2, name='ClockShift_Lag2')(x=scale_power_statistics)

        keras_wrapper = KerasWrapper(keras_model,
                                     fit_kwargs={'batch_size': 32, 'epochs': 100, 'verbose': 0},
                                     compile_kwargs={'loss': 'mse', 'optimizer': 'Adam', 'metrics': ['mse']}) \
            (ClockShift_Lag1=shift_power_statistics,
             ClockShift_Lag2=shift_power_statistics2,
             target=scale_power_statistics)

        inverse_power_scale_dl = power_scaler(
            x=keras_wrapper,
            computation_mode=ComputationMode.Transform,
            use_inverse_transform=True,
            callbacks=[LinePlotCallback('prediction')])

        rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl,
                                   y=pipeline['load_power_statistics'],
                                   callbacks=[CSVCallback('RMSE')])

        pipeline.train(train)
        result = pipeline.test(test)

        return {
            "loss": float(result['RmseCalculator'].values),
            "status": STATUS_OK,
            "eval_time": time.time() - start
        }
예제 #6
0
    # Create a pipeline
    pipeline = Pipeline(path="../results")

    # Extract dummy calender features, using holidays from Germany
    # NOTE: CalendarExtraction can't return multiple features.
    calendar = CalendarExtraction(continent="Europe",
                                  country="Germany",
                                  features=[
                                      CalendarFeature.month,
                                      CalendarFeature.weekday,
                                      CalendarFeature.weekend
                                  ])(x=pipeline["load_power_statistics"])

    # Deal with missing values through linear interpolation
    imputer_power_statistics = LinearInterpolater(
        method="nearest", dim="time",
        name="imputer_power")(x=pipeline["load_power_statistics"])

    # Scale the data using a standard SKLearn scaler
    power_scaler = SKLearnWrapper(module=StandardScaler(), name="scaler_power")
    scale_power_statistics = power_scaler(x=imputer_power_statistics)

    # Create lagged time series to later be used in the regression
    shift_power_statistics = ClockShift(
        lag=1, name="ClockShift_Lag1")(x=scale_power_statistics)
    shift_power_statistics2 = ClockShift(
        lag=2, name="ClockShift_Lag2")(x=scale_power_statistics)

    # Create a linear regression that uses the lagged values to predict the current value
    # NOTE: SKLearnWrapper has to collect all **kwargs itself and fit it against target.
    #       It is also possible to implement a join/collect class
예제 #7
0
 def setUp(self) -> None:
     self.linear_interpolater = LinearInterpolater()