def test_create_and_run_simple_pipeline(self): pipeline = Pipeline() imputer_power_statistics = LinearInterpolater(method="nearest", dim="time", name="imputer_power")(x=pipeline["load_power_statistics"]) imputer_price = LinearInterpolater(method="nearest", dim="time", name="imputer_price")(x=pipeline["price_day_ahead"]) scaler = SKLearnWrapper(StandardScaler())(x=imputer_price) lin_regression = SKLearnWrapper(LinearRegression())(x=scaler, target1=imputer_price, target2=imputer_power_statistics) RMSE(name="Load")(y=imputer_power_statistics, pred=lin_regression["target2"]) RMSE(name="Price")(y=imputer_price, pred=lin_regression["target1"]) data = pd.read_csv(f"{FIXTURE_DIR}/getting_started_data.csv", index_col="time", sep=",", parse_dates=["time"], infer_datetime_format=True) train = data[6000:] test = data[:6000] pipeline.train(train) pipeline.test(test)
class TestLinearInterpolater(unittest.TestCase): def setUp(self) -> None: self.linear_interpolater = LinearInterpolater() def tearDown(self) -> None: self.linear_interpolater = None def test_get_params(self): self.assertEqual(self.linear_interpolater.get_params(), { "method": "linear", "dim": "time", "fill_value": "extrapolate" }) def test_set_params(self): self.assertEqual(self.linear_interpolater.get_params(), { "method": "linear", "dim": "time", "fill_value": "extrapolate" }) self.linear_interpolater.set_params(method="index", dim="location", fill_value="inside") self.assertEqual(self.linear_interpolater.get_params(), { "method": "index", "dim": "location", "fill_value": "inside" }) self.linear_interpolater.set_params(method="linear", dim="time", fill_value="extrapolate") def test_transform(self): time = pd.to_datetime([ '2015-06-03 00:00:00', '2015-06-03 01:00:00', '2015-06-03 02:00:00', '2015-06-03 03:00:00', '2015-06-03 04:00:00' ]) test_data = xr.Dataset({ "test": ("time", xr.DataArray([1, 2, np.nan, 4, 5]).data), "test2": ("time", xr.DataArray([np.nan, 2, 3, 4, 5]).data), "test3": ("time", xr.DataArray([1, 2, 3, 4, np.nan]).data), "test4": ("time", xr.DataArray([1, np.nan, np.nan, np.nan, 5]).data), "time": time }) test_result = self.linear_interpolater.transform(test_data) expected_result = xr.Dataset({ "test": ("time", xr.DataArray([1., 2., 3., 4., 5.]).data), "test2": ("time", xr.DataArray([1., 2., 3., 4., 5.]).data), "test3": ("time", xr.DataArray([1., 2., 3., 4., 5.]).data), "test4": ("time", xr.DataArray([1., 2., 3., 4., 5.]).data), "time": time }) xr.testing.assert_equal(test_result, expected_result)
def test_run_reloaded_simple_pipeline(self): pipeline = Pipeline() imputer_power_statistics = LinearInterpolater(method="nearest", dim="time", name="imputer_power")(x=pipeline["load_power_statistics"]) imputer_price = LinearInterpolater(method="nearest", dim="time", name="imputer_price")(x=pipeline["price_day_ahead"]) scaler = SKLearnWrapper(StandardScaler())(x=imputer_price) SKLearnWrapper(LinearRegression())(x=scaler, target1=imputer_price, target2=imputer_power_statistics) pipeline.to_folder("./pipe1") sleep(1) pipeline2 = Pipeline.from_folder("./pipe1") data = pd.read_csv(f"{FIXTURE_DIR}/getting_started_data.csv", index_col="time", sep=",", parse_dates=["time"], infer_datetime_format=True) train = data[6000:] test = data[:6000] pipeline2.train(train) pipeline2.test(test)
def create_preprocessing_pipeline(power_scaler): pipeline = Pipeline(path="../results/preprocessing") # Deal with missing values through linear interpolation imputer_power_statistics = LinearInterpolater(method="nearest", dim="time", name="imputer_power")(x=pipeline["scaler_power"]) # Scale the data using a standard SKLearn scaler scale_power_statistics = power_scaler(x=imputer_power_statistics) # Create lagged time series to later be used in the regression ClockShift(lag=1)(x=scale_power_statistics) ClockShift(lag=2)(x=scale_power_statistics) return pipeline
def pipe(params): keras_model = get_keras_model(params) pipeline = Pipeline(path="../results") imputer_power_statistics = LinearInterpolater( method='nearest', dim='time', name='imputer_power')(x=pipeline['load_power_statistics']) power_scaler = SKLearnWrapper(module=StandardScaler(), name='scaler_power') scale_power_statistics = power_scaler(x=imputer_power_statistics) shift_power_statistics = ClockShift( lag=1, name='ClockShift_Lag1')(x=scale_power_statistics) shift_power_statistics2 = ClockShift( lag=2, name='ClockShift_Lag2')(x=scale_power_statistics) keras_wrapper = KerasWrapper(keras_model, fit_kwargs={'batch_size': 32, 'epochs': 100, 'verbose': 0}, compile_kwargs={'loss': 'mse', 'optimizer': 'Adam', 'metrics': ['mse']}) \ (ClockShift_Lag1=shift_power_statistics, ClockShift_Lag2=shift_power_statistics2, target=scale_power_statistics) inverse_power_scale_dl = power_scaler( x=keras_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('prediction')]) rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl, y=pipeline['load_power_statistics'], callbacks=[CSVCallback('RMSE')]) pipeline.train(train) result = pipeline.test(test) return { "loss": float(result['RmseCalculator'].values), "status": STATUS_OK, "eval_time": time.time() - start }
# Create a pipeline pipeline = Pipeline(path="../results") # Extract dummy calender features, using holidays from Germany # NOTE: CalendarExtraction can't return multiple features. calendar = CalendarExtraction(continent="Europe", country="Germany", features=[ CalendarFeature.month, CalendarFeature.weekday, CalendarFeature.weekend ])(x=pipeline["load_power_statistics"]) # Deal with missing values through linear interpolation imputer_power_statistics = LinearInterpolater( method="nearest", dim="time", name="imputer_power")(x=pipeline["load_power_statistics"]) # Scale the data using a standard SKLearn scaler power_scaler = SKLearnWrapper(module=StandardScaler(), name="scaler_power") scale_power_statistics = power_scaler(x=imputer_power_statistics) # Create lagged time series to later be used in the regression shift_power_statistics = ClockShift( lag=1, name="ClockShift_Lag1")(x=scale_power_statistics) shift_power_statistics2 = ClockShift( lag=2, name="ClockShift_Lag2")(x=scale_power_statistics) # Create a linear regression that uses the lagged values to predict the current value # NOTE: SKLearnWrapper has to collect all **kwargs itself and fit it against target. # It is also possible to implement a join/collect class
def setUp(self) -> None: self.linear_interpolater = LinearInterpolater()