Ejemplo n.º 1
0
    def test_create_and_run_simple_pipeline(self):
        pipeline = Pipeline()
        imputer_power_statistics = LinearInterpolater(
            method="nearest", dim="time",
            name="imputer_power")(x=pipeline["load_power_statistics"])
        imputer_price = LinearInterpolater(
            method="nearest", dim="time",
            name="imputer_price")(x=pipeline["price_day_ahead"])
        scaler = SKLearnWrapper(StandardScaler())(x=imputer_price)
        lin_regression = SKLearnWrapper(LinearRegression())(
            x=scaler, target1=imputer_price, target2=imputer_power_statistics)

        RmseCalculator(name="Load")(y=imputer_power_statistics,
                                    pred=lin_regression["target2"])
        RmseCalculator(name="Price")(y=imputer_price,
                                     pred=lin_regression["target1"])
        data = pd.read_csv("data/getting_started_data.csv",
                           index_col="time",
                           sep=",",
                           parse_dates=["time"],
                           infer_datetime_format=True)
        train = data[6000:]
        test = data[:6000]
        pipeline.train(train)
        pipeline.test(test)
Ejemplo n.º 2
0
    def test_fit_regression_multiple_datavariables(self):
        time = pd.date_range('2000-01-01', freq='24H', periods=7)
        time2 = pd.date_range('2000-01-08', freq='24H', periods=1)

        bar = xr.DataArray([2, 2, 2, 2, 3, 3, 3],
                           dims=["time"],
                           coords={'time': time})
        foo = xr.DataArray([4, 4, 4, 4, 6, 6, 6],
                           dims=["time"],
                           coords={'time': time})
        target = xr.DataArray([6, 6, 6, 6, 9, 9, 9],
                              dims=["time"],
                              coords={'time': time})

        lin_reg = LinearRegression()
        wrapper = SKLearnWrapper(module=lin_reg)
        self.assertFalse("coef_" in lin_reg.__dir__())

        wrapper.fit(bar=bar, foo=foo, target=target)
        result = wrapper.transform(bar=xr.DataArray([2],
                                                    dims=["time"],
                                                    coords={'time': time2}),
                                   foo=xr.DataArray([4],
                                                    dims=["time"],
                                                    coords={'time': time2}))
        self.assertAlmostEqual(result["target"].values[0, 0], 6.0)
        self.assertEqual(result["target"].shape, (1, 1))
Ejemplo n.º 3
0
    def test_to_folder(self, mock_file, json_mock, fm_mock):
        scaler = SKLearnWrapper(StandardScaler())(input=self.pipeline["input"])
        SKLearnWrapper(LinearRegression())(x=scaler)
        fm_mock_object = MagicMock()
        fm_mock.return_value = fm_mock_object
        fm_mock_object.get_path.side_effect = [
            os.path.join('test_pipeline', 'StandardScaler.pickle'),
            os.path.join('test_pipeline', 'LinearRegression.pickle'),
            os.path.join('test_pipeline', 'pipeline.json'),
        ]

        self.pipeline.to_folder("test_pipeline")

        calls_open = [
            call(os.path.join('test_pipeline', 'StandardScaler.pickle'), 'wb'),
            call(os.path.join('test_pipeline', 'LinearRegression.pickle'),
                 'wb'),
            call(os.path.join('test_pipeline', 'pipeline.json'), 'w')
        ]
        mock_file.assert_has_calls(calls_open, any_order=True)
        args, kwargs = json_mock.dump.call_args
        assert kwargs["obj"]["id"] == pipeline_json["id"]
        assert kwargs["obj"]["name"] == pipeline_json["name"]

        assert kwargs["obj"]["modules"] == pipeline_json["modules"]
        assert kwargs["obj"]["steps"] == pipeline_json["steps"]
Ejemplo n.º 4
0
    def test_add_module_with_inputs(self):
        scaler1 = SKLearnWrapper(StandardScaler())(x=self.pipeline["x"])
        scaler2 = SKLearnWrapper(StandardScaler())(x=self.pipeline["test1"])
        SKLearnWrapper(LinearRegression())(input_1=scaler1, input_2=scaler2)

        # Three modules plus start step and one collect step
        self.assertEqual(5, len(self.pipeline.id_to_step))
Ejemplo n.º 5
0
    def test_transform_multiple_output(self):
        lin_reg = LinearRegression()
        multi_regressor = MultiOutputRegressor(lin_reg)
        wrapper = SKLearnWrapper(module=multi_regressor)
        time = pd.date_range('2000-01-01', freq='24H', periods=5)
        time2 = pd.date_range('2000-01-08', freq='24H', periods=1)

        bar = xr.DataArray([1, 2, 3, 4, 5],
                           dims=["time"],
                           coords={'time': time})
        foo = xr.DataArray([1], dims=["time"], coords={'time': time2})
        target = xr.DataArray([2, 2, 2, 2, 2],
                              dims=["time"],
                              coords={'time': time})
        target2 = xr.DataArray([3, 3, 3, 3, 3],
                               dims=["time"],
                               coords={'time': time})

        wrapper.fit(bar=bar, target1=target, target2=target2)

        result = wrapper.transform(bar=foo)
        self.assertAlmostEqual(result["target1"].values[0], 2.0)
        self.assertAlmostEqual(result["target2"].values[0], 3.0)
        self.assertEqual(result["target1"].shape, (1, 1))
        self.assertEqual(result["target2"].shape, (1, 1))
Ejemplo n.º 6
0
    def test_run_reloaded_simple_pipeline(self):
        pipeline = Pipeline()

        imputer_power_statistics = LinearInterpolater(
            method="nearest", dim="time",
            name="imputer_power")(x=pipeline["load_power_statistics"])
        imputer_price = LinearInterpolater(
            method="nearest", dim="time",
            name="imputer_price")(x=pipeline["price_day_ahead"])
        scaler = SKLearnWrapper(StandardScaler())(x=imputer_price)
        SKLearnWrapper(LinearRegression())(x=scaler,
                                           target1=imputer_price,
                                           target2=imputer_power_statistics)

        pipeline.to_folder("./pipe1")
        sleep(1)

        pipeline2 = Pipeline.from_folder("./pipe1")

        data = pd.read_csv("data/getting_started_data.csv",
                           index_col="time",
                           sep=",",
                           parse_dates=["time"],
                           infer_datetime_format=True)
        train = data[6000:]
        test = data[:6000]
        pipeline2.train(train)
        pipeline2.test(test)
Ejemplo n.º 7
0
    def test_fit_TransformerMixin(self):
        scaler = StandardScaler()
        wrapper = SKLearnWrapper(module=scaler)
        self.assertFalse("mean_" in scaler.__dir__())

        wrapper.fit(test=xr.DataArray([1, 2, 3, 4, 5]))

        self.assertTrue("mean_" in scaler.__dir__())
        self.assertIsNotNone(scaler.mean_)
Ejemplo n.º 8
0
    def test_fit_RegressorMixin(self):
        lin_reg = LinearRegression()
        wrapper = SKLearnWrapper(module=lin_reg)
        self.assertFalse("coef_" in lin_reg.__dir__())

        wrapper.fit(test=xr.DataArray([1, 2, 3, 4, 5]),
                    target=xr.DataArray([2, 2, 2, 2, 2]))

        self.assertTrue("coef_" in lin_reg.__dir__())
        self.assertIsNotNone(lin_reg.coef_)
Ejemplo n.º 9
0
    def test_transform_RegressorMixin(self):
        svr = SVR()
        wrapper = SKLearnWrapper(module=svr)
        time = pd.date_range('2000-01-08', freq='24H', periods=1)
        bar = xr.DataArray([1], dims=["time"], coords={'time': time})

        wrapper.fit(test=xr.DataArray([1, 2, 3, 4, 5]),
                    target=xr.DataArray([2, 2, 2, 2, 2]))

        result = wrapper.transform(bar=bar)
        assert result["target"].values[0] == 2.0
        self.assertEqual(result["target"].shape, (1, 1))
Ejemplo n.º 10
0
    def test_multiple_same_module(self):
        reg_module = SKLearnWrapper(module=LinearRegression())
        reg_one = reg_module(x=self.pipeline["test"],
                             target=self.pipeline["target"])
        reg_two = reg_module(x=self.pipeline["test2"],
                             target=self.pipeline["target"])
        detector = MissingValueDetector()
        detector(dataset=reg_one)
        detector(dataset=reg_two)

        # Three start steps (test, test2, target), two regressors two detectors
        self.assertEqual(7, len(self.pipeline.id_to_step))
        modules = []
        for element in self.pipeline.id_to_step.values():
            if isinstance(element, Step) and not element.module in modules:
                modules.append(element.module)
        # One sklearn wrappers, one missing value detector
        self.assertEqual(2, len(modules))

        self.pipeline.train(
            pd.DataFrame(
                {
                    "test": [1, 2, 2, 3, 4],
                    "test2": [2, 2, 2, 2, 2],
                    "target": [2, 2, 4, 4, -5]
                },
                index=pd.DatetimeIndex(
                    pd.date_range('2000-01-01', freq='24H', periods=5))))
Ejemplo n.º 11
0
    def test_horizon_greater_one_regression(self):
        lin_reg = LinearRegression()

        multi_regressor = SKLearnWrapper(lin_reg)(
            foo=self.pipeline["foo"],
            target=self.pipeline["target"],
            target2=self.pipeline["target2"])
        RmseCalculator()(y=self.pipeline["target"],
                         prediction=multi_regressor["target"])

        time = pd.date_range('2000-01-01', freq='24H', periods=5)

        foo = xr.DataArray([1, 2, 3, 4, 5],
                           dims=["time"],
                           coords={'time': time})
        target = xr.DataArray([[2, 3], [2, 4], [2, 5], [2, 6], [2, 7]],
                              dims=["time", "horizon"],
                              coords={
                                  'time': time,
                                  "horizon": [1, 2]
                              })
        target2 = xr.DataArray([3, 3, 3, 3, 3],
                               dims=["time"],
                               coords={'time': time})

        ds = xr.Dataset({'foo': foo, "target": target, "target2": target2})

        result = self.pipeline.train(ds)
        self.assertAlmostEqual(result["RmseCalculator"].values[0, 0], 0.0)
Ejemplo n.º 12
0
 def test_add_pipeline_without_index(self):
     # This should raise an exception since pipeline might get multiple columns in the input dataframe
     with self.assertRaises(Exception) as context:
         SKLearnWrapper(StandardScaler())(
             x=self.pipeline)  # This should fail
     self.assertEqual(
         "Adding a pipeline as input might be ambigious. Specifiy the desired column of your dataset by using pipeline[<column_name>]",
         str(context.exception))
Ejemplo n.º 13
0
    def test_fit_ClassifierMixin(self):
        svc = SVC()
        wrapper = SKLearnWrapper(module=svc)
        time = pd.date_range('2000-01-01', freq='24H', periods=5)
        time2 = pd.date_range('2000-01-08', freq='24H', periods=1)
        bar = xr.DataArray([1, 2, 3, 4, 5],
                           dims=["time"],
                           coords={'time': time})
        foo = xr.DataArray([1], dims=["time"], coords={'time': time2})
        target = xr.DataArray([0, 0, 1, 1, 1],
                              dims=["time"],
                              coords={'time': time})

        wrapper.fit(bar=bar, target=target)

        result = wrapper.transform(bar=foo)
        assert result["target"].values[0] == 0
        self.assertEqual(result["target"].shape, (1, 1))
Ejemplo n.º 14
0
    def test_DensityMixin(self):
        gauss_density = GaussianMixture(n_components=2)
        wrapper = SKLearnWrapper(module=gauss_density)

        time = pd.date_range('2000-01-01', freq='24H', periods=10)
        time2 = pd.date_range('2000-01-08', freq='24H', periods=1)

        bar = xr.DataArray([2, 4, 5, 4, 3, 2, 1, 5, 5, 5],
                           dims=["time"],
                           coords={'time': time})
        wrapper.fit(bar=bar)

        bar1 = xr.DataArray([5], dims=["time"], coords={'time': time2})
        bar2 = xr.DataArray([2], dims=["time"], coords={'time': time2})

        result1 = wrapper.transform(bar=bar1)
        result0 = wrapper.transform(bar=bar2)

        assert result1.values[0] != result0.values[0]

        self.assertEqual(result1.shape, (1, ))
        self.assertEqual(result0.shape, (1, ))
Ejemplo n.º 15
0
    def test_add_pipeline_to_pipeline_and_save(self, open_mock, json_mock,
                                               fm_mock):
        sub_pipeline = Pipeline()

        detector = MissingValueDetector()
        detector(dataset=sub_pipeline["regressor"])

        regressor = SKLearnWrapper(LinearRegression())(x=self.pipeline["test"])
        sub_pipeline(regression=regressor)

        self.pipeline.to_folder(path="path")

        self.assertEqual(json_mock.dump.call_count, 2)
Ejemplo n.º 16
0
    def test_fit_ClusterMixin(self):
        kmeans = KMeans(n_clusters=2)
        wrapper = SKLearnWrapper(module=kmeans)
        # self.assertFalse("coef_" in lin_reg.__dir__())

        time = pd.date_range('2000-01-01', freq='24H', periods=10)
        time2 = pd.date_range('2000-01-08', freq='24H', periods=1)

        bar = xr.DataArray([2, 4, 5, 4, 2, 2, 1, 5, 5, 5],
                           dims=["time"],
                           coords={'time': time})
        foo1 = xr.DataArray([5], dims=["time"], coords={'time': time2})
        foo2 = xr.DataArray([2], dims=["time"], coords={'time': time2})

        wrapper.fit(bar=bar)

        result1 = wrapper.transform(foo=foo1)
        result0 = wrapper.transform(foo=foo2)

        # Assert that both tested datapoints are in different clusters
        assert result1.values[0].argmax() != result0.values[0].argmax()

        self.assertEqual(result1.shape, (1, 2))
        self.assertEqual(result0.shape, (1, 2))
Ejemplo n.º 17
0
    def test_add_pipeline_to_pipeline_and_train(self, fm_mock):
        sub_pipeline = Pipeline()

        detector = MissingValueDetector()

        detector(dataset=sub_pipeline["regression"])

        regressor = SKLearnWrapper(LinearRegression(), name="regression")(
            x=self.pipeline["test"], target=self.pipeline["target"])
        sub_pipeline(regression=regressor)

        self.pipeline.train(
            pd.DataFrame({
                "test": [24, 24],
                "target": [12, 24]
            },
                         index=pd.to_datetime(
                             ['2015-06-03 00:00:00', '2015-06-03 01:00:00'])))

        for step in self.pipeline.id_to_step.values():
            assert step.computation_mode == ComputationMode.FitTransform
Ejemplo n.º 18
0
 def test_add_only_module(self):
     SKLearnWrapper(LinearRegression())(x=self.pipeline["input"])
     # nodes 1 plus startstep
     self.assertEqual(len(self.pipeline.id_to_step), 2)
Ejemplo n.º 19
0
    def test_add_module_with_one_input_without_a_list(self):
        scaler = SKLearnWrapper(StandardScaler())(input=self.pipeline["test"])
        SKLearnWrapper(LinearRegression())(input=scaler)

        # Three modules plus start step and one collect step
        self.assertEqual(3, len(self.pipeline.id_to_step))
Ejemplo n.º 20
0
 def test_add_with_target(self):
     SKLearnWrapper(LinearRegression())(input=self.pipeline["input"],
                                        target=self.pipeline["target"])
     self.assertEqual(3, len(self.pipeline.id_to_step))
Ejemplo n.º 21
0
 def test_set_params(self):
     scaler = StandardScaler()
     wrapper = SKLearnWrapper(module=scaler)
     self.assertEqual(scaler.get_params()["with_mean"], True)
     wrapper.set_params(with_mean=False, )
     self.assertEqual(scaler.get_params()["with_mean"], False)
Ejemplo n.º 22
0
 def test_get_params(self):
     scaler = StandardScaler()
     wrapper = SKLearnWrapper(module=scaler)
     self.assertEqual(wrapper.get_params(), scaler.get_params())
Ejemplo n.º 23
0
 def test_add_input_as_positional(self):
     # Should fail with an better error message
     SKLearnWrapper(LinearRegression())(x=self.pipeline["input"])
Ejemplo n.º 24
0
 def test_add_module_which_is_not_in_a_list(self):
     wrapper = SKLearnWrapper(
         LinearRegression())(input=self.pipeline["input"])
     SKLearnWrapper(LinearRegression())(x=wrapper)
     # nodes 1 plus startstep
     self.assertEqual(len(self.pipeline.id_to_step), 3)