Python Pipeline.train 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pywatts.core.pipeline

클래스/타입: Pipeline

메소드/함수: train

hotexamples.com에서의 예제들: 8

Python Pipeline.train - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pywatts.core.pipeline.Pipeline.train에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

train(8)

to_folder(7)

Pipeline(7)

from_folder(6)

test(6)

add(3)

transform(2)

_collect_results(1)

draw(1)

get_params(1)

load(1)

save(1)

set_params(1)

예제 #1

파일 보기

    def test_create_and_run_simple_pipeline(self):
        pipeline = Pipeline()
        imputer_power_statistics = LinearInterpolater(
            method="nearest", dim="time",
            name="imputer_power")(x=pipeline["load_power_statistics"])
        imputer_price = LinearInterpolater(
            method="nearest", dim="time",
            name="imputer_price")(x=pipeline["price_day_ahead"])
        scaler = SKLearnWrapper(StandardScaler())(x=imputer_price)
        lin_regression = SKLearnWrapper(LinearRegression())(
            x=scaler, target1=imputer_price, target2=imputer_power_statistics)

        RmseCalculator(name="Load")(y=imputer_power_statistics,
                                    pred=lin_regression["target2"])
        RmseCalculator(name="Price")(y=imputer_price,
                                     pred=lin_regression["target1"])
        data = pd.read_csv("data/getting_started_data.csv",
                           index_col="time",
                           sep=",",
                           parse_dates=["time"],
                           infer_datetime_format=True)
        train = data[6000:]
        test = data[:6000]
        pipeline.train(train)
        pipeline.test(test)

예제 #2

파일 보기

    def pipe(params):
        keras_model = get_keras_model(params)

        pipeline = Pipeline(path="../results")

        imputer_power_statistics = LinearInterpolater(
            method='nearest', dim='time',
            name='imputer_power')(x=pipeline['load_power_statistics'])

        power_scaler = SKLearnWrapper(module=StandardScaler(),
                                      name='scaler_power')
        scale_power_statistics = power_scaler(x=imputer_power_statistics)

        shift_power_statistics = ClockShift(
            lag=1, name='ClockShift_Lag1')(x=scale_power_statistics)
        shift_power_statistics2 = ClockShift(
            lag=2, name='ClockShift_Lag2')(x=scale_power_statistics)

        keras_wrapper = KerasWrapper(keras_model,
                                     fit_kwargs={'batch_size': 32, 'epochs': 100, 'verbose': 0},
                                     compile_kwargs={'loss': 'mse', 'optimizer': 'Adam', 'metrics': ['mse']}) \
            (ClockShift_Lag1=shift_power_statistics,
             ClockShift_Lag2=shift_power_statistics2,
             target=scale_power_statistics)

        inverse_power_scale_dl = power_scaler(
            x=keras_wrapper,
            computation_mode=ComputationMode.Transform,
            use_inverse_transform=True,
            callbacks=[LinePlotCallback('prediction')])

        rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl,
                                   y=pipeline['load_power_statistics'],
                                   callbacks=[CSVCallback('RMSE')])

        pipeline.train(train)
        result = pipeline.test(test)

        return {
            "loss": float(result['RmseCalculator'].values),
            "status": STATUS_OK,
            "eval_time": time.time() - start
        }

예제 #3

파일 보기

파일: example_day_and_night.py 프로젝트: zyxsachin/pyWATTS

    # Create preprocessing pipeline for the preprocessing steps
    preprocessing_pipeline = create_preprocessing_pipeline(power_scaler)
    preprocessing_pipeline = preprocessing_pipeline(scaler_power=train_pipeline["load_power_statistics"])

    # Addd the regressors to the train pipeline
    regressor_lin_reg(ClockShift=preprocessing_pipeline["ClockShift"],
                      ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                      target=train_pipeline["load_power_statistics"],
                      callbacks=[LinePlotCallback('LinearRegression')])
    regressor_svr(ClockShift=preprocessing_pipeline["ClockShift"],
                  ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                  target=train_pipeline["load_power_statistics"],
                  callbacks=[LinePlotCallback('SVR')])

    print("Start training")
    train_pipeline.train(data)
    print("Training finished")

    # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline.
    pipeline = Pipeline(path="../results")

    # Get preprocessing pipeline
    preprocessing_pipeline = create_preprocessing_pipeline(power_scaler)
    preprocessing_pipeline = preprocessing_pipeline(scaler_power=pipeline["load_power_statistics"])

    # Get the test pipeline, the arguments are the modules, from the training pipeline, which should be reused
    test_pipeline = create_test_pipeline([regressor_lin_reg, regressor_svr])

    test_pipeline(ClockShift=preprocessing_pipeline["ClockShift"],
                  ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                  load_power_statistics=pipeline["load_power_statistics"],

예제 #4

파일 보기

class TestPipeline(unittest.TestCase):
    @patch("pywatts.core.pipeline.FileManager")
    def setUp(self, fm_mock) -> None:
        self.fm_mock = fm_mock()
        self.pipeline = Pipeline()

    def tearDown(self) -> None:
        self.pipeline = None

    def test_add_input_as_positional(self):
        # Should fail with an better error message
        SKLearnWrapper(LinearRegression())(x=self.pipeline["input"])

    def test_add_only_module(self):
        SKLearnWrapper(LinearRegression())(x=self.pipeline["input"])
        # nodes 1 plus startstep
        self.assertEqual(len(self.pipeline.id_to_step), 2)

    def test_add_module_which_is_not_in_a_list(self):
        wrapper = SKLearnWrapper(
            LinearRegression())(input=self.pipeline["input"])
        SKLearnWrapper(LinearRegression())(x=wrapper)
        # nodes 1 plus startstep
        self.assertEqual(len(self.pipeline.id_to_step), 3)

    def test_add_pipeline_without_index(self):
        # This should raise an exception since pipeline might get multiple columns in the input dataframe
        with self.assertRaises(Exception) as context:
            SKLearnWrapper(StandardScaler())(
                x=self.pipeline)  # This should fail
        self.assertEqual(
            "Adding a pipeline as input might be ambigious. Specifiy the desired column of your dataset by using pipeline[<column_name>]",
            str(context.exception))

    def test_add_module_with_inputs(self):
        scaler1 = SKLearnWrapper(StandardScaler())(x=self.pipeline["x"])
        scaler2 = SKLearnWrapper(StandardScaler())(x=self.pipeline["test1"])
        SKLearnWrapper(LinearRegression())(input_1=scaler1, input_2=scaler2)

        # Three modules plus start step and one collect step
        self.assertEqual(5, len(self.pipeline.id_to_step))

    def test_add_module_with_one_input_without_a_list(self):
        scaler = SKLearnWrapper(StandardScaler())(input=self.pipeline["test"])
        SKLearnWrapper(LinearRegression())(input=scaler)

        # Three modules plus start step and one collect step
        self.assertEqual(3, len(self.pipeline.id_to_step))

    @patch('pywatts.core.pipeline.FileManager')
    @patch('pywatts.core.pipeline.json')
    @patch("builtins.open", new_callable=mock_open)
    def test_to_folder(self, mock_file, json_mock, fm_mock):
        scaler = SKLearnWrapper(StandardScaler())(input=self.pipeline["input"])
        SKLearnWrapper(LinearRegression())(x=scaler)
        fm_mock_object = MagicMock()
        fm_mock.return_value = fm_mock_object
        fm_mock_object.get_path.side_effect = [
            os.path.join('test_pipeline', 'StandardScaler.pickle'),
            os.path.join('test_pipeline', 'LinearRegression.pickle'),
            os.path.join('test_pipeline', 'pipeline.json'),
        ]

        self.pipeline.to_folder("test_pipeline")

        calls_open = [
            call(os.path.join('test_pipeline', 'StandardScaler.pickle'), 'wb'),
            call(os.path.join('test_pipeline', 'LinearRegression.pickle'),
                 'wb'),
            call(os.path.join('test_pipeline', 'pipeline.json'), 'w')
        ]
        mock_file.assert_has_calls(calls_open, any_order=True)
        args, kwargs = json_mock.dump.call_args
        assert kwargs["obj"]["id"] == pipeline_json["id"]
        assert kwargs["obj"]["name"] == pipeline_json["name"]

        assert kwargs["obj"]["modules"] == pipeline_json["modules"]
        assert kwargs["obj"]["steps"] == pipeline_json["steps"]

    @patch('pywatts.core.pipeline.FileManager')
    @patch('pywatts.modules.sklearn_wrapper.pickle')
    @patch('pywatts.core.pipeline.json')
    @patch("builtins.open", new_callable=mock_open)
    @patch('pywatts.core.pipeline.os.path.isdir')
    def test_from_folder(self, isdir_mock, mock_file, json_mock, pickle_mock,
                         fm_mock):
        scaler = StandardScaler()
        linear_regression = LinearRegression()

        isdir_mock.return_value = True
        json_mock.load.return_value = pipeline_json

        pickle_mock.load.side_effect = [scaler, linear_regression]

        pipeline = Pipeline.from_folder("test_pipeline")
        calls_open = [
            call(os.path.join("test_pipeline", "StandardScaler.pickle"), "rb"),
            call(os.path.join("test_pipeline", "LinearRegression.pickle"),
                 "rb"),
            call(os.path.join("test_pipeline", "pipeline.json"), "r")
        ]

        mock_file.assert_has_calls(calls_open, any_order=True)

        json_mock.load.assert_called_once()
        assert pickle_mock.load.call_count == 2

        isdir_mock.assert_called_once()
        self.assertEqual(3, len(pipeline.id_to_step))

    def test_module_naming_conflict(self):
        # This test should check, that modules with the same name do not lead to an error
        # What should this test?
        # self.fail()
        pass

    def test_add_with_target(self):
        SKLearnWrapper(LinearRegression())(input=self.pipeline["input"],
                                           target=self.pipeline["target"])
        self.assertEqual(3, len(self.pipeline.id_to_step))

    def test_multiple_same_module(self):
        reg_module = SKLearnWrapper(module=LinearRegression())
        reg_one = reg_module(x=self.pipeline["test"],
                             target=self.pipeline["target"])
        reg_two = reg_module(x=self.pipeline["test2"],
                             target=self.pipeline["target"])
        detector = MissingValueDetector()
        detector(dataset=reg_one)
        detector(dataset=reg_two)

        # Three start steps (test, test2, target), two regressors two detectors
        self.assertEqual(7, len(self.pipeline.id_to_step))
        modules = []
        for element in self.pipeline.id_to_step.values():
            if isinstance(element, Step) and not element.module in modules:
                modules.append(element.module)
        # One sklearn wrappers, one missing value detector
        self.assertEqual(2, len(modules))

        self.pipeline.train(
            pd.DataFrame(
                {
                    "test": [1, 2, 2, 3, 4],
                    "test2": [2, 2, 2, 2, 2],
                    "target": [2, 2, 4, 4, -5]
                },
                index=pd.DatetimeIndex(
                    pd.date_range('2000-01-01', freq='24H', periods=5))))

    @patch('pywatts.core.pipeline.Pipeline._create_summary')
    @patch('pywatts.core.pipeline.FileManager')
    def test_add_pipeline_to_pipeline_and_train(self, fm_mock,
                                                create_summary_mock):
        sub_pipeline = Pipeline()

        detector = MissingValueDetector()

        detector(dataset=sub_pipeline["regression"])

        regressor = SKLearnWrapper(LinearRegression(), name="regression")(
            x=self.pipeline["test"], target=self.pipeline["target"])
        sub_pipeline(regression=regressor)

        summary_formatter_mock = MagicMock()
        self.pipeline.train(pd.DataFrame({
            "test": [24, 24],
            "target": [12, 24]
        },
                                         index=pd.to_datetime([
                                             '2015-06-03 00:00:00',
                                             '2015-06-03 01:00:00'
                                         ])),
                            summary_formatter=summary_formatter_mock)

        for step in self.pipeline.id_to_step.values():
            assert step.current_run_setting.computation_mode == ComputationMode.FitTransform

        create_summary_mock.assert_has_calls(
            [call(summary_formatter_mock),
             call(summary_formatter_mock)])

    @patch('pywatts.core.pipeline.FileManager')
    def test_add_pipeline_to_pipeline_and_test(self, fm_mock):
        # Add some steps to the pipeline

        # Assert that the computation is set to fit_transform if the ComputationMode was default

        step = MagicMock()
        step.computation_mode = ComputationMode.Default
        step.finished = False
        time = pd.date_range('2000-01-01', freq='24H', periods=7)

        ds = xr.Dataset({'foo': ('time', [2, 3, 4, 5, 6, 7, 8]), 'time': time})

        subpipeline = Pipeline()
        subpipeline.add(module=step)

        # BUG: In step_factory.py -> create_step the file_manager of the pipeline is accessed
        # and the pipeline is None...
        # subpipeline(self.pipeline)

        # self.pipeline.test(ds)

        # step.set_computation_mode.assert_called_once_with(ComputationMode.Transform)

        # step.reset.assert_called_once()

    @patch("pywatts.core.pipeline.FileManager")
    @patch('pywatts.core.pipeline.json')
    @patch("builtins.open", new_callable=mock_open)
    def test_add_pipeline_to_pipeline_and_save(self, open_mock, json_mock,
                                               fm_mock):
        sub_pipeline = Pipeline()

        detector = MissingValueDetector()
        detector(dataset=sub_pipeline["regressor"])

        regressor = SKLearnWrapper(LinearRegression())(x=self.pipeline["test"])
        sub_pipeline(regression=regressor)

        self.pipeline.to_folder(path="path")

        self.assertEqual(json_mock.dump.call_count, 2)

    def create_summary_in_subpipelines(self):
        assert False

    @patch('pywatts.core.pipeline.FileManager')
    def test__collect_batch_results_naming_conflict(self, fm_mock):
        step_one = MagicMock()
        step_one.name = "step"
        step_two = MagicMock()
        step_two.name = "step"
        result_step_one = MagicMock()
        result_step_two = MagicMock()
        merged_result = {"step": result_step_one, "step_1": result_step_two}

        step_one.get_result.return_value = {"step": result_step_one}
        step_two.get_result.return_value = {"step_1": result_step_two}

        result = self.pipeline._collect_results([step_one, step_two])

        # Assert that steps are correclty called.
        step_one.get_result.assert_called_once_with(None,
                                                    None,
                                                    return_all=True)
        step_two.get_result.assert_called_once_with(None,
                                                    None,
                                                    return_all=True)

        # Assert return value is correct
        self.assertEqual(merged_result, result)

    @patch("pywatts.core.pipeline.FileManager")
    def test_get_params(self, fm_mock):
        result = Pipeline(batch=pd.Timedelta("1h")).get_params()
        self.assertEqual(result, {"batch": pd.Timedelta("1h")})

    def test_set_params(self):
        self.pipeline.set_params(batch=pd.Timedelta("2h"))
        self.assertEqual(self.pipeline.get_params(),
                         {"batch": pd.Timedelta("2h")})

    def test__collect_batch_results(self):
        step_one = MagicMock()
        step_one.name = "step_one"
        step_two = MagicMock()
        step_two.name = "step_two"
        result_step_one = MagicMock()
        result_step_two = MagicMock()
        merged_result = {
            "step_one": result_step_one,
            "step_two": result_step_two
        }

        step_one.get_result.return_value = {"step_one": result_step_one}
        step_two.get_result.return_value = {"step_two": result_step_two}

        result = self.pipeline._collect_results([step_one, step_two])

        # Assert that steps are correclty called.
        step_one.get_result.assert_called_once_with(None,
                                                    None,
                                                    return_all=True)
        step_two.get_result.assert_called_once_with(None,
                                                    None,
                                                    return_all=True)

        # Assert return value is correct
        self.assertEqual(merged_result, result)

    @patch("pywatts.core.pipeline.FileManager")
    @patch("pywatts.core.pipeline.xr.concat")
    def test_batched_pipeline(self, concat_mock, fm_mock):
        # Add some steps to the pipeline

        time = pd.date_range('2000-01-01', freq='1H', periods=7)
        da = xr.DataArray([2, 3, 4, 3, 3, 1, 2],
                          dims=["time"],
                          coords={'time': time})

        # Assert that the computation is set to fit_transform if the ComputationMode was default
        first_step = MagicMock()
        first_step.run_setting = RunSetting(ComputationMode.Default)
        first_step.finished = False
        first_step.further_elements.side_effect = [
            True, True, True, True, False
        ]

        first_step.get_result.return_value = {"one": da}
        self.pipeline.set_params(pd.Timedelta("24h"))
        self.pipeline.add(module=first_step)

        data = pd.DataFrame({
            "test": [1, 2, 2, 3],
            "test2": [2, 2, 2, 2]
        },
                            index=pd.DatetimeIndex(
                                pd.date_range('2000-01-01',
                                              freq='24H',
                                              periods=4)))
        self.pipeline.test(data)

        first_step.set_run_setting.assert_called_once()
        self.assertEqual(
            first_step.set_run_setting.call_args[0][0].computation_mode,
            ComputationMode.Transform)
        calls = [
            call(pd.Timestamp('2000-01-01 00:00:00', freq='24H'),
                 pd.Timestamp('2000-01-02 00:00:00', freq='24H'),
                 return_all=True),
            call(pd.Timestamp('2000-01-02 00:00:00', freq='24H'),
                 pd.Timestamp('2000-01-03 00:00:00', freq='24H'),
                 return_all=True),
            call(pd.Timestamp('2000-01-03 00:00:00', freq='24H'),
                 pd.Timestamp('2000-01-04 00:00:00', freq='24H'),
                 return_all=True),
            call(pd.Timestamp('2000-01-04 00:00:00', freq='24H'),
                 pd.Timestamp('2000-01-05 00:00:00', freq='24H'),
                 return_all=True),
        ]
        first_step.get_result.assert_has_calls(calls, any_order=True)
        self.assertEqual(concat_mock.call_count, 3)

    @patch("pywatts.core.pipeline.FileManager")
    @patch("pywatts.core.pipeline.xr.concat")
    def test_batch_2H_transform(self, concat_mock, fm_mock):
        time = pd.date_range('2000-01-01', freq='1H', periods=7)
        da = xr.DataArray([2, 3, 4, 3, 3, 1, 2],
                          dims=["time"],
                          coords={'time': time})
        pipeline = Pipeline(batch=pd.Timedelta("2h"))
        step_one = MagicMock()
        step_one.get_result.return_value = {"step": da}
        step_one.name = "step"
        result_mock = MagicMock()
        concat_mock.return_value = result_mock
        pipeline.start_steps["foo"] = StartStep("foo"), None
        pipeline.start_steps["foo"][0].last = False
        step_one.further_elements.side_effect = [True, True, True, True, False]
        pipeline.add(module=step_one, input_ids=[1])

        result = pipeline.transform(foo=da)

        self.assertEqual(concat_mock.call_count, 3)
        self.assertEqual(step_one.get_result.call_count, 4)
        self.assertEqual(step_one.further_elements.call_count, 5)
        self.assertEqual({"step": result_mock}, result)

    @patch('pywatts.core.pipeline.FileManager')
    @patch("pywatts.core.pipeline._get_time_indexes", return_value=["time"])
    def test_transform_pipeline(self, get_time_indexes_mock, fm_mock):
        input_mock = MagicMock()
        input_mock.indexes = {"time": ["20.12.2020"]}
        step_two = MagicMock()
        result_mock = MagicMock()
        step_two.name = "mock"
        step_two.get_result.return_value = {"mock": result_mock}
        self.pipeline.add(module=step_two, input_ids=[1])

        result = self.pipeline.transform(x=input_mock)

        step_two.get_result.assert_called_once_with("20.12.2020",
                                                    None,
                                                    return_all=True)
        get_time_indexes_mock.assert_called_once_with({"x": input_mock})
        self.assertEqual({"mock": result_mock}, result)

    @patch("pywatts.core.pipeline.FileManager")
    @patch("pywatts.core.pipeline.Pipeline.from_folder")
    def test_load(self, from_folder_mock, fm_mock):
        created_pipeline = MagicMock()
        from_folder_mock.return_value = created_pipeline
        pipeline = Pipeline.load({
            'name': 'Pipeline',
            'class': 'Pipeline',
            'module': 'pywatts.core.pipeline',
            'pipeline_path': 'save_path'
        })

        from_folder_mock.assert_called_once_with("save_path")
        self.assertEqual(created_pipeline, pipeline)

    @patch("pywatts.core.pipeline.FileManager")
    @patch("pywatts.core.pipeline.Pipeline.to_folder")
    @patch("pywatts.core.pipeline.os")
    def test_save(self, os_mock, to_folder_mock, fm_mock):
        os_mock.path.join.return_value = "save_path"
        os_mock.path.isdir.return_value = False
        sub_pipeline = Pipeline(batch=pd.Timedelta("1h"))
        detector = MissingValueDetector()
        detector(dataset=sub_pipeline["test"])
        fm_mock = MagicMock()
        fm_mock.basic_path = "path_to_save"
        result = sub_pipeline.save(fm_mock)

        to_folder_mock.assert_called_once_with("save_path")
        os_mock.path.join.assert_called_once_with("path_to_save", "Pipeline")
        self.assertEqual(
            {
                'name': 'Pipeline',
                'class': 'Pipeline',
                'module': 'pywatts.core.pipeline',
                'params': {
                    'batch': '0 days 01:00:00'
                },
                'pipeline_path': 'save_path'
            }, result)

    @patch("pywatts.core.pipeline.FileManager")
    @patch("pywatts.core.pipeline.xr.concat")
    def test_batch_1_transform(self, concat_mock, fm_mock):
        time = pd.date_range('2000-01-01', freq='1H', periods=7)
        da = xr.DataArray([2, 3, 4, 3, 3, 1, 2],
                          dims=["time"],
                          coords={'time': time})
        pipeline = Pipeline(batch=pd.Timedelta("1h"))
        step_one = MagicMock()
        step_one.get_result.return_value = {"step": da}
        step_one.name = "step"
        result_mock = MagicMock()
        concat_mock.return_value = result_mock
        pipeline.start_steps["foo"] = StartStep("foo"), None
        pipeline.start_steps["foo"][0].last = False
        step_one.further_elements.side_effect = [
            True, True, True, True, True, True, True, False
        ]
        pipeline.add(module=step_one, input_ids=[1])

        result = pipeline.transform(foo=da)

        self.assertEqual(concat_mock.call_count, 6)
        self.assertEqual(step_one.get_result.call_count, 7)
        self.assertEqual(step_one.further_elements.call_count, 8)
        self.assertEqual({"step": result_mock}, result)

    @patch('pywatts.core.pipeline.FileManager')
    def test_test(self, fm_mock):
        # Add some steps to the pipeline

        # Assert that the computation is set to fit_transform if the ComputationMode was default
        first_step = MagicMock()
        first_step.computation_mode = ComputationMode.Default
        first_step.finished = False
        time = pd.date_range('2000-01-01', freq='1H', periods=7)

        da = xr.DataArray([2, 3, 4, 3, 3, 1, 2],
                          dims=["time"],
                          coords={'time': time})

        first_step.get_result.return_value = {"first": da}
        second_step = MagicMock()
        second_step.computation_mode = ComputationMode.Train
        second_step.finished = False
        second_step.get_result.return_value = {"Second": da}

        self.pipeline.add(module=first_step)
        self.pipeline.add(module=second_step)

        self.pipeline.test(
            pd.DataFrame({
                "test": [1, 2, 2, 3, 4],
                "test2": [2, 2, 2, 2, 2]
            },
                         index=pd.DatetimeIndex(
                             pd.date_range('2000-01-01', freq='24H',
                                           periods=5))))

        first_step.get_result.assert_called_once_with(pd.Timestamp(
            '2000-01-01 00:00:00', freq='24H'),
                                                      None,
                                                      return_all=True)
        second_step.get_result.assert_called_once_with(pd.Timestamp(
            '2000-01-01 00:00:00', freq='24H'),
                                                       None,
                                                       return_all=True)

        first_step.set_run_setting.assert_called_once()
        self.assertEqual(
            first_step.set_run_setting.call_args[0][0].computation_mode,
            ComputationMode.Transform)
        second_step.set_run_setting.assert_called_once()
        self.assertEqual(
            second_step.set_run_setting.call_args[0][0].computation_mode,
            ComputationMode.Transform)

        first_step.reset.assert_called_once()
        second_step.reset.assert_called_once()

    @patch('pywatts.core.pipeline.FileManager')
    def test_train(self, fmmock):
        # Add some steps to the pipeline
        time = pd.date_range('2000-01-01', freq='1H', periods=7)

        da = xr.DataArray([2, 3, 4, 3, 3, 1, 2],
                          dims=["time"],
                          coords={'time': time})

        # Assert that the computation is set to fit_transform if the ComputationMode was default
        first_step = MagicMock()
        first_step.computation_mode = ComputationMode.Default
        first_step.finished = False
        first_step.get_result.return_value = {"first": da}

        second_step = MagicMock()
        second_step.computation_mode = ComputationMode.Train
        second_step.finished = False
        second_step.get_result.return_value = {"second": da}

        self.pipeline.add(module=first_step)
        self.pipeline.add(module=second_step)

        data = pd.DataFrame({
            "test": [1, 2, 2, 3, 4],
            "test2": [2, 2, 2, 2, 2]
        },
                            index=pd.DatetimeIndex(
                                pd.date_range('2000-01-01',
                                              freq='24H',
                                              periods=5)))
        result, summary = self.pipeline.train(data, summary=True)

        first_step.set_run_setting.assert_called_once()
        self.assertEqual(
            first_step.set_run_setting.call_args[0][0].computation_mode,
            ComputationMode.FitTransform)
        second_step.set_run_setting.assert_called_once()
        self.assertEqual(
            second_step.set_run_setting.call_args[0][0].computation_mode,
            ComputationMode.FitTransform)

        first_step.get_result.assert_called_once_with(pd.Timestamp(
            '2000-01-01 00:00:00', freq='24H'),
                                                      None,
                                                      return_all=True)
        second_step.get_result.assert_called_once_with(pd.Timestamp(
            '2000-01-01 00:00:00', freq='24H'),
                                                       None,
                                                       return_all=True)

        first_step.reset.assert_called_once()
        second_step.reset.assert_called_once()
        xr.testing.assert_equal(result["second"], da)

    @patch("builtins.open", new_callable=mock_open)
    def test_horizon_greater_one_regression_inclusive_summary_file(
            self, open_mock):
        lin_reg = LinearRegression()
        self.fm_mock.get_path.return_value = "summary_path"

        multi_regressor = SKLearnWrapper(lin_reg)(
            foo=self.pipeline["foo"],
            target=self.pipeline["target"],
            target2=self.pipeline["target2"])
        RMSE()(y=self.pipeline["target"], prediction=multi_regressor["target"])

        time = pd.date_range('2000-01-01', freq='24H', periods=5)

        foo = xr.DataArray([1, 2, 3, 4, 5],
                           dims=["time"],
                           coords={'time': time})
        target = xr.DataArray([[2, 3], [2, 4], [2, 5], [2, 6], [2, 7]],
                              dims=["time", "horizon"],
                              coords={
                                  'time': time,
                                  "horizon": [1, 2]
                              })
        target2 = xr.DataArray([3, 3, 3, 3, 3],
                               dims=["time"],
                               coords={'time': time})

        ds = xr.Dataset({'foo': foo, "target": target, "target2": target2})

        result, summary = self.pipeline.train(ds, summary=True)

        self.assertTrue("Training Time" in summary)
        self.assertTrue("RMSE" in summary)

        self.fm_mock.get_path.assert_called_once_with("summary.md")
        open_mock().__enter__.return_value.write.assert_called_once_with(
            summary)

        self.assertTrue("target" in result.keys())

예제 #5

파일 보기

        callbacks=[LinePlotCallback('rescale')])

    # Calculate the root mean squared error (RMSE) between the linear regression and the true values
    # save it as csv file
    rmse = RMSE()(y_hat=inverse_power_scale,
                  y=pipeline["load_power_statistics"])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    data = pd.read_csv("../data/getting_started_data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True,
                       sep=",")
    train = data.iloc[:6000, :]
    pipeline.train(data=train)

    test = data.iloc[6000:, :]
    data = pipeline.test(data=test)

    # Save the pipeline to a folder
    pipeline.to_folder("./pipe_getting_started")

    print("Execute second pipeline")
    # Load the pipeline as a new instance
    pipeline2 = Pipeline.from_folder("./pipe_getting_started",
                                     file_manager_path="../pipeline2_results")
    #       WARNING
    #       Sometimes from_folder use unpickle for loading modules. Note that this is not safe.
    #       Consequently, load only pipelines you trust with from_folder.
    #       For more details about pickling see https://docs.python.org/3/library/pickle.html

예제 #6

파일 보기

파일: example_keras.py 프로젝트: KIT-IAI/pyWATTS

        lag=1, name="ClockShift_Lag1")(x=scale_power_statistics)
    shift_power_statistics2 = ClockShift(
        lag=2, name="ClockShift_Lag2")(x=scale_power_statistics)

    keras_wrapper = KerasWrapper(keras_model,
                                 fit_kwargs={"batch_size": 8, "epochs": 1},
                                 compile_kwargs={"loss": "mse", "optimizer": "Adam", "metrics": ["mse"]}) \
        (ClockShift_Lag1=shift_power_statistics,
         ClockShift_Lag2=shift_power_statistics2,
         target=scale_power_statistics)

    inverse_power_scale_dl = power_scaler(
        x=keras_wrapper,
        computation_mode=ComputationMode.Transform,
        use_inverse_transform=True,
        callbacks=[LinePlotCallback("prediction")])

    rmse_dl = RMSE()(keras_model=inverse_power_scale_dl,
                     y=pipeline["load_power_statistics"])

    # Now, the pipeline is complete
    # so we can load data and train the model
    data = pd.read_csv("../data/getting_started_data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True,
                       sep=",")

    pipeline.train(data)
    pipeline.to_folder("../results/pipe_keras")

예제 #7

파일 보기

def custom_multiplication(x: xr.Dataset):
    # Multiply the given dataset with 100.
    return x * 1000


# The main function is where the pipeline is created and run
if __name__ == "__main__":
    # Create a pipeline
    pipeline = Pipeline(path="../results")

    # Add a custom function to the FunctionModule and add the module to the pipeline
    function_module = FunctionModule(
        custom_multiplication, name="Multiplication")(
            x=pipeline["load_power_statistics"],
            callbacks=[CSVCallback("Mul"),
                       LinePlotCallback("Mul")])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    df = pd.read_csv("../data/getting_started_data.csv",
                     parse_dates=["time"],
                     infer_datetime_format=True,
                     index_col="time")

    pipeline.train(df)

    # Generate a plot of the pipeline showing the flow of data through different modules
    pipeline.draw()
    plt.show()

예제 #8

파일 보기

    prediction_moving = ProfileNeuralNetwork(offset=24 * 7 * 11, epochs=1000)(
        historical_input=sampled_difference,
        calendar=sampled_calendar,
        temperature=sampled_temperature,
        humidity=sampled_humidity,
        profile=sampled_profile_moving,
        trend=sampled_trend,
        target=target,
        callbacks=[LinePlotCallback("PNN")])

    rmse = RmseCalculator(offset=11 * 168)(pnn_moving=prediction_moving,
                                           moving_pred=sampled_profile_moving,
                                           y=target,
                                           callbacks=[CSVCallback('RMSE')])

    rmse_cleaned = RmseCalculator(name="RMSE_cleaned", offset=11 * 168)(
        pnn_moving=prediction_moving,
        moving_pred=sampled_profile_moving,
        y=target,
        callbacks=[CSVCallback('RMSE')])

    data = pd.read_csv("data/data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True)

    result_train = pipeline.train(data[:"05.18.2015"])
    result_test = pipeline.test(data["05.18.2015":])

    print("Finished")