def test_run_reloaded_simple_pipeline(self): pipeline = Pipeline() imputer_power_statistics = LinearInterpolater( method="nearest", dim="time", name="imputer_power")(x=pipeline["load_power_statistics"]) imputer_price = LinearInterpolater( method="nearest", dim="time", name="imputer_price")(x=pipeline["price_day_ahead"]) scaler = SKLearnWrapper(StandardScaler())(x=imputer_price) SKLearnWrapper(LinearRegression())(x=scaler, target1=imputer_price, target2=imputer_power_statistics) pipeline.to_folder("./pipe1") sleep(1) pipeline2 = Pipeline.from_folder("./pipe1") data = pd.read_csv("data/getting_started_data.csv", index_col="time", sep=",", parse_dates=["time"], infer_datetime_format=True) train = data[6000:] test = data[:6000] pipeline2.train(train) pipeline2.test(test)
print("Start training") train_pipeline.train(data) print("Training finished") # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline. pipeline = Pipeline(path="../results") # Get preprocessing pipeline preprocessing_pipeline = create_preprocessing_pipeline(power_scaler) preprocessing_pipeline = preprocessing_pipeline(scaler_power=pipeline["load_power_statistics"]) # Get the test pipeline, the arguments are the modules, from the training pipeline, which should be reused test_pipeline = create_test_pipeline([regressor_lin_reg, regressor_svr]) test_pipeline(ClockShift=preprocessing_pipeline["ClockShift"], ClockShift_1=preprocessing_pipeline["ClockShift_1"], load_power_statistics=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('Pipeline'), CSVCallback('Pipeline')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline print("Start testing") result = pipeline.test(test) pipeline.to_folder("stored_day_and_night") pipeline = Pipeline.from_folder("stored_day_and_night") print("Testing finished") result2 = pipeline.test(test) print("FINISHED")
class TestPipeline(unittest.TestCase): @patch("pywatts.core.pipeline.FileManager") def setUp(self, fm_mock) -> None: self.fm_mock = fm_mock() self.pipeline = Pipeline() def tearDown(self) -> None: self.pipeline = None def test_add_input_as_positional(self): # Should fail with an better error message SKLearnWrapper(LinearRegression())(x=self.pipeline["input"]) def test_add_only_module(self): SKLearnWrapper(LinearRegression())(x=self.pipeline["input"]) # nodes 1 plus startstep self.assertEqual(len(self.pipeline.id_to_step), 2) def test_add_module_which_is_not_in_a_list(self): wrapper = SKLearnWrapper( LinearRegression())(input=self.pipeline["input"]) SKLearnWrapper(LinearRegression())(x=wrapper) # nodes 1 plus startstep self.assertEqual(len(self.pipeline.id_to_step), 3) def test_add_pipeline_without_index(self): # This should raise an exception since pipeline might get multiple columns in the input dataframe with self.assertRaises(Exception) as context: SKLearnWrapper(StandardScaler())( x=self.pipeline) # This should fail self.assertEqual( "Adding a pipeline as input might be ambigious. Specifiy the desired column of your dataset by using pipeline[<column_name>]", str(context.exception)) def test_add_module_with_inputs(self): scaler1 = SKLearnWrapper(StandardScaler())(x=self.pipeline["x"]) scaler2 = SKLearnWrapper(StandardScaler())(x=self.pipeline["test1"]) SKLearnWrapper(LinearRegression())(input_1=scaler1, input_2=scaler2) # Three modules plus start step and one collect step self.assertEqual(5, len(self.pipeline.id_to_step)) def test_add_module_with_one_input_without_a_list(self): scaler = SKLearnWrapper(StandardScaler())(input=self.pipeline["test"]) SKLearnWrapper(LinearRegression())(input=scaler) # Three modules plus start step and one collect step self.assertEqual(3, len(self.pipeline.id_to_step)) @patch('pywatts.core.pipeline.FileManager') @patch('pywatts.core.pipeline.json') @patch("builtins.open", new_callable=mock_open) def test_to_folder(self, mock_file, json_mock, fm_mock): scaler = SKLearnWrapper(StandardScaler())(input=self.pipeline["input"]) SKLearnWrapper(LinearRegression())(x=scaler) fm_mock_object = MagicMock() fm_mock.return_value = fm_mock_object fm_mock_object.get_path.side_effect = [ os.path.join('test_pipeline', 'StandardScaler.pickle'), os.path.join('test_pipeline', 'LinearRegression.pickle'), os.path.join('test_pipeline', 'pipeline.json'), ] self.pipeline.to_folder("test_pipeline") calls_open = [ call(os.path.join('test_pipeline', 'StandardScaler.pickle'), 'wb'), call(os.path.join('test_pipeline', 'LinearRegression.pickle'), 'wb'), call(os.path.join('test_pipeline', 'pipeline.json'), 'w') ] mock_file.assert_has_calls(calls_open, any_order=True) args, kwargs = json_mock.dump.call_args assert kwargs["obj"]["id"] == pipeline_json["id"] assert kwargs["obj"]["name"] == pipeline_json["name"] assert kwargs["obj"]["modules"] == pipeline_json["modules"] assert kwargs["obj"]["steps"] == pipeline_json["steps"] @patch('pywatts.core.pipeline.FileManager') @patch('pywatts.modules.sklearn_wrapper.pickle') @patch('pywatts.core.pipeline.json') @patch("builtins.open", new_callable=mock_open) @patch('pywatts.core.pipeline.os.path.isdir') def test_from_folder(self, isdir_mock, mock_file, json_mock, pickle_mock, fm_mock): scaler = StandardScaler() linear_regression = LinearRegression() isdir_mock.return_value = True json_mock.load.return_value = pipeline_json pickle_mock.load.side_effect = [scaler, linear_regression] pipeline = Pipeline.from_folder("test_pipeline") calls_open = [ call(os.path.join("test_pipeline", "StandardScaler.pickle"), "rb"), call(os.path.join("test_pipeline", "LinearRegression.pickle"), "rb"), call(os.path.join("test_pipeline", "pipeline.json"), "r") ] mock_file.assert_has_calls(calls_open, any_order=True) json_mock.load.assert_called_once() assert pickle_mock.load.call_count == 2 isdir_mock.assert_called_once() self.assertEqual(3, len(pipeline.id_to_step)) def test_module_naming_conflict(self): # This test should check, that modules with the same name do not lead to an error # What should this test? # self.fail() pass def test_add_with_target(self): SKLearnWrapper(LinearRegression())(input=self.pipeline["input"], target=self.pipeline["target"]) self.assertEqual(3, len(self.pipeline.id_to_step)) def test_multiple_same_module(self): reg_module = SKLearnWrapper(module=LinearRegression()) reg_one = reg_module(x=self.pipeline["test"], target=self.pipeline["target"]) reg_two = reg_module(x=self.pipeline["test2"], target=self.pipeline["target"]) detector = MissingValueDetector() detector(dataset=reg_one) detector(dataset=reg_two) # Three start steps (test, test2, target), two regressors two detectors self.assertEqual(7, len(self.pipeline.id_to_step)) modules = [] for element in self.pipeline.id_to_step.values(): if isinstance(element, Step) and not element.module in modules: modules.append(element.module) # One sklearn wrappers, one missing value detector self.assertEqual(2, len(modules)) self.pipeline.train( pd.DataFrame( { "test": [1, 2, 2, 3, 4], "test2": [2, 2, 2, 2, 2], "target": [2, 2, 4, 4, -5] }, index=pd.DatetimeIndex( pd.date_range('2000-01-01', freq='24H', periods=5)))) @patch('pywatts.core.pipeline.Pipeline._create_summary') @patch('pywatts.core.pipeline.FileManager') def test_add_pipeline_to_pipeline_and_train(self, fm_mock, create_summary_mock): sub_pipeline = Pipeline() detector = MissingValueDetector() detector(dataset=sub_pipeline["regression"]) regressor = SKLearnWrapper(LinearRegression(), name="regression")( x=self.pipeline["test"], target=self.pipeline["target"]) sub_pipeline(regression=regressor) summary_formatter_mock = MagicMock() self.pipeline.train(pd.DataFrame({ "test": [24, 24], "target": [12, 24] }, index=pd.to_datetime([ '2015-06-03 00:00:00', '2015-06-03 01:00:00' ])), summary_formatter=summary_formatter_mock) for step in self.pipeline.id_to_step.values(): assert step.current_run_setting.computation_mode == ComputationMode.FitTransform create_summary_mock.assert_has_calls( [call(summary_formatter_mock), call(summary_formatter_mock)]) @patch('pywatts.core.pipeline.FileManager') def test_add_pipeline_to_pipeline_and_test(self, fm_mock): # Add some steps to the pipeline # Assert that the computation is set to fit_transform if the ComputationMode was default step = MagicMock() step.computation_mode = ComputationMode.Default step.finished = False time = pd.date_range('2000-01-01', freq='24H', periods=7) ds = xr.Dataset({'foo': ('time', [2, 3, 4, 5, 6, 7, 8]), 'time': time}) subpipeline = Pipeline() subpipeline.add(module=step) # BUG: In step_factory.py -> create_step the file_manager of the pipeline is accessed # and the pipeline is None... # subpipeline(self.pipeline) # self.pipeline.test(ds) # step.set_computation_mode.assert_called_once_with(ComputationMode.Transform) # step.reset.assert_called_once() @patch("pywatts.core.pipeline.FileManager") @patch('pywatts.core.pipeline.json') @patch("builtins.open", new_callable=mock_open) def test_add_pipeline_to_pipeline_and_save(self, open_mock, json_mock, fm_mock): sub_pipeline = Pipeline() detector = MissingValueDetector() detector(dataset=sub_pipeline["regressor"]) regressor = SKLearnWrapper(LinearRegression())(x=self.pipeline["test"]) sub_pipeline(regression=regressor) self.pipeline.to_folder(path="path") self.assertEqual(json_mock.dump.call_count, 2) def create_summary_in_subpipelines(self): assert False @patch('pywatts.core.pipeline.FileManager') def test__collect_batch_results_naming_conflict(self, fm_mock): step_one = MagicMock() step_one.name = "step" step_two = MagicMock() step_two.name = "step" result_step_one = MagicMock() result_step_two = MagicMock() merged_result = {"step": result_step_one, "step_1": result_step_two} step_one.get_result.return_value = {"step": result_step_one} step_two.get_result.return_value = {"step_1": result_step_two} result = self.pipeline._collect_results([step_one, step_two]) # Assert that steps are correclty called. step_one.get_result.assert_called_once_with(None, None, return_all=True) step_two.get_result.assert_called_once_with(None, None, return_all=True) # Assert return value is correct self.assertEqual(merged_result, result) @patch("pywatts.core.pipeline.FileManager") def test_get_params(self, fm_mock): result = Pipeline(batch=pd.Timedelta("1h")).get_params() self.assertEqual(result, {"batch": pd.Timedelta("1h")}) def test_set_params(self): self.pipeline.set_params(batch=pd.Timedelta("2h")) self.assertEqual(self.pipeline.get_params(), {"batch": pd.Timedelta("2h")}) def test__collect_batch_results(self): step_one = MagicMock() step_one.name = "step_one" step_two = MagicMock() step_two.name = "step_two" result_step_one = MagicMock() result_step_two = MagicMock() merged_result = { "step_one": result_step_one, "step_two": result_step_two } step_one.get_result.return_value = {"step_one": result_step_one} step_two.get_result.return_value = {"step_two": result_step_two} result = self.pipeline._collect_results([step_one, step_two]) # Assert that steps are correclty called. step_one.get_result.assert_called_once_with(None, None, return_all=True) step_two.get_result.assert_called_once_with(None, None, return_all=True) # Assert return value is correct self.assertEqual(merged_result, result) @patch("pywatts.core.pipeline.FileManager") @patch("pywatts.core.pipeline.xr.concat") def test_batched_pipeline(self, concat_mock, fm_mock): # Add some steps to the pipeline time = pd.date_range('2000-01-01', freq='1H', periods=7) da = xr.DataArray([2, 3, 4, 3, 3, 1, 2], dims=["time"], coords={'time': time}) # Assert that the computation is set to fit_transform if the ComputationMode was default first_step = MagicMock() first_step.run_setting = RunSetting(ComputationMode.Default) first_step.finished = False first_step.further_elements.side_effect = [ True, True, True, True, False ] first_step.get_result.return_value = {"one": da} self.pipeline.set_params(pd.Timedelta("24h")) self.pipeline.add(module=first_step) data = pd.DataFrame({ "test": [1, 2, 2, 3], "test2": [2, 2, 2, 2] }, index=pd.DatetimeIndex( pd.date_range('2000-01-01', freq='24H', periods=4))) self.pipeline.test(data) first_step.set_run_setting.assert_called_once() self.assertEqual( first_step.set_run_setting.call_args[0][0].computation_mode, ComputationMode.Transform) calls = [ call(pd.Timestamp('2000-01-01 00:00:00', freq='24H'), pd.Timestamp('2000-01-02 00:00:00', freq='24H'), return_all=True), call(pd.Timestamp('2000-01-02 00:00:00', freq='24H'), pd.Timestamp('2000-01-03 00:00:00', freq='24H'), return_all=True), call(pd.Timestamp('2000-01-03 00:00:00', freq='24H'), pd.Timestamp('2000-01-04 00:00:00', freq='24H'), return_all=True), call(pd.Timestamp('2000-01-04 00:00:00', freq='24H'), pd.Timestamp('2000-01-05 00:00:00', freq='24H'), return_all=True), ] first_step.get_result.assert_has_calls(calls, any_order=True) self.assertEqual(concat_mock.call_count, 3) @patch("pywatts.core.pipeline.FileManager") @patch("pywatts.core.pipeline.xr.concat") def test_batch_2H_transform(self, concat_mock, fm_mock): time = pd.date_range('2000-01-01', freq='1H', periods=7) da = xr.DataArray([2, 3, 4, 3, 3, 1, 2], dims=["time"], coords={'time': time}) pipeline = Pipeline(batch=pd.Timedelta("2h")) step_one = MagicMock() step_one.get_result.return_value = {"step": da} step_one.name = "step" result_mock = MagicMock() concat_mock.return_value = result_mock pipeline.start_steps["foo"] = StartStep("foo"), None pipeline.start_steps["foo"][0].last = False step_one.further_elements.side_effect = [True, True, True, True, False] pipeline.add(module=step_one, input_ids=[1]) result = pipeline.transform(foo=da) self.assertEqual(concat_mock.call_count, 3) self.assertEqual(step_one.get_result.call_count, 4) self.assertEqual(step_one.further_elements.call_count, 5) self.assertEqual({"step": result_mock}, result) @patch('pywatts.core.pipeline.FileManager') @patch("pywatts.core.pipeline._get_time_indexes", return_value=["time"]) def test_transform_pipeline(self, get_time_indexes_mock, fm_mock): input_mock = MagicMock() input_mock.indexes = {"time": ["20.12.2020"]} step_two = MagicMock() result_mock = MagicMock() step_two.name = "mock" step_two.get_result.return_value = {"mock": result_mock} self.pipeline.add(module=step_two, input_ids=[1]) result = self.pipeline.transform(x=input_mock) step_two.get_result.assert_called_once_with("20.12.2020", None, return_all=True) get_time_indexes_mock.assert_called_once_with({"x": input_mock}) self.assertEqual({"mock": result_mock}, result) @patch("pywatts.core.pipeline.FileManager") @patch("pywatts.core.pipeline.Pipeline.from_folder") def test_load(self, from_folder_mock, fm_mock): created_pipeline = MagicMock() from_folder_mock.return_value = created_pipeline pipeline = Pipeline.load({ 'name': 'Pipeline', 'class': 'Pipeline', 'module': 'pywatts.core.pipeline', 'pipeline_path': 'save_path' }) from_folder_mock.assert_called_once_with("save_path") self.assertEqual(created_pipeline, pipeline) @patch("pywatts.core.pipeline.FileManager") @patch("pywatts.core.pipeline.Pipeline.to_folder") @patch("pywatts.core.pipeline.os") def test_save(self, os_mock, to_folder_mock, fm_mock): os_mock.path.join.return_value = "save_path" os_mock.path.isdir.return_value = False sub_pipeline = Pipeline(batch=pd.Timedelta("1h")) detector = MissingValueDetector() detector(dataset=sub_pipeline["test"]) fm_mock = MagicMock() fm_mock.basic_path = "path_to_save" result = sub_pipeline.save(fm_mock) to_folder_mock.assert_called_once_with("save_path") os_mock.path.join.assert_called_once_with("path_to_save", "Pipeline") self.assertEqual( { 'name': 'Pipeline', 'class': 'Pipeline', 'module': 'pywatts.core.pipeline', 'params': { 'batch': '0 days 01:00:00' }, 'pipeline_path': 'save_path' }, result) @patch("pywatts.core.pipeline.FileManager") @patch("pywatts.core.pipeline.xr.concat") def test_batch_1_transform(self, concat_mock, fm_mock): time = pd.date_range('2000-01-01', freq='1H', periods=7) da = xr.DataArray([2, 3, 4, 3, 3, 1, 2], dims=["time"], coords={'time': time}) pipeline = Pipeline(batch=pd.Timedelta("1h")) step_one = MagicMock() step_one.get_result.return_value = {"step": da} step_one.name = "step" result_mock = MagicMock() concat_mock.return_value = result_mock pipeline.start_steps["foo"] = StartStep("foo"), None pipeline.start_steps["foo"][0].last = False step_one.further_elements.side_effect = [ True, True, True, True, True, True, True, False ] pipeline.add(module=step_one, input_ids=[1]) result = pipeline.transform(foo=da) self.assertEqual(concat_mock.call_count, 6) self.assertEqual(step_one.get_result.call_count, 7) self.assertEqual(step_one.further_elements.call_count, 8) self.assertEqual({"step": result_mock}, result) @patch('pywatts.core.pipeline.FileManager') def test_test(self, fm_mock): # Add some steps to the pipeline # Assert that the computation is set to fit_transform if the ComputationMode was default first_step = MagicMock() first_step.computation_mode = ComputationMode.Default first_step.finished = False time = pd.date_range('2000-01-01', freq='1H', periods=7) da = xr.DataArray([2, 3, 4, 3, 3, 1, 2], dims=["time"], coords={'time': time}) first_step.get_result.return_value = {"first": da} second_step = MagicMock() second_step.computation_mode = ComputationMode.Train second_step.finished = False second_step.get_result.return_value = {"Second": da} self.pipeline.add(module=first_step) self.pipeline.add(module=second_step) self.pipeline.test( pd.DataFrame({ "test": [1, 2, 2, 3, 4], "test2": [2, 2, 2, 2, 2] }, index=pd.DatetimeIndex( pd.date_range('2000-01-01', freq='24H', periods=5)))) first_step.get_result.assert_called_once_with(pd.Timestamp( '2000-01-01 00:00:00', freq='24H'), None, return_all=True) second_step.get_result.assert_called_once_with(pd.Timestamp( '2000-01-01 00:00:00', freq='24H'), None, return_all=True) first_step.set_run_setting.assert_called_once() self.assertEqual( first_step.set_run_setting.call_args[0][0].computation_mode, ComputationMode.Transform) second_step.set_run_setting.assert_called_once() self.assertEqual( second_step.set_run_setting.call_args[0][0].computation_mode, ComputationMode.Transform) first_step.reset.assert_called_once() second_step.reset.assert_called_once() @patch('pywatts.core.pipeline.FileManager') def test_train(self, fmmock): # Add some steps to the pipeline time = pd.date_range('2000-01-01', freq='1H', periods=7) da = xr.DataArray([2, 3, 4, 3, 3, 1, 2], dims=["time"], coords={'time': time}) # Assert that the computation is set to fit_transform if the ComputationMode was default first_step = MagicMock() first_step.computation_mode = ComputationMode.Default first_step.finished = False first_step.get_result.return_value = {"first": da} second_step = MagicMock() second_step.computation_mode = ComputationMode.Train second_step.finished = False second_step.get_result.return_value = {"second": da} self.pipeline.add(module=first_step) self.pipeline.add(module=second_step) data = pd.DataFrame({ "test": [1, 2, 2, 3, 4], "test2": [2, 2, 2, 2, 2] }, index=pd.DatetimeIndex( pd.date_range('2000-01-01', freq='24H', periods=5))) result, summary = self.pipeline.train(data, summary=True) first_step.set_run_setting.assert_called_once() self.assertEqual( first_step.set_run_setting.call_args[0][0].computation_mode, ComputationMode.FitTransform) second_step.set_run_setting.assert_called_once() self.assertEqual( second_step.set_run_setting.call_args[0][0].computation_mode, ComputationMode.FitTransform) first_step.get_result.assert_called_once_with(pd.Timestamp( '2000-01-01 00:00:00', freq='24H'), None, return_all=True) second_step.get_result.assert_called_once_with(pd.Timestamp( '2000-01-01 00:00:00', freq='24H'), None, return_all=True) first_step.reset.assert_called_once() second_step.reset.assert_called_once() xr.testing.assert_equal(result["second"], da) @patch("builtins.open", new_callable=mock_open) def test_horizon_greater_one_regression_inclusive_summary_file( self, open_mock): lin_reg = LinearRegression() self.fm_mock.get_path.return_value = "summary_path" multi_regressor = SKLearnWrapper(lin_reg)( foo=self.pipeline["foo"], target=self.pipeline["target"], target2=self.pipeline["target2"]) RMSE()(y=self.pipeline["target"], prediction=multi_regressor["target"]) time = pd.date_range('2000-01-01', freq='24H', periods=5) foo = xr.DataArray([1, 2, 3, 4, 5], dims=["time"], coords={'time': time}) target = xr.DataArray([[2, 3], [2, 4], [2, 5], [2, 6], [2, 7]], dims=["time", "horizon"], coords={ 'time': time, "horizon": [1, 2] }) target2 = xr.DataArray([3, 3, 3, 3, 3], dims=["time"], coords={'time': time}) ds = xr.Dataset({'foo': foo, "target": target, "target2": target2}) result, summary = self.pipeline.train(ds, summary=True) self.assertTrue("Training Time" in summary) self.assertTrue("RMSE" in summary) self.fm_mock.get_path.assert_called_once_with("summary.md") open_mock().__enter__.return_value.write.assert_called_once_with( summary) self.assertTrue("target" in result.keys())
# save it as csv file rmse = RMSE()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") train = data.iloc[:6000, :] pipeline.train(data=train) test = data.iloc[6000:, :] data = pipeline.test(data=test) # Save the pipeline to a folder pipeline.to_folder("./pipe_getting_started") print("Execute second pipeline") # Load the pipeline as a new instance pipeline2 = Pipeline.from_folder("./pipe_getting_started", file_manager_path="../pipeline2_results") # WARNING # Sometimes from_folder use unpickle for loading modules. Note that this is not safe. # Consequently, load only pipelines you trust with from_folder. # For more details about pickling see https://docs.python.org/3/library/pickle.html result = pipeline2.test(test) print("Finished")
lag=1, name="ClockShift_Lag1")(x=scale_power_statistics) shift_power_statistics2 = ClockShift( lag=2, name="ClockShift_Lag2")(x=scale_power_statistics) keras_wrapper = KerasWrapper(keras_model, fit_kwargs={"batch_size": 8, "epochs": 1}, compile_kwargs={"loss": "mse", "optimizer": "Adam", "metrics": ["mse"]}) \ (ClockShift_Lag1=shift_power_statistics, ClockShift_Lag2=shift_power_statistics2, target=scale_power_statistics) inverse_power_scale_dl = power_scaler( x=keras_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback("prediction")]) rmse_dl = RMSE()(keras_model=inverse_power_scale_dl, y=pipeline["load_power_statistics"]) # Now, the pipeline is complete # so we can load data and train the model data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") pipeline.train(data) pipeline.to_folder("../results/pipe_keras")
optimizer=optimizer, loss_fn=torch.nn.MSELoss(reduction='sum'))\ ( power_lag1=shift_power_statistics, power_lag2=shift_power_statistics2, target=scale_power_statistics ) inverse_power_scale = power_scaler( x=pytorch_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('forecast')]) rmse_dl = RMSE()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"]) # Now, the pipeline is complete # so we can load data and train the model data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") pipeline.train(data) pipeline.to_folder("./pipe_pytorch") pipeline2 = Pipeline.from_folder("./pipe_pytorch") pipeline2.train(data)
# Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file rmse = RmseCalculator()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"], callbacks=[CSVCallback('RMSE')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") train = data.iloc[:6000, :] pipeline.train(data=train) test = data.iloc[6000:, :] data = pipeline.test(data=test) # Save the pipeline to a folder pipeline.to_folder("./pipe_statsmodel") print("Execute second pipeline") # Load the pipeline as a new instance pipeline2 = Pipeline.from_folder("./pipe_statsmodel", file_manager_path="../pipeline2_results/statsmodel") # WARNING # Sometimes from_folder use unpickle for loading modules. Note that this is not safe. # Consequently, load only pipelines you trust with from_folder. # For more details about pickling see https://docs.python.org/3/library/pickle.html result = pipeline2.test(test) print("Finished")