def test_from_folder(self, isdir_mock, mock_file, json_mock, pickle_mock, fm_mock): scaler = StandardScaler() linear_regression = LinearRegression() isdir_mock.return_value = True json_mock.load.return_value = pipeline_json pickle_mock.load.side_effect = [scaler, linear_regression] pipeline = Pipeline.from_folder("test_pipeline") calls_open = [ call(os.path.join("test_pipeline", "StandardScaler.pickle"), "rb"), call(os.path.join("test_pipeline", "LinearRegression.pickle"), "rb"), call(os.path.join("test_pipeline", "pipeline.json"), "r") ] mock_file.assert_has_calls(calls_open, any_order=True) json_mock.load.assert_called_once() assert pickle_mock.load.call_count == 2 isdir_mock.assert_called_once() self.assertEqual(3, len(pipeline.id_to_step))
def test_run_reloaded_simple_pipeline(self): pipeline = Pipeline() imputer_power_statistics = LinearInterpolater( method="nearest", dim="time", name="imputer_power")(x=pipeline["load_power_statistics"]) imputer_price = LinearInterpolater( method="nearest", dim="time", name="imputer_price")(x=pipeline["price_day_ahead"]) scaler = SKLearnWrapper(StandardScaler())(x=imputer_price) SKLearnWrapper(LinearRegression())(x=scaler, target1=imputer_price, target2=imputer_power_statistics) pipeline.to_folder("./pipe1") sleep(1) pipeline2 = Pipeline.from_folder("./pipe1") data = pd.read_csv("data/getting_started_data.csv", index_col="time", sep=",", parse_dates=["time"], infer_datetime_format=True) train = data[6000:] test = data[:6000] pipeline2.train(train) pipeline2.test(test)
print("Start training") train_pipeline.train(data) print("Training finished") # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline. pipeline = Pipeline(path="../results") # Get preprocessing pipeline preprocessing_pipeline = create_preprocessing_pipeline(power_scaler) preprocessing_pipeline = preprocessing_pipeline(scaler_power=pipeline["load_power_statistics"]) # Get the test pipeline, the arguments are the modules, from the training pipeline, which should be reused test_pipeline = create_test_pipeline([regressor_lin_reg, regressor_svr]) test_pipeline(ClockShift=preprocessing_pipeline["ClockShift"], ClockShift_1=preprocessing_pipeline["ClockShift_1"], load_power_statistics=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('Pipeline'), CSVCallback('Pipeline')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline print("Start testing") result = pipeline.test(test) pipeline.to_folder("stored_day_and_night") pipeline = Pipeline.from_folder("stored_day_and_night") print("Testing finished") result2 = pipeline.test(test) print("FINISHED")
# save it as csv file rmse = RMSE()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") train = data.iloc[:6000, :] pipeline.train(data=train) test = data.iloc[6000:, :] data = pipeline.test(data=test) # Save the pipeline to a folder pipeline.to_folder("./pipe_getting_started") print("Execute second pipeline") # Load the pipeline as a new instance pipeline2 = Pipeline.from_folder("./pipe_getting_started", file_manager_path="../pipeline2_results") # WARNING # Sometimes from_folder use unpickle for loading modules. Note that this is not safe. # Consequently, load only pipelines you trust with from_folder. # For more details about pickling see https://docs.python.org/3/library/pickle.html result = pipeline2.test(test) print("Finished")
optimizer=optimizer, loss_fn=torch.nn.MSELoss(reduction='sum'))\ ( power_lag1=shift_power_statistics, power_lag2=shift_power_statistics2, target=scale_power_statistics ) inverse_power_scale = power_scaler( x=pytorch_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('forecast')]) rmse_dl = RMSE()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"]) # Now, the pipeline is complete # so we can load data and train the model data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") pipeline.train(data) pipeline.to_folder("./pipe_pytorch") pipeline2 = Pipeline.from_folder("./pipe_pytorch") pipeline2.train(data)
# Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file rmse = RmseCalculator()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"], callbacks=[CSVCallback('RMSE')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") train = data.iloc[:6000, :] pipeline.train(data=train) test = data.iloc[6000:, :] data = pipeline.test(data=test) # Save the pipeline to a folder pipeline.to_folder("./pipe_statsmodel") print("Execute second pipeline") # Load the pipeline as a new instance pipeline2 = Pipeline.from_folder("./pipe_statsmodel", file_manager_path="../pipeline2_results/statsmodel") # WARNING # Sometimes from_folder use unpickle for loading modules. Note that this is not safe. # Consequently, load only pipelines you trust with from_folder. # For more details about pickling see https://docs.python.org/3/library/pickle.html result = pipeline2.test(test) print("Finished")