예제 #1
0
    def test_from_folder(self, isdir_mock, mock_file, json_mock, pickle_mock,
                         fm_mock):
        scaler = StandardScaler()
        linear_regression = LinearRegression()

        isdir_mock.return_value = True
        json_mock.load.return_value = pipeline_json

        pickle_mock.load.side_effect = [scaler, linear_regression]

        pipeline = Pipeline.from_folder("test_pipeline")
        calls_open = [
            call(os.path.join("test_pipeline", "StandardScaler.pickle"), "rb"),
            call(os.path.join("test_pipeline", "LinearRegression.pickle"),
                 "rb"),
            call(os.path.join("test_pipeline", "pipeline.json"), "r")
        ]

        mock_file.assert_has_calls(calls_open, any_order=True)

        json_mock.load.assert_called_once()
        assert pickle_mock.load.call_count == 2

        isdir_mock.assert_called_once()
        self.assertEqual(3, len(pipeline.id_to_step))
예제 #2
0
    def test_run_reloaded_simple_pipeline(self):
        pipeline = Pipeline()

        imputer_power_statistics = LinearInterpolater(
            method="nearest", dim="time",
            name="imputer_power")(x=pipeline["load_power_statistics"])
        imputer_price = LinearInterpolater(
            method="nearest", dim="time",
            name="imputer_price")(x=pipeline["price_day_ahead"])
        scaler = SKLearnWrapper(StandardScaler())(x=imputer_price)
        SKLearnWrapper(LinearRegression())(x=scaler,
                                           target1=imputer_price,
                                           target2=imputer_power_statistics)

        pipeline.to_folder("./pipe1")
        sleep(1)

        pipeline2 = Pipeline.from_folder("./pipe1")

        data = pd.read_csv("data/getting_started_data.csv",
                           index_col="time",
                           sep=",",
                           parse_dates=["time"],
                           infer_datetime_format=True)
        train = data[6000:]
        test = data[:6000]
        pipeline2.train(train)
        pipeline2.test(test)
예제 #3
0
    print("Start training")
    train_pipeline.train(data)
    print("Training finished")

    # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline.
    pipeline = Pipeline(path="../results")

    # Get preprocessing pipeline
    preprocessing_pipeline = create_preprocessing_pipeline(power_scaler)
    preprocessing_pipeline = preprocessing_pipeline(scaler_power=pipeline["load_power_statistics"])

    # Get the test pipeline, the arguments are the modules, from the training pipeline, which should be reused
    test_pipeline = create_test_pipeline([regressor_lin_reg, regressor_svr])

    test_pipeline(ClockShift=preprocessing_pipeline["ClockShift"],
                  ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                  load_power_statistics=pipeline["load_power_statistics"],
                  callbacks=[LinePlotCallback('Pipeline'), CSVCallback('Pipeline')])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    print("Start testing")
    result = pipeline.test(test)

    pipeline.to_folder("stored_day_and_night")
    pipeline = Pipeline.from_folder("stored_day_and_night")
    print("Testing finished")
    result2 = pipeline.test(test)

    print("FINISHED")
예제 #4
0
    # save it as csv file
    rmse = RMSE()(y_hat=inverse_power_scale,
                  y=pipeline["load_power_statistics"])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    data = pd.read_csv("../data/getting_started_data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True,
                       sep=",")
    train = data.iloc[:6000, :]
    pipeline.train(data=train)

    test = data.iloc[6000:, :]
    data = pipeline.test(data=test)

    # Save the pipeline to a folder
    pipeline.to_folder("./pipe_getting_started")

    print("Execute second pipeline")
    # Load the pipeline as a new instance
    pipeline2 = Pipeline.from_folder("./pipe_getting_started",
                                     file_manager_path="../pipeline2_results")
    #       WARNING
    #       Sometimes from_folder use unpickle for loading modules. Note that this is not safe.
    #       Consequently, load only pipelines you trust with from_folder.
    #       For more details about pickling see https://docs.python.org/3/library/pickle.html
    result = pipeline2.test(test)
    print("Finished")
예제 #5
0
                                     optimizer=optimizer,
                                     loss_fn=torch.nn.MSELoss(reduction='sum'))\
                      (
                        power_lag1=shift_power_statistics,
                        power_lag2=shift_power_statistics2,
                        target=scale_power_statistics
                      )

    inverse_power_scale = power_scaler(
        x=pytorch_wrapper,
        computation_mode=ComputationMode.Transform,
        use_inverse_transform=True,
        callbacks=[LinePlotCallback('forecast')])

    rmse_dl = RMSE()(y_hat=inverse_power_scale,
                     y=pipeline["load_power_statistics"])

    # Now, the pipeline is complete
    # so we can load data and train the model
    data = pd.read_csv("../data/getting_started_data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True,
                       sep=",")

    pipeline.train(data)
    pipeline.to_folder("./pipe_pytorch")

    pipeline2 = Pipeline.from_folder("./pipe_pytorch")
    pipeline2.train(data)
예제 #6
0
    # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file
    rmse = RmseCalculator()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"],
                            callbacks=[CSVCallback('RMSE')])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    data = pd.read_csv("../data/getting_started_data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True,
                       sep=",")
    train = data.iloc[:6000, :]
    pipeline.train(data=train)

    test = data.iloc[6000:, :]
    data = pipeline.test(data=test)

    # Save the pipeline to a folder
    pipeline.to_folder("./pipe_statsmodel")

    print("Execute second pipeline")
    # Load the pipeline as a new instance
    pipeline2 = Pipeline.from_folder("./pipe_statsmodel", file_manager_path="../pipeline2_results/statsmodel")
    #       WARNING
    #       Sometimes from_folder use unpickle for loading modules. Note that this is not safe.
    #       Consequently, load only pipelines you trust with from_folder.
    #       For more details about pickling see https://docs.python.org/3/library/pickle.html
    result = pipeline2.test(test)
    print("Finished")