Python Pipeline Beispiele

Programmiersprache: Python

Namespace / Paketname: preprocessy.pipelines

Klasse / Typ: Pipeline

Beispiele auf hotexamples.com: 7

Python Pipeline - 7 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die preprocessy.pipelines.Pipeline, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Pipeline(7)

process(5)

remove(3)

add(1)

Häufig verwendete Methoden

Pipeline (7)

process (5)

remove (3)

add (1)

Beispiel #1

Datei anzeigen

Datei: test_base.py Projekt: pal-16/preprocessy

    def test_pipeline_arguments(self):

        with pytest.raises(ArgumentsError):
            Pipeline()

        with pytest.raises(ArgumentsError):
            Pipeline(steps=[custom_read, times_two, squared, split])

        with pytest.raises(TypeError):
            Pipeline(
                df_path="./datasets/configs/dataset.csv",
                steps=[custom_read, "times_two", squared, split],
                params=["hello"],
            )

        with pytest.raises(TypeError):
            Pipeline(
                df_path="./datasets/configs/dataset.csv",
                steps=[custom_read, times_two, squared, split],
                params=["hello"],
            )

        with pytest.raises(TypeError):
            Pipeline(
                df_path="./datasets/configs/dataset.csv",
                steps=[times_two, squared, split],
                params={"col_1": "A"},
                custom_reader="custom_read",
            )

Beispiel #2

Datei anzeigen

Datei: test_base.py Projekt: pal-16/preprocessy

    def test_pipeline_with_custom_reader(self):

        df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
        _ = df.to_csv("./datasets/configs/dataset.csv", index=False)

        params = {
            "col_1": "A",
            "col_2": "B",
            "test_size": 0.2,
            "df": "./datasets/configs/dataset.csv",
        }

        pipeline = Pipeline(
            df_path="./datasets/configs/dataset.csv",
            steps=[times_two, squared, split],
            params=params,
            custom_reader=custom_read,
        )
        pipeline.process()

        assert (pipeline.params["df"].loc[69, "A"] ==
                pipeline.params["df_copy"].loc[69, "A"] * 2)
        assert (pipeline.params["df"].loc[42, "B"] ==
                pipeline.params["df_copy"].loc[42, "B"]**2)

        assert len(pipeline.params["X_train"]) == 80

Beispiel #3

Datei anzeigen

Datei: test_base.py Projekt: pal-16/preprocessy

    def test_pipeline_with_default_reader(self):
        df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
        _ = df.to_csv("./datasets/configs/dataset.csv", index=False)

        params = {
            "col_1": "A",
            "col_2": "B",
            "test_size": 0.2,
        }

        pipeline = Pipeline(
            df_path="./datasets/configs/dataset.csv",
            steps=[times_two, squared, split],
            params=params,
        )
        pipeline.process()

        assert "df" in pipeline.params.keys()
        assert "summary" in pipeline.params.keys()
        assert "stats" in pipeline.params.keys()

Beispiel #4

Datei anzeigen

def test_pipeline_arguments(error, df_path, steps, config_file, params,
                            custom_reader):

    with pytest.raises(error):
        Pipeline(
            df_path=df_path,
            steps=steps,
            config_file=config_file,
            params=params,
            custom_reader=custom_reader,
        )

Beispiel #5

Datei anzeigen

Datei: test_base.py Projekt: pal-16/preprocessy

 def test_remove(self):
     df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
     _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
     params = {
         "col_1": "A",
         "col_2": "B",
         "test_size": 0.2,
     }
     pipeline = Pipeline(
         df_path="./datasets/configs/dataset.csv",
         steps=[times_two, squared, split],
         params=params,
     )
     pipeline.process()
     assert len(pipeline.params["X_train"]) == 80
     pipeline.remove("split")
     pipeline.process()
     assert pipeline.params["df"].shape[0] == df.shape[0]

Beispiel #6

Datei anzeigen

def test_config():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
    params = {
        "df": "./datasets/configs/dataset.csv",
        "col_1": "A",
        "col_2": "B",
        "test_size": 0.2,
    }
    config_path = "./datasets/configs/pipeline_config.json"
    save_config(config_path, params)
    pipeline = Pipeline(
        df_path="./datasets/configs/dataset.csv",
        steps=[times_two, squared, split],
        config_file=config_path,
        custom_reader=custom_read,
    )
    pipeline.process()
    assert len(pipeline.params["X_train"]) == 80
    pipeline.remove("split")
    pipeline.process()
    assert (
        pipeline.params["df"].shape[0] == pipeline.params["df_copy"].shape[0])

Beispiel #7

Datei anzeigen

Datei: test_base.py Projekt: pal-16/preprocessy

 def test_add(self):
     df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
     _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
     params = {
         "col_1": "A",
         "test_size": 0.2,
     }
     pipeline = Pipeline(
         df_path="./datasets/configs/dataset.csv",
         steps=[times_two, split],
         params=params,
     )
     pipeline.process()
     assert pipeline.params["df"].loc[42, "A"] == df.loc[42, "A"] * 2
     pipeline.add(
         squared,
         {
             "col_2": "A",
         },
         before="times_two",
     )
     pipeline.process()
     num_0 = pipeline.params["df"].loc[42, "A"]
     num_1 = df.loc[42, "A"]
     assert num_0 == (num_1**2) * 2
     pipeline.remove("squared")
     pipeline.add(squared, {"col_2": "A"}, after="read_file")
     pipeline.process()
     num_0 = pipeline.params["df"].loc[42, "A"]
     num_1 = df.loc[42, "A"]
     assert num_0 == (num_1**2) * 2