コード例 #1
0
def test_config():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
    params = {"col_1": "A", "col_2": "B", "test_size": 0.2, "X_train": df}
    config_path = "./datasets/configs/pipeline_config.json"
    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, squared, split],
        params=params,
        custom_reader=custom_read,
    )
    pipeline.process()

    # Drop custom key
    pipeline.config_drop_keys.append("train_df_copy")

    pipeline.save_config(config_path)

    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, squared, split],
        config_file=config_path,
        custom_reader=custom_read,
    )

    assert "X_train" not in pipeline.params
    assert "train_df_copy" not in pipeline.params
コード例 #2
0
ファイル: test_base.py プロジェクト: RiyaGupta99/preprocessy
def test_pipeline_with_default_reader():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)

    params = {
        "col_1": "A",
        "col_2": "B",
        "test_size": 0.2,
    }

    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, squared, split],
        params=params,
    )
    pipeline.process()

    assert "train_df" in pipeline.params.keys()
コード例 #3
0
ファイル: test_base.py プロジェクト: RiyaGupta99/preprocessy
def test_pipeline_arguments(error, train_df_path, steps, config_file, params,
                            custom_reader):

    with pytest.raises(error):
        BasePipeline(
            train_df_path=train_df_path,
            steps=steps,
            config_file=config_file,
            params=params,
            custom_reader=custom_reader,
        )
コード例 #4
0
def test_duplicate_param():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
    params = {
        "col_1": "A",
        "col_2": "B",
        "test_size": 0.2,
    }
    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, split],
        params=params,
    )
    with pytest.raises(ValueError):
        pipeline.add(
            squared,
            {
                "col_2": "A",
            },
            before="times_two",
        )
コード例 #5
0
ファイル: test_base.py プロジェクト: RiyaGupta99/preprocessy
def test_pipeline_with_custom_reader():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)

    params = {
        "col_1": "A",
        "col_2": "B",
        "test_size": 0.2,
    }

    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, squared, split],
        params=params,
        custom_reader=custom_read,
    )
    pipeline.process()

    assert (pipeline.params["train_df"].loc[69, "A"] ==
            pipeline.params["train_df_copy"].loc[69, "A"] * 2)
    assert (pipeline.params["train_df"].loc[42, "B"] ==
            pipeline.params["train_df_copy"].loc[42, "B"]**2)

    assert len(pipeline.params["X_train"]) == 80
コード例 #6
0
ファイル: test_base.py プロジェクト: RiyaGupta99/preprocessy
def test_remove():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
    params = {
        "col_1": "A",
        "col_2": "B",
        "test_size": 0.2,
    }
    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, squared, split],
        params=params,
    )
    pipeline.process()
    assert len(pipeline.params["X_train"]) == 80
    pipeline.remove("split")
    pipeline.process()
    assert pipeline.params["train_df"].shape[0] == df.shape[0]
コード例 #7
0
ファイル: test_base.py プロジェクト: RiyaGupta99/preprocessy
def test_config():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
    params = {
        "col_1": "A",
        "col_2": "B",
        "test_size": 0.2,
    }
    config_path = "./datasets/configs/pipeline_config.json"
    save_config(config_path, params)
    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, squared, split],
        config_file=config_path,
        custom_reader=custom_read,
    )
    pipeline.process()
    assert len(pipeline.params["X_train"]) == 80
    pipeline.remove("split")
    pipeline.process()
    assert (pipeline.params["train_df"].shape[0] ==
            pipeline.params["train_df_copy"].shape[0])
コード例 #8
0
def test_add_without_params():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
    params = {
        "col_1": "A",
        "col_2": "B",
        "test_size": 0.2,
    }
    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, split],
        params=params,
    )
    pipeline.add(
        squared,
        before="times_two",
    )
    pipeline.process()
    assert pipeline.params["train_df"].loc[42, "B"] == df.loc[42, "B"]**2
コード例 #9
0
ファイル: test_base.py プロジェクト: RiyaGupta99/preprocessy
def test_add():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
    params = {
        "col_1": "A",
        "test_size": 0.2,
    }
    pipeline = BasePipeline(
        train_df_path="./datasets/configs/dataset.csv",
        steps=[times_two, split],
        params=params,
    )
    pipeline.process()
    assert pipeline.params["train_df"].loc[42, "A"] == df.loc[42, "A"] * 2
    pipeline.add(
        squared,
        {
            "col_2": "A",
        },
        before="times_two",
    )
    pipeline.process()
    num_0 = pipeline.params["train_df"].loc[42, "A"]
    num_1 = df.loc[42, "A"]
    assert num_0 == (num_1**2) * 2
    pipeline.remove("squared")
    pipeline.add(squared, {"col_2": "A"}, after="read_file")
    pipeline.process()
    num_0 = pipeline.params["train_df"].loc[42, "A"]
    num_1 = df.loc[42, "A"]
    assert num_0 == (num_1**2) * 2