Python Chain Examples

Programming Language: Python

Namespace/Package Name: ray.ml.preprocessors

Class/Type: Chain

Examples at hotexamples.com: 3

Python Chain - 3 examples found. These are the top rated real world Python examples of ray.ml.preprocessors.Chain extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Chain(3)

fit(1)

transform(1)

transform_batch(1)

Example #1

Show file

File: sklearn_example.py Project: krfricke/ray

def train_sklearn(num_cpus: int, use_gpu: bool = False) -> Result:
    if use_gpu and not cuMLRandomForestClassifier:
        raise RuntimeError(
            "cuML must be installed for GPU enabled sklearn estimators.")

    train_dataset, valid_dataset, _ = prepare_data()

    # Scale some random columns
    columns_to_scale = ["mean radius", "mean texture"]
    preprocessor = Chain(OrdinalEncoder(["categorical_column"]),
                         StandardScaler(columns=columns_to_scale))

    if use_gpu:
        trainer_resources = {"CPU": 1, "GPU": 1}
        estimator = cuMLRandomForestClassifier()
    else:
        trainer_resources = {"CPU": num_cpus}
        estimator = RandomForestClassifier()

    trainer = SklearnTrainer(
        estimator=estimator,
        label_column="target",
        datasets={
            "train": train_dataset,
            "valid": valid_dataset
        },
        preprocessor=preprocessor,
        cv=5,
        scaling_config={
            "trainer_resources": trainer_resources,
        },
    )
    result = trainer.fit()
    print(result.metrics)

    return result

Example #2

Show file

            )

            if rank == 0:
                print("Ingest stats from rank=0:\n\n{}".format(data_shard.stats()))

        return train_loop_per_worker


if __name__ == "__main__":
    # Generate a synthetic dataset of ~10GiB of float64 data. The dataset is sharded
    # into 100 blocks (parallelism=100).
    dataset = ray.data.range_tensor(50000, shape=(80, 80, 4), parallelism=100)

    # An example preprocessor chain that just scales all values by 4.0 in two stages.
    preprocessor = Chain(
        BatchMapper(lambda df: df * 2),
        BatchMapper(lambda df: df * 2),
    )

    # Setup the dummy trainer that prints ingest stats.
    # Run and print ingest stats.
    trainer = DummyTrainer(
        scaling_config={"num_workers": 1, "use_gpu": False},
        datasets={"train": dataset},
        preprocessor=preprocessor,
        runtime_seconds=30,  # Stop after this amount or time or 1 epoch is read.
        prefetch_blocks=1,  # Number of blocks to prefetch when reading data.
        batch_size=None,
    )
    trainer.fit()

    # Print memory stats (you can also use "ray memory --stats-only" to monitor this

Example #3

Show file

File: test_preprocessors.py Project: patrickstuedi/ray

def test_chain():
    """Tests basic Chain functionality."""
    col_a = [-1, -1, 1, 1]
    col_b = [1, 1, 1, None]
    col_c = ["sunday", "monday", "tuesday", "tuesday"]
    in_df = pd.DataFrame.from_dict({"A": col_a, "B": col_b, "C": col_c})
    ds = ray.data.from_pandas(in_df)

    imputer = SimpleImputer(["B"])
    scaler = StandardScaler(["A", "B"])
    encoder = LabelEncoder("C")
    chain = Chain(scaler, imputer, encoder)

    # Fit data.
    chain.fit(ds)
    assert imputer.stats_ == {
        "mean(B)": 0.0,
    }
    assert scaler.stats_ == {
        "mean(A)": 0.0,
        "mean(B)": 1.0,
        "std(A)": 1.0,
        "std(B)": 0.0,
    }
    assert encoder.stats_ == {
        "unique_values(C)": {
            "monday": 0,
            "sunday": 1,
            "tuesday": 2
        }
    }

    # Transform data.
    transformed = chain.transform(ds)
    out_df = transformed.to_pandas()

    processed_col_a = [-1.0, -1.0, 1.0, 1.0]
    processed_col_b = [0.0, 0.0, 0.0, 0.0]
    processed_col_c = [1, 0, 2, 2]
    expected_df = pd.DataFrame.from_dict({
        "A": processed_col_a,
        "B": processed_col_b,
        "C": processed_col_c
    })

    assert out_df.equals(expected_df)

    # Transform batch.
    pred_col_a = [1, 2, None]
    pred_col_b = [0, None, 2]
    pred_col_c = ["monday", "tuesday", "wednesday"]
    pred_in_df = pd.DataFrame.from_dict({
        "A": pred_col_a,
        "B": pred_col_b,
        "C": pred_col_c
    })

    pred_out_df = chain.transform_batch(pred_in_df)

    pred_processed_col_a = [1, 2, None]
    pred_processed_col_b = [-1.0, 0.0, 1.0]
    pred_processed_col_c = [0, 2, None]
    pred_expected_df = pd.DataFrame.from_dict({
        "A": pred_processed_col_a,
        "B": pred_processed_col_b,
        "C": pred_processed_col_c,
    })

    assert pred_out_df.equals(pred_expected_df)