Ejemplo n.º 1
0
def test_separate_gpu_stage(shutdown_only):
    ray.init(num_gpus=1)
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: DummyPreprocessor()
        }),
        DummyPredictor,
    )
    ds = batch_predictor.predict(
        ray.data.range_table(10),
        num_gpus_per_worker=1,
        separate_gpu_stage=True,
        allow_gpu=True,
    )
    stats = ds.stats()
    assert "Stage 1 read->map_batches:" in stats, stats
    assert "Stage 2 map_batches:" in stats, stats
    assert ds.max("value") == 36.0, ds

    ds = batch_predictor.predict(
        ray.data.range_table(10),
        num_gpus_per_worker=1,
        separate_gpu_stage=False,
        allow_gpu=True,
    )
    stats = ds.stats()
    assert "Stage 1 read:" in stats, stats
    assert "Stage 2 map_batches:" in stats, stats
    assert ds.max("value") == 36.0, ds
Ejemplo n.º 2
0
def test_get_and_set_preprocessor():
    """Test preprocessor can be set and get."""

    preprocessor = DummyPreprocessor(1)
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: preprocessor
        }),
        DummyPredictor,
    )
    assert batch_predictor.get_preprocessor() == preprocessor

    test_dataset = ray.data.range(4)
    output_ds = batch_predictor.predict(test_dataset)
    assert output_ds.to_pandas().to_numpy().squeeze().tolist() == [
        0.0,
        2.0,
        4.0,
        6.0,
    ]

    preprocessor2 = DummyPreprocessor(2)
    batch_predictor.set_preprocessor(preprocessor2)
    assert batch_predictor.get_preprocessor() == preprocessor2

    output_ds = batch_predictor.predict(test_dataset)
    assert output_ds.to_pandas().to_numpy().squeeze().tolist() == [
        0.0,
        4.0,
        8.0,
        12.0,
    ]
Ejemplo n.º 3
0
def test_batch_prediction():
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: DummyPreprocessor()
        }),
        DummyPredictor,
    )

    test_dataset = ray.data.range(4)
    ds = batch_predictor.predict(test_dataset)
    # Check fusion occurred.
    assert "read->map_batches" in ds.stats(), ds.stats()
    assert ds.to_pandas().to_numpy().squeeze().tolist() == [
        0.0,
        4.0,
        8.0,
        12.0,
    ]

    test_dataset = ray.data.from_items([1.0, 2.0, 3.0, 4.0])
    assert next(
        batch_predictor.predict_pipelined(test_dataset, blocks_per_window=2).
        iter_datasets()).to_pandas().to_numpy().squeeze().tolist() == [
            4.0,
            8.0,
        ]
Ejemplo n.º 4
0
def test_tensorflow_predictor_no_training():
    model = build_model()
    checkpoint = to_air_checkpoint(model)
    batch_predictor = BatchPredictor.from_checkpoint(
        checkpoint, TensorflowPredictor, model_definition=build_model)
    predict_dataset = ray.data.range(3)
    predictions = batch_predictor.predict(predict_dataset)
    assert predictions.count() == 3
Ejemplo n.º 5
0
def run_xgboost_prediction(model_path: str, data_path: str):
    model = xgb.Booster()
    model.load_model(model_path)
    ds = data.read_parquet(data_path)
    ckpt = XGBoostCheckpoint.from_model(".", model)
    batch_predictor = BatchPredictor.from_checkpoint(ckpt, XGBoostPredictor)
    result = batch_predictor.predict(ds.drop_columns(["labels"]))
    return result
Ejemplo n.º 6
0
def test_sklearn_predictor_no_training():
    with tempfile.TemporaryDirectory() as tmpdir:
        checkpoint = SklearnCheckpoint.from_estimator(estimator=model, path=tmpdir)
        batch_predictor = BatchPredictor.from_checkpoint(checkpoint, SklearnPredictor)
        test_dataset = ray.data.from_pandas(
            pd.DataFrame(dummy_data, columns=["A", "B"])
        )
        predictions = batch_predictor.predict(test_dataset)
        assert len(predictions.to_pandas()) == 3
Ejemplo n.º 7
0
def predict_linear(result: Result):
    batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint,
                                                     TorchPredictor)

    items = [{"x": random.uniform(0, 1) for _ in range(10)}]
    prediction_dataset = ray.data.from_items(items)

    predictions = batch_predictor.predict(prediction_dataset,
                                          dtype=torch.float)

    return predictions
Ejemplo n.º 8
0
def predict_linear(result: Result) -> Dataset:
    batch_predictor = BatchPredictor.from_checkpoint(
        result.checkpoint, TensorflowPredictor, model_definition=build_model)

    items = [{"x": np.random.uniform(0, 1)} for _ in range(10)]
    prediction_dataset = ray.data.from_items(items)

    predictions = batch_predictor.predict(prediction_dataset, dtype=tf.float32)

    print("PREDICTIONS")
    predictions.show()

    return predictions
Ejemplo n.º 9
0
def test_batch_prediction_with_set_cpus(ray_start_4_cpus):
    with tempfile.TemporaryDirectory() as tmpdir:
        with open(os.path.join(tmpdir, MODEL_KEY), "wb") as f:
            cpickle.dump(model, f)

        checkpoint = Checkpoint.from_directory(tmpdir)

        batch_predictor = BatchPredictor.from_checkpoint(checkpoint, SklearnPredictor)

        test_dataset = ray.data.from_pandas(
            pd.DataFrame(dummy_data, columns=["A", "B"])
        )
        batch_predictor.predict(
            test_dataset, num_cpus_per_worker=2, num_estimator_cpus=2
        )
Ejemplo n.º 10
0
def test_e2e(ray_start_4_cpus, save_strategy):
    ray_train = ray.data.from_pandas(train_df)
    ray_validation = ray.data.from_pandas(validation_df)
    scaling_config = ScalingConfig(num_workers=2, use_gpu=False)
    trainer = HuggingFaceTrainer(
        trainer_init_per_worker=train_function,
        trainer_init_config={
            "epochs": 4,
            "save_strategy": save_strategy
        },
        scaling_config=scaling_config,
        datasets={
            "train": ray_train,
            "evaluation": ray_validation
        },
    )
    result = trainer.fit()

    assert result.metrics["epoch"] == 4
    assert result.metrics["training_iteration"] == 4
    assert result.checkpoint

    trainer2 = HuggingFaceTrainer(
        trainer_init_per_worker=train_function,
        trainer_init_config={"epochs":
                             5},  # this will train for 1 epoch: 5 - 4 = 1
        scaling_config=scaling_config,
        datasets={
            "train": ray_train,
            "evaluation": ray_validation
        },
        resume_from_checkpoint=result.checkpoint,
    )
    result2 = trainer2.fit()

    assert result2.metrics["epoch"] == 5
    assert result2.metrics["training_iteration"] == 1
    assert result2.checkpoint

    predictor = BatchPredictor.from_checkpoint(
        result2.checkpoint,
        HuggingFacePredictor,
        task="text-generation",
        tokenizer=AutoTokenizer.from_pretrained(tokenizer_checkpoint),
    )

    predictions = predictor.predict(ray.data.from_pandas(prompts))
    assert predictions.count() == 3
Ejemplo n.º 11
0
def test_batch_prediction_feature_cols():
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: DummyPreprocessor()
        }),
        DummyPredictor,
    )

    test_dataset = ray.data.from_pandas(
        pd.DataFrame({
            "a": [1, 2, 3],
            "b": [4, 5, 6]
        }))

    assert batch_predictor.predict(test_dataset, feature_columns=[
        "a"
    ]).to_pandas().to_numpy().squeeze().tolist() == [4.0, 8.0, 12.0]
Ejemplo n.º 12
0
def test_automatic_enable_gpu_from_num_gpus_per_worker(shutdown_only):
    """
    Test we automatically set underlying Predictor creation use_gpu to True if
    we found num_gpus_per_worker > 0 in BatchPredictor's predict() call.
    """
    ray.init(num_gpus=1)

    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: DummyPreprocessor()
        }),
        DummyPredictor,
    )
    test_dataset = ray.data.range_table(4)

    with pytest.raises(ValueError,
                       match="DummyPredictor does not support GPU prediction"):
        _ = batch_predictor.predict(test_dataset, num_gpus_per_worker=1)
Ejemplo n.º 13
0
def main(data_size_gb: int):
    data_url = f"s3://air-example-data-2/{data_size_gb}G-image-data-synthetic-raw"
    print(
        f"Running GPU batch prediction with {data_size_gb}GB data from {data_url}"
    )
    start = time.time()
    dataset = ray.data.read_datasource(ImageFolderDatasource(),
                                       paths=[data_url])

    model = resnet18(pretrained=True)

    preprocessor = BatchMapper(preprocess)
    ckpt = TorchCheckpoint.from_model(model=model, preprocessor=preprocessor)

    predictor = BatchPredictor.from_checkpoint(ckpt, TorchPredictor)
    predictor.predict(dataset,
                      num_gpus_per_worker=1,
                      feature_columns=["image"])
    total_time_s = round(time.time() - start, 2)

    # For structured output integration with internal tooling
    results = {
        "data_size_gb": data_size_gb,
    }
    results["perf_metrics"] = [
        {
            "perf_metric_name": "total_time_s",
            "perf_metric_value": total_time_s,
            "perf_metric_type": "LATENCY",
        },
        {
            "perf_metric_name": "throughout_MB_s",
            "perf_metric_value": (data_size_gb * 1024 / total_time_s),
            "perf_metric_type": "THROUGHPUT",
        },
    ]

    test_output_json = os.environ.get("TEST_OUTPUT_JSON",
                                      "/tmp/release_test_out.json")
    with open(test_output_json, "wt") as f:
        json.dump(results, f)

    print(results)
Ejemplo n.º 14
0
def test_batch_prediction_fs():
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: DummyPreprocessor()
        }),
        DummyPredictorFS,
    )

    test_dataset = ray.data.from_items([1.0, 2.0, 3.0, 4.0] *
                                       32).repartition(8)
    assert (batch_predictor.predict(
        test_dataset,
        min_scoring_workers=4).to_pandas().to_numpy().squeeze().tolist() == [
            4.0,
            8.0,
            12.0,
            16.0,
        ] * 32)
Ejemplo n.º 15
0
def test_batch_prediction_keep_cols():
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: DummyPreprocessor()
        }),
        DummyPredictor,
    )

    test_dataset = ray.data.from_pandas(
        pd.DataFrame({
            "a": [1, 2, 3],
            "b": [4, 5, 6],
            "c": [7, 8, 9]
        }))

    output_df = batch_predictor.predict(test_dataset,
                                        feature_columns=["a"],
                                        keep_columns=["b"]).to_pandas()

    assert set(output_df.columns) == {"a", "b"}

    assert output_df["a"].tolist() == [4.0, 8.0, 12.0]
    assert output_df["b"].tolist() == [4, 5, 6]
Ejemplo n.º 16
0
trainer = TensorflowTrainer(
    train_loop_per_worker=train_func,
    train_loop_config=config,
    scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
    datasets={"train": dataset},
)
result = trainer.fit()
print(result.metrics)
# __air_tf_train_end__

# __air_tf_batchpred_start__
import numpy as np

from ray.train.batch_predictor import BatchPredictor
from ray.train.tensorflow import TensorflowPredictor

batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint,
                                                 TensorflowPredictor,
                                                 model_definition=build_model)

items = [{"x": np.random.uniform(0, 1)} for _ in range(10)]
prediction_dataset = ray.data.from_items(items)

predictions = batch_predictor.predict(prediction_dataset, dtype=tf.float32)

print("PREDICTIONS")
predictions.show()

# __air_tf_batchpred_end__
Ejemplo n.º 17
0

def preprocess(df: pd.DataFrame) -> pd.DataFrame:
    """
    User Pytorch code to transform user image. Note we still use pandas as
    intermediate format to hold images as shorthand of python dictionary.
    """
    preprocess = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
    )
    df["image"] = TensorArray([preprocess(x.to_numpy()) for x in df["image"]])
    return df


data_url = "s3://anonymous@air-example-data-2/1G-image-data-synthetic-raw"
print(f"Running GPU batch prediction with 1GB data from {data_url}")
dataset = ray.data.read_datasource(ImageFolderDatasource(), paths=[data_url])

model = resnet18(pretrained=True)

preprocessor = BatchMapper(preprocess)
ckpt = TorchCheckpoint.from_model(model=model, preprocessor=preprocessor)

predictor = BatchPredictor.from_checkpoint(ckpt, TorchPredictor)
predictor.predict(dataset, feature_columns=["image"])
Ejemplo n.º 18
0
        [
            keras.Input(shape=(input_features,)),
            layers.Dense(16, activation="relu"),
            layers.Dense(16, activation="relu"),
            layers.Dense(1, activation="sigmoid"),
        ]
    )


dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
all_features: List[str] = dataset.schema().names
all_features.remove("target")
num_features = len(all_features)

prep = Concatenator(dtype=np.float32)

checkpoint = TensorflowCheckpoint.from_model(
    model=create_model(num_features), preprocessor=prep
)
# You can also fetch a checkpoint from a Trainer
# checkpoint = trainer.fit().checkpoint

batch_predictor = BatchPredictor.from_checkpoint(
    checkpoint, TensorflowPredictor, model_definition=lambda: create_model(num_features)
)

predicted_probabilities = batch_predictor.predict(dataset, feature_columns=all_features)
predicted_probabilities.show()
# {'predictions': array([1.], dtype=float32)}
# {'predictions': array([0.], dtype=float32)}
def main(
    model_checkpoint="gpt2",
    tokenizer_checkpoint="sgugger/gpt2-like-tokenizer",
    dataset_name="wikitext-2-raw-v1",
    dataset_path="wikitext",
    num_epochs=5,
    num_workers=2,
    use_gpu=False,
    smoke_test=False,
):
    block_size = 128

    # Uncomment the following if the maximum length the model was
    # pretrained with can fit in your memory.
    # block_size = tokenizer.model_max_length

    # Run this as a remote function to avoid downloading on the driver
    @ray.remote
    def get_dataset():
        datasets = load_dataset(dataset_path, dataset_name)
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)

        def tokenize_function(examples):
            return tokenizer(examples["text"])

        tokenized_datasets = datasets.map(tokenize_function,
                                          batched=True,
                                          num_proc=1,
                                          remove_columns=["text"])

        def group_texts(examples):
            # Concatenate all texts.
            concatenated_examples = {
                k: sum(examples[k], [])
                for k in examples.keys()
            }
            total_length = len(concatenated_examples[list(examples.keys())[0]])
            # We drop the small remainder. We could add padding if the model supported
            # it instead of this drop. You can customize this part to your needs.
            total_length = (total_length // block_size) * block_size
            # Split by chunks of max_len.
            result = {
                k: [
                    t[i:i + block_size]
                    for i in range(0, total_length, block_size)
                ]
                for k, t in concatenated_examples.items()
            }
            result["labels"] = result["input_ids"].copy()
            return result

        lm_datasets = tokenized_datasets.map(
            group_texts,
            batched=True,
            batch_size=1000,
            num_proc=1,
        )
        ray_train = ray.data.from_huggingface(lm_datasets["train"])
        ray_validation = ray.data.from_huggingface(lm_datasets["validation"])
        return ray_train, ray_validation

    ray_train, ray_validation = ray.get(get_dataset.remote())

    def train_function(train_dataset, eval_dataset=None, **config):
        model_config = AutoConfig.from_pretrained(model_checkpoint)
        model = AutoModelForCausalLM.from_config(model_config)
        print("Initializing TrainingArguments...")
        # The checkpoints will be moved to Ray Tune results
        # directory automatically
        training_dir = tempfile.mkdtemp()
        training_args = TrainingArguments(
            training_dir,
            evaluation_strategy="epoch",
            num_train_epochs=num_epochs,
            learning_rate=2e-5,
            weight_decay=0.01,
            disable_tqdm=True,
            save_strategy="epoch",
            # Required to avoid an exception
            no_cuda=not torch.cuda.is_available(),
        )
        print("Initializing Trainer...")
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
        )
        print("Trainer initialized! Starting training...")
        return trainer

    if smoke_test:
        ray_train = ray_train.limit(16)
        ray_validation = ray_validation.limit(8)

    trainer = HuggingFaceTrainer(
        trainer_init_per_worker=train_function,
        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
        datasets={
            "train": ray_train,
            "evaluation": ray_validation
        },
    )
    results = trainer.fit()
    print(results.metrics)

    tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)
    prompt = ["My text: Complete me..."]
    predictor = BatchPredictor.from_checkpoint(
        results.checkpoint,
        HuggingFacePredictor,
        task="text-generation",
        tokenizer=tokenizer,
    )
    data = ray.data.from_pandas(pd.DataFrame(prompt, columns=["prompt"]))
    prediction = predictor.predict(data, num_gpus_per_worker=int(use_gpu))

    print(f"Generated text for prompt '{prompt}': '{prediction.take(1)}'")
Ejemplo n.º 20
0
        "use_gpu": use_gpu,
    },
    label_column="target",
    params=params,
    datasets={
        "train": train_dataset,
        "valid": valid_dataset
    },
    preprocessor=preprocessor,
    num_boost_round=20,
)
result = trainer.fit()
print(result.metrics)
# __air_xgb_train_end__

# __air_xgb_batchpred_start__
from ray.train.batch_predictor import BatchPredictor
from ray.train.xgboost import XGBoostPredictor

batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint,
                                                 XGBoostPredictor)

predicted_probabilities = batch_predictor.predict(test_dataset)
print("PREDICTED PROBABILITIES")
predicted_probabilities.show()

shap_values = batch_predictor.predict(test_dataset, pred_contribs=True)
print("SHAP VALUES")
shap_values.show()
# __air_xgb_batchpred_end__
Ejemplo n.º 21
0
tuner = Tuner(
    trainer,
    param_space=param_space,
    tune_config=TuneConfig(num_samples=5, metric=metric, mode="min"),
)
# Execute tuning.
result_grid = tuner.fit()

# Fetch the best result.
best_result = result_grid.get_best_result()
print("Best Result:", best_result)
# Best Result: Result(metrics={'loss': 0.278409322102863, ...})
# __air_tune_generic_end__

# __air_pytorch_batchpred_start__
from ray.train.batch_predictor import BatchPredictor
from ray.train.torch import TorchPredictor

# You can also create a checkpoint from a trained model using
# `TorchCheckpoint.from_model`.
checkpoint = best_result.checkpoint

batch_predictor = BatchPredictor.from_checkpoint(
    checkpoint, TorchPredictor, model=create_model(num_features))

predicted_probabilities = batch_predictor.predict(test_dataset)
predicted_probabilities.show()
# {'predictions': array([1.], dtype=float32)}
# {'predictions': array([0.], dtype=float32)}
# __air_pytorch_batchpred_end__