def test_separate_gpu_stage(shutdown_only): ray.init(num_gpus=1) batch_predictor = BatchPredictor.from_checkpoint( Checkpoint.from_dict({ "factor": 2.0, PREPROCESSOR_KEY: DummyPreprocessor() }), DummyPredictor, ) ds = batch_predictor.predict( ray.data.range_table(10), num_gpus_per_worker=1, separate_gpu_stage=True, allow_gpu=True, ) stats = ds.stats() assert "Stage 1 read->map_batches:" in stats, stats assert "Stage 2 map_batches:" in stats, stats assert ds.max("value") == 36.0, ds ds = batch_predictor.predict( ray.data.range_table(10), num_gpus_per_worker=1, separate_gpu_stage=False, allow_gpu=True, ) stats = ds.stats() assert "Stage 1 read:" in stats, stats assert "Stage 2 map_batches:" in stats, stats assert ds.max("value") == 36.0, ds
def test_get_and_set_preprocessor(): """Test preprocessor can be set and get.""" preprocessor = DummyPreprocessor(1) batch_predictor = BatchPredictor.from_checkpoint( Checkpoint.from_dict({ "factor": 2.0, PREPROCESSOR_KEY: preprocessor }), DummyPredictor, ) assert batch_predictor.get_preprocessor() == preprocessor test_dataset = ray.data.range(4) output_ds = batch_predictor.predict(test_dataset) assert output_ds.to_pandas().to_numpy().squeeze().tolist() == [ 0.0, 2.0, 4.0, 6.0, ] preprocessor2 = DummyPreprocessor(2) batch_predictor.set_preprocessor(preprocessor2) assert batch_predictor.get_preprocessor() == preprocessor2 output_ds = batch_predictor.predict(test_dataset) assert output_ds.to_pandas().to_numpy().squeeze().tolist() == [ 0.0, 4.0, 8.0, 12.0, ]
def test_batch_prediction(): batch_predictor = BatchPredictor.from_checkpoint( Checkpoint.from_dict({ "factor": 2.0, PREPROCESSOR_KEY: DummyPreprocessor() }), DummyPredictor, ) test_dataset = ray.data.range(4) ds = batch_predictor.predict(test_dataset) # Check fusion occurred. assert "read->map_batches" in ds.stats(), ds.stats() assert ds.to_pandas().to_numpy().squeeze().tolist() == [ 0.0, 4.0, 8.0, 12.0, ] test_dataset = ray.data.from_items([1.0, 2.0, 3.0, 4.0]) assert next( batch_predictor.predict_pipelined(test_dataset, blocks_per_window=2). iter_datasets()).to_pandas().to_numpy().squeeze().tolist() == [ 4.0, 8.0, ]
def test_tensorflow_predictor_no_training(): model = build_model() checkpoint = to_air_checkpoint(model) batch_predictor = BatchPredictor.from_checkpoint( checkpoint, TensorflowPredictor, model_definition=build_model) predict_dataset = ray.data.range(3) predictions = batch_predictor.predict(predict_dataset) assert predictions.count() == 3
def run_xgboost_prediction(model_path: str, data_path: str): model = xgb.Booster() model.load_model(model_path) ds = data.read_parquet(data_path) ckpt = XGBoostCheckpoint.from_model(".", model) batch_predictor = BatchPredictor.from_checkpoint(ckpt, XGBoostPredictor) result = batch_predictor.predict(ds.drop_columns(["labels"])) return result
def test_sklearn_predictor_no_training(): with tempfile.TemporaryDirectory() as tmpdir: checkpoint = SklearnCheckpoint.from_estimator(estimator=model, path=tmpdir) batch_predictor = BatchPredictor.from_checkpoint(checkpoint, SklearnPredictor) test_dataset = ray.data.from_pandas( pd.DataFrame(dummy_data, columns=["A", "B"]) ) predictions = batch_predictor.predict(test_dataset) assert len(predictions.to_pandas()) == 3
def predict_linear(result: Result): batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint, TorchPredictor) items = [{"x": random.uniform(0, 1) for _ in range(10)}] prediction_dataset = ray.data.from_items(items) predictions = batch_predictor.predict(prediction_dataset, dtype=torch.float) return predictions
def predict_linear(result: Result) -> Dataset: batch_predictor = BatchPredictor.from_checkpoint( result.checkpoint, TensorflowPredictor, model_definition=build_model) items = [{"x": np.random.uniform(0, 1)} for _ in range(10)] prediction_dataset = ray.data.from_items(items) predictions = batch_predictor.predict(prediction_dataset, dtype=tf.float32) print("PREDICTIONS") predictions.show() return predictions
def test_batch_prediction_with_set_cpus(ray_start_4_cpus): with tempfile.TemporaryDirectory() as tmpdir: with open(os.path.join(tmpdir, MODEL_KEY), "wb") as f: cpickle.dump(model, f) checkpoint = Checkpoint.from_directory(tmpdir) batch_predictor = BatchPredictor.from_checkpoint(checkpoint, SklearnPredictor) test_dataset = ray.data.from_pandas( pd.DataFrame(dummy_data, columns=["A", "B"]) ) batch_predictor.predict( test_dataset, num_cpus_per_worker=2, num_estimator_cpus=2 )
def test_e2e(ray_start_4_cpus, save_strategy): ray_train = ray.data.from_pandas(train_df) ray_validation = ray.data.from_pandas(validation_df) scaling_config = ScalingConfig(num_workers=2, use_gpu=False) trainer = HuggingFaceTrainer( trainer_init_per_worker=train_function, trainer_init_config={ "epochs": 4, "save_strategy": save_strategy }, scaling_config=scaling_config, datasets={ "train": ray_train, "evaluation": ray_validation }, ) result = trainer.fit() assert result.metrics["epoch"] == 4 assert result.metrics["training_iteration"] == 4 assert result.checkpoint trainer2 = HuggingFaceTrainer( trainer_init_per_worker=train_function, trainer_init_config={"epochs": 5}, # this will train for 1 epoch: 5 - 4 = 1 scaling_config=scaling_config, datasets={ "train": ray_train, "evaluation": ray_validation }, resume_from_checkpoint=result.checkpoint, ) result2 = trainer2.fit() assert result2.metrics["epoch"] == 5 assert result2.metrics["training_iteration"] == 1 assert result2.checkpoint predictor = BatchPredictor.from_checkpoint( result2.checkpoint, HuggingFacePredictor, task="text-generation", tokenizer=AutoTokenizer.from_pretrained(tokenizer_checkpoint), ) predictions = predictor.predict(ray.data.from_pandas(prompts)) assert predictions.count() == 3
def test_batch_prediction_feature_cols(): batch_predictor = BatchPredictor.from_checkpoint( Checkpoint.from_dict({ "factor": 2.0, PREPROCESSOR_KEY: DummyPreprocessor() }), DummyPredictor, ) test_dataset = ray.data.from_pandas( pd.DataFrame({ "a": [1, 2, 3], "b": [4, 5, 6] })) assert batch_predictor.predict(test_dataset, feature_columns=[ "a" ]).to_pandas().to_numpy().squeeze().tolist() == [4.0, 8.0, 12.0]
def test_automatic_enable_gpu_from_num_gpus_per_worker(shutdown_only): """ Test we automatically set underlying Predictor creation use_gpu to True if we found num_gpus_per_worker > 0 in BatchPredictor's predict() call. """ ray.init(num_gpus=1) batch_predictor = BatchPredictor.from_checkpoint( Checkpoint.from_dict({ "factor": 2.0, PREPROCESSOR_KEY: DummyPreprocessor() }), DummyPredictor, ) test_dataset = ray.data.range_table(4) with pytest.raises(ValueError, match="DummyPredictor does not support GPU prediction"): _ = batch_predictor.predict(test_dataset, num_gpus_per_worker=1)
def main(data_size_gb: int): data_url = f"s3://air-example-data-2/{data_size_gb}G-image-data-synthetic-raw" print( f"Running GPU batch prediction with {data_size_gb}GB data from {data_url}" ) start = time.time() dataset = ray.data.read_datasource(ImageFolderDatasource(), paths=[data_url]) model = resnet18(pretrained=True) preprocessor = BatchMapper(preprocess) ckpt = TorchCheckpoint.from_model(model=model, preprocessor=preprocessor) predictor = BatchPredictor.from_checkpoint(ckpt, TorchPredictor) predictor.predict(dataset, num_gpus_per_worker=1, feature_columns=["image"]) total_time_s = round(time.time() - start, 2) # For structured output integration with internal tooling results = { "data_size_gb": data_size_gb, } results["perf_metrics"] = [ { "perf_metric_name": "total_time_s", "perf_metric_value": total_time_s, "perf_metric_type": "LATENCY", }, { "perf_metric_name": "throughout_MB_s", "perf_metric_value": (data_size_gb * 1024 / total_time_s), "perf_metric_type": "THROUGHPUT", }, ] test_output_json = os.environ.get("TEST_OUTPUT_JSON", "/tmp/release_test_out.json") with open(test_output_json, "wt") as f: json.dump(results, f) print(results)
def test_batch_prediction_fs(): batch_predictor = BatchPredictor.from_checkpoint( Checkpoint.from_dict({ "factor": 2.0, PREPROCESSOR_KEY: DummyPreprocessor() }), DummyPredictorFS, ) test_dataset = ray.data.from_items([1.0, 2.0, 3.0, 4.0] * 32).repartition(8) assert (batch_predictor.predict( test_dataset, min_scoring_workers=4).to_pandas().to_numpy().squeeze().tolist() == [ 4.0, 8.0, 12.0, 16.0, ] * 32)
def test_batch_prediction_keep_cols(): batch_predictor = BatchPredictor.from_checkpoint( Checkpoint.from_dict({ "factor": 2.0, PREPROCESSOR_KEY: DummyPreprocessor() }), DummyPredictor, ) test_dataset = ray.data.from_pandas( pd.DataFrame({ "a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9] })) output_df = batch_predictor.predict(test_dataset, feature_columns=["a"], keep_columns=["b"]).to_pandas() assert set(output_df.columns) == {"a", "b"} assert output_df["a"].tolist() == [4.0, 8.0, 12.0] assert output_df["b"].tolist() == [4, 5, 6]
trainer = TensorflowTrainer( train_loop_per_worker=train_func, train_loop_config=config, scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu), datasets={"train": dataset}, ) result = trainer.fit() print(result.metrics) # __air_tf_train_end__ # __air_tf_batchpred_start__ import numpy as np from ray.train.batch_predictor import BatchPredictor from ray.train.tensorflow import TensorflowPredictor batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint, TensorflowPredictor, model_definition=build_model) items = [{"x": np.random.uniform(0, 1)} for _ in range(10)] prediction_dataset = ray.data.from_items(items) predictions = batch_predictor.predict(prediction_dataset, dtype=tf.float32) print("PREDICTIONS") predictions.show() # __air_tf_batchpred_end__
def preprocess(df: pd.DataFrame) -> pd.DataFrame: """ User Pytorch code to transform user image. Note we still use pandas as intermediate format to hold images as shorthand of python dictionary. """ preprocess = transforms.Compose( [ transforms.ToTensor(), transforms.Resize(256), transforms.CenterCrop(224), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) df["image"] = TensorArray([preprocess(x.to_numpy()) for x in df["image"]]) return df data_url = "s3://anonymous@air-example-data-2/1G-image-data-synthetic-raw" print(f"Running GPU batch prediction with 1GB data from {data_url}") dataset = ray.data.read_datasource(ImageFolderDatasource(), paths=[data_url]) model = resnet18(pretrained=True) preprocessor = BatchMapper(preprocess) ckpt = TorchCheckpoint.from_model(model=model, preprocessor=preprocessor) predictor = BatchPredictor.from_checkpoint(ckpt, TorchPredictor) predictor.predict(dataset, feature_columns=["image"])
[ keras.Input(shape=(input_features,)), layers.Dense(16, activation="relu"), layers.Dense(16, activation="relu"), layers.Dense(1, activation="sigmoid"), ] ) dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv") all_features: List[str] = dataset.schema().names all_features.remove("target") num_features = len(all_features) prep = Concatenator(dtype=np.float32) checkpoint = TensorflowCheckpoint.from_model( model=create_model(num_features), preprocessor=prep ) # You can also fetch a checkpoint from a Trainer # checkpoint = trainer.fit().checkpoint batch_predictor = BatchPredictor.from_checkpoint( checkpoint, TensorflowPredictor, model_definition=lambda: create_model(num_features) ) predicted_probabilities = batch_predictor.predict(dataset, feature_columns=all_features) predicted_probabilities.show() # {'predictions': array([1.], dtype=float32)} # {'predictions': array([0.], dtype=float32)}
def main( model_checkpoint="gpt2", tokenizer_checkpoint="sgugger/gpt2-like-tokenizer", dataset_name="wikitext-2-raw-v1", dataset_path="wikitext", num_epochs=5, num_workers=2, use_gpu=False, smoke_test=False, ): block_size = 128 # Uncomment the following if the maximum length the model was # pretrained with can fit in your memory. # block_size = tokenizer.model_max_length # Run this as a remote function to avoid downloading on the driver @ray.remote def get_dataset(): datasets = load_dataset(dataset_path, dataset_name) tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint) def tokenize_function(examples): return tokenizer(examples["text"]) tokenized_datasets = datasets.map(tokenize_function, batched=True, num_proc=1, remove_columns=["text"]) def group_texts(examples): # Concatenate all texts. concatenated_examples = { k: sum(examples[k], []) for k in examples.keys() } total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder. We could add padding if the model supported # it instead of this drop. You can customize this part to your needs. total_length = (total_length // block_size) * block_size # Split by chunks of max_len. result = { k: [ t[i:i + block_size] for i in range(0, total_length, block_size) ] for k, t in concatenated_examples.items() } result["labels"] = result["input_ids"].copy() return result lm_datasets = tokenized_datasets.map( group_texts, batched=True, batch_size=1000, num_proc=1, ) ray_train = ray.data.from_huggingface(lm_datasets["train"]) ray_validation = ray.data.from_huggingface(lm_datasets["validation"]) return ray_train, ray_validation ray_train, ray_validation = ray.get(get_dataset.remote()) def train_function(train_dataset, eval_dataset=None, **config): model_config = AutoConfig.from_pretrained(model_checkpoint) model = AutoModelForCausalLM.from_config(model_config) print("Initializing TrainingArguments...") # The checkpoints will be moved to Ray Tune results # directory automatically training_dir = tempfile.mkdtemp() training_args = TrainingArguments( training_dir, evaluation_strategy="epoch", num_train_epochs=num_epochs, learning_rate=2e-5, weight_decay=0.01, disable_tqdm=True, save_strategy="epoch", # Required to avoid an exception no_cuda=not torch.cuda.is_available(), ) print("Initializing Trainer...") trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, ) print("Trainer initialized! Starting training...") return trainer if smoke_test: ray_train = ray_train.limit(16) ray_validation = ray_validation.limit(8) trainer = HuggingFaceTrainer( trainer_init_per_worker=train_function, scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu), datasets={ "train": ray_train, "evaluation": ray_validation }, ) results = trainer.fit() print(results.metrics) tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint) prompt = ["My text: Complete me..."] predictor = BatchPredictor.from_checkpoint( results.checkpoint, HuggingFacePredictor, task="text-generation", tokenizer=tokenizer, ) data = ray.data.from_pandas(pd.DataFrame(prompt, columns=["prompt"])) prediction = predictor.predict(data, num_gpus_per_worker=int(use_gpu)) print(f"Generated text for prompt '{prompt}': '{prediction.take(1)}'")
"use_gpu": use_gpu, }, label_column="target", params=params, datasets={ "train": train_dataset, "valid": valid_dataset }, preprocessor=preprocessor, num_boost_round=20, ) result = trainer.fit() print(result.metrics) # __air_xgb_train_end__ # __air_xgb_batchpred_start__ from ray.train.batch_predictor import BatchPredictor from ray.train.xgboost import XGBoostPredictor batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint, XGBoostPredictor) predicted_probabilities = batch_predictor.predict(test_dataset) print("PREDICTED PROBABILITIES") predicted_probabilities.show() shap_values = batch_predictor.predict(test_dataset, pred_contribs=True) print("SHAP VALUES") shap_values.show() # __air_xgb_batchpred_end__
tuner = Tuner( trainer, param_space=param_space, tune_config=TuneConfig(num_samples=5, metric=metric, mode="min"), ) # Execute tuning. result_grid = tuner.fit() # Fetch the best result. best_result = result_grid.get_best_result() print("Best Result:", best_result) # Best Result: Result(metrics={'loss': 0.278409322102863, ...}) # __air_tune_generic_end__ # __air_pytorch_batchpred_start__ from ray.train.batch_predictor import BatchPredictor from ray.train.torch import TorchPredictor # You can also create a checkpoint from a trained model using # `TorchCheckpoint.from_model`. checkpoint = best_result.checkpoint batch_predictor = BatchPredictor.from_checkpoint( checkpoint, TorchPredictor, model=create_model(num_features)) predicted_probabilities = batch_predictor.predict(test_dataset) predicted_probabilities.show() # {'predictions': array([1.], dtype=float32)} # {'predictions': array([0.], dtype=float32)} # __air_pytorch_batchpred_end__