def test_preprocessor_in_checkpoint(ray_start_4_cpus, tmpdir): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) class DummyPreprocessor(Preprocessor): def __init__(self): super().__init__() self.is_same = True def fit(self, dataset): self.fitted_ = True def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame": return df trainer = LightGBMTrainer( scaling_config=scale_config, label_column="target", params=params, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, preprocessor=DummyPreprocessor(), ) result = trainer.fit() # Move checkpoint to a different directory. checkpoint_dict = result.checkpoint.to_dict() checkpoint = Checkpoint.from_dict(checkpoint_dict) checkpoint_path = checkpoint.to_directory(tmpdir) resume_from = Checkpoint.from_directory(checkpoint_path) model, preprocessor = load_checkpoint(resume_from) assert get_num_trees(model) == 10 assert preprocessor.is_same assert preprocessor.fitted_
def _convert_directory_checkpoint_to_sync_if_needed( self, checkpoint: Checkpoint) -> Checkpoint: """Replace the directory checkpoint with a node ip & path dict checkpoint. This dict checkpoint will be used used to sync the directory. If we were to use a directory checkpoint directly, it would get deepcopied & serialized unnecessarily.""" with checkpoint.as_directory() as checkpoint_path: # Load checkpoint from path. checkpoint_path = Path(checkpoint_path).expanduser().absolute() if not checkpoint_path.joinpath(TUNE_CHECKPOINT_ID).exists(): # If the ID file is missing, we assume that this is already # a sync checkpoint dict_checkpoint = checkpoint.to_dict() if (NODE_IP_KEY not in dict_checkpoint or CHECKPOINT_PATH_ON_NODE_KEY not in dict_checkpoint): raise ValueError( "Wrong checkpoint format. Ensure the checkpoint is a " "result of `HuggingFaceTrainer`.") return checkpoint with open(checkpoint_path.joinpath(TUNE_CHECKPOINT_ID), "r") as f: tune_checkpoint_id = int(f.read()) return Checkpoint.from_dict({ NODE_IP_KEY: get_node_ip_address(), CHECKPOINT_PATH_ON_NODE_KEY: str(checkpoint_path), TUNE_CHECKPOINT_ID: tune_checkpoint_id, })
def test_resume_from_checkpoint(ray_start_4_cpus, tmpdir): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) trainer = LightGBMTrainer( scaling_config=scale_config, label_column="target", params=params, num_boost_round=5, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, ) result = trainer.fit() checkpoint = result.checkpoint model, _ = load_checkpoint(checkpoint) assert get_num_trees(model) == 5 # Move checkpoint to a different directory. checkpoint_dict = result.checkpoint.to_dict() checkpoint = Checkpoint.from_dict(checkpoint_dict) checkpoint_path = checkpoint.to_directory(tmpdir) resume_from = Checkpoint.from_directory(checkpoint_path) trainer = LightGBMTrainer( scaling_config=scale_config, label_column="target", params=params, num_boost_round=5, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, resume_from_checkpoint=resume_from, ) result = trainer.fit() checkpoint = result.checkpoint xgb_model, _ = load_checkpoint(checkpoint) assert get_num_trees(xgb_model) == 10
def testDataCheckpointSerde(self): # Data checkpoints keep the same internal representation, including # their data. checkpoint = Checkpoint.from_dict({"checkpoint_data": 5}) self._testCheckpointSerde(checkpoint, *checkpoint.get_internal_representation())
def _prepare_dict_checkpoint(self) -> Checkpoint: # Create checkpoint from dict checkpoint = Checkpoint.from_dict(self.checkpoint_dict_data) self.assertIsInstance(checkpoint, Checkpoint) self.assertTrue(checkpoint._data_dict) self.assertEqual(checkpoint._data_dict["metric"], self.checkpoint_dict_data["metric"]) return checkpoint
def testLocalCheckpointSerde(self): # Local checkpoints are converted to bytes on serialization. Currently # this is a pickled dict, so we compare with a dict checkpoint. source_checkpoint = Checkpoint.from_dict({"checkpoint_data": 5}) with source_checkpoint.as_directory() as tmpdir: checkpoint = Checkpoint.from_directory(tmpdir) self._testCheckpointSerde( checkpoint, *source_checkpoint.get_internal_representation())
def test_predict_from_checkpoint_no_preprocessor(model): checkpoint = Checkpoint.from_dict({MODEL_KEY: model}) predictor = TorchPredictor.from_checkpoint(checkpoint) data_batch = np.array([[1], [2], [3]]) predictions = predictor.predict(data_batch) assert len(predictions) == 3 assert predictions.to_numpy().flatten().tolist() == [2, 4, 6]
def test_init(model, preprocessor): predictor = TorchPredictor(model=model, preprocessor=preprocessor) checkpoint = {MODEL_KEY: model, PREPROCESSOR_KEY: preprocessor} checkpoint_predictor = TorchPredictor.from_checkpoint( Checkpoint.from_dict(checkpoint)) assert checkpoint_predictor.model == predictor.model assert checkpoint_predictor.preprocessor == predictor.preprocessor
def testObjRefCheckpointSerde(self): # Obj ref checkpoints are dict checkpoints put into the Ray object # store, but they have their own data representation (the obj ref). # We thus compare with the actual obj ref checkpoint. source_checkpoint = Checkpoint.from_dict({"checkpoint_data": 5}) obj_ref = source_checkpoint.to_object_ref() checkpoint = Checkpoint.from_object_ref(obj_ref) self._testCheckpointSerde(checkpoint, *checkpoint.get_internal_representation())
def testBytesCheckpointSerde(self): # Bytes checkpoints are just dict checkpoints constructed # from pickled data, so we compare with the source dict checkpoint. source_checkpoint = Checkpoint.from_dict({"checkpoint_data": 5}) blob = source_checkpoint.to_bytes() checkpoint = Checkpoint.from_bytes(blob) self._testCheckpointSerde( checkpoint, *source_checkpoint.get_internal_representation())
def testLocalCheckpointSerde(self): # Local checkpoints are converted to bytes on serialization. Currently # this is a pickled dict, so we compare with a dict checkpoint. source_checkpoint = Checkpoint.from_dict({"checkpoint_data": 5}) tmpdir = source_checkpoint.to_directory() self.addCleanup(shutil.rmtree, tmpdir) checkpoint = Checkpoint.from_directory(tmpdir) self._testCheckpointSerde( checkpoint, *source_checkpoint.get_internal_representation())
def test_predict_array(): checkpoint = {MODEL_KEY: weights} predictor = TensorflowPredictor.from_checkpoint( Checkpoint.from_dict(checkpoint), build_model) data_batch = np.array([[1], [2], [3]]) predictions = predictor.predict(data_batch) assert len(predictions) == 3 assert predictions.to_numpy().flatten().tolist() == [1, 2, 3]
def write_checkpoint(self, checkpoint: Dict): self.add_tune_checkpoint_id(checkpoint) # Add the preprocessor to the checkpoint. checkpoint[PREPROCESSOR_KEY] = self.preprocessor checkpoint_obj = Checkpoint.from_dict(checkpoint) # If inside a Tune Trainable, then checkpoint with Tune. with tune.checkpoint_dir(step=self._latest_checkpoint_id) as checkpoint_dir: checkpoint_obj.to_directory(path=checkpoint_dir)
def test_dict_checkpoint_dict(self): """Test conversion from dict to dict checkpoint and back.""" checkpoint = self._prepare_dict_checkpoint() # Convert into dict checkpoint data_dict = checkpoint.to_dict() self.assertIsInstance(data_dict, dict) # Create from dict checkpoint = Checkpoint.from_dict(data_dict) self.assertTrue(checkpoint._data_dict) self._assert_dict_checkpoint(checkpoint)
def test_init(): preprocessor = DummyPreprocessor() predictor = TensorflowPredictor(model_definition=build_model, preprocessor=preprocessor, model_weights=weights) checkpoint = {MODEL_KEY: weights, PREPROCESSOR_KEY: preprocessor} checkpoint_predictor = TensorflowPredictor.from_checkpoint( Checkpoint.from_dict(checkpoint), build_model) assert checkpoint_predictor.model_definition == predictor.model_definition assert checkpoint_predictor.model_weights == predictor.model_weights assert checkpoint_predictor.preprocessor == predictor.preprocessor
def test_batch_prediction(): batch_predictor = BatchPredictor.from_checkpoint( Checkpoint.from_dict({"factor": 2.0}), DummyPredictor ) test_dataset = ray.data.from_items([1.0, 2.0, 3.0, 4.0]) assert batch_predictor.predict( test_dataset ).to_pandas().to_numpy().squeeze().tolist() == [ 2.0, 4.0, 6.0, 8.0, ]
def create_checkpoint(preprocessor: Optional[Preprocessor] = None, config: Optional[dict] = None) -> Checkpoint: rl_trainer = RLTrainer( algorithm=_DummyTrainer, config=config or {}, preprocessor=preprocessor, ) rl_trainable_cls = rl_trainer.as_trainable() rl_trainable = rl_trainable_cls() with tempfile.TemporaryDirectory() as checkpoint_dir: checkpoint_file = rl_trainable.save(checkpoint_dir) checkpoint_path = TrainableUtil.find_checkpoint_dir(checkpoint_file) checkpoint_data = Checkpoint.from_directory(checkpoint_path).to_dict() return Checkpoint.from_dict(checkpoint_data)
def test_init(): preprocessor = DummyPreprocessor() preprocessor.attr = 1 predictor = LightGBMPredictor(model=model, preprocessor=preprocessor) with tempfile.TemporaryDirectory() as tmpdir: # This somewhat convoluted procedure is the same as in the # Trainers. The reason for saving model to disk instead # of directly to the dict as bytes is due to all callbacks # following save to disk logic. GBDT models are small # enough that IO should not be an issue. model.save_model(os.path.join(tmpdir, MODEL_KEY)) checkpoint = Checkpoint.from_dict({PREPROCESSOR_KEY: preprocessor}) checkpoint.to_directory(path=tmpdir) checkpoint = Checkpoint.from_directory(tmpdir) checkpoint_predictor = LightGBMPredictor.from_checkpoint(checkpoint) assert get_num_trees(checkpoint_predictor.model) == get_num_trees(predictor.model) assert checkpoint_predictor.preprocessor.attr == predictor.preprocessor.attr
def test_preprocessor_in_checkpoint(ray_start_4_cpus, tmpdir): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) class DummyPreprocessor(Preprocessor): def __init__(self): super().__init__() self.is_same = True def fit(self, dataset): self.fitted_ = True def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame": return df trainer = SklearnTrainer( estimator=RandomForestClassifier(), scaling_config=scale_config, label_column="target", datasets={ TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset }, preprocessor=DummyPreprocessor(), ) result = trainer.fit() # Move checkpoint to a different directory. checkpoint_dict = result.checkpoint.to_dict() checkpoint = Checkpoint.from_dict(checkpoint_dict) checkpoint_path = checkpoint.to_directory(tmpdir) resume_from = Checkpoint.from_directory(checkpoint_path) model, preprocessor = load_from_checkpoint(resume_from) assert hasattr(model, "feature_importances_") assert preprocessor.is_same assert preprocessor.fitted_