def from_checkpoint( cls, checkpoint: Checkpoint, *, pipeline: Optional[Type[Pipeline]] = None, **pipeline_kwargs, ) -> "HuggingFacePredictor": """Instantiate the predictor from a Checkpoint. The checkpoint is expected to be a result of ``HuggingFaceTrainer``. Args: checkpoint: The checkpoint to load the model and preprocessor from. It is expected to be from the result of a ``HuggingFaceTrainer`` run. pipeline: A ``transformers.pipelines.Pipeline`` class to use. If not specified, will use the ``pipeline`` abstraction wrapper. **pipeline_kwargs: Any kwargs to pass to the pipeline initialization. If ``pipeline`` is None, this must contain the 'task' argument. Cannot contain 'model'. """ if not pipeline and "task" not in pipeline_kwargs: raise ValueError( "If `pipeline` is not specified, 'task' must be passed as a kwarg." ) pipeline = pipeline or pipeline_factory with checkpoint.as_directory() as checkpoint_path: preprocessor = load_preprocessor_from_dir(checkpoint_path) pipeline = pipeline(model=checkpoint_path, **pipeline_kwargs) return HuggingFacePredictor( pipeline=pipeline, preprocessor=preprocessor, )
def load_checkpoint( checkpoint: Checkpoint, model: Union[Type[transformers.modeling_utils.PreTrainedModel], torch.nn.Module], tokenizer: Optional[Type[transformers.PreTrainedTokenizer]] = None, *, tokenizer_kwargs: Optional[Dict[str, Any]] = None, **pretrained_model_kwargs, ) -> Tuple[ Union[transformers.modeling_utils.PreTrainedModel, torch.nn.Module], transformers.training_args.TrainingArguments, Optional[transformers.PreTrainedTokenizer], Optional[Preprocessor], ]: """Load a Checkpoint from ``HuggingFaceTrainer``. Args: checkpoint: The checkpoint to load the model and preprocessor from. It is expected to be from the result of a ``HuggingFaceTrainer`` run. model: Either a ``transformers.PreTrainedModel`` class (eg. ``AutoModelForCausalLM``), or a PyTorch model to load the weights to. This should be the same model used for training. tokenizer: A ``transformers.PreTrainedTokenizer`` class to load the model tokenizer to. If not specified, the tokenizer will not be loaded. Will throw an exception if specified, but no tokenizer was found in the checkpoint. tokenizer_kwargs: Dict of kwargs to pass to ``tokenizer.from_pretrained`` call. Ignored if ``tokenizer`` is None. **pretrained_model_kwargs: Kwargs to pass to ``mode.from_pretrained`` call. Ignored if ``model`` is not a ``transformers.PreTrainedModel`` class. Returns: The model, ``TrainingArguments``, tokenizer and AIR preprocessor contained within. Those can be used to initialize a ``transformers.Trainer`` object locally. """ tokenizer_kwargs = tokenizer_kwargs or {} with checkpoint.as_directory() as checkpoint_path: preprocessor = load_preprocessor_from_dir(checkpoint_path) if isinstance(model, torch.nn.Module): state_dict = torch.load(os.path.join(checkpoint_path, WEIGHTS_NAME), map_location="cpu") model = load_torch_model(saved_model=state_dict, model_definition=model) else: model = model.from_pretrained(checkpoint_path, **pretrained_model_kwargs) if tokenizer: tokenizer = tokenizer.from_pretrained(checkpoint_path, **tokenizer_kwargs) training_args_path = os.path.join(checkpoint_path, TRAINING_ARGS_NAME) if os.path.exists(training_args_path): with open(training_args_path, "rb") as f: training_args = torch.load(f, map_location="cpu") else: training_args = None return model, training_args, tokenizer, preprocessor
def load_checkpoint( checkpoint: Checkpoint, env: Optional[EnvType] = None, ) -> Tuple[Policy, Optional[Preprocessor]]: """Load a Checkpoint from ``RLTrainer``. Args: checkpoint: The checkpoint to load the policy and preprocessor from. It is expected to be from the result of a ``RLTrainer`` run. env: Optional environment to instantiate the trainer with. If not given, it is parsed from the saved trainer configuration instead. Returns: The policy and AIR preprocessor contained within. """ with checkpoint.as_directory() as checkpoint_path: trainer_class_path = os.path.join(checkpoint_path, RL_TRAINER_CLASS_FILE) config_path = os.path.join(checkpoint_path, RL_CONFIG_FILE) if not os.path.exists(trainer_class_path): raise ValueError( f"RLPredictor only works with checkpoints created by " f"RLTrainer. The checkpoint you specified is missing the " f"`{RL_TRAINER_CLASS_FILE}` file." ) if not os.path.exists(config_path): raise ValueError( f"RLPredictor only works with checkpoints created by " f"RLTrainer. The checkpoint you specified is missing the " f"`{RL_CONFIG_FILE}` file." ) with open(trainer_class_path, "rb") as fp: trainer_cls = cpickle.load(fp) with open(config_path, "rb") as fp: config = cpickle.load(fp) checkpoint_data_path = None for file in os.listdir(checkpoint_path): if file.startswith("checkpoint") and not file.endswith(".tune_metadata"): checkpoint_data_path = os.path.join(checkpoint_path, file) if not checkpoint_data_path: raise ValueError( f"Could not find checkpoint data in RLlib checkpoint. " f"Found files: {list(os.listdir(checkpoint_path))}" ) preprocessor = load_preprocessor_from_dir(checkpoint_path) config.get("evaluation_config", {}).pop("in_evaluation", None) trainer = trainer_cls(config=config, env=env) trainer.restore(checkpoint_data_path) policy = trainer.get_policy() return policy, preprocessor
def load_checkpoint( checkpoint: Checkpoint, ) -> Tuple[BaseEstimator, Optional[Preprocessor]]: """Load a Checkpoint from ``SklearnTrainer``. Args: checkpoint: The checkpoint to load the estimator and preprocessor from. It is expected to be from the result of a ``SklearnTrainer`` run. Returns: The estimator and AIR preprocessor contained within. """ with checkpoint.as_directory() as checkpoint_path: estimator_path = os.path.join(checkpoint_path, MODEL_KEY) with open(estimator_path, "rb") as f: estimator_path = cpickle.load(f) preprocessor = load_preprocessor_from_dir(checkpoint_path) return estimator_path, preprocessor
def load_checkpoint( checkpoint: Checkpoint, ) -> Tuple[xgboost.Booster, Optional[Preprocessor]]: """Load a Checkpoint from ``XGBoostTrainer``. Args: checkpoint: The checkpoint to load the model and preprocessor from. It is expected to be from the result of a ``XGBoostTrainer`` run. Returns: The model and AIR preprocessor contained within. """ with checkpoint.as_directory() as checkpoint_path: xgb_model = xgboost.Booster() xgb_model.load_model(os.path.join(checkpoint_path, MODEL_KEY)) preprocessor = load_preprocessor_from_dir(checkpoint_path) return xgb_model, preprocessor
def load_checkpoint( checkpoint: Checkpoint, ) -> Tuple[lightgbm.Booster, Optional[Preprocessor]]: """Load a Checkpoint from ``LightGBMTrainer``. Args: checkpoint: The checkpoint to load the model and preprocessor from. It is expected to be from the result of a ``LightGBMTrainer`` run. Returns: The model and AIR preprocessor contained within. """ with checkpoint.as_directory() as checkpoint_path: lgbm_model = lightgbm.Booster( model_file=os.path.join(checkpoint_path, MODEL_KEY)) preprocessor = load_preprocessor_from_dir(checkpoint_path) return lgbm_model, preprocessor