Example #1
0
    def from_checkpoint(
        cls,
        checkpoint: Checkpoint,
        *,
        pipeline: Optional[Type[Pipeline]] = None,
        **pipeline_kwargs,
    ) -> "HuggingFacePredictor":
        """Instantiate the predictor from a Checkpoint.

        The checkpoint is expected to be a result of ``HuggingFaceTrainer``.

        Args:
            checkpoint: The checkpoint to load the model and
                preprocessor from. It is expected to be from the result of a
                ``HuggingFaceTrainer`` run.
            pipeline: A ``transformers.pipelines.Pipeline`` class to use.
                If not specified, will use the ``pipeline`` abstraction
                wrapper.
            **pipeline_kwargs: Any kwargs to pass to the pipeline
                initialization. If ``pipeline`` is None, this must contain
                the 'task' argument. Cannot contain 'model'.
        """
        if not pipeline and "task" not in pipeline_kwargs:
            raise ValueError(
                "If `pipeline` is not specified, 'task' must be passed as a kwarg."
            )
        pipeline = pipeline or pipeline_factory
        with checkpoint.as_directory() as checkpoint_path:
            preprocessor = load_preprocessor_from_dir(checkpoint_path)
            pipeline = pipeline(model=checkpoint_path, **pipeline_kwargs)
        return HuggingFacePredictor(
            pipeline=pipeline,
            preprocessor=preprocessor,
        )
Example #2
0
def load_checkpoint(
    checkpoint: Checkpoint,
    model: Union[Type[transformers.modeling_utils.PreTrainedModel],
                 torch.nn.Module],
    tokenizer: Optional[Type[transformers.PreTrainedTokenizer]] = None,
    *,
    tokenizer_kwargs: Optional[Dict[str, Any]] = None,
    **pretrained_model_kwargs,
) -> Tuple[Union[transformers.modeling_utils.PreTrainedModel, torch.nn.Module],
           transformers.training_args.TrainingArguments,
           Optional[transformers.PreTrainedTokenizer],
           Optional["Preprocessor"], ]:
    """Load a Checkpoint from ``HuggingFaceTrainer``.


    Args:
        checkpoint: The checkpoint to load the model and
            preprocessor from. It is expected to be from the result of a
            ``HuggingFaceTrainer`` run.
        model: Either a ``transformers.PreTrainedModel`` class
            (eg. ``AutoModelForCausalLM``), or a PyTorch model to load the
            weights to. This should be the same model used for training.
        tokenizer: A ``transformers.PreTrainedTokenizer`` class to load
            the model tokenizer to. If not specified, the tokenizer will
            not be loaded. Will throw an exception if specified, but no
            tokenizer was found in the checkpoint.
        tokenizer_kwargs: Dict of kwargs to pass to ``tokenizer.from_pretrained``
            call. Ignored if ``tokenizer`` is None.
        **pretrained_model_kwargs: Kwargs to pass to ``mode.from_pretrained``
            call. Ignored if ``model`` is not a ``transformers.PreTrainedModel``
            class.

    Returns:
        The model, ``TrainingArguments``, tokenizer and AIR preprocessor
        contained within. Those can be used to initialize a ``transformers.Trainer``
        object locally.
    """
    tokenizer_kwargs = tokenizer_kwargs or {}
    with checkpoint.as_directory() as checkpoint_path:
        preprocessor = load_preprocessor_from_dir(checkpoint_path)
        if isinstance(model, torch.nn.Module):
            state_dict = torch.load(os.path.join(checkpoint_path,
                                                 WEIGHTS_NAME),
                                    map_location="cpu")
            model = load_torch_model(saved_model=state_dict,
                                     model_definition=model)
        else:
            model = model.from_pretrained(checkpoint_path,
                                          **pretrained_model_kwargs)
        if tokenizer:
            tokenizer = tokenizer.from_pretrained(checkpoint_path,
                                                  **tokenizer_kwargs)
        training_args_path = os.path.join(checkpoint_path, TRAINING_ARGS_NAME)
        if os.path.exists(training_args_path):
            with open(training_args_path, "rb") as f:
                training_args = torch.load(f, map_location="cpu")
        else:
            training_args = None
    return model, training_args, tokenizer, preprocessor
Example #3
0
def load_checkpoint(
    checkpoint: Checkpoint,
    env: Optional[EnvType] = None,
) -> Tuple[Policy, Optional["Preprocessor"]]:
    """Load a Checkpoint from ``RLTrainer``.

    Args:
        checkpoint: The checkpoint to load the policy and
            preprocessor from. It is expected to be from the result of a
            ``RLTrainer`` run.
        env: Optional environment to instantiate the trainer with. If not given,
            it is parsed from the saved trainer configuration instead.

    Returns:
        The policy and AIR preprocessor contained within.
    """
    with checkpoint.as_directory() as checkpoint_path:
        trainer_class_path = os.path.join(checkpoint_path,
                                          RL_TRAINER_CLASS_FILE)
        config_path = os.path.join(checkpoint_path, RL_CONFIG_FILE)

        if not os.path.exists(trainer_class_path):
            raise ValueError(
                f"RLPredictor only works with checkpoints created by "
                f"RLTrainer. The checkpoint you specified is missing the "
                f"`{RL_TRAINER_CLASS_FILE}` file.")

        if not os.path.exists(config_path):
            raise ValueError(
                f"RLPredictor only works with checkpoints created by "
                f"RLTrainer. The checkpoint you specified is missing the "
                f"`{RL_CONFIG_FILE}` file.")

        with open(trainer_class_path, "rb") as fp:
            trainer_cls = cpickle.load(fp)

        with open(config_path, "rb") as fp:
            config = cpickle.load(fp)

        checkpoint_data_path = None
        for file in os.listdir(checkpoint_path):
            if file.startswith(
                    "checkpoint") and not file.endswith(".tune_metadata"):
                checkpoint_data_path = os.path.join(checkpoint_path, file)

        if not checkpoint_data_path:
            raise ValueError(
                f"Could not find checkpoint data in RLlib checkpoint. "
                f"Found files: {list(os.listdir(checkpoint_path))}")

        preprocessor = load_preprocessor_from_dir(checkpoint_path)

        config.get("evaluation_config", {}).pop("in_evaluation", None)
        trainer = trainer_cls(config=config, env=env)
        trainer.restore(checkpoint_data_path)

        policy = trainer.get_policy()
        return policy, preprocessor
Example #4
0
    def get_preprocessor(self) -> Optional["Preprocessor"]:
        """Return the saved preprocessor, if one exists."""

        # The preprocessor will either be stored in an in-memory dict or
        # written to storage. In either case, it will use the PREPROCESSOR_KEY key.

        # First try converting to dictionary.
        checkpoint_dict = self.to_dict()
        preprocessor = checkpoint_dict.get(PREPROCESSOR_KEY, None)

        if preprocessor is None:
            # Fallback to reading from directory.
            with self.as_directory() as checkpoint_path:
                preprocessor = load_preprocessor_from_dir(checkpoint_path)

        return preprocessor
Example #5
0
def load_checkpoint(
    checkpoint: Checkpoint,
) -> Tuple[xgboost.Booster, Optional["Preprocessor"]]:
    """Load a Checkpoint from ``XGBoostTrainer``.

    Args:
        checkpoint: The checkpoint to load the model and
            preprocessor from. It is expected to be from the result of a
            ``XGBoostTrainer`` run.

    Returns:
        The model and AIR preprocessor contained within.
    """
    with checkpoint.as_directory() as checkpoint_path:
        xgb_model = xgboost.Booster()
        xgb_model.load_model(os.path.join(checkpoint_path, MODEL_KEY))
        preprocessor = load_preprocessor_from_dir(checkpoint_path)

    return xgb_model, preprocessor
Example #6
0
def load_checkpoint(
    checkpoint: Checkpoint,
) -> Tuple[lightgbm.Booster, Optional["Preprocessor"]]:
    """Load a Checkpoint from ``LightGBMTrainer``.

    Args:
        checkpoint: The checkpoint to load the model and
            preprocessor from. It is expected to be from the result of a
            ``LightGBMTrainer`` run.

    Returns:
        The model and AIR preprocessor contained within.
    """
    with checkpoint.as_directory() as checkpoint_path:
        lgbm_model = lightgbm.Booster(
            model_file=os.path.join(checkpoint_path, MODEL_KEY))
        preprocessor = load_preprocessor_from_dir(checkpoint_path)

    return lgbm_model, preprocessor
Example #7
0
def load_checkpoint(
    checkpoint: Checkpoint,
) -> Tuple[BaseEstimator, Optional["Preprocessor"]]:
    """Load a Checkpoint from ``SklearnTrainer``.

    Args:
        checkpoint: The checkpoint to load the estimator and
            preprocessor from. It is expected to be from the result of a
            ``SklearnTrainer`` run.

    Returns:
        The estimator and AIR preprocessor contained within.
    """
    with checkpoint.as_directory() as checkpoint_path:
        estimator_path = os.path.join(checkpoint_path, MODEL_KEY)
        with open(estimator_path, "rb") as f:
            estimator = cpickle.load(f)
        preprocessor = load_preprocessor_from_dir(checkpoint_path)

    return estimator, preprocessor