Exemple #1
0
    def load(model_dir):
        """This function allows for loading pretrained models


        # Inputs

        :param model_dir: (string) path to the directory containing the model.
               If the model was trained by the `train` or `experiment` command,
               the model is in `results_dir/experiment_dir/model`.


        # Return

        :return: (LudwigModel) a LudwigModel object


        # Example usage

        ```python
        ludwig_model = LudwigModel.load(model_dir)
        ```

        """

        model, model_definition = load_model_and_definition(model_dir)
        ludwig_model = LudwigModel(model_definition)
        ludwig_model.model = model
        ludwig_model.train_set_metadata = load_metadata(
            os.path.join(model_dir, TRAIN_SET_METADATA_FILE_NAME))
        return ludwig_model
Exemple #2
0
    def from_directory(
        cls: "InferenceModule",
        directory: str,
        device: Optional[TorchDevice] = None,
    ):
        """Create an InferenceModule from a directory containing a model, config, and training set metadata."""
        if device is None:
            logging.info(f'No device specified. Loading using device "{DEVICE}".')
            device = DEVICE

        stage_to_module = _init_inference_stages_from_directory(directory, device=device)

        config_path = os.path.join(directory, MODEL_HYPERPARAMETERS_FILE_NAME)
        config = load_json(config_path) if os.path.exists(config_path) else None

        metadata_path = os.path.join(directory, TRAIN_SET_METADATA_FILE_NAME)
        training_set_metadata = load_metadata(metadata_path) if os.path.exists(metadata_path) else None

        return cls(
            stage_to_module[PREPROCESSOR],
            stage_to_module[PREDICTOR],
            stage_to_module[POSTPROCESSOR],
            config=config,
            training_set_metadata=training_set_metadata,
        )
Exemple #3
0
    def initialize_model(self,
                         train_set_metadata=None,
                         train_set_metadata_json=None,
                         gpus=None,
                         gpu_fraction=1,
                         random_seed=default_random_seed,
                         debug=False,
                         **kwargs):
        """This function initializes a model. It is need for performing online
        learning, so it has to be called before `train_online`.
        `train` initialize the model under the hood, so there is no need to call
        this function if you don't use `train_online`.

        # Inputs

        :param train_set_metadata: (dict) it contains metadata information for
               the input and output features the model is going to be trained
               on. It's the same content of the metadata json file that is
               created while training.
        :param train_set_metadata_json: (string)  path to the JSON metadata file
               created while training. it contains metadata information for the
               input and output features the model is going to be trained on
        :param gpus: (string, default: `None`) list of GPUs to use (it uses the
               same syntax of CUDA_VISIBLE_DEVICES)
        :param gpu_fraction: (float, default `1.0`) fraction of GPU memory to
               initialize the process with
        :param random_seed: (int, default`42`) a random seed that is going to be
               used anywhere there is a call to a random number generator: data
               splitting, parameter initialization and training set shuffling
        :param debug: (bool, default: `False`) enables debugging mode
        """

        if train_set_metadata is None and train_set_metadata_json is None:
            raise ValueError(
                'train_set_metadata or train_set_metadata_json must not None.')
        if train_set_metadata_json is not None:
            train_set_metadata = load_metadata(train_set_metadata_json)

        # update model definition with metadata properties
        update_model_definition_with_metadata(self.model_definition,
                                              train_set_metadata)

        # build model
        model = Model(self.model_definition['input_features'],
                      self.model_definition['output_features'],
                      self.model_definition['combiner'],
                      self.model_definition['training'],
                      self.model_definition['preprocessing'],
                      random_seed=random_seed,
                      debug=debug)
        model.initialize_session(gpus=gpus, gpu_fraction=gpu_fraction)

        # set parameters
        self.model = model
        self.train_set_metadata = train_set_metadata
Exemple #4
0
    def load(model_dir, gpus=None, gpu_memory_limit=None,
             allow_parallel_threads=True):
        """This function allows for loading pretrained models


        # Inputs

        :param model_dir: (string) path to the directory containing the model.
               If the model was trained by the `train` or `experiment` command,
               the model is in `results_dir/experiment_dir/model`.
        :param gpus: (string, default: `None`) list of GPUs to use (it uses the
               same syntax of CUDA_VISIBLE_DEVICES)
        :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate
              per GPU device.
        :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use
               multithreading parallelism to improve performance at the cost of
               determinism.

        # Return

        :return: (LudwigModel) a LudwigModel object


        # Example usage

        ```python
        ludwig_model = LudwigModel.load(model_dir)
        ```

        """

        model, model_definition = load_model_and_definition(model_dir,
                                                            gpus=gpus,
                                                            gpu_memory_limit=gpu_memory_limit,
                                                            allow_parallel_threads=allow_parallel_threads)
        ludwig_model = LudwigModel(model_definition)
        ludwig_model.model = model
        ludwig_model.train_set_metadata = load_metadata(
            os.path.join(
                model_dir,
                TRAIN_SET_METADATA_FILE_NAME
            )
        )
        return ludwig_model
Exemple #5
0
    def load(model_dir, logging_level=logging.ERROR):
        """This function allows for loading pretrained models


        # Inputs

        :param model_dir: (string) path to the directory containing the model.
               If the model was trained by the `train` or `experiment` command,
               the model is in `results_dir/experiment_dir/model`.
        :param logging_level: (int, default: `logging.ERROR`) logging level to
               use for logging. Use logging constants like `logging.DEBUG`,
               `logging.INFO` and `logging.ERROR`. By default only errors will
               be printed.


        # Return

        :return: (LudwigModel) a LudwigModel object


        # Example usage

        ```python
        ludwig_model = LudwigModel.load(model_dir)
        ```

        """

        logging.getLogger().setLevel(logging_level)
        if logging_level in {logging.WARNING, logging.ERROR, logging.CRITICAL}:
            set_disable_progressbar(True)

        model, model_definition = load_model_and_definition(model_dir)
        ludwig_model = LudwigModel(model_definition)
        ludwig_model.model = model
        ludwig_model.train_set_metadata = load_metadata(
            os.path.join(
                model_dir,
                TRAIN_SET_METADATA_FILE_NAME
            )
        )
        return ludwig_model
Exemple #6
0
    def initialize_model(self,
                         train_set_metadata=None,
                         train_set_metadata_json=None,
                         gpus=None,
                         gpu_memory_limit=None,
                         allow_parallel_threads=True,
                         random_seed=default_random_seed,
                         debug=False,
                         **kwargs):
        """This function initializes a model. It is need for performing online
        learning, so it has to be called before `train_online`.
        `train` initialize the model under the hood, so there is no need to call
        this function if you don't use `train_online`.

        # Inputs

        :param train_set_metadata: (dict) it contains metadata information for
               the input and output features the model is going to be trained
               on. It's the same content of the metadata json file that is
               created while training.
        :param train_set_metadata_json: (string)  path to the JSON metadata file
               created while training. it contains metadata information for the
               input and output features the model is going to be trained on
        :param gpus: (string, default: `None`) list of GPUs to use (it uses the
               same syntax of CUDA_VISIBLE_DEVICES)
        :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate
               per GPU device.
        :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use
               multithreading parallelism to improve performance at the cost of
               determinism.
        :param random_seed: (int, default`42`) a random seed that is going to be
               used anywhere there is a call to a random number generator: data
               splitting, parameter initialization and training set shuffling
        :param debug: (bool, default: `False`) enables debugging mode
        """

        if train_set_metadata is None and train_set_metadata_json is None:
            raise ValueError(
                'train_set_metadata or train_set_metadata_json must not None.')
        if train_set_metadata_json is not None:
            train_set_metadata = load_metadata(train_set_metadata_json)

        # update model definition with metadata properties
        update_model_definition_with_metadata(self.model_definition,
                                              train_set_metadata)

        # build model
        model = Trainer(self.model_definition['input_features'],
                        self.model_definition['output_features'],
                        self.model_definition['combiner'],
                        self.model_definition[TRAINING],
                        self.model_definition['preprocessing'],
                        gpus=gpus,
                        gpu_memory_limit=gpu_memory_limit,
                        allow_parallel_threads=allow_parallel_threads,
                        random_seed=random_seed,
                        debug=debug)

        # set parameters
        self.model = model
        self.train_set_metadata = train_set_metadata
Exemple #7
0
    def load(model_dir,
             logging_level=logging.ERROR,
             use_horovod=None,
             gpus=None,
             gpu_memory_limit=None,
             allow_parallel_threads=True):
        """This function allows for loading pretrained models

        # Inputs

        :param logging_level: Log level that will be sent to stderr.
        :param use_horovod: (bool) use Horovod for distributed training. Will be set
               automatically if `horovodrun` is used to launch the training script.
        :param model_dir: (string) path to the directory containing the model.
               If the model was trained by the `train` or `experiment` command,
               the model is in `results_dir/experiment_dir/model`.
        :param gpus: (string, default: `None`) list of GPUs to use (it uses the
               same syntax of CUDA_VISIBLE_DEVICES)
        :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate
              per GPU device.
        :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use
               multithreading parallelism to improve performance at the cost of
               determinism.

        # Return

        :return: (LudwigModel) a LudwigModel object


        # Example usage

        ```python
        ludwig_model = LudwigModel.load(model_dir)
        ```

        """
        horovod = configure_horovod(use_horovod)
        model_definition = broadcast_return(lambda: load_json(os.path.join(
            model_dir,
            MODEL_HYPERPARAMETERS_FILE_NAME
        )), horovod)

        # initialize model
        ludwig_model = LudwigModel(
            model_definition,
            logging_level=logging_level,
            use_horovod=use_horovod,
            gpus=gpus,
            gpu_memory_limit=gpu_memory_limit,
            allow_parallel_threads=allow_parallel_threads,
        )

        # generate model from definition
        ludwig_model.model = LudwigModel.create_model(model_definition)

        # load model weights
        ludwig_model.load_weights(model_dir)

        # load train set metadata
        ludwig_model.training_set_metadata = broadcast_return(
            lambda: load_metadata(
                os.path.join(
                    model_dir,
                    TRAIN_SET_METADATA_FILE_NAME
                )
            ), horovod
        )

        return ludwig_model