def load(model_dir): """This function allows for loading pretrained models # Inputs :param model_dir: (string) path to the directory containing the model. If the model was trained by the `train` or `experiment` command, the model is in `results_dir/experiment_dir/model`. # Return :return: (LudwigModel) a LudwigModel object # Example usage ```python ludwig_model = LudwigModel.load(model_dir) ``` """ model, model_definition = load_model_and_definition(model_dir) ludwig_model = LudwigModel(model_definition) ludwig_model.model = model ludwig_model.train_set_metadata = load_metadata( os.path.join(model_dir, TRAIN_SET_METADATA_FILE_NAME)) return ludwig_model
def from_directory( cls: "InferenceModule", directory: str, device: Optional[TorchDevice] = None, ): """Create an InferenceModule from a directory containing a model, config, and training set metadata.""" if device is None: logging.info(f'No device specified. Loading using device "{DEVICE}".') device = DEVICE stage_to_module = _init_inference_stages_from_directory(directory, device=device) config_path = os.path.join(directory, MODEL_HYPERPARAMETERS_FILE_NAME) config = load_json(config_path) if os.path.exists(config_path) else None metadata_path = os.path.join(directory, TRAIN_SET_METADATA_FILE_NAME) training_set_metadata = load_metadata(metadata_path) if os.path.exists(metadata_path) else None return cls( stage_to_module[PREPROCESSOR], stage_to_module[PREDICTOR], stage_to_module[POSTPROCESSOR], config=config, training_set_metadata=training_set_metadata, )
def initialize_model(self, train_set_metadata=None, train_set_metadata_json=None, gpus=None, gpu_fraction=1, random_seed=default_random_seed, debug=False, **kwargs): """This function initializes a model. It is need for performing online learning, so it has to be called before `train_online`. `train` initialize the model under the hood, so there is no need to call this function if you don't use `train_online`. # Inputs :param train_set_metadata: (dict) it contains metadata information for the input and output features the model is going to be trained on. It's the same content of the metadata json file that is created while training. :param train_set_metadata_json: (string) path to the JSON metadata file created while training. it contains metadata information for the input and output features the model is going to be trained on :param gpus: (string, default: `None`) list of GPUs to use (it uses the same syntax of CUDA_VISIBLE_DEVICES) :param gpu_fraction: (float, default `1.0`) fraction of GPU memory to initialize the process with :param random_seed: (int, default`42`) a random seed that is going to be used anywhere there is a call to a random number generator: data splitting, parameter initialization and training set shuffling :param debug: (bool, default: `False`) enables debugging mode """ if train_set_metadata is None and train_set_metadata_json is None: raise ValueError( 'train_set_metadata or train_set_metadata_json must not None.') if train_set_metadata_json is not None: train_set_metadata = load_metadata(train_set_metadata_json) # update model definition with metadata properties update_model_definition_with_metadata(self.model_definition, train_set_metadata) # build model model = Model(self.model_definition['input_features'], self.model_definition['output_features'], self.model_definition['combiner'], self.model_definition['training'], self.model_definition['preprocessing'], random_seed=random_seed, debug=debug) model.initialize_session(gpus=gpus, gpu_fraction=gpu_fraction) # set parameters self.model = model self.train_set_metadata = train_set_metadata
def load(model_dir, gpus=None, gpu_memory_limit=None, allow_parallel_threads=True): """This function allows for loading pretrained models # Inputs :param model_dir: (string) path to the directory containing the model. If the model was trained by the `train` or `experiment` command, the model is in `results_dir/experiment_dir/model`. :param gpus: (string, default: `None`) list of GPUs to use (it uses the same syntax of CUDA_VISIBLE_DEVICES) :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate per GPU device. :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use multithreading parallelism to improve performance at the cost of determinism. # Return :return: (LudwigModel) a LudwigModel object # Example usage ```python ludwig_model = LudwigModel.load(model_dir) ``` """ model, model_definition = load_model_and_definition(model_dir, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads) ludwig_model = LudwigModel(model_definition) ludwig_model.model = model ludwig_model.train_set_metadata = load_metadata( os.path.join( model_dir, TRAIN_SET_METADATA_FILE_NAME ) ) return ludwig_model
def load(model_dir, logging_level=logging.ERROR): """This function allows for loading pretrained models # Inputs :param model_dir: (string) path to the directory containing the model. If the model was trained by the `train` or `experiment` command, the model is in `results_dir/experiment_dir/model`. :param logging_level: (int, default: `logging.ERROR`) logging level to use for logging. Use logging constants like `logging.DEBUG`, `logging.INFO` and `logging.ERROR`. By default only errors will be printed. # Return :return: (LudwigModel) a LudwigModel object # Example usage ```python ludwig_model = LudwigModel.load(model_dir) ``` """ logging.getLogger().setLevel(logging_level) if logging_level in {logging.WARNING, logging.ERROR, logging.CRITICAL}: set_disable_progressbar(True) model, model_definition = load_model_and_definition(model_dir) ludwig_model = LudwigModel(model_definition) ludwig_model.model = model ludwig_model.train_set_metadata = load_metadata( os.path.join( model_dir, TRAIN_SET_METADATA_FILE_NAME ) ) return ludwig_model
def initialize_model(self, train_set_metadata=None, train_set_metadata_json=None, gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, random_seed=default_random_seed, debug=False, **kwargs): """This function initializes a model. It is need for performing online learning, so it has to be called before `train_online`. `train` initialize the model under the hood, so there is no need to call this function if you don't use `train_online`. # Inputs :param train_set_metadata: (dict) it contains metadata information for the input and output features the model is going to be trained on. It's the same content of the metadata json file that is created while training. :param train_set_metadata_json: (string) path to the JSON metadata file created while training. it contains metadata information for the input and output features the model is going to be trained on :param gpus: (string, default: `None`) list of GPUs to use (it uses the same syntax of CUDA_VISIBLE_DEVICES) :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate per GPU device. :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use multithreading parallelism to improve performance at the cost of determinism. :param random_seed: (int, default`42`) a random seed that is going to be used anywhere there is a call to a random number generator: data splitting, parameter initialization and training set shuffling :param debug: (bool, default: `False`) enables debugging mode """ if train_set_metadata is None and train_set_metadata_json is None: raise ValueError( 'train_set_metadata or train_set_metadata_json must not None.') if train_set_metadata_json is not None: train_set_metadata = load_metadata(train_set_metadata_json) # update model definition with metadata properties update_model_definition_with_metadata(self.model_definition, train_set_metadata) # build model model = Trainer(self.model_definition['input_features'], self.model_definition['output_features'], self.model_definition['combiner'], self.model_definition[TRAINING], self.model_definition['preprocessing'], gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, random_seed=random_seed, debug=debug) # set parameters self.model = model self.train_set_metadata = train_set_metadata
def load(model_dir, logging_level=logging.ERROR, use_horovod=None, gpus=None, gpu_memory_limit=None, allow_parallel_threads=True): """This function allows for loading pretrained models # Inputs :param logging_level: Log level that will be sent to stderr. :param use_horovod: (bool) use Horovod for distributed training. Will be set automatically if `horovodrun` is used to launch the training script. :param model_dir: (string) path to the directory containing the model. If the model was trained by the `train` or `experiment` command, the model is in `results_dir/experiment_dir/model`. :param gpus: (string, default: `None`) list of GPUs to use (it uses the same syntax of CUDA_VISIBLE_DEVICES) :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate per GPU device. :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use multithreading parallelism to improve performance at the cost of determinism. # Return :return: (LudwigModel) a LudwigModel object # Example usage ```python ludwig_model = LudwigModel.load(model_dir) ``` """ horovod = configure_horovod(use_horovod) model_definition = broadcast_return(lambda: load_json(os.path.join( model_dir, MODEL_HYPERPARAMETERS_FILE_NAME )), horovod) # initialize model ludwig_model = LudwigModel( model_definition, logging_level=logging_level, use_horovod=use_horovod, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, ) # generate model from definition ludwig_model.model = LudwigModel.create_model(model_definition) # load model weights ludwig_model.load_weights(model_dir) # load train set metadata ludwig_model.training_set_metadata = broadcast_return( lambda: load_metadata( os.path.join( model_dir, TRAIN_SET_METADATA_FILE_NAME ) ), horovod ) return ludwig_model