Beispiel #1
0
    def initialize_model(self,
                         train_set_metadata=None,
                         train_set_metadata_json=None,
                         gpus=None,
                         gpu_fraction=1,
                         random_seed=default_random_seed,
                         debug=False,
                         **kwargs):
        """This function initializes a model. It is need for performing online
        learning, so it has to be called before `train_online`.
        `train` initialize the model under the hood, so there is no need to call
        this function if you don't use `train_online`.

        # Inputs

        :param train_set_metadata: (dict) it contains metadata information for
               the input and output features the model is going to be trained
               on. It's the same content of the metadata json file that is
               created while training.
        :param train_set_metadata_json: (string)  path to the JSON metadata file
               created while training. it contains metadata information for the
               input and output features the model is going to be trained on
        :param gpus: (string, default: `None`) list of GPUs to use (it uses the
               same syntax of CUDA_VISIBLE_DEVICES)
        :param gpu_fraction: (float, default `1.0`) fraction of GPU memory to
               initialize the process with
        :param random_seed: (int, default`42`) a random seed that is going to be
               used anywhere there is a call to a random number generator: data
               splitting, parameter initialization and training set shuffling
        :param debug: (bool, default: `False`) enables debugging mode
        """

        if train_set_metadata is None and train_set_metadata_json is None:
            raise ValueError(
                'train_set_metadata or train_set_metadata_json must not None.')
        if train_set_metadata_json is not None:
            train_set_metadata = load_metadata(train_set_metadata_json)

        # update model definition with metadata properties
        update_model_definition_with_metadata(self.model_definition,
                                              train_set_metadata)

        # build model
        model = Model(self.model_definition['input_features'],
                      self.model_definition['output_features'],
                      self.model_definition['combiner'],
                      self.model_definition['training'],
                      self.model_definition['preprocessing'],
                      random_seed=random_seed,
                      debug=debug)
        model.initialize_session(gpus=gpus, gpu_fraction=gpu_fraction)

        # set parameters
        self.model = model
        self.train_set_metadata = train_set_metadata
Beispiel #2
0
def train(training_set,
          validation_set,
          test_set,
          model_definition,
          save_path='model',
          model_load_path=None,
          resume=False,
          skip_save_model=False,
          skip_save_progress=False,
          skip_save_log=False,
          gpus=None,
          gpu_fraction=1.0,
          use_horovod=False,
          random_seed=default_random_seed,
          debug=False):
    """
    :param training_set: Dataset contaning training data
    :type training_set: Dataset
    :param validation_set: Dataset contaning validation data
    :type validation_set: Datasetk
    :param test_set: Dataset contaning test data.
    :type test_set: Dataset
    :param model_definition: Model definition which defines the different
           parameters of the model, features, preprocessing and training.
    :type model_definition: Dictionary
    :param save_path: The path to save the model to.
    :type save_path: filepath (str)
    :param model_load_path: If this is specified the loaded model will be used
           as initialization (useful for transfer learning).
    :type model_load_path: filepath (str)
    :param skip_save_model: Disables
               saving model weights and hyperparameters each time the model
           improves. By default Ludwig saves model weights after each epoch
           the validation measure imrpvoes, but if the model is really big
           that can be time consuming if you do not want to keep
           the weights and just find out what performance can a model get
           with a set of hyperparameters, use this parameter to skip it,
           but the model will not be loadable later on.
    :type skip_save_model: Boolean
    :param skip_save_progress: Disables saving
           progress each epoch. By default Ludwig saves weights and stats
           after each epoch for enabling resuming of training, but if
           the model is really big that can be time consuming and will uses
           twice as much space, use this parameter to skip it, but training
           cannot be resumed later on.
    :type skip_save_progress: Boolean
    :param skip_save_log: Disables saving TensorBoard
           logs. By default Ludwig saves logs for the TensorBoard, but if it
           is not needed turning it off can slightly increase the
           overall speed..
    :type skip_save_log: Boolean
    :param gpus: List of GPUs that are available for training.
    :type gpus: List
    :param gpu_fraction: Fraction of the memory of each GPU to use at
           the beginning of the training. The memory may grow elastically.
    :type gpu_fraction: Integer
    :param random_seed: Random seed used for weights initialization,
           splits and any other random function.
    :type random_seed: Integer
    :param debug: If true turns on tfdbg with inf_or_nan checks.
    :type debug: Boolean
    :returns: None
    """
    if model_load_path is not None:
        # Load model
        if is_on_master():
            print_boxed('LOADING MODEL')
            logger.info('Loading model: {}\n'.format(model_load_path))
        model, _ = load_model_and_definition(model_load_path)
    else:
        # Build model
        if is_on_master():
            print_boxed('BUILDING MODEL', print_fun=logger.debug)

        model = Model(model_definition['input_features'],
                      model_definition['output_features'],
                      model_definition['combiner'],
                      model_definition['training'],
                      model_definition['preprocessing'],
                      use_horovod=use_horovod,
                      random_seed=random_seed,
                      debug=debug)

    contrib_command("train_model", model, model_definition, model_load_path)

    # Train model
    if is_on_master():
        print_boxed('TRAINING')
    return model, model.train(training_set,
                              validation_set=validation_set,
                              test_set=test_set,
                              save_path=save_path,
                              resume=resume,
                              skip_save_model=skip_save_model,
                              skip_save_progress=skip_save_progress,
                              skip_save_log=skip_save_log,
                              gpus=gpus,
                              gpu_fraction=gpu_fraction,
                              random_seed=random_seed,
                              **model_definition['training'])
Beispiel #3
0
def train(training_set,
          validation_set,
          test_set,
          model_definition,
          save_path='model',
          model_load_path=None,
          resume=False,
          skip_save_progress_weights=False,
          gpus=None,
          gpu_fraction=1.0,
          use_horovod=False,
          random_seed=default_random_seed,
          debug=False):
    """
    :param training_set: Dataset contaning training data
    :type training_set: Dataset
    :param validation_set: Dataset contaning validation data
    :type validation_set: Datasetk
    :param test_set: Dataset contaning test data.
    :type test_set: Dataset
    :param model_definition: Model definition which defines the different
           parameters of the model, features, preprocessing and training.
    :type model_definition: Dictionary
    :param save_path: The path to save the model to.
    :type save_path: filepath (str)
    :param model_load_path: If this is specified the loaded model will be used
           as initialization (useful for transfer learning).
    :type model_load_path: filepath (str)
    :param skip_save_progress_weights: Skips saving the weights at the end of
           each epoch. If this is true, training cannot be resumed from the
           exactly the state at the end of the previous epoch.
    :type skip_save_progress_weights: Boolean
    :param gpus: List of GPUs that are available for training.
    :type gpus: List
    :param gpu_fraction: Fraction of the memory of each GPU to use at
           the beginning of the training. The memory may grow elastically.
    :type gpu_fraction: Integer
    :param random_seed: Random seed used for weights initialization,
           splits and any other random function.
    :type random_seed: Integer
    :param debug: If true turns on tfdbg with inf_or_nan checks.
    :type debug: Boolean
    :returns: None
    """
    if model_load_path is not None:
        # Load model
        if is_on_master():
            print_boxed('LOADING MODEL')
            logging.info('Loading model: {}\n'.format(model_load_path))
        model, _ = load_model_and_definition(model_load_path)
    else:
        # Build model
        if is_on_master():
            print_boxed('BUILDING MODEL', print_fun=logging.debug)
        model = Model(model_definition['input_features'],
                      model_definition['output_features'],
                      model_definition['combiner'],
                      model_definition['training'],
                      model_definition['preprocessing'],
                      use_horovod=use_horovod,
                      random_seed=random_seed,
                      debug=debug)

    # Train model
    if is_on_master():
        print_boxed('TRAINING')
    return model, model.train(
        training_set,
        validation_set=validation_set,
        test_set=test_set,
        save_path=save_path,
        resume=resume,
        skip_save_progress_weights=skip_save_progress_weights,
        gpus=gpus,
        gpu_fraction=gpu_fraction,
        random_seed=random_seed,
        **model_definition['training'])