def setup_model_scaffolding( raw_df, input_features, output_features ): # setup input feature for testing config = {'input_features': input_features, 'output_features': output_features} # setup model scaffolding to for testing model = LudwigModel(config) training_set, _, _, training_set_metadata = preprocess_for_training( config, training_set=raw_df, skip_save_processed_input=True ) model.training_set_metadata = training_set_metadata update_config_with_metadata( model.config, training_set_metadata ) model.model = model.create_model(model.config) # setup batcher to go through synthetic data with training_set.initialize_batcher() as batcher: yield model, batcher
def test_experiment_dataset_formats(data_format): # primary focus of this test is to determine if exceptions are # raised for different data set formats and in_memory setting input_features = [ numerical_feature(), category_feature() ] output_features = [ category_feature(), numerical_feature() ] config = { 'input_features': input_features, 'output_features': output_features, 'combiner': { 'type': 'concat', 'fc_size': 14 }, 'preprocessing': {}, 'training': {'epochs': 2} } # create temporary name for train and test data sets csv_filename = 'train_' + uuid.uuid4().hex[:10].upper() + '.csv' # setup training data format to test raw_data = generate_data(input_features, output_features, csv_filename) training_set_metadata = None if data_format == 'hdf5': # hdf5 format training_set, _, _, training_set_metadata = preprocess_for_training( config, dataset=raw_data ) dataset_to_use = training_set.data_hdf5_fp else: dataset_to_use = create_data_set_to_use(data_format, raw_data) # define Ludwig model model = LudwigModel( config=config ) model.train( dataset=dataset_to_use, training_set_metadata=training_set_metadata, random_seed=default_random_seed ) # # run functions with the specified data format model.evaluate(dataset=dataset_to_use) model.predict(dataset=dataset_to_use) # Delete the temporary data created delete_temporary_data(csv_filename)
def test_regularizers( input_features, output_features, ): np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED) random.seed(0) data_file = generate_data(input_features, output_features, num_examples=BATCH_SIZE) data_df = read_csv(data_file) regularizer_losses = [] for regularization_type in [None, "l1", "l2", "l1_l2"]: config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "output_size": 14}, TRAINER: {"epochs": 2, "regularization_type": regularization_type, "regularization_lambda": 0.1}, } backend = LocalTestBackend() model = LudwigModel(config, backend=backend) processed_data_df, _, _, _ = preprocess_for_training(config, data_df, backend=backend) with processed_data_df.initialize_batcher(batch_size=BATCH_SIZE) as batcher: batch = batcher.next_batch() _, _, _ = model.train( training_set=data_df, skip_save_processed_input=True, skip_save_progress=True, skip_save_unprocessed_output=True, ) inputs = { i_feat.feature_name: torch.from_numpy(batch[i_feat.proc_column]).to(DEVICE) for i_feat in model.model.input_features.values() } targets = { o_feat.feature_name: torch.from_numpy(batch[o_feat.proc_column]).to(DEVICE) for o_feat in model.model.output_features.values() } predictions = model.model((inputs, targets)) loss, _ = model.model.train_loss(targets, predictions, regularization_type, 0.1) regularizer_losses.append(loss) # Regularizer_type=None has lowest regularizer loss assert min(regularizer_losses) == regularizer_losses[0] # l1, l2 and l1_l2 should be greater than zero assert torch.all(torch.tensor([t - regularizer_losses[0] > 0.0 for t in regularizer_losses[1:]])) # using default setting l1 + l2 == l1_l2 losses assert torch.isclose( regularizer_losses[1] + regularizer_losses[2] - regularizer_losses[0], regularizer_losses[3], rtol=0.1 )
def train_online(self, dataset, training_set_metadata=None, data_format='auto', random_seed=default_random_seed, debug=False): """Performs one epoch of training of the model on `dataset`. :param dataset: (string, dict, DataFrame) source containing the training dataset. :param training_set_metadata: (string, dict) metadata JSON file or loaded metadata. Intermediate preprocess structure containing the mappings of the input CSV created the first time a CSV file is used in the same directory with the same name and a '.json' extension. :param data_format: (string) format to interpret data sources. Will be inferred automatically if not specified. :param random_seed: (int, default`42`) a random seed that is going to be used anywhere there is a call to a random number generator: data splitting, parameter initialization and training set shuffling :param debug: (bool, default: `False`) enables debugging mode """ training_set_metadata = training_set_metadata or self.training_set_metadata training_dataset, _, _, training_set_metadata = preprocess_for_training( self.model_definition, training_set=dataset, training_set_metadata=training_set_metadata, data_format=data_format, skip_save_processed_input=True, preprocessing_params=self.model_definition[PREPROCESSING], random_seed=random_seed ) if not self.training_set_metadata: self.training_set_metadata = training_set_metadata if not self.model: update_model_definition_with_metadata( self.model_definition, training_set_metadata ) self.model = LudwigModel.create_model(self.model_definition, random_seed=random_seed) if not self._online_trainer: self._online_trainer = Trainer( **self.model_definition[TRAINING], random_seed=random_seed, horoovd=self._horovod, debug=debug ) self._online_trainer.train_online( self.model, training_dataset, )
def test_experiment_dataset_formats(data_format, csv_filename): # primary focus of this test is to determine if exceptions are # raised for different data set formats and in_memory setting input_features = [number_feature(), category_feature()] output_features = [category_feature(), number_feature()] config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "output_size": 14 }, "preprocessing": {}, TRAINER: { "epochs": 2 }, } # setup training data format to test raw_data = generate_data(input_features, output_features, csv_filename) training_set_metadata = None if data_format == "hdf5": # hdf5 format training_set, _, _, training_set_metadata = preprocess_for_training( config, dataset=raw_data) dataset_to_use = training_set.data_hdf5_fp else: dataset_to_use = create_data_set_to_use(data_format, raw_data) # define Ludwig model model = LudwigModel(config=config) model.train(dataset=dataset_to_use, training_set_metadata=training_set_metadata, random_seed=default_random_seed) # # run functions with the specified data format model.evaluate(dataset=dataset_to_use) model.predict(dataset=dataset_to_use)
def full_train(model_definition, model_definition_file=None, data_df=None, data_train_df=None, data_validation_df=None, data_test_df=None, data_csv=None, data_train_csv=None, data_validation_csv=None, data_test_csv=None, data_hdf5=None, data_train_hdf5=None, data_validation_hdf5=None, data_test_hdf5=None, train_set_metadata_json=None, experiment_name='experiment', model_name='run', model_load_path=None, model_resume_path=None, skip_save_model=False, skip_save_progress=False, skip_save_log=False, skip_save_processed_input=False, output_directory='results', should_close_session=True, gpus=None, gpu_fraction=1.0, use_horovod=False, random_seed=42, debug=False, **kwargs): """*full_train* defines the entire training procedure used by Ludwig's internals. Requires most of the parameters that are taken into the model. Builds a full ludwig model and performs the training. :param data_test_df: :param data_df: :param data_train_df: :param data_validation_df: :param model_definition: Model definition which defines the different parameters of the model, features, preprocessing and training. :type model_definition: Dictionary :param model_definition_file: The file that specifies the model definition. It is a yaml file. :type model_definition_file: filepath (str) :param data_csv: A CSV file contanining the input data which is used to train, validate and test a model. The CSV either contains a split column or will be split. :type data_csv: filepath (str) :param data_train_csv: A CSV file contanining the input data which is used to train a model. :type data_train_csv: filepath (str) :param data_validation_csv: A CSV file contanining the input data which is used to validate a model.. :type data_validation_csv: filepath (str) :param data_test_csv: A CSV file contanining the input data which is used to test a model. :type data_test_csv: filepath (str) :param data_hdf5: If the dataset is in the hdf5 format, this is used instead of the csv file. :type data_hdf5: filepath (str) :param data_train_hdf5: If the training set is in the hdf5 format, this is used instead of the csv file. :type data_train_hdf5: filepath (str) :param data_validation_hdf5: If the validation set is in the hdf5 format, this is used instead of the csv file. :type data_validation_hdf5: filepath (str) :param data_test_hdf5: If the test set is in the hdf5 format, this is used instead of the csv file. :type data_test_hdf5: filepath (str) :param train_set_metadata_json: If the dataset is in hdf5 format, this is the associated json file containing metadata. :type train_set_metadata_json: filepath (str) :param experiment_name: The name for the experiment. :type experiment_name: Str :param model_name: Name of the model that is being used. :type model_name: Str :param model_load_path: If this is specified the loaded model will be used as initialization (useful for transfer learning). :type model_load_path: filepath (str) :param model_resume_path: Resumes training of the model from the path specified. The difference with model_load_path is that also training statistics like the current epoch and the loss and performance so far are also resumed effectively cotinuing a previously interrupted training process. :type model_resume_path: filepath (str) :param skip_save_model: Disables saving model weights and hyperparameters each time the model improves. By default Ludwig saves model weights after each epoch the validation measure imrpvoes, but if the model is really big that can be time consuming if you do not want to keep the weights and just find out what performance can a model get with a set of hyperparameters, use this parameter to skip it, but the model will not be loadable later on. :type skip_save_model: Boolean :param skip_save_progress: Disables saving progress each epoch. By default Ludwig saves weights and stats after each epoch for enabling resuming of training, but if the model is really big that can be time consuming and will uses twice as much space, use this parameter to skip it, but training cannot be resumed later on. :type skip_save_progress: Boolean :param skip_save_processed_input: If a CSV dataset is provided it is preprocessed and then saved as an hdf5 and json to avoid running the preprocessing again. If this parameter is False, the hdf5 and json file are not saved. :type skip_save_processed_input: Boolean :param skip_save_log: Disables saving TensorBoard logs. By default Ludwig saves logs for the TensorBoard, but if it is not needed turning it off can slightly increase the overall speed.. :type skip_save_progress: Boolean :param output_directory: The directory that will contanin the training statistics, the saved model and the training procgress files. :type output_directory: filepath (str) :param gpus: List of GPUs that are available for training. :type gpus: List :param gpu_fraction: Fraction of the memory of each GPU to use at the beginning of the training. The memory may grow elastically. :type gpu_fraction: Integer :param random_seed: Random seed used for weights initialization, splits and any other random function. :type random_seed: Integer :param debug: If true turns on tfdbg with inf_or_nan checks. :type debug: Boolean :returns: None """ # set input features defaults if model_definition_file is not None: with open(model_definition_file, 'r') as def_file: model_definition = merge_with_defaults(yaml.safe_load(def_file)) else: model_definition = merge_with_defaults(model_definition) # setup directories and file names experiment_dir_name = None if model_resume_path is not None: if os.path.exists(model_resume_path): experiment_dir_name = model_resume_path else: if is_on_master(): logger.info('Model resume path does not exists, ' 'starting training from scratch') model_resume_path = None if model_resume_path is None: if is_on_master(): experiment_dir_name = get_experiment_dir_name( output_directory, experiment_name, model_name) else: experiment_dir_name = '.' # if model_load_path is not None, load its train_set_metadata if model_load_path is not None: train_set_metadata_json = os.path.join(model_load_path, TRAIN_SET_METADATA_FILE_NAME) description_fn, training_stats_fn, model_dir = get_file_names( experiment_dir_name) # save description description = get_experiment_description( model_definition, data_csv=data_csv, data_train_csv=data_train_csv, data_validation_csv=data_validation_csv, data_test_csv=data_test_csv, data_hdf5=data_hdf5, data_train_hdf5=data_train_hdf5, data_validation_hdf5=data_validation_hdf5, data_test_hdf5=data_test_hdf5, metadata_json=train_set_metadata_json, random_seed=random_seed) if is_on_master(): save_json(description_fn, description) # print description logger.info('Experiment name: {}'.format(experiment_name)) logger.info('Model name: {}'.format(model_name)) logger.info('Output path: {}'.format(experiment_dir_name)) logger.info('\n') for key, value in description.items(): logger.info('{}: {}'.format(key, pformat(value, indent=4))) logger.info('\n') # preprocess preprocessed_data = preprocess_for_training( model_definition, data_df=data_df, data_train_df=data_train_df, data_validation_df=data_validation_df, data_test_df=data_test_df, data_csv=data_csv, data_train_csv=data_train_csv, data_validation_csv=data_validation_csv, data_test_csv=data_test_csv, data_hdf5=data_hdf5, data_train_hdf5=data_train_hdf5, data_validation_hdf5=data_validation_hdf5, data_test_hdf5=data_test_hdf5, train_set_metadata_json=train_set_metadata_json, skip_save_processed_input=skip_save_processed_input, preprocessing_params=model_definition['preprocessing'], random_seed=random_seed) (training_set, validation_set, test_set, train_set_metadata) = preprocessed_data if is_on_master(): logger.info('Training set: {0}'.format(training_set.size)) if validation_set is not None: logger.info('Validation set: {0}'.format(validation_set.size)) if test_set is not None: logger.info('Test set: {0}'.format(test_set.size)) # update model definition with metadata properties update_model_definition_with_metadata(model_definition, train_set_metadata) if is_on_master(): if not skip_save_model: # save train set metadata os.makedirs(model_dir, exist_ok=True) save_json(os.path.join(model_dir, TRAIN_SET_METADATA_FILE_NAME), train_set_metadata) # run the experiment model, result = train(training_set=training_set, validation_set=validation_set, test_set=test_set, model_definition=model_definition, save_path=model_dir, model_load_path=model_load_path, resume=model_resume_path is not None, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, gpus=gpus, gpu_fraction=gpu_fraction, use_horovod=use_horovod, random_seed=random_seed, debug=debug) train_trainset_stats, train_valisest_stats, train_testset_stats = result train_stats = { 'train': train_trainset_stats, 'validation': train_valisest_stats, 'test': train_testset_stats } if should_close_session: model.close_session() if is_on_master(): # save training and test statistics save_json(training_stats_fn, train_stats) # grab the results of the model with highest validation test performance validation_field = model_definition['training']['validation_field'] validation_measure = model_definition['training']['validation_measure'] validation_field_result = train_valisest_stats[validation_field] best_function = get_best_function(validation_measure) # results of the model with highest validation test performance if is_on_master() and validation_set is not None: epoch_best_vali_measure, best_vali_measure = best_function( enumerate(validation_field_result[validation_measure]), key=lambda pair: pair[1]) logger.info( 'Best validation model epoch: {0}'.format(epoch_best_vali_measure + 1)) logger.info( 'Best validation model {0} on validation set {1}: {2}'.format( validation_measure, validation_field, best_vali_measure)) if test_set is not None: best_vali_measure_epoch_test_measure = train_testset_stats[ validation_field][validation_measure][epoch_best_vali_measure] logger.info( 'Best validation model {0} on test set {1}: {2}'.format( validation_measure, validation_field, best_vali_measure_epoch_test_measure)) logger.info('\nFinished: {0}_{1}'.format(experiment_name, model_name)) logger.info('Saved to: {0}'.format(experiment_dir_name)) contrib_command("train_save", experiment_dir_name) return (model, preprocessed_data, experiment_dir_name, train_stats, model_definition)
def train(self, data_df=None, data_train_df=None, data_validation_df=None, data_test_df=None, data_csv=None, data_train_csv=None, data_validation_csv=None, data_test_csv=None, data_hdf5=None, data_train_hdf5=None, data_validation_hdf5=None, data_test_hdf5=None, train_set_metadata_json=None, dataset_type='generic', model_name='run', model_load_path=None, model_resume_path=None, skip_save_model=False, skip_save_progress=False, skip_save_log=False, skip_save_processed_input=False, output_directory='results', gpus=None, gpu_fraction=1.0, random_seed=42, logging_level=logging.ERROR, debug=False, **kwargs): """This function is used to perform a full training of the model on the specified dataset. # Inputs :param data_df: (DataFrame) dataframe containing data. If it has a split column, it will be used for splitting (0: train, 1: validation, 2: test), otherwise the dataset will be randomly split :param data_train_df: (DataFrame) dataframe containing training data :param data_validation_df: (DataFrame) dataframe containing validation data :param data_test_df: (DataFrame dataframe containing test data :param data_csv: (string) input data CSV file. If it has a split column, it will be used for splitting (0: train, 1: validation, 2: test), otherwise the dataset will be randomly split :param data_train_csv: (string) input train data CSV file :param data_validation_csv: (string) input validation data CSV file :param data_test_csv: (string) input test data CSV file :param data_hdf5: (string) input data HDF5 file. It is an intermediate preprocess version of the input CSV created the first time a CSV file is used in the same directory with the same name and a hdf5 extension :param data_train_hdf5: (string) input train data HDF5 file. It is an intermediate preprocess version of the input CSV created the first time a CSV file is used in the same directory with the same name and a hdf5 extension :param data_validation_hdf5: (string) input validation data HDF5 file. It is an intermediate preprocess version of the input CSV created the first time a CSV file is used in the same directory with the same name and a hdf5 extension :param data_test_hdf5: (string) input test data HDF5 file. It is an intermediate preprocess version of the input CSV created the first time a CSV file is used in the same directory with the same name and a hdf5 extension :param train_set_metadata_json: (string) input metadata JSON file. It is an intermediate preprocess file containing the mappings of the input CSV created the first time a CSV file is used in the same directory with the same name and a json extension :param dataset_type: (string, default: `'default'`) determines the type of preprocessing will be applied to the data. Only `generic` is available at the moment :param model_name: (string) a name for the model, user for the save directory :param model_load_path: (string) path of a pretrained model to load as initialization :param model_resume_path: (string) path of a the model directory to resume training of :param skip_save_model: (bool, default: `False`) disables saving model weights and hyperparameters each time the model improves. By default Ludwig saves model weights after each epoch the validation measure imrpvoes, but if the model is really big that can be time consuming if you do not want to keep the weights and just find out what performance can a model get with a set of hyperparameters, use this parameter to skip it, but the model will not be loadable later on. :param skip_save_progress: (bool, default: `False`) disables saving progress each epoch. By default Ludwig saves weights and stats after each epoch for enabling resuming of training, but if the model is really big that can be time consuming and will uses twice as much space, use this parameter to skip it, but training cannot be resumed later on. :param skip_save_log: (bool, default: `False`) disables saving TensorBoard logs. By default Ludwig saves logs for the TensorBoard, but if it is not needed turning it off can slightly increase the overall speed. :param skip_save_processed_input: (bool, default: `False`) skips saving intermediate HDF5 and JSON files :param output_directory: (string, default: `'results'`) directory that contains the results :param gpus: (string, default: `None`) list of GPUs to use (it uses the same syntax of CUDA_VISIBLE_DEVICES) :param gpu_fraction: (float, default `1.0`) fraction of gpu memory to initialize the process with :param random_seed: (int, default`42`) a random seed that is going to be used anywhere there is a call to a random number generator: data splitting, parameter initialization and training set shuffling :param debug: (bool, default: `False`) enables debugging mode :param logging_level: (int, default: `logging.ERROR`) logging level to use for logging. Use logging constants like `logging.DEBUG`, `logging.INFO` and `logging.ERROR`. By default only errors will be printed. There are three ways to provide data: by dataframes using the `_df` parameters, by CSV using the `_csv` parameters and by HDF5 and JSON, using `_hdf5` and `_json` parameters. The DataFrame approach uses data previously obtained and put in a dataframe, the CSV approach loads data from a CSV file, while HDF5 and JSON load previously preprocessed HDF5 and JSON files (they are saved in the same directory of the CSV they are obtained from). For all three approaches either a full dataset can be provided (which will be split randomly according to the split probabilities defined in the model definition, by default 70% training, 10% validation and 20% test) or, if it contanins a plit column, it will be plit according to that column (interpreting 0 as training, 1 as validation and 2 as test). Alternatively separated dataframes / CSV / HDF5 files can beprovided for each split. During training the model and statistics will be saved in a directory `[output_dir]/[experiment_name]_[model_name]_n` where all variables are resolved to user spiecified ones and `n` is an increasing number starting from 0 used to differentiate different runs. # Return :return: (dict) a dictionary containing training statistics for each output feature containing loss and measures values for each epoch. """ logging.getLogger().setLevel(logging_level) if logging_level in {logging.WARNING, logging.ERROR, logging.CRITICAL}: set_disable_progressbar(True) # setup directories and file names experiment_dir_name = None if model_resume_path is not None: if os.path.exists(model_resume_path): experiment_dir_name = model_resume_path else: logging.info('Model resume path does not exists,' ' starting training from scratch') model_resume_path = None if model_resume_path is None: experiment_dir_name = get_experiment_dir_name( output_directory, '', model_name) description_fn, training_stats_fn, model_dir = get_file_names( experiment_dir_name) # save description description = get_experiment_description( self.model_definition, dataset_type, data_csv=data_csv, data_train_csv=data_train_csv, data_validation_csv=data_validation_csv, data_test_csv=data_test_csv, data_hdf5=data_hdf5, data_train_hdf5=data_train_hdf5, data_validation_hdf5=data_validation_hdf5, data_test_hdf5=data_test_hdf5, metadata_json=train_set_metadata_json, random_seed=random_seed) save_json(description_fn, description) # print description logging.info('Model name: {}'.format(model_name)) logging.info('Output path: {}'.format(experiment_dir_name)) logging.info('\n') for key, value in description.items(): logging.info('{0}: {1}'.format(key, pformat(value, indent=4))) logging.info('\n') # preprocess if data_df is not None or data_train_df is not None: (training_set, validation_set, test_set, train_set_metadata) = preprocess_for_training( self.model_definition, dataset_type, data_df=data_df, data_train_df=data_train_df, data_validation_df=data_validation_df, data_test_df=data_test_df, train_set_metadata_json=train_set_metadata_json, skip_save_processed_input=True, preprocessing_params=self.model_definition['preprocessing'], random_seed=random_seed) else: (training_set, validation_set, test_set, train_set_metadata) = preprocess_for_training( self.model_definition, dataset_type, data_csv=data_csv, data_train_csv=data_train_csv, data_validation_csv=data_validation_csv, data_test_csv=data_test_csv, data_hdf5=data_hdf5, data_train_hdf5=data_train_hdf5, data_validation_hdf5=data_validation_hdf5, data_test_hdf5=data_test_hdf5, train_set_metadata_json=train_set_metadata_json, skip_save_processed_input=skip_save_processed_input, preprocessing_params=self.model_definition['preprocessing'], random_seed=random_seed) logging.info('Training set: {0}'.format(training_set.size)) if validation_set is not None: logging.info('Validation set: {0}'.format(validation_set.size)) if test_set is not None: logging.info('Test set: {0}'.format(test_set.size)) # update model definition with metadata properties update_model_definition_with_metadata(self.model_definition, train_set_metadata) if not skip_save_model: os.makedirs(model_dir, exist_ok=True) train_set_metadata_path = os.path.join( model_dir, TRAIN_SET_METADATA_FILE_NAME) save_json(train_set_metadata_path, train_set_metadata) # run the experiment model, result = train(training_set=training_set, validation_set=validation_set, test_set=test_set, model_definition=self.model_definition, save_path=model_dir, model_load_path=model_load_path, resume=model_resume_path is not None, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, gpus=gpus, gpu_fraction=gpu_fraction, random_seed=random_seed, debug=debug) train_trainset_stats, train_valisest_stats, train_testset_stats = result train_stats = { 'train': train_trainset_stats, 'validation': train_valisest_stats, 'test': train_testset_stats } # save training and test statistics save_json(training_stats_fn, train_stats) # grab the results of the model with highest validation test performance md_training = self.model_definition['training'] validation_field = md_training['validation_field'] validation_measure = md_training['validation_measure'] validation_field_result = train_valisest_stats[validation_field] best_function = get_best_function(validation_measure) # print results of the model with highest validation test performance if validation_set is not None: # max or min depending on the measure epoch_best_vali_measure, best_vali_measure = best_function( enumerate(validation_field_result[validation_measure]), key=lambda pair: pair[1]) logging.info('Best validation model epoch: {0}'.format( epoch_best_vali_measure + 1)) logging.info( 'Best validation model {0} on validation set {1}: {2}'.format( validation_measure, validation_field, best_vali_measure)) if test_set is not None: best_vali_measure_epoch_test_measure = train_testset_stats[ validation_field][validation_measure][ epoch_best_vali_measure] logging.info( 'Best validation model {0} on test set {1}: {2}'.format( validation_measure, validation_field, best_vali_measure_epoch_test_measure)) logging.info('Finished: {0}'.format(model_name)) logging.info('Saved to {0}:'.format(experiment_dir_name)) # set parameters self.model = model self.train_set_metadata = train_set_metadata return train_stats
def test_experiment_image_dataset(train_format, train_in_memory, test_format, test_in_memory): # primary focus of this test is to determine if exceptions are # raised for different data set formats and in_memory setting # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') input_features = [ image_feature(folder=image_dest_folder, encoder='stacked_cnn', preprocessing={ 'in_memory': True, 'height': 12, 'width': 12, 'num_channels': 3, 'num_processes': 5 }, fc_size=16, num_filters=8), ] output_features = [ category_feature(vocab_size=2, reduce_input='sum'), ] config = { 'input_features': input_features, 'output_features': output_features, 'combiner': { 'type': 'concat', 'fc_size': 14 }, 'preprocessing': {}, 'training': { 'epochs': 2 } } # create temporary name for train and test data sets train_csv_filename = 'train_' + uuid.uuid4().hex[:10].upper() + '.csv' test_csv_filename = 'test_' + uuid.uuid4().hex[:10].upper() + '.csv' # setup training data format to test train_data = generate_data(input_features, output_features, train_csv_filename) config['input_features'][0]['preprocessing']['in_memory'] \ = train_in_memory training_set_metadata = None if train_format == 'hdf5': # hdf5 format train_set, _, _, training_set_metadata = preprocess_for_training( config, dataset=train_data) train_dataset_to_use = train_set.data_hdf5_fp else: train_dataset_to_use = create_data_set_to_use(train_format, train_data) # define Ludwig model model = LudwigModel(config=config, ) model.train(dataset=train_dataset_to_use, training_set_metadata=training_set_metadata) model.config['input_features'][0]['preprocessing']['in_memory'] \ = test_in_memory # setup test data format to test test_data = generate_data(input_features, output_features, test_csv_filename) if test_format == 'hdf5': # hdf5 format # create hdf5 data set _, test_set, _, training_set_metadata_for_test = preprocess_for_training( model.config, dataset=test_data) test_dataset_to_use = test_set.data_hdf5_fp else: test_dataset_to_use = create_data_set_to_use(test_format, test_data) # run functions with the specified data format model.evaluate(dataset=test_dataset_to_use) model.predict(dataset=test_dataset_to_use) # Delete the temporary data created shutil.rmtree(image_dest_folder) delete_temporary_data(train_csv_filename) delete_temporary_data(test_csv_filename)
def test_experiment_image_dataset(train_format, train_in_memory, test_format, test_in_memory, tmpdir): # Image Inputs image_dest_folder = os.path.join(tmpdir, "generated_images") input_features = [ image_feature( folder=image_dest_folder, encoder="stacked_cnn", preprocessing={ "in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5 }, output_size=16, num_filters=8, ), ] output_features = [ category_feature(vocab_size=2, reduce_input="sum"), ] config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "output_size": 14 }, "preprocessing": {}, TRAINER: { "epochs": 2 }, } # create temporary name for train and test data sets train_csv_filename = os.path.join( tmpdir, "train_" + uuid.uuid4().hex[:10].upper() + ".csv") test_csv_filename = os.path.join( tmpdir, "test_" + uuid.uuid4().hex[:10].upper() + ".csv") # setup training data format to test train_data = generate_data(input_features, output_features, train_csv_filename) config["input_features"][0]["preprocessing"]["in_memory"] = train_in_memory training_set_metadata = None backend = LocalTestBackend() if train_format == "hdf5": # hdf5 format train_set, _, _, training_set_metadata = preprocess_for_training( config, dataset=train_data, backend=backend, ) train_dataset_to_use = train_set.data_hdf5_fp else: train_dataset_to_use = create_data_set_to_use(train_format, train_data) # define Ludwig model model = LudwigModel( config=config, backend=backend, ) model.train(dataset=train_dataset_to_use, training_set_metadata=training_set_metadata) model.config["input_features"][0]["preprocessing"][ "in_memory"] = test_in_memory # setup test data format to test test_data = generate_data(input_features, output_features, test_csv_filename) if test_format == "hdf5": # hdf5 format # create hdf5 data set _, test_set, _, training_set_metadata_for_test = preprocess_for_training( model.config, dataset=test_data, backend=backend, ) test_dataset_to_use = test_set.data_hdf5_fp else: test_dataset_to_use = create_data_set_to_use(test_format, test_data) # run functions with the specified data format model.evaluate(dataset=test_dataset_to_use) model.predict(dataset=test_dataset_to_use)
def test_decoder(test_case): # reproducible synthetic data set np.random.seed(RANDOM_SEED) tf.random.set_seed(RANDOM_SEED) # create synthetic data for the test features = [ test_case.syn_data.feature_generator( *test_case.syn_data.feature_generator_args, **test_case.syn_data.feature_generator_kwargs ) ] feature_name = features[0]['name'] data_generator = build_synthetic_dataset(BATCH_SIZE, features) data_list = list(data_generator) raw_data = [x[0] for x in data_list[1:]] df = pd.DataFrame({data_list[0][0]: raw_data}) # create synthetic combiner layer combiner_outputs_rank2 = { 'combiner_output': tf.random.normal( [BATCH_SIZE, HIDDEN_SIZE], dtype=tf.float32 ) } combiner_outputs_rank3 = { 'combiner_output': tf.random.normal( [BATCH_SIZE, SEQ_SIZE, HIDDEN_SIZE], dtype=tf.float32 ), 'encoder_output_state': tf.random.normal( [BATCH_SIZE, HIDDEN_SIZE], dtype=tf.float32 ), 'lengths': tf.convert_to_tensor( np.array(BATCH_SIZE * [SEQ_SIZE]), dtype=tf.int32 ) } # minimal config sufficient to create output feature config = {'input_features': [], 'output_features': features} training_set, _, _, training_set_metadata = preprocess_for_training( config, training_set=df, skip_save_processed_input=True, random_seed=RANDOM_SEED ) # run through each type of regularizer regularizer_losses = [] for regularizer in [None, 'l1', 'l2', 'l1_l2']: # start with clean slate and make reproducible tf.keras.backend.clear_session() np.random.seed(RANDOM_SEED) tf.random.set_seed(RANDOM_SEED) # setup kwarg for regularizer parms x_coder_kwargs = dict( zip(test_case.regularizer_parm_names, len(test_case.regularizer_parm_names) * [regularizer]) ) # combine other other keyword parameters x_coder_kwargs.update(test_case.XCoder_other_parms) features[0].update(x_coder_kwargs) if features[0]['type'] in SEQUENCE_TYPES: features[0]['num_classes'] = training_set_metadata[feature_name][ 'vocab_size'] + 1 training_set.dataset[feature_name] = \ training_set.dataset[feature_name].astype(np.int32) combiner_outputs = combiner_outputs_rank3 else: combiner_outputs = combiner_outputs_rank2 output_def_obj = build_single_output(features[0], None, None) targets = training_set.dataset[feature_name] if len(targets.shape) == 1: targets = targets.reshape(-1, 1) output_def_obj( ( (combiner_outputs, None), targets ), training=True, mask=None ) regularizer_loss = tf.reduce_sum(output_def_obj.decoder_obj.losses) regularizer_losses.append(regularizer_loss) # check loss regularization loss values # None should be zero assert regularizer_losses[0] == 0 # l1, l2 and l1_l2 should be greater than zero assert np.all([t > 0.0 for t in regularizer_losses[1:]]) # # using default setting l1 + l2 == l1_l2 losses assert np.isclose( regularizer_losses[1].numpy() + regularizer_losses[2].numpy(), regularizer_losses[3].numpy())
def test_encoder(test_case): # set up required directories for images if needed shutil.rmtree(IMAGE_DIR, ignore_errors=True) os.mkdir(IMAGE_DIR) # reproducible synthetic data set np.random.seed(RANDOM_SEED) tf.random.set_seed(RANDOM_SEED) # create synthetic data for the test features = [ test_case.syn_data.feature_generator( *test_case.syn_data.feature_generator_args, **test_case.syn_data.feature_generator_kwargs ) ] feature_name = features[0]['name'] data_generator = build_synthetic_dataset(BATCH_SIZE, features) data_list = list(data_generator) raw_data = [x[0] for x in data_list[1:]] df = pd.DataFrame({data_list[0][0]: raw_data}) # minimal config sufficient to create the input feature config = {'input_features': features, 'output_features': []} training_set, _, _, training_set_metadata = preprocess_for_training( config, training_set=df, skip_save_processed_input=True, random_seed=RANDOM_SEED ) # run through each type of regularizer for the encoder regularizer_losses = [] for regularizer in [None, 'l1', 'l2', 'l1_l2']: # start with clean slate and make reproducible tf.keras.backend.clear_session() np.random.seed(RANDOM_SEED) tf.random.set_seed(RANDOM_SEED) # setup kwarg for regularizer parms x_coder_kwargs = dict( zip(test_case.regularizer_parm_names, len(test_case.regularizer_parm_names) * [regularizer]) ) # combine other other keyword parameters x_coder_kwargs.update(test_case.XCoder_other_parms) features[0].update(x_coder_kwargs) # shim code to support sequence/sequence like features if features[0]['type'] in SEQUENCE_TYPES.union({'category', 'set'}): features[0]['vocab'] = training_set_metadata[feature_name][ 'idx2str'] training_set.dataset[feature_name] = \ training_set.dataset[feature_name].astype(np.int32) input_def_obj = build_single_input(features[0], None) inputs = training_set.dataset[feature_name] # make sure we are at least rank 2 tensor if len(inputs.shape) == 1: inputs = inputs.reshape(-1, 1) # special handling for image feature if features[0]['type'] == 'image': inputs = tf.cast(inputs, tf.float32) / 255 input_def_obj.encoder_obj(inputs) regularizer_loss = tf.reduce_sum(input_def_obj.encoder_obj.losses) regularizer_losses.append(regularizer_loss) # check loss regularization loss values # None should be zero assert regularizer_losses[0] == 0 # l1, l2 and l1_l2 should be greater than zero assert np.all([t > 0.0 for t in regularizer_losses[1:]]) # # using default setting l1 + l2 == l1_l2 losses assert np.isclose( regularizer_losses[1].numpy() + regularizer_losses[2].numpy(), regularizer_losses[3].numpy()) # cleanup shutil.rmtree(IMAGE_DIR, ignore_errors=True)
def get_trainingset_metadata(config, dataset): (_, _, _, training_set_metadata) = preprocess_for_training( config, dataset=dataset, preprocessing_params=config[PREPROCESSING]) return training_set_metadata
def train( self, dataset=None, training_set=None, validation_set=None, test_set=None, training_set_metadata=None, data_format=None, experiment_name='api_experiment', model_name='run', model_resume_path=None, skip_save_training_description=False, skip_save_training_statistics=False, skip_save_model=False, skip_save_progress=False, skip_save_log=False, skip_save_processed_input=False, output_directory='results', random_seed=default_random_seed, debug=False, **kwargs ): """This function is used to perform a full training of the model on the specified dataset. # Inputs :param dataset: (string, dict, DataFrame) source containing the entire dataset. If it has a split column, it will be used for splitting (0: train, 1: validation, 2: test), otherwise the dataset will be randomly split. :param training_set: (string, dict, DataFrame) source containing training data. :param validation_set: (string, dict, DataFrame) source containing validation data. :param test_set: (string, dict, DataFrame) source containing test data. :param training_set_metadata: (string, dict) metadata JSON file or loaded metadata. Intermediate preprocess structure containing the mappings of the input CSV created the first time a CSV file is used in the same directory with the same name and a '.json' extension. :param data_format: (string) format to interpret data sources. Will be inferred automatically if not specified. :param experiment_name: (string) a name for the experiment, used for the save directory :param model_name: (string) a name for the model, used for the save directory :param model_resume_path: (string) path of a the model directory to resume training of :param skip_save_training_description: (bool, default: `False`) disables saving the description JSON file. :param skip_save_training_statistics: (bool, default: `False`) disables saving training statistics JSON file. :param skip_save_model: (bool, default: `False`) disables saving model weights and hyperparameters each time the model improves. By default Ludwig saves model weights after each epoch the validation metric imrpvoes, but if the model is really big that can be time consuming if you do not want to keep the weights and just find out what performance can a model get with a set of hyperparameters, use this parameter to skip it, but the model will not be loadable later on. :param skip_save_progress: (bool, default: `False`) disables saving progress each epoch. By default Ludwig saves weights and stats after each epoch for enabling resuming of training, but if the model is really big that can be time consuming and will uses twice as much space, use this parameter to skip it, but training cannot be resumed later on. :param skip_save_log: (bool, default: `False`) disables saving TensorBoard logs. By default Ludwig saves logs for the TensorBoard, but if it is not needed turning it off can slightly increase the overall speed. :param skip_save_processed_input: (bool, default: `False`) skips saving intermediate HDF5 and JSON files :param output_directory: (string, default: `'results'`) directory that contains the results :param random_seed: (int, default`42`) a random seed that is going to be used anywhere there is a call to a random number generator: data splitting, parameter initialization and training set shuffling :param debug: (bool, default: `False`) enables debugging mode There are three ways to provide data: by dataframes using the `_df` parameters, by CSV using the `_csv` parameters and by HDF5 and JSON, using `_hdf5` and `_json` parameters. The DataFrame approach uses data previously obtained and put in a dataframe, the CSV approach loads data from a CSV file, while HDF5 and JSON load previously preprocessed HDF5 and JSON files (they are saved in the same directory of the CSV they are obtained from). For all three approaches either a full dataset can be provided (which will be split randomly according to the split probabilities defined in the model definition, by default 70% training, 10% validation and 20% test) or, if it contanins a plit column, it will be plit according to that column (interpreting 0 as training, 1 as validation and 2 as test). Alternatively separated dataframes / CSV / HDF5 files can beprovided for each split. During training the model and statistics will be saved in a directory `[output_dir]/[experiment_name]_[model_name]_n` where all variables are resolved to user spiecified ones and `n` is an increasing number starting from 0 used to differentiate different runs. # Return :return: ((dict, DataFrame)) tuple containing: - A dictionary of training statistics for each output feature containing loss and metrics values for each epoch. The second return - A Pandas DataFrame of preprocessed training data. """ # setup directories and file names if model_resume_path is not None: if os.path.exists(model_resume_path): output_directory = model_resume_path else: if is_on_master(): logger.info( 'Model resume path does not exists, ' 'starting training from scratch' ) model_resume_path = None if model_resume_path is None: if is_on_master(): output_directory = get_output_directory( output_directory, experiment_name, model_name ) else: output_directory = None # if we are skipping all saving, # there is no need to create a directory that will remain empty should_create_output_directory = not ( skip_save_training_description and skip_save_training_statistics and skip_save_model and skip_save_progress and skip_save_log and skip_save_processed_input ) description_fn = training_stats_fn = model_dir = None if is_on_master(): if should_create_output_directory: if not os.path.exists(output_directory): os.makedirs(output_directory, exist_ok=True) description_fn, training_stats_fn, model_dir = get_file_names( output_directory) # save description if is_on_master(): description = get_experiment_description( self.model_definition, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, random_seed=random_seed ) if not skip_save_training_description: save_json(description_fn, description) # print description logger.info('Experiment name: {}'.format(experiment_name)) logger.info('Model name: {}'.format(model_name)) logger.info('Output directory: {}'.format(output_directory)) logger.info('\n') for key, value in description.items(): logger.info('{}: {}'.format(key, pformat(value, indent=4))) logger.info('\n') # preprocess preprocessed_data = preprocess_for_training( self.model_definition, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, skip_save_processed_input=skip_save_processed_input, preprocessing_params=self.model_definition[PREPROCESSING], random_seed=random_seed ) (training_set, validation_set, test_set, training_set_metadata) = preprocessed_data self.training_set_metadata = training_set_metadata if is_on_master(): logger.info('Training set: {0}'.format(training_set.size)) if validation_set is not None: logger.info('Validation set: {0}'.format(validation_set.size)) if test_set is not None: logger.info('Test set: {0}'.format(test_set.size)) if is_on_master(): if not skip_save_model: # save train set metadata os.makedirs(model_dir, exist_ok=True) save_json( os.path.join( model_dir, TRAIN_SET_METADATA_FILE_NAME ), training_set_metadata ) contrib_command("train_init", experiment_directory=output_directory, experiment_name=experiment_name, model_name=model_name, output_directory=output_directory, resume=model_resume_path is not None) # Build model if not provided # if it was provided it means it was already loaded if not self.model: if is_on_master(): print_boxed('MODEL', print_fun=logger.debug) # update model definition with metadata properties update_model_definition_with_metadata( self.model_definition, training_set_metadata ) self.model = LudwigModel.create_model(self.model_definition, random_seed=random_seed) # init trainer trainer = Trainer( **self.model_definition[TRAINING], resume=model_resume_path is not None, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, random_seed=random_seed, horoovd=self._horovod, debug=debug ) contrib_command("train_model", self.model, self.model_definition, self.model_definition_fp) # train model if is_on_master(): print_boxed('TRAINING') if not skip_save_model: self.save_model_definition(model_dir) train_stats = trainer.train( self.model, training_set, validation_set=validation_set, test_set=test_set, save_path=model_dir, ) train_trainset_stats, train_valiset_stats, train_testset_stats = train_stats train_stats = { TRAINING: train_trainset_stats, VALIDATION: train_valiset_stats, TEST: train_testset_stats } # save training statistics if is_on_master(): if not skip_save_training_statistics: save_json(training_stats_fn, train_stats) # grab the results of the model with highest validation test performance validation_field = trainer.validation_field validation_metric = trainer.validation_metric validation_field_result = train_valiset_stats[validation_field] best_function = get_best_function(validation_metric) # results of the model with highest validation test performance if is_on_master() and validation_set is not None: epoch_best_vali_metric, best_vali_metric = best_function( enumerate(validation_field_result[validation_metric]), key=lambda pair: pair[1] ) logger.info( 'Best validation model epoch: {0}'.format( epoch_best_vali_metric + 1) ) logger.info( 'Best validation model {0} on validation set {1}: {2}'.format( validation_metric, validation_field, best_vali_metric )) if test_set is not None: best_vali_metric_epoch_test_metric = train_testset_stats[ validation_field][validation_metric][ epoch_best_vali_metric] logger.info( 'Best validation model {0} on test set {1}: {2}'.format( validation_metric, validation_field, best_vali_metric_epoch_test_metric ) ) logger.info( '\nFinished: {0}_{1}'.format(experiment_name, model_name)) logger.info('Saved to: {0}'.format(output_directory)) contrib_command("train_save", output_directory) self.training_set_metadata = training_set_metadata if not skip_save_model: # Load the best weights from saved checkpoint self.load_weights(model_dir) return train_stats, preprocessed_data, output_directory
def experiment( model_definition, model_definition_file=None, data_csv=None, data_train_csv=None, data_validation_csv=None, data_test_csv=None, data_hdf5=None, data_train_hdf5=None, data_validation_hdf5=None, data_test_hdf5=None, train_set_metadata_json=None, experiment_name='experiment', model_name='run', model_load_path=None, model_resume_path=None, skip_save_progress_weights=False, skip_save_processed_input=False, skip_save_unprocessed_output=False, output_directory='results', gpus=None, gpu_fraction=1.0, use_horovod=False, random_seed=default_random_seed, debug=False, **kwargs ): """Trains a model on a dataset's training and validation splits and uses it to predict on the test split. It saves the trained model and the statistics of training and testing. :param model_definition: Model definition which defines the different parameters of the model, features, preprocessing and training. :type model_definition: Dictionary :param model_definition_file: The file that specifies the model definition. It is a yaml file. :type model_definition_file: filepath (str) :param data_csv: A CSV file contanining the input data which is used to train, validate and test a model. The CSV either contains a split column or will be split. :type data_csv: filepath (str) :param data_train_csv: A CSV file contanining the input data which is used to train a model. :type data_train_csv: filepath (str) :param data_validation_csv: A CSV file contanining the input data which is used to validate a model.. :type data_validation_csv: filepath (str) :param data_test_csv: A CSV file contanining the input data which is used to test a model. :type data_test_csv: filepath (str) :param data_hdf5: If the dataset is in the hdf5 format, this is used instead of the csv file. :type data_hdf5: filepath (str) :param data_train_hdf5: If the training set is in the hdf5 format, this is used instead of the csv file. :type data_train_hdf5: filepath (str) :param data_validation_hdf5: If the validation set is in the hdf5 format, this is used instead of the csv file. :type data_validation_hdf5: filepath (str) :param data_test_hdf5: If the test set is in the hdf5 format, this is used instead of the csv file. :type data_test_hdf5: filepath (str) :param train_set_metadata_json: If the dataset is in hdf5 format, this is the associated json file containing metadata. :type train_set_metadata_json: filepath (str) :param experiment_name: The name for the experiment. :type experiment_name: Str :param model_name: Name of the model that is being used. :type model_name: Str :param model_load_path: If this is specified the loaded model will be used as initialization (useful for transfer learning). :type model_load_path: filepath (str) :param model_resume_path: Resumes training of the model from the path specified. The difference with model_load_path is that also training statistics like the current epoch and the loss and performance so far are also resumed effectively cotinuing a previously interrupted training process. :type model_resume_path: filepath (str) :param skip_save_progress_weights: Skips saving the weights at the end of each epoch. If this is true, training cannot be resumed from the exactly the state at the end of the previous epoch. :type skip_save_progress_weights: Boolean :param skip_save_processed_input: If a CSV dataset is provided it is preprocessed and then saved as an hdf5 and json to avoid running the preprocessing again. If this parameter is False, the hdf5 and json file are not saved. :type skip_save_processed_input: Boolean :param skip_save_unprocessed_output: By default predictions and their probabilities are saved in both raw unprocessed numpy files contaning tensors and as postprocessed CSV files (one for each output feature). If this parameter is True, only the CSV ones are saved and the numpy ones are skipped. :type skip_save_unprocessed_output: Boolean :param output_directory: The directory that will contanin the training statistics, the saved model and the training procgress files. :type output_directory: filepath (str) :param gpus: List of GPUs that are available for training. :type gpus: List :param gpu_fraction: Fraction of the memory of each GPU to use at the beginning of the training. The memory may grow elastically. :type gpu_fraction: Integer :param random_seed: Random seed used for weights initialization, splits and any other random function. :type random_seed: Integer :param debug: If true turns on tfdbg with inf_or_nan checks. :type debug: Boolean """ # set input features defaults if model_definition_file is not None: with open(model_definition_file, 'r') as def_file: model_definition = merge_with_defaults(yaml.load(def_file)) else: model_definition = merge_with_defaults(model_definition) # setup directories and file names experiment_dir_name = None if model_resume_path is not None: if os.path.exists(model_resume_path): experiment_dir_name = model_resume_path else: if is_on_master(): logging.info( 'Model resume path does not exists, ' 'starting training from scratch' ) model_resume_path = None if model_resume_path is None: if is_on_master(): experiment_dir_name = get_experiment_dir_name( output_directory, experiment_name, model_name ) else: experiment_dir_name = '/' description_fn, training_stats_fn, model_dir = get_file_names( experiment_dir_name ) # save description description = get_experiment_description( model_definition, data_csv, data_train_csv, data_validation_csv, data_test_csv, data_hdf5, data_train_hdf5, data_validation_hdf5, data_test_hdf5, train_set_metadata_json, random_seed ) if is_on_master(): save_json(description_fn, description) # print description logging.info('Experiment name: {}'.format(experiment_name)) logging.info('Model name: {}'.format(model_name)) logging.info('Output path: {}'.format(experiment_dir_name)) logging.info('') for key, value in description.items(): logging.info('{}: {}'.format(key, pformat(value, indent=4))) logging.info('') # preprocess ( training_set, validation_set, test_set, train_set_metadata ) = preprocess_for_training( model_definition, data_csv=data_csv, data_train_csv=data_train_csv, data_validation_csv=data_validation_csv, data_test_csv=data_test_csv, data_hdf5=data_hdf5, data_train_hdf5=data_train_hdf5, data_validation_hdf5=data_validation_hdf5, data_test_hdf5=data_test_hdf5, train_set_metadata_json=train_set_metadata_json, skip_save_processed_input=skip_save_processed_input, preprocessing_params=model_definition[ 'preprocessing'], random_seed=random_seed ) if is_on_master(): logging.info('Training set: {0}'.format(training_set.size)) if validation_set is not None: logging.info('Validation set: {0}'.format(validation_set.size)) if test_set is not None: logging.info('Test set: {0}'.format(test_set.size)) # update model definition with metadata properties update_model_definition_with_metadata(model_definition, train_set_metadata) # run the experiment model, training_results = train( training_set=training_set, validation_set=validation_set, test_set=test_set, model_definition=model_definition, save_path=model_dir, model_load_path=model_load_path, resume=model_resume_path is not None, skip_save_progress_weights=skip_save_progress_weights, gpus=gpus, gpu_fraction=gpu_fraction, use_horovod=use_horovod, random_seed=random_seed, debug=debug ) ( train_trainset_stats, train_valisest_stats, train_testset_stats ) = training_results if is_on_master(): # save train set metadata save_json( os.path.join( model_dir, TRAIN_SET_METADATA_FILE_NAME ), train_set_metadata ) # grab the results of the model with highest validation test performance validation_field = model_definition['training']['validation_field'] validation_measure = model_definition['training']['validation_measure'] validation_field_result = train_valisest_stats[validation_field] best_function = get_best_function(validation_measure) # print results of the model with highest validation test performance if is_on_master(): if validation_set is not None: # max or min depending on the measure epoch_best_vali_measure, best_vali_measure = best_function( enumerate(validation_field_result[validation_measure]), key=lambda pair: pair[1] ) logging.info('Best validation model epoch: {0}'.format( epoch_best_vali_measure + 1) ) logging.info( 'Best validation model {0} on validation set {1}: {2}'.format( validation_measure, validation_field, best_vali_measure) ) if test_set is not None: best_vali_measure_epoch_test_measure = train_testset_stats[ validation_field ][validation_measure][epoch_best_vali_measure] logging.info( 'Best validation model {0} on test set {1}: {2}'.format( validation_measure, validation_field, best_vali_measure_epoch_test_measure ) ) # save training statistics if is_on_master(): save_json( training_stats_fn, { 'train': train_trainset_stats, 'validation': train_valisest_stats, 'test': train_testset_stats } ) if test_set is not None: # predict test_results = predict( test_set, train_set_metadata, model, model_definition, model_definition['training']['batch_size'], only_predictions=False, gpus=gpus, gpu_fraction=gpu_fraction, debug=debug ) # postprocess postprocessed_output = postprocess( test_results, model_definition['output_features'], train_set_metadata, experiment_dir_name, skip_save_unprocessed_output or not is_on_master() ) if is_on_master(): print_prediction_results(test_results) save_prediction_outputs(postprocessed_output, experiment_dir_name) save_prediction_statistics(test_results, experiment_dir_name) model.close_session() if is_on_master(): logging.info('\nFinished: {0}_{1}'.format( experiment_name, model_name)) logging.info('Saved to: {}'.format(experiment_dir_name)) return experiment_dir_name