def setUp(self): super().setUp() params = Params({ "trainer": { "type": "gan-test" }, "data_reader": { "type": "sampling", "sampler": { "type": "normal", "mean": 4.0, "stdev": 1.25 } }, "noise_reader": { "type": "sampling", "sampler": { "type": "uniform" } }, "generator": { "type": "generator-test", "input_dim": 1, "hidden_dim": 5, "output_dim": 1 }, "discriminator": { "type": "discriminator-test", "input_dim": 500, "hidden_dim": 10 }, "iterator": { "type": "basic", "batch_size": 500 }, "noise_iterator": { "type": "basic", "batch_size": 500 }, "generator_optimizer": { "type": "sgd", "lr": 0.1 }, "discriminator_optimizer": { "type": "sgd", "lr": 0.1 }, "num_epochs": 5, "batches_per_epoch": 2 }) self.trainer = TrainerBase.from_params(params, self.TEST_DIR)
def setUp(self): super().setUp() params = Params({ "model": { "type": "multi-task-test" }, "iterator": { "type": "homogeneous-batch" }, "mingler": { "type": "round-robin" }, "optimizer": { "type": "sgd", "lr": 0.01 }, "train_dataset_readers": { "a": { "type": "multi-task-test", "field_name": "field_a" }, "b": { "type": "multi-task-test", "field_name": "field_b" }, "c": { "type": "multi-task-test", "field_name": "field_c" }, }, "train_file_paths": { "a": self.FIXTURES_ROOT / 'data' / 'babi.txt', "b": self.FIXTURES_ROOT / 'data' / 'conll2000.txt', "c": self.FIXTURES_ROOT / 'data' / 'conll2003.txt' }, "trainer": { "type": "multi-task-test" } }) self.trainer = TrainerBase.from_params(params, self.TEST_DIR)
params.to_file(serialize_config_file) dist.barrier() params = ConstParams.from_file(serialize_config_file) log_dir = os.path.join(serialization_dir, str(dist.get_rank())) os.makedirs(log_dir, exist_ok=True) stdout_handler = prepare_global_logging(log_dir, file_friendly_logging=False) prepare_environment(params) cuda_device = params.trainer.get('cuda_device', -1) check_for_gpu(cuda_device) trainer_type = params.trainer.type trainer = TrainerBase.from_params(params, serialization_dir, recover) params_cnt, params_trainable_cnt = count_parameters(trainer.model) print("all params cnt: ", params_cnt) print("all trainable params cnt: ", params_trainable_cnt) metrics = trainer.train() cleanup_global_logging(stdout_handler) if is_master_rank: archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True)
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False, cache_directory: str = None, cache_prefix: str = None) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. cache_directory : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. cache_prefix : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ create_serialization_dir(params, serialization_dir, recover, force) stdout_handler = prepare_global_logging(serialization_dir, file_friendly_logging) prepare_environment(params) cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params( params, # pylint: disable=no-member serialization_dir, recover, cache_directory, cache_prefix) trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: if evaluate_on_test: raise ValueError( "--evaluate-on-test only works with the default Trainer. " "If you're using the CallbackTrainer you can use a callback " "to evaluate at Events.TRAINING_END; otherwise you'll have " "to run allennlp evaluate separately.") trainer = TrainerBase.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) evaluation_dataset = None params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") cleanup_global_logging(stdout_handler) # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model
"type": "discriminator-test", "input_dim": sample_size, "hidden_dim": 10, "preprocessing": "moments" }, "iterator": { "type": "basic", "batch_size": sample_size }, "noise_iterator": { "type": "basic", "batch_size": sample_size }, "generator_optimizer": { "type": "sgd", "lr": 0.1 }, "discriminator_optimizer": { "type": "sgd", "lr": 0.1 }, "num_epochs": 1000, "batches_per_epoch": 2 }) import tempfile serialization_dir_ = tempfile.mkdtemp() trainer_ = TrainerBase.from_params(params_, serialization_dir_) metrics_ = trainer_.train() print(metrics_)
def _train_worker( process_rank: int, params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, cache_directory: str = None, cache_prefix: str = None, include_package: List[str] = None, node_rank: int = 0, master_addr: str = "127.0.0.1", master_port: int = 29500, world_size: int = 1, distributed_device_ids: List[str] = None, ) -> Optional[Model]: """ Helper to train the configured model/experiment. In distributed mode, this is spawned as a worker process. In a single GPU experiment, this returns the ``Model`` object and in distributed training, nothing is returned. # Parameters process_rank : ``int`` The process index that is initialized using the GPU device id. params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. cache_directory : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. cache_prefix : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. include_package : ``List[str]``, optional In distributed mode, since this function would have been spawned as a separate process, the extra imports need to be done again. NOTE: This does not have any effect in single GPU training. node_rank : ``int``, optional Rank of the node world_size : ``int``, optional The number of processes involved in distributed training. # Returns best_model : ``Model`` The model with the best epoch weights. """ prepare_global_logging(serialization_dir, file_friendly_logging, rank=process_rank, world_size=world_size) prepare_environment(params) distributed = world_size > 1 # not using `allennlp.common.util.is_master` as the process group is yet to be initialized master = process_rank == 0 evaluate_on_test = params.pop_bool("evaluate_on_test", False) if distributed: # Since the worker is spawned and not forked, the extra imports # need to be done again. if include_package is not None: for package_name in include_package: import_submodules(package_name) num_procs_per_node = len(distributed_device_ids) # The Unique identifier of the worker process among all the processes in the # distributed training group is computed here. This is used while initializing # the process group using `init_process_group` global_rank = node_rank * num_procs_per_node + process_rank # In distributed training, the configured device is always going to be a list. # The corresponding gpu id for the particular worker is obtained by picking the id # from the device list with the rank as index gpu_id = distributed_device_ids[process_rank] # type: ignore # Till now, "cuda_device" might not be set in the trainer params. # But a worker trainer needs to only know about its specific GPU id. params["trainer"]["cuda_device"] = gpu_id params["trainer"]["world_size"] = world_size params["trainer"]["distributed"] = True torch.cuda.set_device(gpu_id) dist.init_process_group( backend="nccl", init_method=f"tcp://{master_addr}:{master_port}", world_size=world_size, rank=global_rank, ) logging.info(f"Process group of world size {world_size} initialized " f"for distributed training in worker {global_rank}") trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator, ) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: if evaluate_on_test: raise ValueError( "--evaluate-on-test only works with the default Trainer. " "If you're using the CallbackTrainer you can use a callback " "to evaluate at Events.TRAINING_END; otherwise you'll have " "to run allennlp evaluate separately.") trainer = TrainerBase.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) evaluation_dataset = None params.assert_empty("base train command") try: if distributed: # let the setup get ready for all the workers dist.barrier() metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if master and os.path.exists( os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise if master: if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer.cuda_device, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key="", ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) if not distributed: return trainer.model return None # to make mypy happy
def test_trainer_from_base_class_params(self): params = Params.from_file(self.FIXTURES_ROOT / "simple_tagger" / "experiment.json") # Can instantiate from base class params TrainerBase.from_params(params, self.TEST_DIR)
num_epochs=num_epochs, shuffle=shuffle, serialization_dir=serialization_dir, cuda_device=cuda_device, callbacks=callbacks, distributed=distributed, rank=rank, world_size=world_size, ) class GanCallbackTrainerTest(ModelTestCase): def test_gan_can_train(self): params = config(batches_per_epoch=2, num_epochs=2) train_model(params, self.TEST_DIR) if __name__ == "__main__": # Run it yourself, it's fun! # # python -m allennlp.tests.training.gan_callback_trainer_test # serialization_dir = tempfile.mkdtemp() params = config() trainer = TrainerBase.from_params(params=params, serialization_dir=serialization_dir) metrics = trainer.train() print(metrics)
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False, debate_mode: List[str] = ('f'), judge_filename: str = None, update_judge: bool = False, eval_mode: bool = False, reward_method: str = None, detach_value_head: bool = False, breakpoint_level: int = 0, search_outputs_path: str = None, accumulation_steps: int = 1, multi_gpu: bool = False, choice_mode: str = None, qa_loss_weight: float = 0., influence_reward: bool = False, theory_of_mind: bool = False, num_pred_rounds: int = -1, x_order_prob: float = 0., require_action: bool = False, single_shot: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. debate_mode : ``List[str]`` List of debate turns (e.g. aa, ar, rr, Ar) => capitalization implies search agent file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. judge_filename : ``str``, optional (default=None) Path to judge config or pre-trained judge model. If config, judge trained during debate. Necessary parameter if running in debate mode. update_judge : ``bool``, optional (default=False) Boolean whether or not to update Judge model during debate training. eval_mode : ``bool``, optional (default=False) Boolean whether or not to run in eval-only mode, on test data. Does not update/train any of the models. reward_method : ``str``, optional (default=False) Choice of reward function (RL) or loss function (Supervised Learning) for training debate agents detach_value_head : ``bool``, optional (default=False) Boolean whether or not to detatch value function gradient updates from the policy network. This prevents value function gradients from affecting policy network parameters. breakpoint_level : ``int`` optional (default=0) Debugging option to set breakpoint sensitivity (0 - no breakpoints). id_to_search_filename : ``str`` optional (default=None) Path to file with search predictions for each agent - necessary for supervised training accumulation_steps : ``int`` (default=1) Number of gradient steps to accumulate over before performing an update. Poor-man's batching for instances where number of examples per batch is small (limited GPU memory) multi_gpu : ``bool`` (default=False) Boolean whether or not to run models/training in model parallel mode. Requires specifying GPU allocations for trainer, judge, and debaters in the training config file (see training_config/bidaf.race.size=0.5.gpu=2.jsonnet for example usage). Returns ------- best_model: ``Model`` The model with the best epoch weights. """ assert ( not single_shot ) or eval_mode, 'Using single shot prediction outside eval_mode not yet supported.' assert (not single_shot) or (num_pred_rounds == -1), \ 'Using single shot prediction for a specific number of rounds is not yet supported.' # Get number of debate turns, and assert that not performing judge-only training num_no_qa_turns = sum([(('l' in debate_turn) or ('w' in debate_turn)) for debate_turn in debate_mode]) if (qa_loss_weight > 0) and (num_no_qa_turns == 0): warnings.warn( 'Unused argument qa_loss_weight in debate mode ' + str(debate_mode) + '. If this was unintentional, please remove the -q flag.', UserWarning) not_using_trained_debater = len( set('ablwⅰⅱⅲⅳ').intersection(''.join(debate_mode))) == 0 if (judge_filename is not None) and not_using_trained_debater: warnings.warn( 'Unnecessary to have debaters in debate mode ' + str(debate_mode) + '. If this was unintentional, please remove the -j flag.', UserWarning) prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) # Check that all Desired CUDA Devices exist => trainer => cuda_devices should contain list of required devices cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) # Build Allocation Dictionary (to be passed to all future functions) if multi_gpu: gpu_allocations, allocation_dict = params.params.pop( 'gpu_allocations', {}), {} assert len(gpu_allocations ) == 3, 'Must set gpu_allocations in config if multi-gpu' for k in ['debate', 'judge', 'trainer']: assert gpu_allocations[ k] in cuda_device, "Desired GPU not available... current: %s" % str( cuda_device) allocation_dict[k] = gpu_allocations[k] else: allocation_dict = {} params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. params['dataset_reader'][ 'debate_mode'] = debate_mode # If debate_mode requires sample duplicates pieces = TrainerPieces.from_params(params, serialization_dir, cuda_device, recover, judge_filename=judge_filename, update_judge=update_judge, eval_mode=eval_mode, reward_method=reward_method, detach_value_head=detach_value_head, allocation_dict=allocation_dict, qa_loss_weight=qa_loss_weight, influence_reward=influence_reward, theory_of_mind=theory_of_mind) # pylint: disable=no-member trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, debate_mode=debate_mode, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator, eval_mode=eval_mode, breakpoint_level=breakpoint_level, search_outputs_path=search_outputs_path, accumulation_steps=accumulation_steps, allocation_dict=allocation_dict, choice_mode=choice_mode, num_pred_rounds=num_pred_rounds, x_order_prob=x_order_prob, require_action=require_action, single_shot=single_shot) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: assert (len(debate_mode) == 1) and (debate_mode[0] == 'f'), 'TrainerBase untested for debate training.' trainer = TrainerBase.from_params(params, serialization_dir, recover) evaluation_iterator = evaluation_dataset = None params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)) and not eval_mode: logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") # Now tar up results if not eval_mode: archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) else: dump_metrics(os.path.join( serialization_dir, "metrics.eval.d=" + '-'.join(debate_mode) + ".json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, del_models: bool = False, del_vocab: bool = False, convert: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. del_models : ``bool``, optional (default=False) If ``True``, we will delete existing models and logs if they already exist. del_vocab : ``bool``, optional (default=False) If ``True``, we will delete existing vocabulary if it already exists. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ cuda_device = params.params.get('trainer').get('cuda_device', -1) if cuda_device >= 0: check_for_gpu(cuda_device) torch.cuda.set_device(cuda_device) # Sometimes we might change the config a bit but still want to continue training # if recover: # create_serialization_dir( # params, serialization_dir, recover, del_models) if del_models: for path in glob(f'{serialization_dir}/*'): if os.path.isfile(path) and not path.endswith('config.yaml'): os.remove(path) log_path = f'{serialization_dir}/log' if os.path.isdir(log_path): shutil.rmtree(log_path) if del_vocab: vocab_path = f'{serialization_dir}/vocabulary' if os.path.isdir(vocab_path): shutil.rmtree(vocab_path) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == 'default': # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params(params, serialization_dir, recover) # pylint: disable=no-member trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.corpus.train, validation_data=pieces.corpus.valid, params=pieces.params, validation_iterator=pieces.validation_iterator) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.corpus.test batch_weight_key = pieces.batch_weight_key elif trainer_type == 'trainer_fp16_single': params.get("trainer").pop('type') # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params(params, serialization_dir, recover) # pylint: disable=no-member trainer = TrainerF16SingleTask.from_params( model=pieces.model, serialization_dir=serialization_dir, files_to_archive=params.files_to_archive, iterator=pieces.iterator, train_data=pieces.corpus.train, validation_data=pieces.corpus.valid, params=pieces.params, validation_iterator=pieces.validation_iterator) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.corpus.test batch_weight_key = pieces.batch_weight_key else: trainer = TrainerBase.from_params(params, serialization_dir, recover) # TODO(joelgrus): handle evaluation in the general case evaluation_iterator = evaluation_dataset = None params.assert_empty('base train command') if convert: logging.info('In conversion mode.') trainer._save_checkpoint(epoch=0) create_model_archive(serialization_dir, params) sys.exit(0) try: metrics = trainer.train() except (KeyboardInterrupt, RuntimeError): # if we have completed an epoch, try to create a model archive. logging.info("Training stopped. Attempting to create " "a model archive using the current best epoch weights.") create_model_archive(serialization_dir, params) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key=batch_weight_key) for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model