def from_params(cls, # type: ignore params: Params, serialization_dir: str, recover: bool = False): # pylint: disable=arguments-differ pieces = TrainerPieces.from_params(params, serialization_dir, recover) # pylint: disable=no-member return NoOpTrainer(serialization_dir, pieces.model)
def from_params(cls, # type: ignore params: Params, serialization_dir: str, recover: bool = False): # pylint: disable=arguments-differ typ3 = params.get("trainer", {}).pop("type", "default") if typ3 == "default": # Special logic to keep old from_params behavior. from allennlp.training.trainer import Trainer, TrainerPieces pieces = TrainerPieces.from_params(params, serialization_dir, recover) # pylint: disable=no-member return Trainer.from_params(model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator) else: klass = TrainerBase.by_name(typ3) # Explicit check to prevent recursion. is_overriden = klass.from_params.__func__ != TrainerBase.from_params.__func__ # type: ignore assert is_overriden, f"Class {klass.__name__} must override `from_params`." return klass.from_params(params, serialization_dir, recover)
def from_params( cls, # type: ignore params: Params, serialization_dir: str, recover: bool = False): # pylint: disable=arguments-differ typ3 = params.get("trainer", {}).pop("type", "default") if typ3 == "default": # Special logic to keep old from_params behavior. from allennlp.training.trainer import Trainer, TrainerPieces pieces = TrainerPieces.from_params(params, serialization_dir, recover) # pylint: disable=no-member return Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator) else: return TrainerBase.by_name(typ3).from_params( params, serialization_dir, recover)
def from_params(cls, params, serialization_dir, recover): pieces = TrainerPieces.from_params(params, serialization_dir, recover) return cls.from_params_old(model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator)
def from_params( cls, # type: ignore params: Params, serialization_dir: str, recover: bool = False ) -> Tuple['CrossValidationTrainer', TrainerPieces]: # pylint: disable=arguments-differ trainer_pieces = TrainerPieces.from_params(params, serialization_dir, recover) # pylint: disable=no-member if not isinstance(trainer_pieces.train_dataset, list): raise ValueError( "The training dataset must be a list. DatasetReader's lazy mode is not supported." ) if trainer_pieces.validation_dataset and not isinstance( trainer_pieces.validation_dataset, list): raise ValueError( "The validation dataset must be a list. DatasetReader's lazy mode is not supported." ) trainer_params = trainer_pieces.params subtrainer_params = trainer_params.pop('trainer') cross_validation_splitter = CrossValidationSplitter.from_params( trainer_params.pop('splitter')) group_key = trainer_params.pop('group_key', None) # If there's a test dataset then probably we want to leave the model trained with all the data at the end. leave_model_trained = trainer_params.pop_bool( 'leave_model_trained', bool(trainer_pieces.test_dataset)) trainer_params.assert_empty(__name__) params.assert_empty(__name__) return cls( model=trainer_pieces.model, # type: ignore train_dataset=trainer_pieces.train_dataset, iterator=trainer_pieces.iterator, subtrainer_params=subtrainer_params, cross_validation_splitter=cross_validation_splitter, serialization_dir=serialization_dir, group_key=group_key, leave_model_trained=leave_model_trained, validation_dataset=trainer_pieces.validation_dataset, recover=recover), trainer_pieces
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False, cache_directory: str = None, cache_prefix: str = None) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. cache_directory : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. cache_prefix : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) stdout_handler = prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params( params, # pylint: disable=no-member serialization_dir, recover, cache_directory, cache_prefix) trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator) else: # Workaround to obtain the evaluation parts. pieces = TrainerPieces.from_params( params.duplicate(), # pylint: disable=no-member serialization_dir, recover, cache_directory, cache_prefix) trainer = TrainerBase.from_params(params, serialization_dir, recover) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") cleanup_global_logging(stdout_handler) # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False, debate_mode: List[str] = ('f'), judge_filename: str = None, update_judge: bool = False, eval_mode: bool = False, reward_method: str = None, detach_value_head: bool = False, breakpoint_level: int = 0, search_outputs_path: str = None, accumulation_steps: int = 1, multi_gpu: bool = False, choice_mode: str = None, qa_loss_weight: float = 0., influence_reward: bool = False, theory_of_mind: bool = False, num_pred_rounds: int = -1, x_order_prob: float = 0., require_action: bool = False, single_shot: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. debate_mode : ``List[str]`` List of debate turns (e.g. aa, ar, rr, Ar) => capitalization implies search agent file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. judge_filename : ``str``, optional (default=None) Path to judge config or pre-trained judge model. If config, judge trained during debate. Necessary parameter if running in debate mode. update_judge : ``bool``, optional (default=False) Boolean whether or not to update Judge model during debate training. eval_mode : ``bool``, optional (default=False) Boolean whether or not to run in eval-only mode, on test data. Does not update/train any of the models. reward_method : ``str``, optional (default=False) Choice of reward function (RL) or loss function (Supervised Learning) for training debate agents detach_value_head : ``bool``, optional (default=False) Boolean whether or not to detatch value function gradient updates from the policy network. This prevents value function gradients from affecting policy network parameters. breakpoint_level : ``int`` optional (default=0) Debugging option to set breakpoint sensitivity (0 - no breakpoints). id_to_search_filename : ``str`` optional (default=None) Path to file with search predictions for each agent - necessary for supervised training accumulation_steps : ``int`` (default=1) Number of gradient steps to accumulate over before performing an update. Poor-man's batching for instances where number of examples per batch is small (limited GPU memory) multi_gpu : ``bool`` (default=False) Boolean whether or not to run models/training in model parallel mode. Requires specifying GPU allocations for trainer, judge, and debaters in the training config file (see training_config/bidaf.race.size=0.5.gpu=2.jsonnet for example usage). Returns ------- best_model: ``Model`` The model with the best epoch weights. """ assert ( not single_shot ) or eval_mode, 'Using single shot prediction outside eval_mode not yet supported.' assert (not single_shot) or (num_pred_rounds == -1), \ 'Using single shot prediction for a specific number of rounds is not yet supported.' # Get number of debate turns, and assert that not performing judge-only training num_no_qa_turns = sum([(('l' in debate_turn) or ('w' in debate_turn)) for debate_turn in debate_mode]) if (qa_loss_weight > 0) and (num_no_qa_turns == 0): warnings.warn( 'Unused argument qa_loss_weight in debate mode ' + str(debate_mode) + '. If this was unintentional, please remove the -q flag.', UserWarning) not_using_trained_debater = len( set('ablwⅰⅱⅲⅳ').intersection(''.join(debate_mode))) == 0 if (judge_filename is not None) and not_using_trained_debater: warnings.warn( 'Unnecessary to have debaters in debate mode ' + str(debate_mode) + '. If this was unintentional, please remove the -j flag.', UserWarning) prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) # Check that all Desired CUDA Devices exist => trainer => cuda_devices should contain list of required devices cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) # Build Allocation Dictionary (to be passed to all future functions) if multi_gpu: gpu_allocations, allocation_dict = params.params.pop( 'gpu_allocations', {}), {} assert len(gpu_allocations ) == 3, 'Must set gpu_allocations in config if multi-gpu' for k in ['debate', 'judge', 'trainer']: assert gpu_allocations[ k] in cuda_device, "Desired GPU not available... current: %s" % str( cuda_device) allocation_dict[k] = gpu_allocations[k] else: allocation_dict = {} params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. params['dataset_reader'][ 'debate_mode'] = debate_mode # If debate_mode requires sample duplicates pieces = TrainerPieces.from_params(params, serialization_dir, cuda_device, recover, judge_filename=judge_filename, update_judge=update_judge, eval_mode=eval_mode, reward_method=reward_method, detach_value_head=detach_value_head, allocation_dict=allocation_dict, qa_loss_weight=qa_loss_weight, influence_reward=influence_reward, theory_of_mind=theory_of_mind) # pylint: disable=no-member trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, debate_mode=debate_mode, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator, eval_mode=eval_mode, breakpoint_level=breakpoint_level, search_outputs_path=search_outputs_path, accumulation_steps=accumulation_steps, allocation_dict=allocation_dict, choice_mode=choice_mode, num_pred_rounds=num_pred_rounds, x_order_prob=x_order_prob, require_action=require_action, single_shot=single_shot) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: assert (len(debate_mode) == 1) and (debate_mode[0] == 'f'), 'TrainerBase untested for debate training.' trainer = TrainerBase.from_params(params, serialization_dir, recover) evaluation_iterator = evaluation_dataset = None params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)) and not eval_mode: logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") # Now tar up results if not eval_mode: archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) else: dump_metrics(os.path.join( serialization_dir, "metrics.eval.d=" + '-'.join(debate_mode) + ".json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model