def finish(self, metrics: Dict[str, Any]) -> None: # import wandb here to be sure that it was initialized # before this line was executed import wandb # noqa if self.evaluation_data_loader is not None and self.evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = training_util.evaluate( self.model, self.evaluation_data_loader, # type:ignore cuda_device=self.trainer.cuda_device, # type: ignore batch_weight_key=self.batch_weight_key, ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif self.evaluation_data_loader is not None: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) common_util.dump_metrics( os.path.join(self.serialization_dir, "metrics.json"), metrics, log=True, ) # update the summary with all metrics wandb.run.summary.update(metrics)
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel( logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop("dataset_reader")) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(instances=instances) model.extend_embedder_vocab(embedding_sources) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") dump_metrics(args.output_file, metrics, log=True) return metrics
def main(cuda_device, testing=False, testing_vocab=False, experiments=None): # ''' Make training happen if experiments: save_dir = experiments os.system('cp training_config/coref.jsonnet ' + save_dir) for x in [10, 5, 0]: print("Running with " + str(x) + "% of labels") serialization_dir = os.path.join(save_dir, "temp_" + str(cuda_device)) os.system('rm -rf ' + serialization_dir) params = Params.from_file(os.path.join(save_dir, 'coref.jsonnet')) params.params['trainer']['cuda_device'] = cuda_device params.params['trainer']['active_learning']['use_percent'] = True params.params['trainer']['active_learning']['num_labels'] = round( 0.01 * x, 2) best_model, metrics = train_model(params, serialization_dir) dump_metrics(os.path.join(save_dir, str(x) + ".json"), metrics, log=True) else: params = Params.from_file('training_config/coref.jsonnet') if testing or testing_vocab: params.params['trainer']['active_learning']['epoch_interval'] = 0 if testing: params.params['model']['text_field_embedder'][ 'token_embedders']['tokens'] = { 'type': 'embedding', 'embedding_dim': 300 } serialization_dir = tempfile.mkdtemp() params.params['trainer']['cuda_device'] = cuda_device best_model, metrics = train_model(params, serialization_dir)
def end_of_epoch(self, trainer: "CallbackTrainer"): # Create overall metrics dict training_elapsed_time = time.time() - trainer.training_start_time trainer.metrics["training_duration"] = str( datetime.timedelta(seconds=training_elapsed_time)) trainer.metrics["training_start_epoch"] = self.starting_epoch trainer.metrics[ "training_epochs"] = trainer.epoch_number - self.starting_epoch + 1 trainer.metrics["epoch"] = trainer.epoch_number for key, value in trainer.train_metrics.items(): trainer.metrics["training_" + key] = value for key, value in trainer.val_metrics.items(): trainer.metrics["validation_" + key] = value if self.metric_tracker.is_best_so_far(): # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) trainer.metrics["best_epoch"] = trainer.epoch_number for key, value in trainer.val_metrics.items(): trainer.metrics["best_validation_" + key] = value self.metric_tracker.best_epoch_metrics = copy.deepcopy( trainer.val_metrics) if trainer._serialization_dir: dump_metrics( os.path.join(trainer._serialization_dir, f"metrics_epoch_{trainer.epoch_number}.json"), trainer.metrics, )
def finish(self, metrics: Dict[str, Any]) -> None: # import wandb here to be sure that it was initialized # before this line was executed import wandb # noqa if self.evaluation_data_loader is not None and self.evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = training_util.evaluate( self.model, self.evaluation_data_loader, # type:ignore cuda_device=self.trainer.cuda_device, # type: ignore batch_weight_key=self.batch_weight_key, ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif self.evaluation_data_loader is not None: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) common_util.dump_metrics( os.path.join(self.serialization_dir, "metrics.json"), metrics, log=True, ) # update the summary with all metrics if wandb.run is None: logger.info("wandb run was closed. Resuming to update summary.") run = wandb.init( id=read_from_env("WANDB_RUN_ID"), project=read_from_env("WANDB_PROJECT"), entity=read_from_env("WANDB_ENTITY"), resume="must", ) else: logger.info( "There is an active wandb run. Using that to update summary.") run = wandb.run if run is not None: logger.info("Updating summary on wandb.") run.summary.update(metrics)
def finish(self, metrics: Dict[str, Any]): if self.evaluation_data_loader is not None and self.evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = training_util.evaluate( self.model, self.evaluation_data_loader, cuda_device=self.trainer.cuda_device, batch_weight_key=self.batch_weight_key, ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif self.evaluation_data_loader is not None: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) common_util.dump_metrics( os.path.join(self.serialization_dir, "metrics.json"), metrics, log=True )
def train_one_epoch( trainer: Trainer, epoch_count: int) -> Tuple[Dict[str, float], Dict[str, float]]: train_metrics: Dict[str, float] = {} val_metrics: Dict[str, float] = {} this_epoch_val_metric: float = None metrics: Dict[str, float] = {} train_metrics = trainer._train_epoch(epoch_count) if trainer._validation_data is not None: with torch.no_grad(): # We have a validation set, so compute all the metrics on it. val_loss, num_batches = trainer._validation_loss() val_metrics = training_util.get_metrics(trainer.model, val_loss, num_batches, reset=True) this_epoch_val_metric = val_metrics[trainer._validation_metric] for key, value in train_metrics.items(): metrics["training_" + key] = value for key, value in val_metrics.items(): metrics["validation_" + key] = value if trainer._serialization_dir: dump_metrics( os.path.join(trainer._serialization_dir, f"metrics_epoch_{epoch_count}.json"), metrics) # The Scheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. if trainer._learning_rate_scheduler: trainer._learning_rate_scheduler.step(this_epoch_val_metric, epoch_count) if trainer._momentum_scheduler: trainer._momentum_scheduler.step(this_epoch_val_metric, epoch_count) #trainer._save_checkpoint(epoch_count) return train_metrics, val_metrics
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False, cache_directory: str = None, cache_prefix: str = None) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. cache_directory : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. cache_prefix : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ create_serialization_dir(params, serialization_dir, recover, force) stdout_handler = prepare_global_logging(serialization_dir, file_friendly_logging) prepare_environment(params) cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params( params, # pylint: disable=no-member serialization_dir, recover, cache_directory, cache_prefix) trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: if evaluate_on_test: raise ValueError( "--evaluate-on-test only works with the default Trainer. " "If you're using the CallbackTrainer you can use a callback " "to evaluate at Events.TRAINING_END; otherwise you'll have " "to run allennlp evaluate separately.") trainer = TrainerBase.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) evaluation_dataset = None params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") cleanup_global_logging(stdout_handler) # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model
def train(self) -> Dict[str, Any]: """ Trains the supplied model with the supplied parameters. """ try: epoch_counter = self._restore_checkpoint() except RuntimeError: traceback.print_exc() raise ConfigurationError( "Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?") training_util.enable_gradient_clipping(self.model, self._grad_clipping) logger.info("Beginning training.") train_metrics: Dict[str, float] = {} val_metrics: Dict[str, float] = {} this_epoch_val_metric: float = None metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() for epoch in range(epoch_counter, self._num_epochs): epoch_start_time = time.time() train_metrics = self._train_epoch(epoch) # get peak of memory usage if 'cpu_memory_MB' in train_metrics: metrics['peak_cpu_memory_MB'] = max( metrics.get('peak_cpu_memory_MB', 0), train_metrics['cpu_memory_MB']) for key, value in train_metrics.items(): if key.startswith('gpu_'): metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) if self._validation_data is not None: with torch.no_grad(): # We have a validation set, so compute all the metrics on it. val_loss, num_batches = self._validation_loss() val_metrics = training_util.get_metrics(self.model, val_loss, num_batches, reset=True) # Check validation metric for early stopping this_epoch_val_metric = val_metrics[ self._validation_metric] self._metric_tracker.add_metric(this_epoch_val_metric) if self._metric_tracker.should_stop_early(): logger.info("Ran out of patience. Stopping training.") break self._tensorboard.log_metrics(train_metrics, val_metrics=val_metrics, log_to_console=True) # Create overall metrics dict training_elapsed_time = time.time() - training_start_time metrics["training_duration"] = time.strftime( "%H:%M:%S", time.gmtime(training_elapsed_time)) metrics["training_start_epoch"] = epoch_counter metrics["training_epochs"] = epochs_trained metrics["epoch"] = epoch for key, value in train_metrics.items(): metrics["training_" + key] = value for key, value in val_metrics.items(): metrics["validation_" + key] = value if self._metric_tracker.is_best_so_far(): # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) metrics['best_epoch'] = epoch for key, value in val_metrics.items(): metrics["best_validation_" + key] = value if self._serialization_dir: dump_metrics( os.path.join(self._serialization_dir, f'metrics_epoch_{epoch}.json'), metrics) if self._learning_rate_scheduler: # The LRScheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. self._learning_rate_scheduler.step(this_epoch_val_metric, epoch) self._save_checkpoint(epoch) epoch_elapsed_time = time.time() - epoch_start_time logger.info( "Epoch duration: %s", time.strftime("%H:%M:%S", time.gmtime(epoch_elapsed_time))) if epoch < self._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * \ ((self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1) formatted_time = str( datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) epochs_trained += 1 # Load the best model state before returning best_model_state = self._checkpointer.best_model_state() if best_model_state: self.model.load_state_dict(best_model_state) return metrics
def train(self) -> Dict[str, Any]: try: epoch_counter = self._restore_checkpoint() except RuntimeError: traceback.print_exc() raise ConfigurationError( "Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?") training_util.enable_gradient_clipping(self.model, self._grad_clipping) logger.info("Beginning training.") train_metrics: Dict[str, float] = {} val_metrics: Dict[str, float] = {} this_epoch_val_metric: float = None metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() metrics['best_epoch'] = self._metric_tracker.best_epoch for key, value in self._metric_tracker.best_epoch_metrics.items(): metrics["best_validation_" + key] = value for epoch in range(epoch_counter, self._num_epochs): epoch_start_time = time.time() train_metrics = self._train_epoch(epoch) if self._validation_data is not None: with torch.no_grad(): val_loss, num_batches = self._validation_loss() val_metrics = training_util.get_metrics(self.get_model(), val_loss, num_batches, reset=True) this_epoch_val_metric = val_metrics[ self._validation_metric] self._metric_tracker.add_metric(this_epoch_val_metric) if self._metric_tracker.should_stop_early(): logger.info("Ran out of patience. Stopping training.") break # Create overall metrics dict training_elapsed_time = time.time() - training_start_time metrics["training_duration"] = str( datetime.timedelta(seconds=training_elapsed_time)) metrics["training_start_epoch"] = epoch_counter metrics["training_epochs"] = epochs_trained metrics["epoch"] = epoch for key, value in train_metrics.items(): metrics["training_" + key] = value for key, value in val_metrics.items(): metrics["validation_" + key] = value if self._metric_tracker.is_best_so_far(): metrics['best_epoch'] = epoch for key, value in val_metrics.items(): metrics["best_validation_" + key] = value self._metric_tracker.best_epoch_metrics = val_metrics if self._serialization_dir and is_master_rank(): dump_metrics( os.path.join(self._serialization_dir, f'metrics_epoch_{epoch}.json'), metrics) if self._learning_rate_scheduler: self._learning_rate_scheduler.step(this_epoch_val_metric, epoch) if is_master_rank(): self._save_checkpoint(epoch) epoch_elapsed_time = time.time() - epoch_start_time logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time)) if epoch < self._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * \ ((self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1) formatted_time = str( datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) epochs_trained += 1 best_model_state = self._checkpointer.best_model_state() if best_model_state: self.model.load_state_dict(best_model_state) return metrics
len(reader.alltags)) ser_dir_iter = serialization_dir + "/final" prepare_global_logging(ser_dir_iter, False) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=folds[0] + folds[1], validation_dataset=validation_dataset, patience=10, num_epochs=45, validation_metric="+f1-measure-overall", cuda_device=cuda_device, num_serialized_models_to_keep=3, serialization_dir=ser_dir_iter) trainer.train() test_metrics = util.evaluate( trainer.model, test_dataset, iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value dump_metrics(os.path.join(ser_dir_iter, "metrics.json"), metrics, log=True)
def _train_worker( process_rank: int, params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, cache_directory: str = None, cache_prefix: str = None, include_package: List[str] = None, node_rank: int = 0, master_addr: str = "127.0.0.1", master_port: int = 29500, world_size: int = 1, distributed_device_ids: List[str] = None, ) -> Optional[Model]: """ Helper to train the configured model/experiment. In distributed mode, this is spawned as a worker process. In a single GPU experiment, this returns the ``Model`` object and in distributed training, nothing is returned. # Parameters process_rank : ``int`` The process index that is initialized using the GPU device id. params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. cache_directory : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. cache_prefix : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. include_package : ``List[str]``, optional In distributed mode, since this function would have been spawned as a separate process, the extra imports need to be done again. NOTE: This does not have any effect in single GPU training. node_rank : ``int``, optional Rank of the node world_size : ``int``, optional The number of processes involved in distributed training. # Returns best_model : ``Model`` The model with the best epoch weights. """ prepare_global_logging(serialization_dir, file_friendly_logging, rank=process_rank, world_size=world_size) prepare_environment(params) distributed = world_size > 1 # not using `allennlp.common.util.is_master` as the process group is yet to be initialized master = process_rank == 0 evaluate_on_test = params.pop_bool("evaluate_on_test", False) if distributed: # Since the worker is spawned and not forked, the extra imports # need to be done again. if include_package is not None: for package_name in include_package: import_submodules(package_name) num_procs_per_node = len(distributed_device_ids) # The Unique identifier of the worker process among all the processes in the # distributed training group is computed here. This is used while initializing # the process group using `init_process_group` global_rank = node_rank * num_procs_per_node + process_rank # In distributed training, the configured device is always going to be a list. # The corresponding gpu id for the particular worker is obtained by picking the id # from the device list with the rank as index gpu_id = distributed_device_ids[process_rank] # type: ignore # Till now, "cuda_device" might not be set in the trainer params. # But a worker trainer needs to only know about its specific GPU id. params["trainer"]["cuda_device"] = gpu_id params["trainer"]["world_size"] = world_size params["trainer"]["distributed"] = True torch.cuda.set_device(gpu_id) dist.init_process_group( backend="nccl", init_method=f"tcp://{master_addr}:{master_port}", world_size=world_size, rank=global_rank, ) logging.info(f"Process group of world size {world_size} initialized " f"for distributed training in worker {global_rank}") trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator, ) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: if evaluate_on_test: raise ValueError( "--evaluate-on-test only works with the default Trainer. " "If you're using the CallbackTrainer you can use a callback " "to evaluate at Events.TRAINING_END; otherwise you'll have " "to run allennlp evaluate separately.") trainer = TrainerBase.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) evaluation_dataset = None params.assert_empty("base train command") try: if distributed: # let the setup get ready for all the workers dist.barrier() metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if master and os.path.exists( os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise if master: if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer.cuda_device, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key="", ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) if not distributed: return trainer.model return None # to make mypy happy
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False, debate_mode: List[str] = ('f'), judge_filename: str = None, update_judge: bool = False, eval_mode: bool = False, reward_method: str = None, detach_value_head: bool = False, breakpoint_level: int = 0, search_outputs_path: str = None, accumulation_steps: int = 1, multi_gpu: bool = False, choice_mode: str = None, qa_loss_weight: float = 0., influence_reward: bool = False, theory_of_mind: bool = False, num_pred_rounds: int = -1, x_order_prob: float = 0., require_action: bool = False, single_shot: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. debate_mode : ``List[str]`` List of debate turns (e.g. aa, ar, rr, Ar) => capitalization implies search agent file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. judge_filename : ``str``, optional (default=None) Path to judge config or pre-trained judge model. If config, judge trained during debate. Necessary parameter if running in debate mode. update_judge : ``bool``, optional (default=False) Boolean whether or not to update Judge model during debate training. eval_mode : ``bool``, optional (default=False) Boolean whether or not to run in eval-only mode, on test data. Does not update/train any of the models. reward_method : ``str``, optional (default=False) Choice of reward function (RL) or loss function (Supervised Learning) for training debate agents detach_value_head : ``bool``, optional (default=False) Boolean whether or not to detatch value function gradient updates from the policy network. This prevents value function gradients from affecting policy network parameters. breakpoint_level : ``int`` optional (default=0) Debugging option to set breakpoint sensitivity (0 - no breakpoints). id_to_search_filename : ``str`` optional (default=None) Path to file with search predictions for each agent - necessary for supervised training accumulation_steps : ``int`` (default=1) Number of gradient steps to accumulate over before performing an update. Poor-man's batching for instances where number of examples per batch is small (limited GPU memory) multi_gpu : ``bool`` (default=False) Boolean whether or not to run models/training in model parallel mode. Requires specifying GPU allocations for trainer, judge, and debaters in the training config file (see training_config/bidaf.race.size=0.5.gpu=2.jsonnet for example usage). Returns ------- best_model: ``Model`` The model with the best epoch weights. """ assert ( not single_shot ) or eval_mode, 'Using single shot prediction outside eval_mode not yet supported.' assert (not single_shot) or (num_pred_rounds == -1), \ 'Using single shot prediction for a specific number of rounds is not yet supported.' # Get number of debate turns, and assert that not performing judge-only training num_no_qa_turns = sum([(('l' in debate_turn) or ('w' in debate_turn)) for debate_turn in debate_mode]) if (qa_loss_weight > 0) and (num_no_qa_turns == 0): warnings.warn( 'Unused argument qa_loss_weight in debate mode ' + str(debate_mode) + '. If this was unintentional, please remove the -q flag.', UserWarning) not_using_trained_debater = len( set('ablwⅰⅱⅲⅳ').intersection(''.join(debate_mode))) == 0 if (judge_filename is not None) and not_using_trained_debater: warnings.warn( 'Unnecessary to have debaters in debate mode ' + str(debate_mode) + '. If this was unintentional, please remove the -j flag.', UserWarning) prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) # Check that all Desired CUDA Devices exist => trainer => cuda_devices should contain list of required devices cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) # Build Allocation Dictionary (to be passed to all future functions) if multi_gpu: gpu_allocations, allocation_dict = params.params.pop( 'gpu_allocations', {}), {} assert len(gpu_allocations ) == 3, 'Must set gpu_allocations in config if multi-gpu' for k in ['debate', 'judge', 'trainer']: assert gpu_allocations[ k] in cuda_device, "Desired GPU not available... current: %s" % str( cuda_device) allocation_dict[k] = gpu_allocations[k] else: allocation_dict = {} params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. params['dataset_reader'][ 'debate_mode'] = debate_mode # If debate_mode requires sample duplicates pieces = TrainerPieces.from_params(params, serialization_dir, cuda_device, recover, judge_filename=judge_filename, update_judge=update_judge, eval_mode=eval_mode, reward_method=reward_method, detach_value_head=detach_value_head, allocation_dict=allocation_dict, qa_loss_weight=qa_loss_weight, influence_reward=influence_reward, theory_of_mind=theory_of_mind) # pylint: disable=no-member trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, debate_mode=debate_mode, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator, eval_mode=eval_mode, breakpoint_level=breakpoint_level, search_outputs_path=search_outputs_path, accumulation_steps=accumulation_steps, allocation_dict=allocation_dict, choice_mode=choice_mode, num_pred_rounds=num_pred_rounds, x_order_prob=x_order_prob, require_action=require_action, single_shot=single_shot) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: assert (len(debate_mode) == 1) and (debate_mode[0] == 'f'), 'TrainerBase untested for debate training.' trainer = TrainerBase.from_params(params, serialization_dir, recover) evaluation_iterator = evaluation_dataset = None params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)) and not eval_mode: logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") # Now tar up results if not eval_mode: archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) else: dump_metrics(os.path.join( serialization_dir, "metrics.eval.d=" + '-'.join(debate_mode) + ".json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, del_models: bool = False, del_vocab: bool = False, convert: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. del_models : ``bool``, optional (default=False) If ``True``, we will delete existing models and logs if they already exist. del_vocab : ``bool``, optional (default=False) If ``True``, we will delete existing vocabulary if it already exists. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ cuda_device = params.params.get('trainer').get('cuda_device', -1) if cuda_device >= 0: check_for_gpu(cuda_device) torch.cuda.set_device(cuda_device) # Sometimes we might change the config a bit but still want to continue training # if recover: # create_serialization_dir( # params, serialization_dir, recover, del_models) if del_models: for path in glob(f'{serialization_dir}/*'): if os.path.isfile(path) and not path.endswith('config.yaml'): os.remove(path) log_path = f'{serialization_dir}/log' if os.path.isdir(log_path): shutil.rmtree(log_path) if del_vocab: vocab_path = f'{serialization_dir}/vocabulary' if os.path.isdir(vocab_path): shutil.rmtree(vocab_path) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == 'default': # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params(params, serialization_dir, recover) # pylint: disable=no-member trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.corpus.train, validation_data=pieces.corpus.valid, params=pieces.params, validation_iterator=pieces.validation_iterator) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.corpus.test batch_weight_key = pieces.batch_weight_key elif trainer_type == 'trainer_fp16_single': params.get("trainer").pop('type') # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params(params, serialization_dir, recover) # pylint: disable=no-member trainer = TrainerF16SingleTask.from_params( model=pieces.model, serialization_dir=serialization_dir, files_to_archive=params.files_to_archive, iterator=pieces.iterator, train_data=pieces.corpus.train, validation_data=pieces.corpus.valid, params=pieces.params, validation_iterator=pieces.validation_iterator) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.corpus.test batch_weight_key = pieces.batch_weight_key else: trainer = TrainerBase.from_params(params, serialization_dir, recover) # TODO(joelgrus): handle evaluation in the general case evaluation_iterator = evaluation_dataset = None params.assert_empty('base train command') if convert: logging.info('In conversion mode.') trainer._save_checkpoint(epoch=0) create_model_archive(serialization_dir, params) sys.exit(0) try: metrics = trainer.train() except (KeyboardInterrupt, RuntimeError): # if we have completed an epoch, try to create a model archive. logging.info("Training stopped. Attempting to create " "a model archive using the current best epoch weights.") create_model_archive(serialization_dir, params) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key=batch_weight_key) for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model
def train(self) -> Dict[str, Any]: """ Trains the supplied model with the supplied parameters. """ try: epoch_counter = self._restore_checkpoint() except RuntimeError: traceback.print_exc() raise ConfigurationError("Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?") training_util.enable_gradient_clipping(self.model, self._grad_clipping) logger.info("Beginning training.") train_metrics: Dict[str, float] = {} val_metrics: Dict[str, float] = {} this_epoch_val_metric: float = None metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() metrics['best_epoch'] = self._metric_tracker.best_epoch for key, value in self._metric_tracker.best_epoch_metrics.items(): metrics["best_validation_" + key] = value if self.callbacks is not None: with torch.no_grad(): for callback in self.callbacks: callback.on_train_begin() for epoch in range(epoch_counter, self._num_epochs): epoch_start_time = time.time() if self.callbacks is not None: with torch.no_grad(): for callback in self.callbacks: callback.on_epoch_begin(epoch) train_metrics = self._train_epoch(epoch) if not self._early_stopping_by_batch: # get peak of memory usage if 'cpu_memory_MB' in train_metrics: metrics['peak_cpu_memory_MB'] = max(metrics.get('peak_cpu_memory_MB', 0), train_metrics['cpu_memory_MB']) for key, value in train_metrics.items(): if key.startswith('gpu_'): metrics["peak_"+key] = max(metrics.get("peak_"+key, 0), value) if self._validation_data is not None: with torch.no_grad(): val_metrics_temp = self._estimator.estimate(self._validation_data) # We have a validation set, so compute all the metrics on it. # val_loss, num_batches = self._validation_loss() # val_metrics = training_util.get_metrics(self.model, val_loss, num_batches, reset=True) val_metrics = {'loss': 0} if 'sentiment_acc' in val_metrics_temp: val_metrics['accuracy'] = val_metrics_temp['sentiment_acc'] if 'category_f1' in val_metrics_temp: val_metrics['category_f1'] = val_metrics_temp['category_f1']['fscore'] if 'other_metrics' in val_metrics_temp and 'merge_micro_f1' in val_metrics_temp['other_metrics']: val_metrics['merge_micro_f1'] = val_metrics_temp['other_metrics']['merge_micro_f1'] # Check validation metric for early stopping val_metrics.update(val_metrics_temp) this_epoch_val_metric = val_metrics[self._validation_metric] self._metric_tracker.add_metric(this_epoch_val_metric) if self._metric_tracker.should_stop_early(): logger.info("Ran out of patience. Stopping training.") break self._tensorboard.log_metrics(train_metrics, val_metrics=val_metrics, log_to_console=True, epoch=epoch + 1) # +1 because tensorboard doesn't like 0 # Create overall metrics dict training_elapsed_time = time.time() - training_start_time metrics["training_duration"] = str(datetime.timedelta(seconds=training_elapsed_time)) metrics["training_start_epoch"] = epoch_counter metrics["training_epochs"] = epochs_trained metrics["epoch"] = epoch for key, value in train_metrics.items(): metrics["training_" + key] = value for key, value in val_metrics.items(): metrics["validation_" + key] = value if self._metric_tracker.is_best_so_far(): # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) metrics['best_epoch'] = epoch for key, value in val_metrics.items(): metrics["best_validation_" + key] = value self._metric_tracker.best_epoch_metrics = val_metrics if self._serialization_dir: dump_metrics(os.path.join(self._serialization_dir, f'metrics_epoch_{epoch}.json'), metrics) # The Scheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. if self._learning_rate_scheduler: self._learning_rate_scheduler.step(this_epoch_val_metric, epoch) if self._momentum_scheduler: self._momentum_scheduler.step(this_epoch_val_metric, epoch) self._save_checkpoint(epoch) else: if self._metric_tracker.should_stop_early(): logger.info("Ran out of patience. Stopping training.") break epoch_elapsed_time = time.time() - epoch_start_time logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time)) if epoch < self._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * \ ((self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1) formatted_time = str(datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) if self.callbacks is not None: with torch.no_grad(): for callback in self.callbacks: callback.on_epoch_end(epoch) epochs_trained += 1 # make sure pending events are flushed to disk and files are closed properly # self._tensorboard.close() # Load the best model state before returning best_model_state = self._checkpointer.best_model_state() if best_model_state: self.model.load_state_dict(best_model_state) return metrics
def train(self) -> Dict[str, Any]: """ Trains the supplied model with the supplied parameters. """ try: epoch_counter, validation_metric_per_epoch = self._restore_checkpoint( ) except RuntimeError: traceback.print_exc() raise ConfigurationError( "Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?") self._enable_gradient_clipping() self._enable_activation_logging() logger.info("Beginning training.") train_metrics: Dict[str, float] = {} val_metrics: Dict[str, float] = {} metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() for epoch in range(epoch_counter, self._num_epochs): epoch_start_time = time.time() train_metrics = self._train_epoch(epoch) if self.predictor is not None: # We have a validation set, so compute all the metrics on it. #val_loss, num_batches = self._validation_loss() #val_metrics = self._get_metrics(val_loss, num_batches, reset=True) val_metrics = self.predictor.evaluate(self.model) # Check validation metric for early stopping this_epoch_val_metric = val_metrics[self._validation_metric] # Check validation metric to see if it's the best so far is_best_so_far = self._is_best_so_far( this_epoch_val_metric, self._validation_metric_per_interval) validation_metric_per_epoch.append(this_epoch_val_metric) self._validation_metric_per_interval.append( this_epoch_val_metric) if self._should_stop_early(validation_metric_per_epoch): logger.info("Ran out of patience. Stopping training.") break else: # No validation set, so just assume it's the best so far. is_best_so_far = True val_metrics = {} this_epoch_val_metric = None self._metrics_to_tensorboard(epoch, train_metrics, val_metrics=val_metrics) self._metrics_to_console(train_metrics, val_metrics) # Create overall metrics dict training_elapsed_time = time.time() - training_start_time metrics["training_duration"] = time.strftime( "%H:%M:%S", time.gmtime(training_elapsed_time)) metrics["training_start_epoch"] = epoch_counter metrics["training_epochs"] = epochs_trained metrics["epoch"] = epoch for key, value in train_metrics.items(): metrics["training_" + key] = value for key, value in val_metrics.items(): metrics["validation_" + key] = value if is_best_so_far: # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) metrics['best_epoch'] = epoch for key, value in val_metrics.items(): metrics["best_validation_" + key] = value if self._serialization_dir: dump_metrics( os.path.join(self._serialization_dir, f'metrics_epoch_{epoch}.json'), metrics) if self._learning_rate_scheduler: # The LRScheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. self._learning_rate_scheduler.step(this_epoch_val_metric, epoch) if self.learning_rate_decay: self.optimizer.param_groups[0][ 'lr'] *= self.learning_rate_decay self._save_checkpoint(epoch, validation_metric_per_epoch, is_best=is_best_so_far) epoch_elapsed_time = time.time() - epoch_start_time logger.info( "Epoch duration: %s", time.strftime("%H:%M:%S", time.gmtime(epoch_elapsed_time))) if epoch < self._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * \ ((self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1) formatted_time = str( datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) epochs_trained += 1 return metrics
def _try_train(self) -> Tuple[Dict[str, Any], int]: try: epoch_counter = self._restore_checkpoint() except RuntimeError: traceback.print_exc() raise ConfigurationError( "Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?" ) training_util.enable_gradient_clipping(self.model, self._grad_clipping) logger.info("Beginning training.") val_metrics: Dict[str, float] = {} metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() metrics["best_epoch"] = self._metric_tracker.best_epoch for key, value in self._metric_tracker.best_epoch_metrics.items(): metrics["best_validation_" + key] = value for epoch in range(epoch_counter, self._num_epochs): epoch_start_time = time.time() train_metrics = self._train_epoch(epoch) # Back up the model now, in case something goes wrong later with the evaluation if self._primary and self._checkpointer is not None: self._checkpointer.shelve_model(epoch, self) # Wait for the primary process to finish saving the model checkpoint if self._distributed: dist.barrier() # get peak of memory usage for key, value in train_metrics.items(): if key.startswith("gpu_") and key.endswith("_memory_MB"): metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) elif key.startswith("worker_") and key.endswith("_memory_MB"): metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) this_epoch_val_metric: float = 0.0 if self._validation_data_loader is not None: with torch.no_grad(): # We have a validation set, so compute all the metrics on it. val_loss, val_reg_loss, num_batches = self._validation_loss(epoch) # It is safe again to wait till the validation is done. This is # important to get the metrics right. if self._distributed: dist.barrier() val_metrics = training_util.get_metrics( self.model, val_loss, val_reg_loss, batch_loss=None, batch_reg_loss=None, num_batches=num_batches, reset=True, world_size=self._world_size, cuda_device=self.cuda_device, ) # Check validation metric for early stopping this_epoch_val_metric = self._metric_tracker.combined_score(val_metrics) self._metric_tracker.add_metrics(val_metrics) # Create overall metrics dict training_elapsed_time = time.time() - training_start_time metrics["training_duration"] = str(datetime.timedelta(seconds=training_elapsed_time)) metrics["training_start_epoch"] = epoch_counter metrics["training_epochs"] = epochs_trained metrics["epoch"] = epoch for key, value in train_metrics.items(): metrics["training_" + key] = value for key, value in val_metrics.items(): metrics["validation_" + key] = value if self._metric_tracker.is_best_so_far(): # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) metrics["best_epoch"] = epoch for key, value in val_metrics.items(): metrics["best_validation_" + key] = value self._metric_tracker.best_epoch_metrics = val_metrics if self._serialization_dir and self._primary: common_util.dump_metrics( os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), metrics, ) # The Scheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. if self._learning_rate_scheduler: self._learning_rate_scheduler.step(this_epoch_val_metric) if self._momentum_scheduler: self._momentum_scheduler.step(this_epoch_val_metric) # The checkpointer saves state from the learning rate scheduler and the momentum # scheduler, so we have to make sure those are updated before we save the checkpoint here. if self._primary and self._checkpointer is not None: self._checkpointer.save_checkpoint( epoch, self, is_best_so_far=self._metric_tracker.is_best_so_far() ) # Wait for the primary process to finish saving the checkpoint if self._distributed: dist.barrier() for callback in self._callbacks: callback.on_epoch(self, metrics=metrics, epoch=epoch, is_primary=self._primary) epoch_elapsed_time = time.time() - epoch_start_time logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time)) if epoch < self._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * ( (self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1 ) formatted_time = str(datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) epochs_trained += 1 if self._metric_tracker.should_stop_early(): logger.info("Ran out of patience. Stopping training.") break else: epoch = self._num_epochs - 1 # Load the best model state before returning best_model_state = ( None if self._checkpointer is None else self._checkpointer.best_model_state() ) if best_model_state: self.model.load_state_dict(best_model_state) return metrics, epoch
def train(self) -> Dict[str, Any]: """ Trains the supplied model with the supplied parameters. """ try: epoch_counter = self._restore_checkpoint() except RuntimeError: traceback.print_exc() raise ConfigurationError( "Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?") training_util.enable_gradient_clipping(self.model, self._grad_clipping) logger.info("Beginning training.") val_metrics: Dict[str, float] = {} this_epoch_val_metric: float = None metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() metrics["best_epoch"] = self._metric_tracker.best_epoch for key, value in self._metric_tracker.best_epoch_metrics.items(): metrics["best_validation_" + key] = value for callback in self._epoch_callbacks: callback(self, metrics={}, epoch=-1) for epoch in range(epoch_counter, self._num_epochs): epoch_start_time = time.time() train_metrics = self._train_epoch(epoch) # get peak of memory usage if "cpu_memory_MB" in train_metrics: metrics["peak_cpu_memory_MB"] = max( metrics.get("peak_cpu_memory_MB", 0), train_metrics["cpu_memory_MB"]) for key, value in train_metrics.items(): if key.startswith("gpu_"): metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) if self._validation_data_loader is not None: with torch.no_grad(): # We have a validation set, so compute all the metrics on it. val_loss, val_reg_loss, num_batches = self._validation_loss( epoch) # It is safe again to wait till the validation is done. This is # important to get the metrics right. if self._distributed: dist.barrier() val_metrics = training_util.get_metrics( self.model, val_loss, val_reg_loss, num_batches, reset=True, world_size=self._world_size, cuda_device=[self.cuda_device], ) # Check validation metric for early stopping this_epoch_val_metric = val_metrics[ self._validation_metric] self._metric_tracker.add_metric(this_epoch_val_metric) if self._metric_tracker.should_stop_early(): logger.info("Ran out of patience. Stopping training.") break if self._master: self._tensorboard.log_metrics( train_metrics, val_metrics=val_metrics, log_to_console=True, epoch=epoch + 1) # +1 because tensorboard doesn't like 0 # Create overall metrics dict training_elapsed_time = time.time() - training_start_time metrics["training_duration"] = str( datetime.timedelta(seconds=training_elapsed_time)) metrics["training_start_epoch"] = epoch_counter metrics["training_epochs"] = epochs_trained metrics["epoch"] = epoch for key, value in train_metrics.items(): metrics["training_" + key] = value for key, value in val_metrics.items(): metrics["validation_" + key] = value if self._metric_tracker.is_best_so_far(): # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) metrics["best_epoch"] = epoch for key, value in val_metrics.items(): metrics["best_validation_" + key] = value self._metric_tracker.best_epoch_metrics = val_metrics if self._serialization_dir and self._master: common_util.dump_metrics( os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), metrics) # The Scheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. if self._learning_rate_scheduler: self._learning_rate_scheduler.step(this_epoch_val_metric) if self._momentum_scheduler: self._momentum_scheduler.step(this_epoch_val_metric) if self._master: self._checkpointer.save_checkpoint( epoch, self, is_best_so_far=self._metric_tracker.is_best_so_far()) # Wait for the master to finish saving the checkpoint if self._distributed: dist.barrier() for callback in self._epoch_callbacks: callback(self, metrics=metrics, epoch=epoch) epoch_elapsed_time = time.time() - epoch_start_time logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time)) if epoch < self._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * ( (self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1) formatted_time = str( datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) epochs_trained += 1 # make sure pending events are flushed to disk and files are closed properly self._tensorboard.close() # Load the best model state before returning best_model_state = self._checkpointer.best_model_state() if best_model_state: self.model.load_state_dict(best_model_state) return metrics
def train(self) -> Dict[str, Any]: metrics_by_fold = [] if self.validation_dataset: logger.info( "Using the concatenation of the training and the validation datasets for" " cross-validation.") dataset = self.train_dataset + self.validation_dataset else: dataset = self.train_dataset groups = self._get_groups(dataset) n_splits = self.cross_validation_splitter.get_n_splits(dataset, groups=groups) for fold_index, (train_indices, validation_indices, test_indices) in enumerate( self.cross_validation_splitter(dataset, groups=groups)): logger.info("Fold %d/%d", fold_index, n_splits - 1) serialization_dir = os.path.join(self._serialization_dir, f'fold_{fold_index}') os.makedirs(serialization_dir, exist_ok=True) train_dataset = [dataset[i] for i in train_indices] validation_dataset = [dataset[i] for i in validation_indices] or None test_dataset = [dataset[i] for i in test_indices] # TODO: make it generic as a "fold consistency checking", in which the folder and field key is specified. with open(f'data/folds/fold{fold_index}_train_ids', 'w') as file: for instance in train_dataset: file.write( f'{instance["question_id"].as_tensor({}).item()}\n') if validation_dataset: with open(f'data/folds/fold{fold_index}_validation_ids', 'w') as file: for instance in validation_dataset: file.write( f'{instance["question_id"].as_tensor({}).item()}\n' ) with open(f'data/folds/fold{fold_index}_test_ids', 'w') as file: for instance in test_dataset: file.write( f'{instance["question_id"].as_tensor({}).item()}\n') model = copy.deepcopy(self.model) subtrainer = self._build_subtrainer(serialization_dir, model, train_dataset, validation_dataset) # try: fold_metrics = subtrainer.train() # except KeyboardInterrupt: # TODO # # if we have completed an epoch, try to create a model archive. # if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): # logging.info("Training interrupted by the user. Attempting to create " # "a model archive using the current best epoch weights.") # archive_model(serialization_dir) # raise # archive_model(serialization_dir) # TODO for metric_key, metric_value in training_util.evaluate( model, test_dataset, self.iterator, cuda_device=self._cuda_devices[0], batch_weight_key='').items(): if metric_key in fold_metrics: fold_metrics[f'test_{metric_key}'] = metric_value else: fold_metrics[metric_key] = metric_value dump_metrics(os.path.join(serialization_dir, 'metrics.json'), fold_metrics, log=True) metrics_by_fold.append(fold_metrics) metrics = {} for metric_key, fold_0_metric_value in metrics_by_fold[0].items(): if isinstance(fold_0_metric_value, float): average = Average() for fold_index, fold_metrics in enumerate(metrics_by_fold): metric_value = fold_metrics[metric_key] metrics[f'fold{fold_index}_{metric_key}'] = metric_value average(metric_value) metrics[f'average_{metric_key}'] = average.get_metric() else: for fold_index, fold_metrics in enumerate(metrics_by_fold): metrics[f'fold{fold_index}_{metric_key}'] = fold_metrics[ metric_key] if self.leave_model_trained: subtrainer = self._build_subtrainer(self._serialization_dir, self.model, self.train_dataset, self.validation_dataset) subtrainer.train() return metrics
def main(args): # validate inputs num_ensemble_models = None selector = args.selector if selector[:3] == 'qbc': assert (len(selector) > 3) num_ensemble_models = int(selector[3:]) selector = 'qbc' assert(selector == 'entropy' or selector == 'score' or selector == 'random' or selector == 'qbc') # 1 and only 1 specified assert getattr(args, 'labels_to_query', None) or getattr(args, 'query_time_file', None) assert not getattr(args, 'labels_to_query', None) or not getattr(args, 'query_time_file', None) # parse inputs if getattr(args, 'labels_to_query', None): label_times_list = args.labels_to_query.split(",") else: label_times_list = args.query_time_file.split(":") # import submodule import_submodules('discrete_al_coref_module') if getattr(args, 'experiments', None): ''' Default (experimental) mode ''' # create save dir save_dir = args.experiments if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) for x in label_times_list: if getattr(args, 'labels_to_query', None): x = int(x) assert x >= 0 print("Running with {} labels per doc".format(x)) save_fn = x else: assert os.path.exists(x) print("Running with equivalent annotation time to {}".format(x)) save_fn = x.replace('/', '%').replace('_query_info.json', '').replace( '.json', '').replace('.', '') serialization_dir = os.path.join(save_dir, "checkpoint_{}".format(save_fn)) print("Saving in directory: {}".format(serialization_dir)) if os.path.exists(serialization_dir): print("Deleting existing directory found in same location.") shutil.rmtree(serialization_dir) # modify parameters according to passed-in arguments params = Params.from_file("training_config/coref.jsonnet") params.params['trainer']['cuda_device'] = args.cuda_device params.params['trainer']['active_learning']['save_al_queries'] = args.save_al_queries params.params['trainer']['active_learning']['query_type'] = "pairwise" if args.pairwise else "discrete" if selector: params.params['trainer']['active_learning']['selector']['type'] = selector params.params['trainer']['active_learning']['selector']['use_clusters'] = not args.no_clusters if getattr(args, 'labels_to_query', None): params.params['trainer']['active_learning']['num_labels'] = x else: params.params['trainer']['active_learning']['use_equal_annot_time'] = True params.params['trainer']['active_learning']['equal_annot_time_file'] = x # train model best_model, metrics, query_info = train_model(params, serialization_dir, selector, num_ensemble_models, recover=False) dump_metrics(os.path.join(save_dir, "{}.json".format(save_fn)), metrics, log=True) with open(os.path.join(save_dir, "{}_query_info.json".format(save_fn)), 'w', encoding='utf-8') as f: json.dump(query_info, f) else: ''' Test mode ''' params = Params.from_file('training_config/coref.jsonnet') if getattr(args, 'labels_to_query', None): params.params['trainer']['active_learning']['num_labels'] = label_times_list[0] else: params.params['trainer']['active_learning']['use_equal_annot_time'] = True params.params['trainer']['active_learning']['equal_annot_time_file'] = label_times_list[0] params.params['trainer']['active_learning']['save_al_queries'] = args.save_al_queries if getattr(args, 'testing', None) or getattr(args, 'testing_vocab', None): params.params['trainer']['active_learning']['epoch_interval'] = 0 del params.params['test_data_path'] ''' Uncomment if necessary params.params['train_data_path'] = "/checkpoint/belindali/active_learning_coref/coref_ontonotes/dev.english.v4_gold_conll" params.params['dataset_reader']['fully_labelled_threshold'] = 100 #''' if getattr(args, 'testing', None): params.params['model']['text_field_embedder']['token_embedders']['tokens'] = {'type': 'embedding', 'embedding_dim': 300} with TemporaryDirectory() as serialization_dir: print("temp file path: " + str(serialization_dir)) params.params['trainer']['cuda_device'] = args.cuda_device params.params['trainer']['active_learning']['query_type'] = "pairwise" if args.pairwise else "discrete" params.params['trainer']['active_learning']['selector']['type'] = selector if selector else "entropy" params.params['trainer']['active_learning']['selector']['use_clusters'] = not args.no_clusters best_model, metrics, query_info = train_model(params, serialization_dir, selector, num_ensemble_models) with open(os.path.join(serialization_dir, "query_info.json"), 'w', encoding='utf-8') as f: json.dump(query_info, f)
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) return best_model
def train(self) -> Dict[str, Any]: """ Trains the supplied model with the supplied parameters. """ try: epoch_counter = self._restore_checkpoint() except RuntimeError: traceback.print_exc() raise ConfigurationError( "Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?") training_util.enable_gradient_clipping(self.model, self._grad_clipping) logger.info("Beginning training.") train_metrics: Dict[str, float] = {} val_metrics: Dict[str, float] = {} this_epoch_val_metric: float = None metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() metrics['best_epoch'] = self._metric_tracker.best_epoch for key, value in self._metric_tracker.best_epoch_metrics.items(): metrics["best_validation_" + key] = value #################################################################################################### if self.visdom: def create_plot_window(vis, xlabel, ylabel, title): return vis.line(X=np.array([1]), Y=np.array([np.nan]), opts=dict(xlabel=xlabel, ylabel=ylabel, title=title)) self.train_loss_window = create_plot_window( self.visdom, '#Iterations', 'Loss', 'Training Loss') self.consume_time_window = create_plot_window( self.visdom, "#Epochs", "Seconds", "Consuming time") self.left_time_window = self.visdom.text( "Waiting for training.......") metric_window = {} ########################################################################################## for epoch in range(epoch_counter, self._num_epochs): epoch_start_time = time.time() train_metrics = self._train_epoch(epoch) # get peak of memory usage if 'cpu_memory_MB' in train_metrics: metrics['peak_cpu_memory_MB'] = max( metrics.get('peak_cpu_memory_MB', 0), train_metrics['cpu_memory_MB']) for key, value in train_metrics.items(): if key.startswith('gpu_'): metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) if self._validation_data is not None: with torch.no_grad(): # We have a validation set, so compute all the metrics on it. val_loss, num_batches = self._validation_loss() val_metrics = training_util.get_metrics(self.model, val_loss, num_batches, reset=True) # Check validation metric for early stopping this_epoch_val_metric = val_metrics[ self._validation_metric] self._metric_tracker.add_metric(this_epoch_val_metric) if self._metric_tracker.should_stop_early(): logger.info("Ran out of patience. Stopping training.") break self._tensorboard.log_metrics( train_metrics, val_metrics=val_metrics, log_to_console=True, ) # +1 because tensorboard doesn't like 0 # Create overall metrics dict training_elapsed_time = time.time() - training_start_time metrics["training_duration"] = time.strftime( "%H:%M:%S", time.gmtime(training_elapsed_time)) metrics["training_start_epoch"] = epoch_counter metrics["training_epochs"] = epochs_trained metrics["epoch"] = epoch # print(train_metrics.keys()) # print(val_metrics.keys()) ############################################################################################### if self.visdom: for key in train_metrics.keys(): newkey = 'training_' + key if newkey in metric_window: continue else: metric_window[newkey] = create_plot_window( self.visdom, '#Epochs', key, newkey) for key in val_metrics.keys(): newkey = 'validation_' + key if newkey in metric_window: continue else: metric_window[newkey] = create_plot_window( self.visdom, '#Epochs', key, newkey) ################################################################################################# for key, value in train_metrics.items(): metrics["training_" + key] = value ########################################################## if self.visdom: self.visdom.line(X=np.array([epoch]), Y=np.array([value]), win=metric_window["training_" + key], update='append') ######################################################### for key, value in val_metrics.items(): metrics["validation_" + key] = value ########################################################## if self.visdom: self.visdom.line(X=np.array([epoch]), Y=np.array([value]), win=metric_window["validation_" + key], update='append') ############################################################ if self._metric_tracker.is_best_so_far(): # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) metrics['best_epoch'] = epoch for key, value in val_metrics.items(): metrics["best_validation_" + key] = value self._metric_tracker.best_epoch_metrics = val_metrics if self._serialization_dir: dump_metrics( os.path.join(self._serialization_dir, f'metrics_epoch_{epoch}.json'), metrics) # The Scheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. if self._learning_rate_scheduler: self._learning_rate_scheduler.step(this_epoch_val_metric, epoch) if self._momentum_scheduler: self._momentum_scheduler.step(this_epoch_val_metric, epoch) self._save_checkpoint(epoch) epoch_elapsed_time = time.time() - epoch_start_time logger.info( "Epoch duration: %s", time.strftime("%H:%M:%S", time.gmtime(epoch_elapsed_time))) ####################################################################################### if self.visdom: self.visdom.line(X=np.array([epoch]), Y=np.array([epoch_elapsed_time / 60]), win=self.consume_time_window, update='append') ############################################################################################ if epoch < self._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * \ ((self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1) formatted_time = str( datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) ####################################################################################### if self.visdom: self.visdom.text( "Estimated training time remaining: {}".format( formatted_time), win=self.left_time_window, append=True) ############################################################################################ epochs_trained += 1 # Load the best model state before returning best_model_state = self._checkpointer.best_model_state() if best_model_state: self.model.load_state_dict(best_model_state) return metrics
def evaluate( model: Model, data_loader: DataLoader, cuda_device: int = -1, batch_weight_key: str = None, output_file: str = None, predictions_output_file: str = None, ) -> Dict[str, Any]: """ # Parameters model : `Model` The model to evaluate data_loader : `DataLoader` The `DataLoader` that will iterate over the evaluation data (data loaders already contain their data). cuda_device : `int`, optional (default=`-1`) The cuda device to use for this evaluation. The model is assumed to already be using this device; this parameter is only used for moving the input data to the correct device. batch_weight_key : `str`, optional (default=`None`) If given, this is a key in the output dictionary for each batch that specifies how to weight the loss for that batch. If this is not given, we use a weight of 1 for every batch. metrics_output_file : `str`, optional (default=`None`) Optional path to write the final metrics to. predictions_output_file : `str`, optional (default=`None`) Optional path to write the predictions to. # Returns `Dict[str, Any]` The final metrics. """ check_for_gpu(cuda_device) data_loader.set_target_device(int_to_device(cuda_device)) predictions_file = (None if predictions_output_file is None else open( predictions_output_file, "w")) with torch.no_grad(): model.eval() iterator = iter(data_loader) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = nn_util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if not HasBeenWarned.tqdm_ignores_underscores and any( metric_name.startswith("_") for metric_name in metrics): logger.warning('Metrics with names beginning with "_" will ' "not be logged to the tqdm progress bar.") HasBeenWarned.tqdm_ignores_underscores = True description = (", ".join([ "%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_") ]) + " ||") generator_tqdm.set_description(description, refresh=False) if predictions_file is not None: predictions = json.dumps( sanitize(model.make_output_human_readable(output_dict))) predictions_file.write(predictions + "\n") if predictions_file is not None: predictions_file.close() final_metrics = model.get_metrics(reset=True) if loss_count > 0: # Sanity check if loss_count != batch_count: raise RuntimeError( "The model you are trying to evaluate only sometimes produced a loss!" ) final_metrics["loss"] = total_loss / total_weight if output_file is not None: dump_metrics(output_file, final_metrics, log=True) return final_metrics
def train_model(params: Params, serialization_dir: str, results_fn: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Tuple[Model, Dict[str, Any]]: prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None held_out_iterator_params = params.pop("held_out_iterator", None) if held_out_iterator_params: held_out_iterator = DataIterator.from_params(held_out_iterator_params) held_out_iterator.index_with(vocab) else: held_out_iterator = None train_data = all_datasets['train'] held_out_train_data = all_datasets.get('held_out_train') validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params( model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, held_out_train_data=held_out_train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator, held_out_iterator=held_out_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(results_dir, results_fn), metrics, log=True) return best_model, metrics
tempdir = tempfile.mkdtemp() with tarfile.open(resolved_archive_file, "r:gz") as archive: archive.extractall(tempdir) atexit.register(_cleanup_archive_dir, tempdir) serialization_dir = tempdir config = Params.from_file(os.path.join(serialization_dir, "config.json"), "") model = SemanticRoleLabeler.from_archive(args.archive_file) archive = Archive(model=model, config=config) prepare_environment(config) model.eval() validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop("dataset_reader")) instances = dataset_reader.read(args.evaluation_data_path) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) metrics = evaluate(model, data_loader, -1, "") dump_metrics(args.output_file, metrics)
params.to_file(serialize_config_file) dist.barrier() params = ConstParams.from_file(serialize_config_file) log_dir = os.path.join(serialization_dir, str(dist.get_rank())) os.makedirs(log_dir, exist_ok=True) stdout_handler = prepare_global_logging(log_dir, file_friendly_logging=False) prepare_environment(params) cuda_device = params.trainer.get('cuda_device', -1) check_for_gpu(cuda_device) trainer_type = params.trainer.type trainer = TrainerBase.from_params(params, serialization_dir, recover) params_cnt, params_trainable_cnt = count_parameters(trainer.model) print("all params cnt: ", params_cnt) print("all trainable params cnt: ", params_trainable_cnt) metrics = trainer.train() cleanup_global_logging(stdout_handler) if is_master_rank: archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True)
def custom_train(self) -> Dict[str, Any]: """ Trains the supplied model with the supplied parameters. """ logger.info("GAN TRAINER HM START") try: epoch_counter = self.trainer._restore_checkpoint() except RuntimeError: traceback.print_exc() raise ConfigurationError( "Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?") # TODO - gradient clipping? training_util.enable_gradient_clipping(self.trainer.model, self.trainer._grad_clipping) #HACK: #self.trainer._metric_tracker._patience = 30 logger.info("Beginning training.") train_metrics: Dict[str, float] = {} val_metrics: Dict[str, float] = {} this_epoch_val_metric: float = None metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() metrics['best_epoch'] = self.trainer._metric_tracker.best_epoch for key, value in self.trainer._metric_tracker.best_epoch_metrics.items( ): metrics["best_validation_" + key] = value for epoch in range(epoch_counter, self.trainer._num_epochs): # Start tracemalloc # tracemalloc.start() epoch_start_time = time.time() train_metrics = self.semi_train_epoch(epoch) # get peak of memory usage if 'cpu_memory_MB' in train_metrics: metrics['peak_cpu_memory_MB'] = max( metrics.get('peak_cpu_memory_MB', 0), train_metrics['cpu_memory_MB']) for key, value in train_metrics.items(): if key.startswith('gpu_'): metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) """ if self.unlabelled_dataset is not None: unlabelled_metrics = unlabelled_train_epoch(self.trainer, self.unlabelled_dataset, epoch) for key, value in unlabelled_metrics.items(): if key.startswith('gpu_'): metrics["peak_"+'un_'+key] = max(unlabelled_metrics.get("peak_"+key, 0), value) else: metrics['un_'+key] = value """ if self.trainer._validation_data is not None and ( (epoch - epoch_counter) % self.calc_valid_freq == (self.calc_valid_freq - 1)): with torch.no_grad(): # We have a validation set, so compute all the metrics on it. val_loss, num_batches = self.trainer._validation_loss() val_metrics = training_util.get_metrics(self.trainer.model, val_loss, num_batches, reset=True) # Check validation metric for early stopping this_epoch_val_metric = val_metrics[ self.trainer._validation_metric] self.trainer._metric_tracker.add_metric( this_epoch_val_metric) if self.trainer._metric_tracker.should_stop_early(): logger.info("Ran out of patience. Stopping training.") break self.trainer._tensorboard.log_metrics(train_metrics, val_metrics=val_metrics, log_to_console=True) # Create overall metrics dict training_elapsed_time = time.time() - training_start_time metrics["training_duration"] = time.strftime( "%H:%M:%S", time.gmtime(training_elapsed_time)) metrics["training_start_epoch"] = epoch_counter metrics["training_epochs"] = epochs_trained metrics["epoch"] = epoch for key, value in train_metrics.items(): metrics["training_" + key] = value for key, value in val_metrics.items(): metrics["validation_" + key] = value is_best_so_far = False if self.trainer._metric_tracker.is_best_so_far(): is_best_so_far = True # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) metrics['best_epoch'] = epoch for key, value in val_metrics.items(): metrics["best_validation_" + key] = value self.trainer._metric_tracker.best_epoch_metrics = val_metrics if self.trainer._serialization_dir: dump_metrics( os.path.join(self.trainer._serialization_dir, f'metrics_epoch_{epoch}.json'), metrics) #Pdb().set_trace() if self.trainer._learning_rate_scheduler: # The LRScheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. self.trainer._learning_rate_scheduler.step( this_epoch_val_metric, epoch) self.trainer._save_checkpoint(epoch) if self.constraints_model is not None: spath = self.save_constraints_model(epoch) if is_best_so_far: shutil.copyfile( spath, os.path.join(self.trainer._serialization_dir, 'best_dd_checkpoint.pth')) # Start saving checkpoint models after checkpoint_begin after every checkpoint_interval #if (self.trainer._checkpointer._save_intermediate_checkpoints) and (epoch >= self.trainer._checkpointer._checkpoint_begin) and (epoch%self.trainer._checkpointer._checkpoint_interval == 0): # shutil.copyfile(spath,os.path.join(self.trainer._serialization_dir,'dd_checkpoint_epoch_'+str(epoch)+'.cpoint')) epoch_elapsed_time = time.time() - epoch_start_time logger.info( "Epoch duration: %s", time.strftime("%H:%M:%S", time.gmtime(epoch_elapsed_time))) if epoch < self.trainer._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * \ ((self.trainer._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1) formatted_time = str( datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) self.trainer.model.train() epochs_trained += 1 # Take snapshot and reveal top memory allocation # snapshot = tracemalloc.take_snapshot() # top_stats = snapshot.statistics('lineno') # print("[ Top 10 ]") # for stat in top_stats[:10]: # logger.info(stat) # Load the best model state before returning best_model_state = self.trainer._checkpointer.best_model_state() if best_model_state: self.trainer.model.load_state_dict(best_model_state) return metrics
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover) prepare_global_logging(serialization_dir, file_friendly_logging) check_for_gpu(params.get('trainer').get('cuda_device', -1)) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) return best_model
def train(self, experiment: Optional[Experiment] = None) -> Dict[str, Any]: """ Trains the supplied model with the supplied parameters. """ try: epoch_counter = self._restore_checkpoint() except RuntimeError: traceback.print_exc() raise ConfigurationError( "Could not recover training from the checkpoint. Did you mean to output to " "a different serialization directory or delete the existing serialization " "directory?") training_util.enable_gradient_clipping(self.model, self._grad_clipping) self.experiment = experiment logger.info("Beginning training.") self.val_metrics: Dict[str, float] = {} this_epoch_val_metric: float = None self.metrics: Dict[str, Any] = {} epochs_trained = 0 training_start_time = time.time() self.metrics["best_epoch"] = self._metric_tracker.best_epoch for key, value in self._metric_tracker.best_epoch_metrics.items(): self.metrics["best_validation_" + key] = value for callback in self._epoch_callbacks: callback(self, metrics={}, epoch=-1, is_master=self._master) for epoch in range(epoch_counter, self._num_epochs): self.epoch = epoch epoch_start_time = time.time() train_metrics = self._train_epoch(epoch) if experiment: with experiment.train(): experiment.log_metrics( { k: v for k, v in train_metrics.items() if np.isscalar(v) }, step=epoch) # get peak of memory usage for key, value in train_metrics.items(): if key.startswith("gpu_") and key.endswith("_memory_MB"): self.metrics["peak_" + key] = max( self.metrics.get("peak_" + key, 0), value) elif key.startswith("worker_") and key.endswith("_memory_MB"): self.metrics["peak_" + key] = max( self.metrics.get("peak_" + key, 0), value) if self._validation_data_loader is not None and epoch >= self.epochs_before_validate: with torch.no_grad(): try: if self.external_callbacks: self.external_callbacks.call_if_registered( CallbackName.BEFORE_VALIDATION, annotator=self.annotator, model=self.model, trainer=self, experiment=experiment) # We have a validation set, so compute all the metrics on it. val_loss, val_reg_loss, num_batches, preds = self._validation_loss( epoch) # It is safe again to wait till the validation is done. This is # important to get the metrics right. if self._distributed: dist.barrier() self.val_metrics = training_util.get_metrics( self.model, val_loss, val_reg_loss, num_batches, reset=True, world_size=self._world_size, cuda_device=self.cuda_device, ) if self.dataset_writer: if self.decoder: preds = self.decoder.decode_batch( self.model.vocab, preds) filename = self._serialization_dir + f"/pred_epoch_{epoch}.txt" with open(filename, "w") as f: self.dataset_writer.write_to_file( self.model.vocab, OrderedDatasetReader.restore_order(preds), f) if self.validation_command: self.val_metrics.update( self.validation_command.evaluate(filename)) if self.external_callbacks: self.external_callbacks.call_if_registered( CallbackName.AFTER_VALIDATION, annotator=self.annotator, model=self.model, trainer=self, experiment=experiment) # Check validation metric for early stopping this_epoch_val_metric = self.val_metrics[ self._validation_metric] self._metric_tracker.add_metric(this_epoch_val_metric) if self._metric_tracker.should_stop_early(): logger.info( "Ran out of patience. Stopping training.") break except Exception as ex: print("An exception occured:") print(ex) self._checkpointer.save_checkpoint("validation-failed", trainer=self) raise if self._master: self._tensorboard.log_metrics( train_metrics, val_metrics=self.val_metrics, log_to_console=True, epoch=epoch + 1) # +1 because tensorboard doesn't like 0 # Create overall metrics dict training_elapsed_time = time.time() - training_start_time self.metrics["training_duration"] = str( datetime.timedelta(seconds=training_elapsed_time)) self.metrics["training_start_epoch"] = epoch_counter self.metrics["training_epochs"] = epochs_trained self.metrics["epoch"] = epoch for key, value in train_metrics.items(): self.metrics["training_" + key] = value for key, value in self.val_metrics.items(): self.metrics["validation_" + key] = value if experiment: with experiment.validate(): experiment.log_metrics( { k: v for k, v in self.metrics.items() if np.isscalar(v) }, step=epoch) if self._metric_tracker.is_best_so_far(): # Update all the best_ metrics. # (Otherwise they just stay the same as they were.) self.metrics["best_epoch"] = epoch for key, value in self.val_metrics.items(): self.metrics["best_validation_" + key] = value self._metric_tracker.best_epoch_metrics = self.val_metrics if self._serialization_dir and self._master: common_util.dump_metrics( os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), self.metrics) # The Scheduler API is agnostic to whether your schedule requires a validation metric - # if it doesn't, the validation metric passed here is ignored. if self._learning_rate_scheduler: self._learning_rate_scheduler.step(this_epoch_val_metric) if self._momentum_scheduler: self._momentum_scheduler.step(this_epoch_val_metric) if self._master: self._checkpointer.save_checkpoint( epoch, self, is_best_so_far=self._metric_tracker.is_best_so_far()) # Wait for the master to finish saving the checkpoint if self._distributed: dist.barrier() for callback in self._epoch_callbacks: callback(self, metrics=self.metrics, epoch=epoch, is_master=self._master) epoch_elapsed_time = time.time() - epoch_start_time logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time)) if epoch < self._num_epochs - 1: training_elapsed_time = time.time() - training_start_time estimated_time_remaining = training_elapsed_time * ( (self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1) formatted_time = str( datetime.timedelta(seconds=int(estimated_time_remaining))) logger.info("Estimated training time remaining: %s", formatted_time) epochs_trained += 1 # make sure pending events are flushed to disk and files are closed properly self._tensorboard.close() # Load the best model state before returning best_model_state = self._checkpointer.best_model_state() if best_model_state: self.model.load_state_dict(best_model_state) if self.external_callbacks: self.external_callbacks.call_if_registered( CallbackName.AFTER_TRAINING, annotator=self.annotator, model=self.model, trainer=self, experiment=experiment) return self.metrics
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("transformers.modeling_utils").disabled = True logging.getLogger("transformers.tokenization_utils").disabled = True logging.getLogger("transformers.configuration_utils").disabled = True logging.basicConfig(level=logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop("dataset_reader")) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources = ( json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {} ) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(instances=instances) model.extend_embedder_vocab(embedding_sources) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) if "iter_norm" in dir(model.text_field_embedder._token_embedders['tokens']): iter_num = model.text_field_embedder._token_embedders['tokens'].iter_norm else: iter_num = None if iter_num: # Obtrain evaluation info for iterative normalization: iter_mean_eval = [] for iter_norm_i in range(iter_num): logging.info("This is the {} time during iterative normalization for evaluation".format(iter_norm_i)) mean, embeddings = get_iter_norm_mean_eval(model, data_loader, iter_mean_eval, args.cuda_device) logger.info("The degree of isotropy of vectors is {} ".format(degree_anisotropy(embeddings.t(), args.cuda_device))) iter_mean_eval.append(mean) model.text_field_embedder._token_embedders['tokens'].iter_norm = None model.text_field_embedder._token_embedders['tokens']._matched_embedder.mean_emb_eval = iter_mean_eval model.text_field_embedder._token_embedders['tokens']._matched_embedder.is_train = False metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") dump_metrics(args.output_file, metrics, log=True) return metrics
prob_diff_weight=args.prob_diff_weight, learning_rate=args.learning_rate, num_updates=args.num_updates, num_labels=class_model_args['num_classes'], device=args.cuda) data = pd.read_csv(args.csv_path) sequences = data['sequences'].tolist()[:args.sample] labels = data['labels'].tolist()[:args.sample] maskers = [args.maskers.split(',')] * len(sequences) results_path = Path( args.results_path) / datetime.now().strftime('%Y%m%d_%H%M%S') results_path.mkdir(exist_ok=True, parents=True) path_to_results_file = results_path / 'results.csv' dump_metrics(results_path / 'args.json', args.__dict__) with open(path_to_results_file, 'w', newline='') as csv_write: fieldnames = list(AttackerOutput.__annotations__.keys()) writer = csv.DictWriter(csv_write, fieldnames=fieldnames) writer.writeheader() for seq, lab, mask_tokens in tqdm(zip(sequences, labels, maskers)): attacker.set_label_to_attack(lab) attacker.set_input(sequence=seq, mask_tokens=mask_tokens) output = attacker.sample_until_label_is_changed( max_steps=args.max_steps, early_stopping=args.early_stopping).__dict__ attacker.empty_history()