def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) model(**batch) metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any( metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join([ "%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_") ]) + " ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics(reset=True)
def __init__( self, serialization_dir: str, cuda_device: int = -1, distributed: bool = False, local_rank: int = 0, world_size: int = 1, ) -> None: check_for_gpu(cuda_device) self._serialization_dir = serialization_dir if isinstance(cuda_device, list): raise ConfigurationError( "In allennlp 1.0, the Trainer can only be assigned a single `cuda_device`. " "Instead, we use torch's DistributedDataParallel at the command level, meaning " "our Trainer always uses a single GPU per process.") if not isinstance(cuda_device, int): raise ConfigurationError( "Expected an int for cuda_device, got {}".format(cuda_device)) if distributed and world_size <= 1: raise ConfigurationError( "Distributed training can be performed only with more than 1 GPU device. Check " "`cuda_device` key in the experiment configuration.") self.cuda_device = cuda_device self._distributed = distributed self._rank = local_rank self._master = self._rank == 0 self._world_size = world_size
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, serialization_dir: str, eval_suffix: str, batch_weight_key: str) -> Dict[str, Any]: print("inst", type(instances), dir(instances), instances) check_for_gpu(cuda_device) nlp = spacy.load("en_core_web_lg") assert not os.path.exists( os.path.join(serialization_dir, f'generations{eval_suffix}.jsonl')) # //ron: TODO - It could be helpful if we will encounter some problems with running times # caching saves us extra 30 minutes # if 'goodnews' in serialization_dir: # cache_path = 'data/goodnews/evaluation_cache.pkl' # elif 'nytimes' in serialization_dir: # cache_path = 'data/nytimes/evaluation_cache.pkl' # if os.path.exists(cache_path): # with open(cache_path, 'rb') as f: # cache = pickle.load(f) # else: # cache = {} with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = nn_util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") print(output_dict) return output_dict # write_to_json(output_dict, serialization_dir, # nlp, eval_suffix, cache) # metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight
def predict_model_with_archive(predictor: str, params: Params, archive: str, input_file: str, output_file: str, batch_size: int = None): if 'cuda_device' in params['trainer']: cuda_device = params['trainer']['cuda_device'] from allennlp.common.checks import check_for_gpu check_for_gpu(cuda_device) archive = load_archive(archive, cuda_device=cuda_device) else: archive = load_archive(archive) for item in archive.config.duplicate(): archive.config.__delitem__(item) for item in params: archive.config[item] = params.as_dict()[item] predictor = MachampPredictor.from_archive(archive, predictor) if batch_size == None: batch_size = params['data_loader']['batch_sampler']['batch_size'] manager = _PredictManager(predictor, input_file, output_file, batch_size, print_to_console=False, has_dataset_reader=True) manager.run()
def run_distributed_test( device_ids: List[int] = [-1, -1], func: Callable = None, *args, **kwargs, ): """ This runs the `func` in a simulated distributed environment. # Parameters device_ids: `List[int]` List of devices. There need to be at least 2 devices. Default is [-1, -1]. func: `Callable` `func` needs to be global for spawning the processes, so that it can be pickled. """ check_for_gpu(device_ids) nprocs = world_size = len(device_ids) mp.start_processes( init_process, args=(device_ids, world_size, func, args, kwargs), nprocs=nprocs, start_method="fork", )
def evaluate(model, instances, data_iterator, cuda_device): _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False, cuda_device=cuda_device) logger.info(u"Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: model(**batch) metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any( metric_name.startswith(u"_") for metric_name in metrics)): logger.warning(u"Metrics with names beginning with \"_\" will " u"not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = u', '.join([ u"%s: %.2f" % (name, value) for name, value in list(metrics.items()) if not name.startswith(u"_") ]) + u" ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics(reset=True)
def __init__(self, serialization_dir: str, cuda_device: Union[int, List] = -1, allocation_dict: Dict[str, int] = None) -> None: check_for_gpu(cuda_device) self._serialization_dir = serialization_dir # Configure GPUs: if not isinstance(cuda_device, int) and not isinstance(cuda_device, list): raise ConfigurationError("Expected an int or list for cuda_device, got {}".format(cuda_device)) if isinstance(cuda_device, list): # Only enter standard multiple GPU mode (data parallel) if allocation_dict is empty if allocation_dict is None or len(allocation_dict) == 0: logger.warning(f"Data Parallel Multiple GPU support is experimental not recommended for use. " "In some cases it may lead to incorrect results or undefined behavior.") self._multiple_gpu = True self._cuda_devices = cuda_device self._allocation_dict = None # Otherwise, set cuda devices and allocation dictionary else: self._multiple_gpu = False self._cuda_devices = cuda_device self._allocation_dict = allocation_dict else: assert (allocation_dict is None or len(allocation_dict) == 0), \ "Should not specify GPU Allocation if only one GPU!" self._multiple_gpu = False self._cuda_devices = [cuda_device] self._allocation_dict = None
def run_distributed_test( device_ids: List[int] = None, func: Callable = None, *args, **kwargs, ): """ This runs the `func` in a simulated distributed environment. # Parameters device_ids: `List[int]` List of devices. There need to be at least 2 devices. Default is [-1, -1]. func: `Callable` `func` needs to be global for spawning the processes, so that it can be pickled. """ device_ids = device_ids or [-1, -1] check_for_gpu(device_ids) # "fork" start method is the default and should be preferred, except when we're # running the tests on GPU, in which case we need to use "spawn". start_method = "spawn" if any(x >= 0 for x in device_ids) else "fork" nprocs = world_size = len(device_ids) mp.start_processes( init_process, args=(world_size, device_ids, func, args, kwargs), nprocs=nprocs, start_method=start_method, )
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) return Predictor.from_archive(archive, 'tokenized-tagger')
def _get_predictor(args) -> SemanticRoleLabelerPredictor: check_for_gpu(args.cuda_device) archive = load_archive( args.archive_file, cuda_device=args.cuda_device, ) return SemanticRoleLabelerPredictor.from_archive(archive)
def __init__(self, archive_file=DEFAULT_ARCHIVE_FILE, cuda_device=DEFAULT_CUDA_DEVICE, model_file=None, context_size=3): """ Constructor for NLU class. """ self.context_size = context_size check_for_gpu(cuda_device) if not os.path.isfile(archive_file): if not model_file: raise Exception("No model for MILU is specified!") archive_file = cached_path(model_file) archive = load_archive(archive_file, cuda_device=cuda_device) self.tokenizer = SpacyWordSplitter(language="en_core_web_sm") _special_case = [{ORTH: u"id", LEMMA: u"id"}] self.tokenizer.spacy.tokenizer.add_special_case(u"id", _special_case) dataset_reader_params = archive.config["dataset_reader"] self.dataset_reader = DatasetReader.from_params(dataset_reader_params) self.model = archive.model self.model.eval()
def predict_model(predictor: str, params: Params, archive_dir: str, input_file: str, output_file: str, batch_size: int = 1): """ Predict output annotations from the given model and input file and produce an output file. :param predictor: the type of predictor to use, e.g., "udify_predictor" :param params: the Params of the model :param archive_dir: the saved model archive :param input_file: the input file to predict :param output_file: the output file to save :param batch_size: the batch size, set this higher to speed up GPU inference """ cuda_device = params["trainer"]["cuda_device"] check_for_gpu(cuda_device) archive = load_archive(os.path.join(archive_dir, "model.tar.gz"), cuda_device=cuda_device) predictor = Predictor.from_archive(archive, predictor) manager = _PredictManager(predictor, input_file, output_file, batch_size, print_to_console=False, has_dataset_reader=True) manager.run()
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) model(**batch) metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics(reset=True)
def __init__( self, name: str, model: Model, optimizer: Optimizer, cuda_device: int, grad_norm: Optional[float] = None, scaler: Optional[amp.GradScaler] = None, grad_clipping: Optional[float] = None, learning_rate_scheduler: Optional[LearningRateScheduler] = None, momentum_scheduler: Optional[MomentumScheduler] = None ) -> "ComponentOptimizer": self.name = name self.model = model self._optimizer = optimizer if cuda_device is None: from torch import cuda if cuda.device_count() > 0: cuda_device = 0 else: cuda_device = -1 check_for_gpu(cuda_device) self._cuda_device = int_to_device(cuda_device) self._grad_norm = grad_norm self._scaler = scaler self._grad_clipping = grad_clipping self._learning_rate_scheduler = learning_rate_scheduler self._momentum_scheduler = momentum_scheduler self._loss = {'train': ComponentLoss(), 'validation': ComponentLoss()}
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) return Predictor.from_archive(archive, args.predictor)
def get_predictor(self, model_path, cuda_device): check_for_gpu(cuda_device) archive = load_archive(model_path, weights_file=None, cuda_device=cuda_device, overrides="") return Predictor.from_archive(archive, None)
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) return Predictor.from_path( args.archive_path, predictor_name=args.predictor, cuda_device=args.cuda_device, overrides=args.overrides, )
def _get_predictor(args): check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) return Predictor.from_archive(archive, args.predictor)
def evaluate(model: Model, instances: Iterable[Instance], task_name: str, data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: """ Evaluate a model for a particular tasks (usually after training). Parameters ---------- model : ``allennlp.models.model.Model``, required The model to evaluate instances : ``Iterable[Instance]``, required The (usually test) dataset on which to evalute the model. task_name : ``str``, required The name of the tasks on which evaluate the model. data_iterator : ``DataIterator`` Iterator that go through the dataset. cuda_device : ``int`` Cuda device to use. Returns ------- metrics : ``Dict[str, Any]`` A dictionary containing the metrics on the evaluated dataset. """ check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) eval_loss = 0 nb_batches = 0 for tensor_batch in generator_tqdm: nb_batches += 1 train_stages = ["stm", "sd", "valid"] task_index = TASKS_NAME.index(task_name) tensor_batch['task_index'] = torch.tensor(task_index) tensor_batch["reverse"] = torch.tensor(False) tensor_batch['for_training'] = torch.tensor(False) train_stage = train_stages.index("stm") tensor_batch['train_stage'] = torch.tensor(train_stage) tensor_batch = move_to_device(tensor_batch, 0) eval_output_dict = model.forward(**tensor_batch) loss = eval_output_dict["loss"] eval_loss += loss.item() metrics = model.get_metrics(task_name=task_name) metrics["stm_loss"] = float(eval_loss / nb_batches) description = training_util.description_from_metrics(metrics) generator_tqdm.set_description(description, refresh=False) metrics = model.get_metrics(task_name=task_name, reset=True) metrics["stm_loss"] = float(eval_loss / nb_batches) return metrics
def main(span_model_path: str, span_to_question_model_path: str, cuda_device: int, input_file: str, output_file: str, span_min_prob: float, question_min_prob: float, question_beam_size: int) -> None: check_for_gpu(cuda_device) span_model_archive = load_archive_from_folder( span_model_path, cuda_device=cuda_device, overrides= '{ "model": { "span_selector": {"span_decoding_threshold": 0.00} } }', weights_file=os.path.join(span_model_path, "best.th")) # override span detection threshold to be low enough so we can reasonably approximate bad spans # as having probability 0. span_to_question_model_archive = load_archive_from_folder( span_to_question_model_path, cuda_device=cuda_device, weights_file=os.path.join(span_to_question_model_path, "best.th")) span_model_dataset_reader_params = span_model_archive.config[ "dataset_reader"].duplicate() span_model_dataset_reader_params["qasrl_filter"]["allow_all"] = True span_to_question_model_dataset_reader_params = span_to_question_model_archive.config[ "dataset_reader"].duplicate() span_to_question_model_dataset_reader_params["qasrl_filter"][ "allow_all"] = True pipeline = AFirstPipelineSequential( span_model=span_model_archive.model, span_model_dataset_reader=DatasetReader.from_params( span_model_dataset_reader_params), span_to_question_model=span_to_question_model_archive.model, span_to_question_model_dataset_reader=DatasetReader.from_params( span_to_question_model_dataset_reader_params), span_minimum_threshold=span_min_prob, question_minimum_threshold=question_min_prob, question_beam_size=question_beam_size) if output_file is None: for line in tqdm(read_lines(cached_path(input_file))): input_json = json.loads(line) output_json = pipeline.predict(input_json) print(json.dumps(output_json)) elif output_file.endswith('.gz'): with gzip.open(output_file, 'wt') as f: for line in tqdm(read_lines(cached_path(input_file))): input_json = json.loads(line) output_json = pipeline.predict(input_json) f.write(json.dumps(output_json)) f.write('\n') else: with open(output_file, 'w', encoding='utf8') as out: for line in tqdm(read_lines(cached_path(input_file))): input_json = json.loads(line) output_json = pipeline.predict(input_json) print(json.dumps(output_json), file=out)
def _get_predictors(args: argparse.Namespace) -> (Predictor, Predictor): check_for_gpu(args.cuda_device) archive = load_archive(args.archive_path, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) return (Predictor.from_archive(archive, args.predictor), Predictor.from_archive(archive, args.aristo_predictor))
def _get_predictor() -> predictors.Predictor: allen_checks.check_for_gpu(FLAGS.cuda_device) checks.file_exists(FLAGS.model_path) archive = models.load_archive( FLAGS.model_path, cuda_device=FLAGS.cuda_device, ) return predictors.Predictor.from_archive(archive, FLAGS.predictor_name)
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) predicator = Predictor.from_archive(archive, args.predictor) if "dependency_srl" in args.predictor: predicator.set_files(args.input_file.replace("txt", "predict")) return predicator
def get_predictor(predictor_name: str, params: Params, archive: str): cuda_device = params["trainer"]["cuda_device"] check_for_gpu(cuda_device) archive = load_archive(archive, cuda_device=cuda_device, overrides=json.dumps(params.as_dict())) predictor = Predictor.from_archive(archive, predictor_name) return predictor
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, label_fname: str) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() label_file = open(label_fname, 'w') label_file.write('real_label,guessed_label\n') iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) total_num_inst = 0 for batch in generator_tqdm: num_inst = batch['tokens']['tokens'].size(0) total_num_inst += num_inst batch = util.move_to_device(batch, cuda_device) output_dict = model(**batch) if cuda_device == -1: output_matrix = output_dict['label_logits'].data.numpy() else: output_matrix = output_dict['label_logits'].data.cpu().numpy() output_labels = np.argmax(output_matrix, axis=1) if cuda_device == -1: true_labels = batch['label'].data.numpy() else: true_labels = batch['label'].data.cpu().numpy() assert true_labels.shape[0] == output_labels.shape[0] for i in range(true_labels.shape[0]): label_file.write(str(int(true_labels[i])) + ',') label_file.write(str(int(output_labels[i])) + '\n') metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) print("NUM INSTANCES ITERATED OVER: " + str(total_num_inst)) label_file.close() return model.get_metrics(reset=True)
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) return Predictor.from_archive( archive, args.predictor, dataset_reader_to_load=args.dataset_reader_choice)
def _get_predictor(**params) -> Predictor: for package_name in params["include_package"]: import_submodules(package_name) cuda_device = params["cuda_device"] check_for_gpu(cuda_device) archive = load_archive(params["model_file"], weights_file=params["weights_file"], cuda_device=params["cuda_device"], overrides=params["overrides"]) return Predictor.from_archive(archive, params["predictor"])
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive( args.archive_path, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) predictor = Predictor.from_archive(archive, args.predictor) predictor._model.eval() return predictor
def evaluate(model: Model, instances: Iterable[Instance], task_name: str, data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: """ Evaluate a model for a particular task (usually after training). Parameters ---------- model : ``allennlp.models.model.Model``, required The model to evaluate instances : ``Iterable[Instance]``, required The (usually test) dataset on which to evalute the model. task_name : ``str``, required The name of the task on which evaluate the model. data_iterator : ``DataIterator`` Iterator that go through the dataset. cuda_device : ``int`` Cuda device to use. Returns ------- metrics : ``Dict[str, Any]`` A dictionary containing the metrics on the evaluated dataset. """ check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) eval_loss = 0 nb_batches = 0 for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) nb_batches += 1 eval_output_dict = model.forward(task_name=task_name, tensor_batch=batch) loss = eval_output_dict["loss"] eval_loss += loss.item() metrics = model.get_metrics(task_name=task_name) metrics["loss"] = float(eval_loss / nb_batches) description = ", ".join([ "%s: %.2f" % (name, value) for name, value in metrics.items() ]) + " ||" generator_tqdm.set_description(description, refresh=False) metrics = model.get_metrics(task_name=task_name, reset=True, full=True) metrics["loss"] = float(eval_loss / nb_batches) return metrics
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) model = load_archive(args.model_archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) sampler = load_archive(args.sampler_archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) return ClozePredictor.from_archive(model, sampler, args.predictor)
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) ov = parse_overrides(args.overrides) paper_features_path = None try: paper_features_path = ov['dataset_reader']['paper_features_path'] except KeyError: pass return predictor_from_archive(archive, args.predictor, paper_features_path)
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) batch_count = 0 loss_count = 0 total_loss = 0.0 for batch in generator_tqdm: batch_count += 1 batch = util.move_to_device(batch, cuda_device) loss = model(**batch).get("loss") metrics = model.get_metrics() if loss is not None: loss_count += 1 metrics["loss"] = loss.item() total_loss += loss.item() if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) final_metrics = model.get_metrics(reset=True) if loss_count > 0: if loss_count != batch_count: raise RuntimeError("The model you are trying to evaluate only sometimes " + "produced a loss!") final_metrics["loss"] = total_loss/batch_count return final_metrics
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) return best_model
def find_learning_rate_model(params: Params, serialization_dir: str, start_lr: float = 1e-5, end_lr: float = 10, num_batches: int = 100, linear_steps: bool = False, stopping_factor: float = None, force: bool = False) -> None: """ Runs learning rate search for given `num_batches` and saves the results in ``serialization_dir`` Parameters ---------- trainer: :class:`~allennlp.common.registrable.Registrable` params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results. start_lr: ``float`` Learning rate to start the search. end_lr: ``float`` Learning rate upto which search is done. num_batches: ``int`` Number of mini-batches to run Learning rate finder. linear_steps: ``bool`` Increase learning rate linearly if False exponentially. stopping_factor: ``float`` Stop the search when the current loss exceeds the best loss recorded by multiple of stopping factor. If ``None`` search proceeds till the ``end_lr`` force: ``bool`` If True and the serialization directory already exists, everything in it will be removed prior to finding the learning rate. """ if os.path.exists(serialization_dir) and force: shutil.rmtree(serialization_dir) if os.path.exists(serialization_dir) and os.listdir(serialization_dir): raise ConfigurationError(f'Serialization directory {serialization_dir} already exists and is ' f'not empty.') else: os.makedirs(serialization_dir, exist_ok=True) prepare_environment(params) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model = Model.from_params(vocab=vocab, params=params.pop('model')) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) train_data = all_datasets['train'] trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) trainer = Trainer.from_params(model, serialization_dir, iterator, train_data, params=trainer_params, validation_data=None, validation_iterator=None) logger.info(f'Starting learning rate search from {start_lr} to {end_lr} in {num_batches} iterations.') learning_rates, losses = search_learning_rate(trainer, start_lr=start_lr, end_lr=end_lr, num_batches=num_batches, linear_steps=linear_steps, stopping_factor=stopping_factor) logger.info(f'Finished learning rate search.') losses = _smooth(losses, 0.98) _save_plot(learning_rates, losses, os.path.join(serialization_dir, 'lr-losses.png'))
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, batch_weight_key: str) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) final_metrics = model.get_metrics(reset=True) if loss_count > 0: # Sanity check if loss_count != batch_count: raise RuntimeError("The model you are trying to evaluate only sometimes " + "produced a loss!") final_metrics["loss"] = total_loss / total_weight return final_metrics