def evaluate(model: Model, dataset: Dataset, iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: model.eval() generator = iterator(dataset, num_epochs=1) logger.info("Iterating over dataset") for batch in tqdm.tqdm(generator): tensor_batch = arrays_to_variables(batch, cuda_device, for_training=False) model.forward(**tensor_batch) return model.get_metrics()
def ensure_model_can_train_save_and_load(self, model: Model, dataset: Dataset, iterator: DataIterator = None): model.eval() # set eval mode, to turn off things like dropout data_iterator = iterator or BasicIterator() single_batch = next(data_iterator(dataset)) single_batch = arrays_to_variables(single_batch) model_predictions = model.forward(**single_batch) # Check loss exists and we can compute gradients. model_loss = model_predictions["loss"] assert model_loss is not None model_loss.backward() torch.save(model.state_dict(), self.MODEL_FILE) loaded_model = model loaded_model.zero_grad() loaded_model.load_state_dict(torch.load(self.MODEL_FILE)) loaded_model.eval() # set eval mode, to turn off things like dropout loaded_model_predictions = loaded_model.forward(**single_batch) # Check loaded model's loss exists and we can compute gradients. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values # for these keys should be close. for key in model_predictions.keys(): assert_allclose(model_predictions[key].data.numpy(), loaded_model_predictions[key].data.numpy()) return model, loaded_model
def evaluate(model: Model, dataset: Dataset, iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: model.eval() generator = iterator(dataset, num_epochs=1) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm(generator, total=iterator.get_num_batches(dataset)) for batch in generator_tqdm: tensor_batch = arrays_to_variables(batch, cuda_device, for_training=False) model.forward(**tensor_batch) metrics = model.get_metrics() description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" generator_tqdm.set_description(description) return model.get_metrics()
def evaluate(model: Model, instances: Iterable[Instance], task_name: str, data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: """ Evaluate a model for a particular tasks (usually after training). Parameters ---------- model : ``allennlp.models.model.Model``, required The model to evaluate instances : ``Iterable[Instance]``, required The (usually test) dataset on which to evalute the model. task_name : ``str``, required The name of the tasks on which evaluate the model. data_iterator : ``DataIterator`` Iterator that go through the dataset. cuda_device : ``int`` Cuda device to use. Returns ------- metrics : ``Dict[str, Any]`` A dictionary containing the metrics on the evaluated dataset. """ check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) eval_loss = 0 nb_batches = 0 for tensor_batch in generator_tqdm: nb_batches += 1 train_stages = ["stm", "sd", "valid"] task_index = TASKS_NAME.index(task_name) tensor_batch['task_index'] = torch.tensor(task_index) tensor_batch["reverse"] = torch.tensor(False) tensor_batch['for_training'] = torch.tensor(False) train_stage = train_stages.index("stm") tensor_batch['train_stage'] = torch.tensor(train_stage) tensor_batch = move_to_device(tensor_batch, 0) eval_output_dict = model.forward(**tensor_batch) loss = eval_output_dict["loss"] eval_loss += loss.item() metrics = model.get_metrics(task_name=task_name) metrics["stm_loss"] = float(eval_loss / nb_batches) description = training_util.description_from_metrics(metrics) generator_tqdm.set_description(description, refresh=False) metrics = model.get_metrics(task_name=task_name, reset=True) metrics["stm_loss"] = float(eval_loss / nb_batches) return metrics
def evaluate(model: Model, instances: Iterable[Instance], task_name: str, data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: """ Evaluate a model for a particular task (usually after training). Parameters ---------- model : ``allennlp.models.model.Model``, required The model to evaluate instances : ``Iterable[Instance]``, required The (usually test) dataset on which to evalute the model. task_name : ``str``, required The name of the task on which evaluate the model. data_iterator : ``DataIterator`` Iterator that go through the dataset. cuda_device : ``int`` Cuda device to use. Returns ------- metrics : ``Dict[str, Any]`` A dictionary containing the metrics on the evaluated dataset. """ check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) eval_loss = 0 nb_batches = 0 for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) nb_batches += 1 eval_output_dict = model.forward(task_name=task_name, tensor_batch=batch) loss = eval_output_dict["loss"] eval_loss += loss.item() metrics = model.get_metrics(task_name=task_name) metrics["loss"] = float(eval_loss / nb_batches) description = ", ".join([ "%s: %.2f" % (name, value) for name, value in metrics.items() ]) + " ||" generator_tqdm.set_description(description, refresh=False) metrics = model.get_metrics(task_name=task_name, reset=True, full=True) metrics["loss"] = float(eval_loss / nb_batches) return metrics
def evaluate(model: Model, dataset: Dataset, iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: model.eval() generator = iterator(dataset, num_epochs=1) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm(generator, total=iterator.get_num_batches(dataset)) output = pd.DataFrame() for raw_batch, batch in generator_tqdm: raw_fields = [x.fields for x in raw_batch.instances] parsed_fields = [] for item in raw_fields: premise = " ".join([x.text for x in item['premise'].tokens]) hypothesis = " ".join([x.text for x in item['hypothesis'].tokens]) label = item['label'].label parsed_fields.append({ "sentence1": premise, "sentence2": hypothesis, "gold_label": label }) parsed_fields = pd.DataFrame(parsed_fields) tensor_batch = arrays_to_variables(batch, cuda_device, for_training=False) bo = model.forward(**tensor_batch) metrics = model.get_metrics() description = ', '.join( ["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" generator_tqdm.set_description(description) batch_output = pd.DataFrame() INVERSE_LABEL_MAP = { 0: "entailment", 1: "neutral", 2: "contradiction", 3: "hidden" } batch_output['prediction_label'] = bo['label_logits'].data.numpy( ).argmax(axis=1) batch_output['prediction_score'] = bo['label_probs'].data.numpy().max( axis=1) batch_output['prediction_label'] = batch_output.prediction_label.apply( lambda x: INVERSE_LABEL_MAP[x]) parsed_output = pd.concat([parsed_fields, batch_output], axis=1) output = pd.concat([output, parsed_output], axis=0) hard_subset = output.loc[output.gold_label != output.prediction_label] easy_subset = output.loc[output.gold_label == output.prediction_label] return model.get_metrics(), hard_subset, easy_subset
def fine_tune_model(model: Model, params: Params, serialization_dir: str, extend_vocab: bool = False, file_friendly_logging: bool = False, batch_weight_key: str = "", embedding_sources_mapping: Dict[str, str] = None, in_fold = None, num_folds = None, ewc_weight=None) -> Model: """ Fine tunes the given model, using a set of parameters that is largely identical to those used for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored, if it is present (as we are already given a ``Model`` here). The main difference between the logic done here and the logic done in ``train_model`` is that here we do not worry about vocabulary construction or creating the model object. Everything else is the same. Parameters ---------- model : ``Model`` A model to fine tune. params : ``Params`` A parameter object specifying an AllenNLP Experiment serialization_dir : ``str`` The directory in which to save results and logs. extend_vocab: ``bool``, optional (default=False) If ``True``, we use the new instances to extend your vocabulary. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. batch_weight_key : ``str``, optional (default="") If non-empty, name of metric used to weight the loss on a per-batch basis. embedding_sources_mapping: ``Dict[str, str]``, optional (default=None) mapping from model paths to the pretrained embedding filepaths used during fine-tuning. """ prepare_environment(params) if os.path.exists(serialization_dir) and os.listdir(serialization_dir): raise ConfigurationError(f"Serialization directory ({serialization_dir}) " f"already exists and is not empty.") os.makedirs(serialization_dir, exist_ok=True) prepare_global_logging(serialization_dir, file_friendly_logging) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: json.dump(serialization_params, param_file, indent=4) if params.pop('model', None): logger.warning("You passed parameters for the model in your configuration file, but we " "are ignoring them, using instead the model parameters in the archive.") vocabulary_params = params.pop('vocabulary', {}) if vocabulary_params.get('directory_path', None): logger.warning("You passed `directory_path` in parameters for the vocabulary in " "your configuration file, but it will be ignored. ") all_datasets = datasets_from_params(params) vocab = model.vocab if extend_vocab: datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("Extending model vocabulary using %s data.", ", ".join(datasets_for_vocab_creation)) vocab.extend_from_instances(vocabulary_params, (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) model.extend_embedder_vocab(embedding_sources_mapping) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') dl_params = params.pop("data_loader") if test_data is not None: rand = random.Random(1234) test_data.index_with(vocab) shuffled_test = copy(test_data.instances) rand.shuffle(shuffled_test) extra_test = shuffled_test[:2000] keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": AllennlpDataset(extra_test, vocab)}) extra_test_loader = DataLoader.from_params(params.pop("test_data_loader", keys)) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": test_data}) test_loader = DataLoader.from_params(params.pop("test_data_loader", keys)) master_model = model global_metrics = {} training_metrics = [] final_metrics = {} master_trainer = trainer_params.as_dict() if num_folds is not None: rand = random.Random(1234) fold_train = [] fold_test = [] fold_train_loader = [] fold_test_loader = [] shuffled_instances = copy(train_data.instances) rand.shuffle(shuffled_instances) kfold = KFold(n_splits=num_folds, random_state=None, shuffle=False) computed_folds = list(kfold.split(shuffled_instances)) for fold in range(num_folds): train_indexes, test_indexes = computed_folds[fold] new_train = [shuffled_instances[i] for i in train_indexes] new_test = [shuffled_instances[i] for i in test_indexes] fold_train.append(AllennlpDataset(new_train, vocab=vocab)) fold_test.append(AllennlpDataset(new_test, vocab=vocab)) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": fold_test[-1]}) fold_test_loader.append(DataLoader.from_params(params.pop("fold_test_data_loader",keys))) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": fold_train[-1]}) fold_train_loader.append(DataLoader.from_params(params.pop("fold_train_data_loader", keys))) for fold in ([in_fold] if in_fold is not None else range(num_folds)): fold_model = deepcopy(master_model) eval_epoch_callback = EvalEpochCallback(fold, fold_test_loader[fold], test_loader, global_metrics) callbacks = [eval_epoch_callback] if ewc_weight is not None: ewc = EWC(extra_test_loader) def ewc_forward(*args, **kwargs) -> Dict[str, torch.Tensor]: ewc_loss = 0 if ewc.model.training: ewc_loss = ewc.penalty(ewc.model) ret = ewc.model.old_forward(*args, **kwargs) ret["loss"] += ewc_weight * ewc_loss return ret fold_model.old_forward = fold_model.forward fold_model.forward = ewc_forward callbacks.append(CallLossCallback(ewc)) trainer = Trainer.from_params(model=fold_model, serialization_dir=serialization_dir, data_loader=fold_train_loader[fold], train_data=train_data, validation_data=None, params=Params(deepcopy(master_trainer)), validation_data_loader=None, epoch_callbacks=callbacks) training_metrics.append(trainer.train()) del fold_model del trainer del eval_epoch_callback state = glob(serialization_dir+"/*.th") for file in state: logger.info("deleting state - {}".format(file)) os.unlink(file) else: callbacks = [] if ewc_weight is not None: ewc = EWC(extra_test_loader) def ewc_forward(*args, **kwargs) -> Dict[str, torch.Tensor]: ewc_loss = 0 if ewc.model.training: ewc_loss = ewc.penalty(ewc.model) ret = ewc.model.old_forward(*args, **kwargs) ret["loss"] += ewc_weight * ewc_loss return ret model.old_forward = model.forward model.forward = ewc_forward callbacks.append(CallLossCallback(ewc)) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": train_data}) train_data.index_with(vocab) train_data_loader = DataLoader.from_params(params.pop("train_loader",keys)) if validation_data is not None: validation_data.index_with(vocab) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": validation_data}) validation_data_loader = DataLoader.from_params(params.pop("validation_loader", keys)) else: validation_data_loader = None if "finetune" in dir(model): model.finetune() logger.info("Fine tuning model") trainer = Trainer.from_params(model=model, serialization_dir=serialization_dir, data_loader=train_data_loader, train_data=train_data, validation_data=None, params=Params(deepcopy(master_trainer)), validation_data_loader=validation_data_loader, epoch_callbacks=callbacks) training_metrics = trainer.train() archive_model(serialization_dir) final_metrics["fine_tune"] = global_metrics final_metrics["training"] = training_metrics metrics_json = json.dumps(final_metrics, indent=2) with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: metrics_file.write(metrics_json) logger.info("Metrics: %s", metrics_json) return model