def batch_to_ids(batch: List[List[str]]) -> torch.Tensor: """ Converts a batch of tokenized sentences to a tensor representing the sentences with encoded characters (len(batch), max sentence length, max word length). Parameters ---------- batch : ``List[List[str]]``, required A list of tokenized sentences. Returns ------- A tensor of padded character ids. """ instances = [] indexer = ELMoTokenCharactersIndexer() for sentence in batch: tokens = [Token(token) for token in sentence] field = TextField(tokens, {'character_ids': indexer}) instance = Instance({"elmo": field}) instances.append(instance) dataset = Batch(instances) vocab = Vocabulary() dataset.index_instances(vocab) return dataset.as_tensor_dict()['elmo']['character_ids']
def test_forward_pass_runs_correctly(self): """ Check to make sure a forward pass on an ensemble of two identical copies of a model yields the same results as the model itself. """ bidaf_ensemble = BidafEnsemble([self.model, self.model]) batch = Batch(self.instances) batch.index_instances(self.vocab) training_tensors = batch.as_tensor_dict() bidaf_output_dict = self.model(**training_tensors) ensemble_output_dict = bidaf_ensemble(**training_tensors) metrics = self.model.get_metrics(reset=True) # We've set up the data such that there's a fake answer that consists of the whole # paragraph. _Any_ valid prediction for that question should produce an F1 of greater than # zero, while if we somehow haven't been able to load the evaluation data, or there was an # error with using the evaluation script, this will fail. This makes sure that we've # loaded the evaluation data correctly and have hooked things up to the official evaluation # script. assert metrics['f1'] > 0 assert torch.equal(ensemble_output_dict['best_span'], bidaf_output_dict['best_span']) assert ensemble_output_dict['best_span_str'] == bidaf_output_dict[ 'best_span_str']
def forward_on_instances( self, instances: List[Instance]) -> List[Dict[str, numpy.ndarray]]: """ Takes a list of :class:`~allennlp.data.instance.Instance`s, converts that text into arrays using this model's :class:`Vocabulary`, passes those arrays through :func:`self.forward()` and :func:`self.decode()` (which by default does nothing) and returns the result. Before returning the result, we convert any ``torch.Tensors`` into numpy arrays and separate the batched output into a list of individual dicts per instance. Note that typically this will be faster on a GPU (and conditionally, on a CPU) than repeated calls to :func:`forward_on_instance`. Parameters ---------- instances : List[Instance], required The instances to run the model on. cuda_device : int, required The GPU device to use. -1 means use the CPU. Returns ------- A list of the models output for each instance. """ batch_size = len(instances) with torch.no_grad(): cuda_device = self._get_prediction_device() dataset = Batch(instances) dataset.index_instances(self.vocab) model_input = dataset.as_tensor_dict(cuda_device=cuda_device) outputs = self.decode(self(**model_input)) instance_separated_output: List[Dict[str, numpy.ndarray]] = [ {} for _ in dataset.instances ] for name, output in list(outputs.items()): if isinstance(output, torch.Tensor): # NOTE(markn): This is a hack because 0-dim pytorch tensors are not iterable. # This occurs with batch size 1, because we still want to include the loss in that case. if output.dim() == 0: output = output.unsqueeze(0) if output.size(0) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue output = output.detach().cpu().numpy() elif len(output) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue outputs[name] = output for instance_output, batch_element in zip( instance_separated_output, output): instance_output[name] = batch_element return instance_separated_output
def test_as_tensor_dict(self): dataset = Batch(self.instances) dataset.index_instances(self.vocab) padding_lengths = dataset.get_padding_lengths() tensors = dataset.as_tensor_dict(padding_lengths) text1 = tensors["text1"]["tokens"].detach().cpu().numpy() text2 = tensors["text2"]["tokens"].detach().cpu().numpy() numpy.testing.assert_array_almost_equal(text1, numpy.array([[2, 3, 4, 5, 6], [1, 3, 4, 5, 6]])) numpy.testing.assert_array_almost_equal(text2, numpy.array([[2, 3, 4, 1, 5, 6], [2, 3, 1, 0, 0, 0]]))
def test_tagger_with_elmo_token_embedder_forward_pass_runs_correctly(self): dataset = Batch(self.instances) dataset.index_instances(self.vocab) training_tensors = dataset.as_tensor_dict() output_dict = self.model(**training_tensors) tags = output_dict['tags'] assert len(tags) == 2 assert len(tags[0]) == 7 assert len(tags[1]) == 7 for example_tags in tags: for tag_id in example_tags: tag = self.model.vocab.get_token_from_index(tag_id, namespace="labels") assert tag in {'O', 'I-ORG', 'I-PER', 'I-LOC'}
def dry_run_from_params(params: Params, serialization_dir: str) -> None: prepare_environment(params) vocab_params = params.pop("vocabulary", {}) os.makedirs(serialization_dir, exist_ok=True) vocab_dir = os.path.join(serialization_dir, "vocabulary") if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None: raise ConfigurationError("The 'vocabulary' directory in the provided " "serialization directory is non-empty") all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) instances = [instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation] vocab = Vocabulary.from_params(vocab_params, instances) dataset = Batch(instances) dataset.index_instances(vocab) dataset.print_statistics() vocab.print_statistics() logger.info(f"writing the vocabulary to {vocab_dir}.") vocab.save_to_files(vocab_dir) model = Model.from_params(vocab=vocab, params=params.pop('model')) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name)
def _sentences_to_ids(self, sentences): indexer = ELMoTokenCharactersIndexer() # For each sentence, first create a TextField, then create an instance instances = [] for sentence in sentences: tokens = [Token(token) for token in sentence] field = TextField(tokens, {'character_ids': indexer}) instance = Instance({'elmo': field}) instances.append(instance) dataset = Batch(instances) vocab = Vocabulary() dataset.index_instances(vocab) return dataset.as_tensor_dict()['elmo']['character_ids']
def get_vocab_and_both_elmo_indexed_ids(batch: List[List[str]]): instances = [] indexer = ELMoTokenCharactersIndexer() indexer2 = SingleIdTokenIndexer() for sentence in batch: tokens = [Token(token) for token in sentence] field = TextField(tokens, { 'character_ids': indexer, 'tokens': indexer2 }) instance = Instance({"elmo": field}) instances.append(instance) dataset = Batch(instances) vocab = Vocabulary.from_instances(instances) dataset.index_instances(vocab) return vocab, dataset.as_tensor_dict()["elmo"]
def test_forward_pass_runs_correctly(self): batch = Batch(self.instances) batch.index_instances(self.vocab) training_tensors = batch.as_tensor_dict() output_dict = self.model(**training_tensors) metrics = self.model.get_metrics(reset=True) # We've set up the data such that there's a fake answer that consists of the whole # paragraph. _Any_ valid prediction for that question should produce an F1 of greater than # zero, while if we somehow haven't been able to load the evaluation data, or there was an # error with using the evaluation script, this will fail. This makes sure that we've # loaded the evaluation data correctly and have hooked things up to the official evaluation # script. assert metrics['f1'] > 0 span_start_probs = output_dict['span_start_probs'][0].data.numpy() span_end_probs = output_dict['span_start_probs'][0].data.numpy() assert_almost_equal(numpy.sum(span_start_probs, -1), 1, decimal=6) assert_almost_equal(numpy.sum(span_end_probs, -1), 1, decimal=6) span_start, span_end = tuple(output_dict['best_span'][0].data.numpy()) assert span_start >= 0 assert span_start <= span_end assert span_end < self.instances[0].fields['passage'].sequence_length() assert isinstance(output_dict['best_span_str'][0], str)
def test_padding_lengths_uses_max_instance_lengths(self): dataset = Batch(self.instances) dataset.index_instances(self.vocab) padding_lengths = dataset.get_padding_lengths() assert padding_lengths == {"text1": {"num_tokens": 5}, "text2": {"num_tokens": 6}}
class ModelTestCase(AllenNlpTestCase): """ A subclass of :class:`~allennlp.common.testing.test_case.AllenNlpTestCase` with added methods for testing :class:`~allennlp.models.model.Model` subclasses. """ def set_up_model(self, param_file, dataset_file): # pylint: disable=attribute-defined-outside-init self.param_file = param_file params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) instances = reader.read(dataset_file) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if 'vocabulary' in params: vocab_params = params['vocabulary'] vocab = Vocabulary.from_params(params=vocab_params, instances=instances) else: vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.model = Model.from_params(vocab=self.vocab, params=params['model']) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(self.instances) self.dataset.index_instances(self.vocab) def ensure_model_can_train_save_and_load(self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1): save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir) loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose(model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key) params = Params.from_file(param_file) reader = DatasetReader.from_params(params['dataset_reader']) # Need to duplicate params because Iterator.from_params will consume. iterator_params = params['iterator'] iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict())) iterator = DataIterator.from_params(iterator_params) iterator2 = DataIterator.from_params(iterator_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_dataset = reader.read(params['validation_data_path']) iterator.index_with(model.vocab) model_batch = next( iterator(model_dataset, shuffle=False, cuda_device=cuda_device)) loaded_dataset = reader.read(params['validation_data_path']) iterator2.index_with(loaded_model.vocab) loaded_batch = next( iterator2(loaded_dataset, shuffle=False, cuda_device=cuda_device)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, 'stateful') and module.stateful: module.reset_states() model_predictions = model(**model_batch) loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model def assert_fields_equal(self, field1, field2, name: str, tolerance: float = 1e-6) -> None: if isinstance(field1, torch.Tensor): assert_allclose(field1.detach().cpu().numpy(), field2.detach().cpu().numpy(), rtol=tolerance, err_msg=name) elif isinstance(field1, dict): assert field1.keys() == field2.keys() for key in field1: self.assert_fields_equal(field1[key], field2[key], tolerance=tolerance, name=name + '.' + str(key)) elif isinstance(field1, (list, tuple)): assert len(field1) == len(field2) for i, (subfield1, subfield2) in enumerate(zip(field1, field2)): self.assert_fields_equal(subfield1, subfield2, tolerance=tolerance, name=name + f"[{i}]") elif isinstance(field1, (float, int)): assert_allclose([field1], [field2], rtol=tolerance, err_msg=name) else: assert field1 == field2 @staticmethod def check_model_computes_gradients_correctly(model, model_batch): model.zero_grad() result = model(**model_batch) result["loss"].backward() has_zero_or_none_grads = {} for name, parameter in model.named_parameters(): zeros = torch.zeros(parameter.size()) if parameter.requires_grad: if parameter.grad is None: has_zero_or_none_grads[ name] = "No gradient computed (i.e parameter.grad is None)" elif parameter.grad.is_sparse or parameter.grad.data.is_sparse: pass # Some parameters will only be partially updated, # like embeddings, so we just check that any gradient is non-zero. elif (parameter.grad.cpu() == zeros).all(): has_zero_or_none_grads[ name] = f"zeros with shape ({tuple(parameter.grad.size())})" else: assert parameter.grad is None if has_zero_or_none_grads: for name, grad in has_zero_or_none_grads.items(): print(f"Parameter: {name} had incorrect gradient: {grad}") raise Exception( "Incorrect gradients found. See stdout for more info.") def ensure_batch_predictions_are_consistent(self): self.model.eval() single_predictions = [] for i, instance in enumerate(self.instances): dataset = Batch([instance]) tensors = dataset.as_tensor_dict(dataset.get_padding_lengths()) result = self.model(**tensors) single_predictions.append(result) full_dataset = Batch(self.instances) batch_tensors = full_dataset.as_tensor_dict( full_dataset.get_padding_lengths()) batch_predictions = self.model(**batch_tensors) for i, instance_predictions in enumerate(single_predictions): for key, single_predicted in instance_predictions.items(): tolerance = 1e-6 if 'loss' in key: # Loss is particularly unstable; we'll just be satisfied if everything else is # close. continue single_predicted = single_predicted[0] batch_predicted = batch_predictions[key][i] if isinstance(single_predicted, torch.Tensor): if single_predicted.size() != batch_predicted.size(): slices = tuple( slice(0, size) for size in single_predicted.size()) batch_predicted = batch_predicted[slices] assert_allclose(single_predicted.data.numpy(), batch_predicted.data.numpy(), atol=tolerance, err_msg=key) else: assert single_predicted == batch_predicted, key