def ensure_batch_predictions_are_consistent(self): self.model.eval() single_predictions = [] for i, instance in enumerate(self.instances): dataset = Batch([instance]) tensors = dataset.as_tensor_dict(dataset.get_padding_lengths()) result = self.model(**tensors) single_predictions.append(result) full_dataset = Batch(self.instances) batch_tensors = full_dataset.as_tensor_dict( full_dataset.get_padding_lengths()) batch_predictions = self.model(**batch_tensors) for i, instance_predictions in enumerate(single_predictions): for key, single_predicted in instance_predictions.items(): tolerance = 1e-6 if 'loss' in key: # Loss is particularly unstable; we'll just be satisfied if everything else is # close. continue single_predicted = single_predicted[0] batch_predicted = batch_predictions[key][i] if isinstance(single_predicted, torch.Tensor): if single_predicted.size() != batch_predicted.size(): slices = tuple( slice(0, size) for size in single_predicted.size()) batch_predicted = batch_predicted[slices] assert_allclose(single_predicted.data.numpy(), batch_predicted.data.numpy(), atol=tolerance, err_msg=key) else: assert single_predicted == batch_predicted, key
def test_forward_pass_runs_correctly(self): """ Check to make sure a forward pass on an ensemble of two identical copies of a model yields the same results as the model itself. """ bidaf_ensemble = BidafEnsemble([self.model, self.model]) batch = Batch(self.instances) batch.index_instances(self.vocab) training_tensors = batch.as_tensor_dict() bidaf_output_dict = self.model(**training_tensors) ensemble_output_dict = bidaf_ensemble(**training_tensors) metrics = self.model.get_metrics(reset=True) # We've set up the data such that there's a fake answer that consists of the whole # paragraph. _Any_ valid prediction for that question should produce an F1 of greater than # zero, while if we somehow haven't been able to load the evaluation data, or there was an # error with using the evaluation script, this will fail. This makes sure that we've # loaded the evaluation data correctly and have hooked things up to the official evaluation # script. assert metrics['f1'] > 0 assert torch.equal(ensemble_output_dict['best_span'], bidaf_output_dict['best_span']) assert ensemble_output_dict['best_span_str'] == bidaf_output_dict[ 'best_span_str']
def batch_to_ids(batch: List[List[str]]) -> torch.Tensor: """ Converts a batch of tokenized sentences to a tensor representing the sentences with encoded characters (len(batch), max sentence length, max word length). Parameters ---------- batch : ``List[List[str]]``, required A list of tokenized sentences. Returns ------- A tensor of padded character ids. """ instances = [] indexer = ELMoTokenCharactersIndexer() for sentence in batch: tokens = [Token(token) for token in sentence] field = TextField(tokens, {'character_ids': indexer}) instance = Instance({"elmo": field}) instances.append(instance) dataset = Batch(instances) vocab = Vocabulary() dataset.index_instances(vocab) return dataset.as_tensor_dict()['elmo']['character_ids']
def forward_on_instances( self, instances: List[Instance]) -> List[Dict[str, numpy.ndarray]]: """ Takes a list of :class:`~allennlp.data.instance.Instance`s, converts that text into arrays using this model's :class:`Vocabulary`, passes those arrays through :func:`self.forward()` and :func:`self.decode()` (which by default does nothing) and returns the result. Before returning the result, we convert any ``torch.Tensors`` into numpy arrays and separate the batched output into a list of individual dicts per instance. Note that typically this will be faster on a GPU (and conditionally, on a CPU) than repeated calls to :func:`forward_on_instance`. Parameters ---------- instances : List[Instance], required The instances to run the model on. cuda_device : int, required The GPU device to use. -1 means use the CPU. Returns ------- A list of the models output for each instance. """ batch_size = len(instances) with torch.no_grad(): cuda_device = self._get_prediction_device() dataset = Batch(instances) dataset.index_instances(self.vocab) model_input = dataset.as_tensor_dict(cuda_device=cuda_device) outputs = self.decode(self(**model_input)) instance_separated_output: List[Dict[str, numpy.ndarray]] = [ {} for _ in dataset.instances ] for name, output in list(outputs.items()): if isinstance(output, torch.Tensor): # NOTE(markn): This is a hack because 0-dim pytorch tensors are not iterable. # This occurs with batch size 1, because we still want to include the loss in that case. if output.dim() == 0: output = output.unsqueeze(0) if output.size(0) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue output = output.detach().cpu().numpy() elif len(output) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue outputs[name] = output for instance_output, batch_element in zip( instance_separated_output, output): instance_output[name] = batch_element return instance_separated_output
def test_as_tensor_dict(self): dataset = Batch(self.instances) dataset.index_instances(self.vocab) padding_lengths = dataset.get_padding_lengths() tensors = dataset.as_tensor_dict(padding_lengths) text1 = tensors["text1"]["tokens"].detach().cpu().numpy() text2 = tensors["text2"]["tokens"].detach().cpu().numpy() numpy.testing.assert_array_almost_equal(text1, numpy.array([[2, 3, 4, 5, 6], [1, 3, 4, 5, 6]])) numpy.testing.assert_array_almost_equal(text2, numpy.array([[2, 3, 4, 1, 5, 6], [2, 3, 1, 0, 0, 0]]))
def test_tagger_with_elmo_token_embedder_forward_pass_runs_correctly(self): dataset = Batch(self.instances) dataset.index_instances(self.vocab) training_tensors = dataset.as_tensor_dict() output_dict = self.model(**training_tensors) tags = output_dict['tags'] assert len(tags) == 2 assert len(tags[0]) == 7 assert len(tags[1]) == 7 for example_tags in tags: for tag_id in example_tags: tag = self.model.vocab.get_token_from_index(tag_id, namespace="labels") assert tag in {'O', 'I-ORG', 'I-PER', 'I-LOC'}
def _sentences_to_ids(self, sentences): indexer = ELMoTokenCharactersIndexer() # For each sentence, first create a TextField, then create an instance instances = [] for sentence in sentences: tokens = [Token(token) for token in sentence] field = TextField(tokens, {'character_ids': indexer}) instance = Instance({'elmo': field}) instances.append(instance) dataset = Batch(instances) vocab = Vocabulary() dataset.index_instances(vocab) return dataset.as_tensor_dict()['elmo']['character_ids']
def get_vocab_and_both_elmo_indexed_ids(batch: List[List[str]]): instances = [] indexer = ELMoTokenCharactersIndexer() indexer2 = SingleIdTokenIndexer() for sentence in batch: tokens = [Token(token) for token in sentence] field = TextField(tokens, { 'character_ids': indexer, 'tokens': indexer2 }) instance = Instance({"elmo": field}) instances.append(instance) dataset = Batch(instances) vocab = Vocabulary.from_instances(instances) dataset.index_instances(vocab) return vocab, dataset.as_tensor_dict()["elmo"]
def test_forward_pass_runs_correctly(self): batch = Batch(self.instances) batch.index_instances(self.vocab) training_tensors = batch.as_tensor_dict() output_dict = self.model(**training_tensors) metrics = self.model.get_metrics(reset=True) # We've set up the data such that there's a fake answer that consists of the whole # paragraph. _Any_ valid prediction for that question should produce an F1 of greater than # zero, while if we somehow haven't been able to load the evaluation data, or there was an # error with using the evaluation script, this will fail. This makes sure that we've # loaded the evaluation data correctly and have hooked things up to the official evaluation # script. assert metrics['f1'] > 0 span_start_probs = output_dict['span_start_probs'][0].data.numpy() span_end_probs = output_dict['span_start_probs'][0].data.numpy() assert_almost_equal(numpy.sum(span_start_probs, -1), 1, decimal=6) assert_almost_equal(numpy.sum(span_end_probs, -1), 1, decimal=6) span_start, span_end = tuple(output_dict['best_span'][0].data.numpy()) assert span_start >= 0 assert span_start <= span_end assert span_end < self.instances[0].fields['passage'].sequence_length() assert isinstance(output_dict['best_span_str'][0], str)