def data_instance_to_model_input(instance, model): dataset = Batch([instance]) dataset.index_instances(model.vocab) cuda_device = model._get_prediction_device() model_input = move_to_device(dataset.as_tensor_dict(), cuda_device=cuda_device) return model_input
def __iter__(self): while True: self.init_epoch() for idx, minibatch in enumerate(self.batches): # fast-forward if loaded from state if self._iterations_this_epoch > idx: continue self.iterations += 1 self._iterations_this_epoch += 1 if self.sort_within_batch: # NOTE: `rnn.pack_padded_sequence` requires that a minibatch # be sorted by decreasing order, which requires reversing # relative to typical sort keys if self.sort: minibatch.reverse() else: minibatch.sort(key=self.sort_key, reverse=True) batch = Batch(minibatch) if self.device == 'cuda' or self.device.type == "cuda": batch = move_to_device( batch, self.device.index if self.device.index is not None else 0) yield batch.as_tensor_dict(batch.get_padding_lengths()) if not self.repeat: return
def forward_on_instances(self, instances: List[Instance], **kwargs) -> List[Dict[str, np.ndarray]]: # An exact copy of the original method, but supports kwargs batch_size = len(instances) with torch.no_grad(): cuda_device = self._get_prediction_device() dataset = Batch(instances) dataset.index_instances(self.vocab) model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device) outputs = self.make_output_human_readable( self(**model_input, **kwargs)) instance_separated_output: List[Dict[str, np.ndarray]] = [ {} for _ in dataset.instances ] for name, output in list(outputs.items()): if isinstance(output, torch.Tensor): if output.dim() == 0: output = output.unsqueeze(0) if output.size(0) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue output = output.detach().cpu().numpy() elif len(output) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue for instance_output, batch_element in zip( instance_separated_output, output): instance_output[name] = batch_element return instance_separated_output
def get_answer(): # Take user input and convert to Instance user_context = request.args.get("context", "", type=str) user_question = request.args.get("question", "", type=str) input_instance = squad_reader.text_to_instance( question_text=user_question, passage_text=user_context) # Make a dataset from the instance dataset = Batch([input_instance]) dataset.index_instances(train_vocab) batch = dataset.as_tensor_dict() batch = move_to_device(batch, cuda_device=0 if cuda else -1) # Extract relevant data from batch. passage = batch["passage"]["tokens"] question = batch["question"]["tokens"] metadata = batch.get("metadata", {}) # Run data through model to get start and end logits. output_dict = model(passage, question) start_logits = output_dict["start_logits"] end_logits = output_dict["end_logits"] # Compute the best span best_span = get_best_span(start_logits, end_logits) # Get the string corresponding to the best span passage_str = metadata[0]['original_passage'] offsets = metadata[0]['token_offsets'] predicted_span = tuple(best_span[0].data.cpu().numpy()) start_offset = offsets[predicted_span[0]][0] end_offset = offsets[predicted_span[1]][1] best_span_string = passage_str[start_offset:end_offset] # Return the best string back to the GUI return jsonify(answer=best_span_string)
def predict_loop_or_load( model: Model, dataset_iterator: Iterator, device: str = 'cpu', output_file: Union[str, Path] = 'output.jsonl', force_repredict: bool = False) -> List[Dict[str, Any]]: """ Checks if results are already present in the output file. If the file exists reads it and returns the contents. If it does not, runs the prediction loop and populates the file and returns results. """ # check output_file: Path = Path(output_file) # type: ignore if output_file.exists(): if output_file.is_file(): logger.info("{} file already exists...") if force_repredict: logger.info( "force_repredict is True. Hence repredicting and overwritting" ) else: logger.info("Reading results from the existing file ") return load_outputs(output_file) # Predict device = 'cpu' if device is 'cpu': device_instance = torch.device('cpu') device_int = -1 else: device_instance = torch.device('cuda', 0) device_int = 0 model = model.to(device=device_instance) model.eval() if hasattr(model, 'test'): model.test() results = [] with open(output_file, 'w') as f: logger.info("Starting predictions ...") for i, input_batch in enumerate(tqdm.tqdm(dataset_iterator)): input_batch_on_device = nn_util.move_to_device( input_batch, device_int) result = model.forward(**input_batch_on_device) input_ = { 'h': input_batch_on_device['tr_h'].item(), 't': input_batch['hr_t'].item(), 'r': input_batch['hr_r'].item() } result = {**(input_), **result} line = json.dumps(result) + '\n' results.append(result) print(i, ' : ', line) f.write(line) return results
def __call__(self, trainer: GradientDescentTrainer, epoch: int, **kwargs) -> None: logger.info(f"===== Sample at Epoch {epoch} =====") vocab = trainer.model.vocab # sample a instance batch = next(iter(trainer.data_loader)) # log input tokens index = 0 for signature, vocab_namespace in self.input_name_spaces.items(): input_ = batch[signature] while isinstance(input_, dict): input_ = input_["tokens"] input_ = input_[index] human_redable_tokens = tensor2tokens(input_, vocab, vocab_namespace) logger.info( f"{signature}({vocab_namespace}): {human_redable_tokens}") # log output tokens if self.output_name_spaces: model = trainer.model model.eval() batch = move_to_device(batch, model.model_weight.device) output_dict = model(**batch) for signature, vocab_namespace in self.output_name_spaces.items(): output = output_dict[signature][index] human_redable_tokens = tensor2tokens(output, vocab, vocab_namespace) logger.info( f"{signature}({vocab_namespace}): {human_redable_tokens}") model.get_metrics(reset=True)
def instances_to_captum_inputs(self, labeled_instances): batch_size = len(labeled_instances) with torch.no_grad(): cuda_device = self._get_prediction_device() batch = Batch(labeled_instances) batch.index_instances(self.vocab) model_input = util.move_to_device(batch.as_tensor_dict(), cuda_device) key1, key2 = self.field_names tokens1 = model_input[key1] tokens2 = model_input[key2] label = model_input["label"] tokens_mask1 = util.get_text_field_mask(tokens1) tokens_mask2 = util.get_text_field_mask(tokens2) embedded_tokens1 = self.word_embeddings(tokens1) embedded_tokens2 = self.word_embeddings(tokens2) output_dict = {} output_dict[f"{key1}_embedding"] = embedded_tokens1 output_dict[f"{key2}_embedding"] = embedded_tokens2 return (embedded_tokens1, embedded_tokens2), None, (tokens_mask1, tokens_mask2, label, output_dict)
def forward(self, tokens: torch.Tensor) -> torch.Tensor: # tokens may have extra dimensions (batch_size, d1, ..., dn, sequence_length), # but embedding expects (batch_size, sequence_length), so pass tokens to # util.combine_initial_dims (which is a no-op if there are no extra dimensions). # Remember the original size. original_size = tokens.size() tokens = util.combine_initial_dims(tokens) weight = self.weight weight_device = util.get_device_of(weight) if util.get_device_of(self.edges) != weight_device: self.edges = util.move_to_device(self.edges, weight_device) for _ in range(self.hop): weight = self.gnn(weight, self.edges.t()) embedded = embedding( tokens, weight, padding_idx=self.padding_index, max_norm=self.max_norm, norm_type=self.norm_type, scale_grad_by_freq=self.scale_grad_by_freq, sparse=self.sparse, ) # Now (if necessary) add back in the extra dimensions. embedded = util.uncombine_initial_dims(embedded, original_size) if self._projection: projection = self._projection for _ in range(embedded.dim() - 2): projection = TimeDistributed(projection) embedded = projection(embedded) return embedded
def predict(self, batches): t11 = time() predictions = [] for batch, model in zip(batches, self.models): batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1) with torch.no_grad(): prediction = model.forward(**batch) for k in [ 'class_probabilities_labels', 'class_probabilities_d_tags' ]: v = prediction[k] # # prediction detail [Citao] # print(k) # print(v.shape) # print(v) # for i in v[0]: # print(np.argsort(list(i))[::-1][:20]) # print('-') predictions.append(prediction) preds, idx, error_probs = self._convert(predictions) t55 = time() if self.log: logging.info("Inference time {}".format(t55 - t11)) return preds, idx, error_probs
def get_predictions(model, serialization_dir, reader, device): """ Generates predictions from a trained model on a reader """ dev = reader.read('raw_data/drop/drop_dataset_dev.json') vocab = Vocabulary.from_files(join(serialization_dir, 'vocabulary')) iterator = BasicIterator(batch_size=1) iterator.index_with(vocab) dev_iter = iterator(dev, num_epochs=1) dev_batches = [batch for batch in dev_iter] dev_batches = move_to_device(dev_batches, device) predictions = {} with torch.no_grad(): for batch in tqdm(dev_batches): out = model(**batch) assert len(out['question_id']) == 1 assert len(out['answer']) == 1 query_id = out['question_id'][0] if 'value' in out['answer'][0]: prediction = out['answer'][0]['value'] elif 'count' in out['answer'][0]: prediction = out['answer'][0]['count'].item() else: raise ValueError() predictions[query_id] = prediction print(model.get_metrics()) torch.cuda.empty_cache() return predictions
def train_epoch(self): self.model.get_metrics(reset=True) train_generator = self.iterator( instances=self.train_dataset, shuffle=True) for _ in range(self.iters_per_epoch): self.model.train() batch = next(train_generator) batch = move_to_device(batch, self.cuda_device) output = self.model( premise=batch['premise'], hypothesis=batch['hypothesis'], label=batch['label']) loss = output['loss'] self.model.zero_grad() loss.backward() if self.clip_grad_max_norm is not None: clip_grad_norm_(self.model.parameters(), max_norm=self.clip_grad_max_norm) self.optimizer.step() self.maybe_write_summary(prefix='train') if self.global_step % self.validate_every == 0: self.validate() self.global_step += 1
def eval_loss(net, loader, use_cuda=False, gpu_device=0): if use_cuda: net= net.cuda() net.eval() loss = 0 batches = 0 validation_data = loader[0] validation_iterator = loader[1] val_generator = validation_iterator(validation_data, num_epochs=1, shuffle=False) with torch.no_grad(): for batch in val_generator: batches+=1 if use_cuda: batch = nn_util.move_to_device(batch,gpu_device) scores = net( tokens=batch["tokens"], verb_indicator=batch["verb_indicator"], tags=batch["tags"], metadata=batch["metadata"] ) loss+=scores["loss"].item() return loss/batches, (1-loss)/batches
def predict_instance(self, instance): """ An instance is an entire document, represented as a list of sentences. """ model = self._model cuda_device = model._get_prediction_device() # Try to predict this batch. try: dataset = Batch([instance]) dataset.index_instances(model.vocab) model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device) prediction = model.make_output_human_readable( model(**model_input)).to_json() # If we run out of GPU memory, warn user and indicate that this document failed. # This way, prediction doesn't grind to a halt every time we run out of GPU. except RuntimeError as err: # doc_key, dataset, sentences, message metadata = instance["metadata"].metadata doc_key = metadata.doc_key msg = ( f"Encountered a RunTimeError on document {doc_key}. Skipping this example." f" Error message:\n{err.args[0]}.") warnings.warn(msg) prediction = metadata.to_json() prediction["_FAILED_PREDICTION"] = True return prediction
def _forward(self, tensor_batch: torch.Tensor, task: Task = None, for_training: bool = False, reverse=False, train_stage="stm"): train_stages = ["stm", "sd", "valid"] if task is not None: # tensor_batch = move_to_device(tensor_batch, self._cuda_device) task_index = TASKS_NAME.index(task._name) tensor_batch['task_index'] = torch.tensor(task_index) tensor_batch["reverse"] = torch.tensor(reverse) tensor_batch['for_training'] = torch.tensor(for_training) train_stage = train_stages.index(train_stage) tensor_batch['train_stage'] = torch.tensor(train_stage) tensor_batch = move_to_device(tensor_batch, self._cuda_device) output_dict = self._model.forward(**tensor_batch) if for_training: try: # loss = output_dict["stm_loss"] output_dict[ "loss"] += self._model.get_regularization_penalty() except KeyError: raise RuntimeError( "The model you are trying to optimize does not contain a" " `loss` key in the output of model.forward(inputs).") return output_dict else: raise ConfigurationError( "Cannot call forward through tasks `None`")
def batch_outputs(self, batch: TensorDict, for_training: bool) -> Dict[str, torch.Tensor]: """ Does a forward pass on the given batch and returns the output dictionary that the model returns, after adding any specified regularization penalty to the loss (if training). """ # batch = nn_util.move_to_device(batch, self.cuda_device) batch = nn_util.move_to_device(batch, self.model_engine.device) output_dict = self.model_engine(**batch) if for_training: try: assert "loss" in output_dict regularization_penalty = self.model.get_regularization_penalty( ) if regularization_penalty is not None: output_dict["reg_loss"] = regularization_penalty output_dict["loss"] += regularization_penalty except AssertionError: if for_training: raise RuntimeError( "The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") return output_dict
def batch_outputs(self, batch: TensorDict, for_training: bool) -> Dict[str, torch.Tensor]: """ Does a forward pass on the given batch and returns the output dictionary that the model returns, after adding any specified regularization penalty to the loss (if training). """ batch = nn_util.move_to_device(batch, self.cuda_device) output_dict = self._pytorch_model(**batch) if for_training: try: regularization_penalty = self.model.get_regularization_penalty( ) loss = output_dict["loss"] # Handle model without regularization if regularization_penalty == 0.0: regularization_penalty = loss.new_full(size=[], fill_value=0.0) output_dict["reg_loss"] = regularization_penalty output_dict["loss"] += regularization_penalty except KeyError: if for_training: raise RuntimeError( "The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") return output_dict
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) model(**batch) metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any( metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join([ "%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_") ]) + " ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics(reset=True)
def _make_embedder_input(self, all_tokens: List[str]) -> Dict[str, torch.Tensor]: inputs = {} # A bit of a hack; this will only work with some dataset readers, but it'll do for now. indexers = self.predictor._dataset_reader._token_indexers # type: ignore for indexer_name, token_indexer in indexers.items(): if isinstance(token_indexer, SingleIdTokenIndexer): all_indices = [ self.vocab._token_to_index[self.namespace][token] for token in all_tokens ] inputs[indexer_name] = {"tokens": torch.LongTensor(all_indices).unsqueeze(0)} elif isinstance(token_indexer, TokenCharactersIndexer): tokens = [Token(x) for x in all_tokens] max_token_length = max(len(x) for x in all_tokens) # sometime max_token_length is too short for cnn encoder max_token_length = max(max_token_length, token_indexer._min_padding_length) indexed_tokens = token_indexer.tokens_to_indices(tokens, self.vocab) padding_lengths = token_indexer.get_padding_lengths(indexed_tokens) padded_tokens = token_indexer.as_padded_tensor_dict(indexed_tokens, padding_lengths) inputs[indexer_name] = { "token_characters": torch.LongTensor( padded_tokens["token_characters"] ).unsqueeze(0) } elif isinstance(token_indexer, ELMoTokenCharactersIndexer): elmo_tokens = [] for token in all_tokens: elmo_indexed_token = token_indexer.tokens_to_indices( [Token(text=token)], self.vocab )["tokens"] elmo_tokens.append(elmo_indexed_token[0]) inputs[indexer_name] = {"tokens": torch.LongTensor(elmo_tokens).unsqueeze(0)} else: raise RuntimeError("Unsupported token indexer:", token_indexer) return util.move_to_device(inputs, self.cuda_device)
def validate(config, model, task, split="val"): # model = self.model pred = [] scorer = MetricForClassification() if split == "val": data = task.val_data n_examples = min(task.n_val_examples, config.target.val_data_limit) elif split == "test": data = task.test_data n_examples = task.n_test_examples val_iter = BasicIterator(config.target.batch_size, instances_per_epoch=n_examples)(data, num_epochs=1, shuffle=False) n_val_batches = math.ceil(n_examples / config.target.batch_size) model.eval() with torch.no_grad(): for batch in val_iter: batch = move_to_device(batch, cuda_device) out = model(batch) scorer.update(out, batch) # log log.info("\nValidation Summary:") logs = scorer.calculate(reset=False) log_string = " | ".join([f"{k}: {v:.4f}" for k, v in logs.items()]) log.info(log_string) metric = logs["accuracy"] return metric
def predict(self, batches): t11 = time() predictions = [] # print('batches:', len(batches), 'models:', len(self.models)); exit(0); for batch, model in zip(batches, self.models): batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1) # print('batch:', batch); #exit(0) # print('batch:', batch.keys()); # print('batch:', batch['tokens'].keys()) # print('bert:', batch['tokens']['bert'].shape, type(batch['tokens']['bert'])) # print('bert-offsets:', batch['tokens']['bert-offsets'].shape, type(batch['tokens']['bert-offsets'])) # print('mask:', batch['tokens']['mask'].shape, type(batch['tokens']['mask'])) # exit(0) # def print_size_of_model(model): # torch.save(model.state_dict(), "temp.p") # print('Size (MB):', os.path.getsize("temp.p") / 1e6) # os.remove('temp.p') # # # print(model) # print_size_of_model(model); exit(0); with torch.no_grad(): prediction = model.forward(**batch) predictions.append(prediction) preds, idx, error_probs = self._convert(predictions) t55 = time() if self.log: print(f"Inference time {t55 - t11}") return preds, idx, error_probs
def forward_on_instance(self, instance: SyncedFieldsInstance) -> Dict[str, str]: """ Takes an :class:`~allennlp.data.instance.Instance`, which typically has raw text in it, converts that text into arrays using this model's :class:`Vocabulary`, passes those arrays through :func:`self.forward()` and :func:`self.decode()` (which by default does nothing) and returns the result. Before returning the result, we convert any ``torch.Tensors`` into numpy arrays and remove the batch dimension. """ cuda_device = self._get_prediction_device() dataset = Batch([instance]) dataset.index_instances(self.vocab) gt_has_oov = False dataset_tensor_dict = dataset.as_tensor_dict() if self.OOV_ID in dataset_tensor_dict["target_tokens"]["ids_with_unks"]: gt_has_oov = True model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device) output_ids = self.beam_search_decode(**model_input) output_words = [] for _id in output_ids: if _id<self.vocab_size: output_words.append(self.vocab.get_token_from_index(_id)) else: output_words.append(instance.oov_list[_id-self.vocab_size]) assert output_words[0]==START_SYMBOL, "somehow the first symbol is not the START symbol. might be a bug" output_words=output_words[1:] if output_words[-1]==END_SYMBOL: output_words = output_words[:-1] return " ".join(output_words)
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) model(**batch) metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics(reset=True)
def forward_on_instances( self, instances: List[Instance], cuda_device: int) -> List[Dict[str, numpy.ndarray]]: model_input = {} dataset = Batch(instances) dataset.index_instances(self.vocab) model_input = util.move_to_device(dataset.as_tensor_dict(), 0) #input #model_input.update({'instances':instances}) model_input.update({'predict': True}) # del model_input["source_tokens_raw"] # del model_input["source_tokens"] # del model_input["instances"] outputs = self.decode(self(**model_input)) #print(outputs) instance_separated_output: List[Dict[str, numpy.ndarray]] = [ {} for _ in dataset.instances ] for name, output in list(outputs.items()): outputs[name] = output for instance_output, batch_element in zip( instance_separated_output, output): instance_output[name] = batch_element return instance_separated_output
def _forward(self, tensor_batch: torch.Tensor, for_training: bool = False, task: Task = None): if task is not None: # tensor_batch = tensor2HalfTensor(tensor_batch) # for k, val in tensor_batch.items(): # if isinstance(val, Dict): # for key, value in val.items(): # print(key, value.dtype) # if isinstance(val, Tensor): # print(k, val.dtype) tensor_batch = move_to_device(tensor_batch, self._cuda_device) output_dict = self._model.forward(task_name=task._name, tensor_batch=tensor_batch) if for_training: try: loss = output_dict["loss"] loss += self._model.get_regularization_penalty() except KeyError: raise RuntimeError( "The model you are trying to optimize does not contain a" " `loss` key in the output of model.forward(inputs).") return output_dict else: raise ConfigurationError( "Cannot call forward through tasks `None`")
def run_repl(model, vocab, indexers, task, args): """ Run REPL """ print("Input CTRL-C or enter 'QUIT' to terminate.") while True: try: print() input_string = input(" INPUT: ") if input_string == "QUIT": break tokens = process_sentence( tokenizer_name=task.tokenizer_name, sent=input_string, max_seq_len=args.max_seq_len ) print("TOKENS:", " ".join("[{}]".format(tok) for tok in tokens)) field = sentence_to_text_field(tokens, indexers) field.index(vocab) batch = Batch([Instance({"input1": field})]).as_tensor_dict() batch = move_to_device(batch, args.cuda) with torch.no_grad(): out = model.forward(task, batch, predict=True) assert out["logits"].shape[1] == 2 s = " PRED: " s += "TRUE " if out["preds"][0].item() else "FALSE" s += " ({:.1f}%, logits: {:.3f} vs {:.3f})".format( torch.softmax(out["logits"][0], dim=0)[1].item() * 100, out["logits"][0][0].item(), out["logits"][0][1].item(), ) print(s) except KeyboardInterrupt: print("\nTerminating.") break
def generate_captions(self, articles): instances = [self.prepare_instance(a) for a in articles] iterator = self.data_iterator(instances, num_epochs=1, shuffle=False) generated_captions = [] for batch in iterator: if self.device.type == 'cuda': batch = move_to_device(batch, self.device.index) attns_list = self.model.generate(**batch) # generated_captions += output_dict['generations'] # attns = output_dict['attns'] # len(attns) == gen_len (ignoring seed) # len(attns[0]) == n_layers # attns[0][0]['image'].shape == [47] # attns[0][0]['article'].shape == [article_len] output = [] for i, instance in enumerate(instances): buffered = BytesIO() instance['metadata']['image'].save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode() output.append({ 'title': instance['metadata']['title'], 'start': instance['metadata']['start'], 'before': instance['metadata']['before'], 'after': instance['metadata']['after'], # 'caption': generated_captions[i], 'attns': attns_list[i], 'image': img_str, }) return output
def forward(self, tree: Tree, label: torch.LongTensor = None) -> Dict[str, torch.Tensor]: str_phase_holder = [] self.collect_phase(tree, str_phase_holder) # tokenize and elmo tokenize instances = [self.text_to_instance(phase) for phase in str_phase_holder] idx, instances = sort_by_padding(instances, [("tokens", "num_tokens")], self.vocab) batch = Batch(instances) pad_lengths = batch.get_padding_lengths() tensor_dict = batch.as_tensor_dict(pad_lengths) tensor_dict = move_to_device(tensor_dict, 0) output = self.biattentive_cell(**tensor_dict) # alert reshape the result to [length, comp, gaussian] # alert here is ugly batch_size, labels = output['weight'].size() labels = labels // self.component_num output['weight'] = output['weight'].reshape(batch_size, labels, self.component_num) output['mu'] = output['mu'].reshape(batch_size, labels, self.component_num, self.gaussian_dim) output['var'] = output['var'].reshape(batch_size, labels, self.component_num, self.gaussian_dim) # resort output result new_idx = [i for i in range(len(instances))] for pos, name in enumerate(idx): new_idx[name] = pos for name, tensor in output.items(): output[name] = torch.stack([tensor[i] for i in new_idx]) return output
def _iter_batches(self) -> Iterator[TensorDict]: if self._instances is not None or self.num_workers <= 0: for batch in self._instances_to_batches(self.iter_instances(), move_to_device=True): yield batch else: ctx = mp.get_context(self.start_method) queue: mp.JoinableQueue = ( ctx.JoinableQueue() if self._max_batch_queue_size is None else ctx.JoinableQueue(maxsize=self._max_batch_queue_size) ) workers = self._start_batch_workers(queue, ctx) try: # We can now start consuming from the `queue` as the batch workers # produce batches. done_count: int = 0 while done_count < self.num_workers: for batch, worker_error in iter(queue.get, (None, None)): if worker_error is not None: e, tb = worker_error raise WorkerError(e, tb) if not self._worker_cuda_safe and self.cuda_device is not None: # Need to move batch to target device now. batch = nn_util.move_to_device(batch, self.cuda_device) yield batch queue.task_done() done_count += 1 finally: if hasattr(queue, "close"): # for compat with different Python versions. queue.close() # type: ignore[attr-defined] self._join_workers(workers, queue)
def evaluate_batch(model, batch, trigger_token_ids=None, snli=False): """ Takes a batch of classification examples (SNLI or SST), and runs them through the model. If trigger_token_ids is not None, then it will append the tokens to the input. This funtion is used to get the model's accuracy and/or the loss with/without the trigger. """ batch = move_to_device(batch, cuda_device=0) if trigger_token_ids is None: if snli: model(batch['premise'], batch['hypothesis'], batch['label']) else: model(batch['tokens'], batch['label']) return None else: trigger_sequence_tensor = torch.LongTensor(deepcopy(trigger_token_ids)) trigger_sequence_tensor = trigger_sequence_tensor.repeat(len(batch['label']), 1).cuda() if snli: original_tokens = batch['hypothesis']['tokens'].clone() batch['hypothesis']['tokens'] = torch.cat((trigger_sequence_tensor, original_tokens), 1) output_dict = model(batch['premise'], batch['hypothesis'], batch['label']) batch['hypothesis']['tokens'] = original_tokens else: original_tokens = batch['tokens']['tokens']['tokens'].clone() batch['tokens']['tokens']['tokens'] = torch.cat((trigger_sequence_tensor, original_tokens), 1) output_dict = model(**batch) batch['tokens']['tokens']['tokens'] = original_tokens return output_dict
def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor: """ Does a forward pass on the given batches and returns the ``loss`` value in the result. If ``for_training`` is `True` also applies regularization penalty. """ if self._multiple_gpu: output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices) else: assert len(batch_group) == 1 batch = batch_group[0] batch = nn_util.move_to_device(batch, self._cuda_devices[0]) output_dict = self.model(**batch) try: loss = output_dict["loss"] if for_training: loss += self.model.get_regularization_penalty() except KeyError: if for_training: raise RuntimeError( "The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") loss = None return loss
def data_parallel(batch_group: List[TensorDict], model: Model, cuda_devices: List) -> Dict[str, torch.Tensor]: """ Performs a forward pass using multiple GPUs. This is a simplification of torch.nn.parallel.data_parallel to support the allennlp model interface. """ assert len(batch_group) <= len(cuda_devices) moved = [ nn_util.move_to_device(batch, device) for batch, device in zip(batch_group, cuda_devices) ] used_device_ids = cuda_devices[:len(moved)] # Counterintuitively, it appears replicate expects the source device id to be the first element # in the device id list. See torch.cuda.comm.broadcast_coalesced, which is called indirectly. replicas = replicate(model, used_device_ids) # We pass all our arguments as kwargs. Create a list of empty tuples of the # correct shape to serve as (non-existent) positional arguments. inputs = [()] * len(batch_group) outputs = parallel_apply(replicas, inputs, moved, used_device_ids) # Only the 'loss' is needed. # a (num_gpu, ) tensor with loss on each GPU losses = gather([output['loss'].unsqueeze(0) for output in outputs], used_device_ids[0], 0) return {'loss': losses.mean()}
def forward_on_instances(self, instances: List[Instance]) -> List[Dict[str, numpy.ndarray]]: """ Takes a list of :class:`~allennlp.data.instance.Instance`s, converts that text into arrays using this model's :class:`Vocabulary`, passes those arrays through :func:`self.forward()` and :func:`self.decode()` (which by default does nothing) and returns the result. Before returning the result, we convert any ``torch.Tensors`` into numpy arrays and separate the batched output into a list of individual dicts per instance. Note that typically this will be faster on a GPU (and conditionally, on a CPU) than repeated calls to :func:`forward_on_instance`. Parameters ---------- instances : List[Instance], required The instances to run the model on. cuda_device : int, required The GPU device to use. -1 means use the CPU. Returns ------- A list of the models output for each instance. """ batch_size = len(instances) with torch.no_grad(): cuda_device = self._get_prediction_device() dataset = Batch(instances) dataset.index_instances(self.vocab) model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device) outputs = self.decode(self(**model_input)) instance_separated_output: List[Dict[str, numpy.ndarray]] = [{} for _ in dataset.instances] for name, output in list(outputs.items()): if isinstance(output, torch.Tensor): # NOTE(markn): This is a hack because 0-dim pytorch tensors are not iterable. # This occurs with batch size 1, because we still want to include the loss in that case. if output.dim() == 0: output = output.unsqueeze(0) if output.size(0) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue output = output.detach().cpu().numpy() elif len(output) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue outputs[name] = output for instance_output, batch_element in zip(instance_separated_output, output): instance_output[name] = batch_element return instance_separated_output
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, batch_weight_key: str) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) final_metrics = model.get_metrics(reset=True) if loss_count > 0: # Sanity check if loss_count != batch_count: raise RuntimeError("The model you are trying to evaluate only sometimes " + "produced a loss!") final_metrics["loss"] = total_loss / total_weight return final_metrics