def eval(tokenizer: Tokenizer, model: GPT2LMHeadModel, dataset: MyDataset, args: TrainingArguments): model.eval() loss = 0 iterator = build_data_iterator(tokenizer, dataset, args.eval_batch_size, args.block_size) for ids, attention_mask in tqdm(iterator, desc='eval'): ids = ids.to(args.device) with torch.no_grad(): loss += model(ids, attention_mask=attention_mask.to(args.device), labels=ids)[0].item() model.train() return loss / len(iterator)
def _validate( model: GPT2LMHeadModel, dev_dataloader: DataLoader, device: torch.device, logger: logging.Logger, global_step: int, ): model.eval() loss_list = [] for batch_data in tqdm(dev_dataloader, desc="[EVAL]"): with torch.no_grad(): input_ids, attention_mask, labels = tuple(value.to(device) for value in batch_data) model_outputs = model.forward(input_ids, attention_mask=attention_mask, labels=labels, return_dict=True) loss_list.append(model_outputs.loss.item()) mean_loss = np.mean(loss_list) logger.info(f"[EVAL] global_step:{global_step} loss:{mean_loss:.4f} perplexity:{math.exp(mean_loss):.4f}") model.train()
def predict_next_token( words: str, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: GPT2Tokenizer, top: int = 3 ) -> Tuple[Tuple[str, float], ...]: """ Predict the next token, given a some starting words. :param words: a string of a few words (max tokens: 1023) :param gpt2_model: GPT2LMHeadModel preferably :param gpt2_tokenizer: GPT2Tokenizer :param top: the number of probable tokens to return :return: a tuple of tuples (token, probability) ## OOME on circleci :-( # >>> gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') # >>> gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2') # >>> _ = gpt2_model.eval() # >>> predict_next_token('I am looking', gpt2_model, gpt2_tokenizer) # (('forward', 0.3665640652179718), ('for', 0.35346919298171997), ('to', 0.08423731476068497)) """ tokens_tensor = torch.tensor( # pylint: disable=not-callable gpt2_tokenizer.encode(words, add_special_tokens=True) ).unsqueeze( 0 ) # Batch size 1 if tokens_tensor.shape[1] > 1023: LOG.warning( "Too many tokens, should be 1023 or less, found %s", tokens_tensor.shape[1] ) soft = torch.nn.Softmax(dim=1) gpt2_model.eval() with torch.no_grad(): predictions = gpt2_model(tokens_tensor)[0].squeeze(0) predictions = soft(predictions) values, indices = torch.topk( # pylint: disable=no-member predictions[-1, :], top ) id_prob = list(zip(indices, values)) return tuple( [ # type: ignore (gpt2_tokenizer.decode(int(tmp[0])).strip(), float(tmp[1])) for tmp in id_prob ] )