Esempio n. 1
0
def eval(tokenizer: Tokenizer, model: GPT2LMHeadModel, dataset: MyDataset,
         args: TrainingArguments):
    model.eval()
    loss = 0
    iterator = build_data_iterator(tokenizer, dataset, args.eval_batch_size,
                                   args.block_size)
    for ids, attention_mask in tqdm(iterator, desc='eval'):
        ids = ids.to(args.device)
        with torch.no_grad():
            loss += model(ids,
                          attention_mask=attention_mask.to(args.device),
                          labels=ids)[0].item()
    model.train()
    return loss / len(iterator)
def _validate(
    model: GPT2LMHeadModel,
    dev_dataloader: DataLoader,
    device: torch.device,
    logger: logging.Logger,
    global_step: int,
):
    model.eval()
    loss_list = []
    for batch_data in tqdm(dev_dataloader, desc="[EVAL]"):
        with torch.no_grad():
            input_ids, attention_mask, labels = tuple(value.to(device) for value in batch_data)
            model_outputs = model.forward(input_ids, attention_mask=attention_mask, labels=labels, return_dict=True)
            loss_list.append(model_outputs.loss.item())

    mean_loss = np.mean(loss_list)
    logger.info(f"[EVAL] global_step:{global_step} loss:{mean_loss:.4f} perplexity:{math.exp(mean_loss):.4f}")
    model.train()
Esempio n. 3
0
def predict_next_token(
    words: str, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: GPT2Tokenizer, top: int = 3
) -> Tuple[Tuple[str, float], ...]:
    """
    Predict the next token, given a some starting words.
    :param words: a string of a few words (max tokens: 1023)
    :param gpt2_model: GPT2LMHeadModel preferably
    :param gpt2_tokenizer: GPT2Tokenizer
    :param top: the number of probable tokens to return
    :return: a tuple of tuples (token, probability)

    ## OOME on circleci :-(
    # >>> gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    # >>> gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
    # >>> _ = gpt2_model.eval()
    # >>> predict_next_token('I am looking', gpt2_model, gpt2_tokenizer)
    # (('forward', 0.3665640652179718), ('for', 0.35346919298171997), ('to', 0.08423731476068497))

    """
    tokens_tensor = torch.tensor(  # pylint: disable=not-callable
        gpt2_tokenizer.encode(words, add_special_tokens=True)
    ).unsqueeze(
        0
    )  # Batch size 1
    if tokens_tensor.shape[1] > 1023:
        LOG.warning(
            "Too many tokens, should be 1023 or less, found %s", tokens_tensor.shape[1]
        )
    soft = torch.nn.Softmax(dim=1)
    gpt2_model.eval()
    with torch.no_grad():
        predictions = gpt2_model(tokens_tensor)[0].squeeze(0)
        predictions = soft(predictions)
        values, indices = torch.topk(  # pylint: disable=no-member
            predictions[-1, :], top
        )
        id_prob = list(zip(indices, values))
    return tuple(
        [  # type: ignore
            (gpt2_tokenizer.decode(int(tmp[0])).strip(), float(tmp[1]))
            for tmp in id_prob
        ]
    )