Beispiel #1
0
def get_answer(text: str, model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer):
    cntx_token_id, answer_token_id = tokenizer.additional_special_tokens_ids
    context = tokenizer.encode(text)
    context = [tokenizer.bos_token_id] + [cntx_token_id
                                          ] + context + [answer_token_id]
    context = torch.LongTensor([context])
    ans = model.generate(input_ids=context, max_length=100,
                         temperature=0.7)[0][1:-1]
    return tokenizer.decode(ans)
Beispiel #2
0
def make_predictions(
    text: str,
    tokenizer: GPT2Tokenizer,
    gpt2: GPT2LMHeadModel,
    device: torch.device,
    max_output_length: int = 100,
) -> Sequence[str]:
    """Make predictions for text using GPT-2.

    Args:
        text: Input text.
        tokenizer: GPT-2 tokenizer.
        gpt2: GPT-2 model.
        device: GPT-2 device.
        max_output_length: Maximum length of generated sequence.

    Returns:
        List of predicted strings after the provided text, or an empty list if the input is over 300
        tokens long.
    """
    text = unicodedata.normalize("NFKC", text)
    input_ids = tokenizer.encode(text)
    input_ids = torch.tensor([input_ids]).to(device)  # pylint: disable=not-callable
    input_id_length = len(input_ids[0])

    # Long inputs usually result in useless outputs, so no predictions are acceptable
    if input_id_length > 300:
        return []

    # Enforce maximum generated length to prevent memory issues
    max_length = min(input_id_length + max_output_length, 350)

    with torch.cuda.amp.autocast():  # Run with FP16
        sample_outputs = gpt2.generate(
            input_ids,
            do_sample=True,
            max_length=max_length,
            min_length=2,  # We want output that is at least two words
            temperature=0.8,
            top_k=50,
            top_p=0.8,
            num_return_sequences=40,
        )

    suggestions = []
    for output in sample_outputs:
        decoded_output = result_replace(tokenizer.decode(output[input_id_length:]))
        suggestions.append(decoded_output)

    return suggestions
Beispiel #3
0
def predict_next_token(
    words: str, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: GPT2Tokenizer, top: int = 3
) -> Tuple[Tuple[str, float], ...]:
    """
    Predict the next token, given a some starting words.
    :param words: a string of a few words (max tokens: 1023)
    :param gpt2_model: GPT2LMHeadModel preferably
    :param gpt2_tokenizer: GPT2Tokenizer
    :param top: the number of probable tokens to return
    :return: a tuple of tuples (token, probability)

    ## OOME on circleci :-(
    # >>> gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    # >>> gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
    # >>> _ = gpt2_model.eval()
    # >>> predict_next_token('I am looking', gpt2_model, gpt2_tokenizer)
    # (('forward', 0.3665640652179718), ('for', 0.35346919298171997), ('to', 0.08423731476068497))

    """
    tokens_tensor = torch.tensor(  # pylint: disable=not-callable
        gpt2_tokenizer.encode(words, add_special_tokens=True)
    ).unsqueeze(
        0
    )  # Batch size 1
    if tokens_tensor.shape[1] > 1023:
        LOG.warning(
            "Too many tokens, should be 1023 or less, found %s", tokens_tensor.shape[1]
        )
    soft = torch.nn.Softmax(dim=1)
    gpt2_model.eval()
    with torch.no_grad():
        predictions = gpt2_model(tokens_tensor)[0].squeeze(0)
        predictions = soft(predictions)
        values, indices = torch.topk(  # pylint: disable=no-member
            predictions[-1, :], top
        )
        id_prob = list(zip(indices, values))
    return tuple(
        [  # type: ignore
            (gpt2_tokenizer.decode(int(tmp[0])).strip(), float(tmp[1]))
            for tmp in id_prob
        ]
    )
Beispiel #4
0
def generate(
    input_text: str,
    model: GPT2LMHeadModel,
    tokenizer: GPT2Tokenizer,
    max_generation_len: int = 200,
    max_context_len: int = 256,
):
    generated_sentence = input_text
    prompt_tokens = torch.tensor(
        tokenizer.encode(generated_sentence)).to("cuda").unsqueeze(0)
    text_form = st.empty()

    for _ in tqdm(range(max_generation_len)):
        # NOTE: uncomment this and remove `model.half()` if it fits into the GPU
        # with torch.cuda.amp.autocast():
        #     outputs = model(prompt_tokens)

        context_len = prompt_tokens.shape[1]
        if context_len > max_context_len:
            prompt_tokens = prompt_tokens[:, (context_len - max_context_len):]

        outputs = model(prompt_tokens)

        last_scores = outputs[0][:, -1, :]
        probs = torch.softmax(last_scores, dim=-1)

        predicted_token = predict_token(probs)
        predicted_token = predicted_token.to("cuda")

        prompt_tokens = torch.cat([prompt_tokens, predicted_token], dim=1)

        predicted_word = tokenizer.decode(
            predicted_token,
            skip_special_tokens=True,
        )

        generated_sentence += predicted_word
        text_form.empty()
        text_form.text(generated_sentence)

    return generated_sentence
Beispiel #5
0
    def process_lines(
        self,
        text_lines: List[str],
        tokenizer: GPT2Tokenizer,
    ) -> Tuple[List[str], List[int]]:
        """
        Process a list of lines with the following rules:
        - If line is blank: pass, do nothing.
        - If line contain text:
            - If text is not dialog: append the current 
                line and continue.
            - Else:
                1. Iterate and append to the current sentence until 
                    finding a blank line.

        Args:
            text_lines (List[str]): list containing the lines to
                be processed.

        Returns:
            Tuple[List[str], List[int]]: tuple containing a list 
                of the processed lines and a list with the tokens 
                computed from these lines.
        """
        processed_lines = []
        tokenized_lines = []
        max_stacked_sentences = 12

        text_lines_gen = (l for l in text_lines)
        for line in text_lines_gen:
            processed_line = self.process_line(line)

            if processed_line == "":
                continue

            if is_dialog(processed_line):
                processed_line += " \n"

            else:
                for _ in range(max_stacked_sentences):
                    try:
                        extra_line = next(text_lines_gen)
                    except StopIteration:
                        break

                    processed_extra_line = self.process_line(extra_line)

                    if is_dialog(processed_extra_line):
                        processed_extra_line = f"\n{processed_extra_line} \n"

                    if processed_extra_line != "":
                        processed_line += f" {processed_extra_line}"
                    else:
                        break

                processed_line += " \n\n"

            processed_lines.append(processed_line)

            tokenized_line = tokenizer.encode(processed_line)
            tokenized_lines.append(tokenized_line)

        return processed_lines, tokenized_lines