Beispiel #1
0
def get_answer(text: str, model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer):
    cntx_token_id, answer_token_id = tokenizer.additional_special_tokens_ids
    context = tokenizer.encode(text)
    context = [tokenizer.bos_token_id] + [cntx_token_id
                                          ] + context + [answer_token_id]
    context = torch.LongTensor([context])
    ans = model.generate(input_ids=context, max_length=100,
                         temperature=0.7)[0][1:-1]
    return tokenizer.decode(ans)
Beispiel #2
0
def generate_sentences(model: GPT2LMHeadModel,
                       tokenizer: GPT2Tokenizer,
                       top_k: int = 50,
                       top_p: float = 0.95,
                       max_length: int = 512,
                       num_return_sequences: int = 1,
                       prompt_tokens: torch.Tensor = None) -> List[str]:
    if prompt_tokens is None:
        prompt_tokens = torch.tensor(random.randint(1, 30000))[None, None]

    if prompt_tokens.shape[1] > max_length:
        prompt_tokens = prompt_tokens[:, 0:max_length]

    sample_outputs = model.generate(input_ids=prompt_tokens,
                                    do_sample=True,
                                    top_k=50,
                                    top_p=0.95,
                                    max_length=max_length * 2,
                                    num_return_sequences=num_return_sequences)

    sentence_list = []
    for idx, sample_output in enumerate(sample_outputs):
        input_sentence = tokenizer.decode(
            prompt_tokens[0],
            skip_special_tokens=True,
        )
        generated_sentence = tokenizer.decode(
            sample_output[prompt_tokens.shape[1]:],
            skip_special_tokens=True,
        )

        sentence = (f"\n\n{' # ' * 32}\n\n"
                    f"\n\n{' # ' * 32}\n\n"
                    f"INPUT:\n\n{input_sentence}"
                    f"\n\n{' # ' * 32}\n\n"
                    f"GENERATION:\n\n{generated_sentence}")
        sentence_list.append(sentence)

    return sentence_list
Beispiel #3
0
def make_predictions(
    text: str,
    tokenizer: GPT2Tokenizer,
    gpt2: GPT2LMHeadModel,
    device: torch.device,
    max_output_length: int = 100,
) -> Sequence[str]:
    """Make predictions for text using GPT-2.

    Args:
        text: Input text.
        tokenizer: GPT-2 tokenizer.
        gpt2: GPT-2 model.
        device: GPT-2 device.
        max_output_length: Maximum length of generated sequence.

    Returns:
        List of predicted strings after the provided text, or an empty list if the input is over 300
        tokens long.
    """
    text = unicodedata.normalize("NFKC", text)
    input_ids = tokenizer.encode(text)
    input_ids = torch.tensor([input_ids]).to(device)  # pylint: disable=not-callable
    input_id_length = len(input_ids[0])

    # Long inputs usually result in useless outputs, so no predictions are acceptable
    if input_id_length > 300:
        return []

    # Enforce maximum generated length to prevent memory issues
    max_length = min(input_id_length + max_output_length, 350)

    with torch.cuda.amp.autocast():  # Run with FP16
        sample_outputs = gpt2.generate(
            input_ids,
            do_sample=True,
            max_length=max_length,
            min_length=2,  # We want output that is at least two words
            temperature=0.8,
            top_k=50,
            top_p=0.8,
            num_return_sequences=40,
        )

    suggestions = []
    for output in sample_outputs:
        decoded_output = result_replace(tokenizer.decode(output[input_id_length:]))
        suggestions.append(decoded_output)

    return suggestions
Beispiel #4
0
def predict_next_token(
    words: str, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: GPT2Tokenizer, top: int = 3
) -> Tuple[Tuple[str, float], ...]:
    """
    Predict the next token, given a some starting words.
    :param words: a string of a few words (max tokens: 1023)
    :param gpt2_model: GPT2LMHeadModel preferably
    :param gpt2_tokenizer: GPT2Tokenizer
    :param top: the number of probable tokens to return
    :return: a tuple of tuples (token, probability)

    ## OOME on circleci :-(
    # >>> gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    # >>> gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
    # >>> _ = gpt2_model.eval()
    # >>> predict_next_token('I am looking', gpt2_model, gpt2_tokenizer)
    # (('forward', 0.3665640652179718), ('for', 0.35346919298171997), ('to', 0.08423731476068497))

    """
    tokens_tensor = torch.tensor(  # pylint: disable=not-callable
        gpt2_tokenizer.encode(words, add_special_tokens=True)
    ).unsqueeze(
        0
    )  # Batch size 1
    if tokens_tensor.shape[1] > 1023:
        LOG.warning(
            "Too many tokens, should be 1023 or less, found %s", tokens_tensor.shape[1]
        )
    soft = torch.nn.Softmax(dim=1)
    gpt2_model.eval()
    with torch.no_grad():
        predictions = gpt2_model(tokens_tensor)[0].squeeze(0)
        predictions = soft(predictions)
        values, indices = torch.topk(  # pylint: disable=no-member
            predictions[-1, :], top
        )
        id_prob = list(zip(indices, values))
    return tuple(
        [  # type: ignore
            (gpt2_tokenizer.decode(int(tmp[0])).strip(), float(tmp[1]))
            for tmp in id_prob
        ]
    )
Beispiel #5
0
def generate(
    input_text: str,
    model: GPT2LMHeadModel,
    tokenizer: GPT2Tokenizer,
    max_generation_len: int = 200,
    max_context_len: int = 256,
):
    generated_sentence = input_text
    prompt_tokens = torch.tensor(
        tokenizer.encode(generated_sentence)).to("cuda").unsqueeze(0)
    text_form = st.empty()

    for _ in tqdm(range(max_generation_len)):
        # NOTE: uncomment this and remove `model.half()` if it fits into the GPU
        # with torch.cuda.amp.autocast():
        #     outputs = model(prompt_tokens)

        context_len = prompt_tokens.shape[1]
        if context_len > max_context_len:
            prompt_tokens = prompt_tokens[:, (context_len - max_context_len):]

        outputs = model(prompt_tokens)

        last_scores = outputs[0][:, -1, :]
        probs = torch.softmax(last_scores, dim=-1)

        predicted_token = predict_token(probs)
        predicted_token = predicted_token.to("cuda")

        prompt_tokens = torch.cat([prompt_tokens, predicted_token], dim=1)

        predicted_word = tokenizer.decode(
            predicted_token,
            skip_special_tokens=True,
        )

        generated_sentence += predicted_word
        text_form.empty()
        text_form.text(generated_sentence)

    return generated_sentence
Beispiel #6
0
def compute_compression(model,
                        data,
                        context,
                        batch_size,
                        verbose=False,
                        tbw: SummaryWriter = None,
                        tok: trf.GPT2Tokenizer = None,
                        skip=0):
    """
    Compute the _compression_ of a dataset under a model. That is, given a model, in how many bits could we represent
    the dataset. This requires us to turn a given probability distribution into a code for the outcomes.
    See [this video](https://youtu.be/mSneVjDvzNQ) for an explanation.
    :param model: A sequence-to-sequence model that takes as input a (sub) sequence of integers and produces a probability
    distributuion on the output.
    :param data: A singe list of integers representing the  data
    :return: The result of the computation in "bits per byte". That is, how many bits does the compressed representation
    spend on each byte (=ASCII character) of the raw data.
    """

    bits, tot = 0.0, 0
    batch = []
    # Buffer, every time it fills up, we run it through the model
    # --- For the sake of speed we want to process the data in batches. For each token in the data, we make a
    #     prediction based on all the `context` tokens before it. This means that for each subsequence in the batch, we
    #     need to shift the start/end indices ahead by one token.
    #
    #     After we pass the batch through the model, we look at only the probabilities predicted for the last token.
    target_indices = []
    i, ic = 0, 0
    for current in tqdm.trange(skip, data.size(0)) if verbose else range(
            skip, data.size(0)):

        # `current` is the character which we will ultimately predict

        fr = max(0, current - context)
        to = current + 1

        instance = data[fr:to].to(
            torch.long)  # the subsequence of the data to add to the batch
        # -- slice out an instance of size context + 1 (or shorter at the start of the data)

        # if tok is not None:
        #     print(instance[:-1], tok.decode(instance[:-1]))
        #     print(instance[-1:], tok.decode(instance[-1:]))

        target_indices.append(
            instance.size(0) -
            2)  # index of the last element of the input to the model

        if instance.size(0) < context + 1:
            assert skip < context  # We shouldn't get here if we skip the first `context` characters

            # the index in the output tensor of the character we want to predict
            # -- It's context + 1, because we clip off the last token as a target

            pad = torch.zeros(size=(context + 1 - instance.size(0), ),
                              dtype=torch.long)
            instance = torch.cat([instance, pad], dim=0)
            # -- the first tokens don't have enough tokens preceding them, so we pad them to the right size.

            assert instance.size(
                0) == context + 1  # all instances should be `context` + 1 long

        if torch.cuda.is_available():
            instance = instance.cuda()

        batch.append(instance[None, :])
        # -- We add a singleton dimension to concatenate along later.

        if len(batch) == batch_size or current == data.size(0) - 1:
            # batch is full or we are at the last instance, run it through the model

            b = len(batch)

            ti = torch.tensor(target_indices) + 1
            all = torch.cat(batch, dim=0)
            inputs = all[:, :-1]  # input
            target = all[torch.arange(b), ti]  # target values

            with torch.no_grad():
                if next(model.parameters()).is_cuda:
                    inputs = inputs.cuda()
                output = model(inputs)

            if type(output) != torch.Tensor:
                output = torch.log_softmax(
                    output.logits, dim=2
                )  # To make the method work for GPT2 models from Huggingface

            assert output.size()[:2] == (
                b,
                context), f'was: {output.size()}, should be {(b, context, -1)}'

            lnprobs = output[torch.arange(b, device=d()), target_indices,
                             target]
            log2probs = lnprobs / LOGE2
            # -- The model produces natural logarithms of probabilities, but we need base-2 logarithms of the
            #    probabilities, since these give us bits.

            if tbw is not None:
                for j, lp in enumerate(log2probs):
                    i += 1
                    tbw.add_scalar('compression/bits-per-token', -lp, i)

                    if tok is not None:
                        nc = len(tok.decode(target[j]))
                        ic += nc
                        tbw.add_scalar('compression/bits-per-byte', -lp / nc,
                                       ic)

            bits += -log2probs.sum(
            )  # Add the bits for each character (the negative log_2 probabilities) to the running total
            batch, target_indices = [], []  # clear the buffer

    if isinstance(bits, torch.Tensor):
        bits = bits.item()

    if tok is not None:
        return bits, ic  # total nr of bits used, total nr of characters seen
    else:
        return bits  # total nr of bits used