Ejemplo n.º 1
0
def memory_merge(prompt, context, tokenizer, maxHistory=1024):
    assert (prompt + context)
    # print(prompt+context)
    # logger.debug('RAW TEXT INPUT IS:`%r`', context)
    # the tokenizer is kind of broken for the first input, especially if it includes white space. Same with any trailing white space on the last output.
    # I'm going with the add prefix option but I'm not sure it's quite right
    prompt_tokens = tokenizer.encode(prompt,
                                     add_special_tokens=False,
                                     add_prefix_space=True)
    if len(prompt_tokens) >= maxHistory:
        logger.debug("Clamping the amount of prompt tokens.")
        context_tokens = prompt_tokens[-maxHistory:]
    else:
        context_tokens = hackyEncode(tokenizer,
                                     hackyWhiteSpaceCutter(prompt) + context)
        context_tokens = context_tokens[-(maxHistory - len(prompt_tokens)):]
        # logger.debug('DECODED CONTEXT TOKENS: `%r`', tokenizer.convert_ids_to_tokens(context_tokens))
        prompt_tokens.extend(context_tokens)
        context_tokens = prompt_tokens
        # logger.debug('DECODED OUTPUT IS: `%r`', tokenizer.decode(context_tokens, clean_up_tokenization_spaces=False))
        # this is a hack and it should be up to the sampler to deal with max size
        if len(context_tokens) > maxHistory:
            logger.error("CONTEXT IS TOO LONG ERROR")
            context_tokens = context_tokens[-maxHistory:]
    return context_tokens
Ejemplo n.º 2
0
def clean_suggested_action(result_raw, min_length=4):
    result_cleaned = standardize_punctuation(result_raw)
    result_cleaned = cut_trailing_sentence(result_cleaned, allow_action=True)

    # The generations actions carry on into the next prompt, so lets remove the prompt
    results = result_cleaned.split("\n")
    results = [s.strip() for s in results]
    results = [s for s in results if len(s) > min_length]

    # Sometimes actions are generated with leading > ! . or ?. Likely the model trying to finish the prompt or start an action.
    result = results[0].strip().lstrip(" >!.?") if len(results) else ""

    # result = cut_trailing_quotes(result)
    logger.debug(
        "full suggested action '%r'. Cropped: '%r'. Split '%r'",
        result_raw,
        result,
        results,
    )

    # Often actions are cropped with sentance fragments, lets remove. Or we could just turn up config_act["generate-number"]
    result = first_to_second_person(result)
    # Sometimes the suggestion start with "You" we will add that on later anyway so remove it here
    # result = re.sub("^ ?[Yy]ou try to ?", "", result)
    # result = re.sub("^ ?[Yy]ou start to ?", "", result)
    # result = re.sub("^ ?[Yy]ou ", "", result)
    logger.debug("suggested action after cleaning `%r`", result)
    return result
Ejemplo n.º 3
0
    def generate(self,
                 context,
                 prompt='',
                 temperature=None,
                 top_p=None,
                 top_k=None,
                 repetition_penalty=None,
                 depth=0):
        assert (top_k is not None)
        assert (temperature is not None)
        assert (top_p)
        assert (repetition_penalty)
        # logger.debug("BEFORE PROMPT_REPLACE: `%r`", prompt)

        # prompt = [self.prompt_replace(p) for p in prompt]

        # logger.debug("AFTER PROMPT_REPLACE is: `%r`", repr(prompt))
        assert (prompt + context)

        text = self.generate_raw(context,
                                 prompt,
                                 temperature=temperature,
                                 top_k=top_k,
                                 top_p=top_p,
                                 repetition_penalty=repetition_penalty,
                                 stop_tokens=torch.tensor(
                                     [[self.tokenizer.eos_token_id]]))

        logger.debug("Generated result is: `%r`", repr(text))

        result = self.result_replace(text)

        if (depth > 6) and len(result) == 0:
            # Sometimes it keeps generating a story startng with an action (">"), if it's tried a few times and it keeps
            # happening, lets let it keep action text which starts in ">"
            # We could just blacklist that token and force it to generate something else. TODO
            result = self.result_replace(text, allow_action=True)
            logger.info(
                "Model generated empty text after formatting `%r`. Trying to format less with allow_action=True. `%r`",
                text,
                result,
            )

            # same here as above
        if len(result) == 0:
            if depth < 20:
                logger.info("Model generated empty text trying again %r",
                            depth)
                return self.generate(prompt,
                                     context,
                                     temperature=temperature,
                                     top_p=top_p,
                                     top_k=top_k,
                                     repetition_penalty=repetition_penalty,
                                     depth=depth + 1)
            else:
                logger.warn(
                    "Model generated empty text %r times. Try another action",
                    depth)
        return result
Ejemplo n.º 4
0
    def generate_raw(self,
                     context,
                     prompt='',
                     generate_num=None,
                     temperature=None,
                     top_k=None,
                     top_p=None,
                     repetition_penalty=None,
                     stop_tokens=None):
        assert (top_k is not None)
        assert (temperature is not None)
        assert (top_p)
        assert (repetition_penalty)

        context_tokens = memory_merge(prompt, context, self.tokenizer,
                                      self.max_history_tokens)

        # if os.environ.get("DEBUG_GPT2", False):
        logger.debug(
            "Text passing into model `%r`",
            self.tokenizer.decode(
                context_tokens,
                clean_up_tokenization_spaces=True,
                #skip_special_tokens=True,
            ),
        )
        generated = 0
        for _ in range(self.samples // self.batch_size):
            out = self.sample_sequence(
                context_tokens,
                generate_num=generate_num,
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                repetition_penalty=repetition_penalty,
                stop_tokens=stop_tokens,
            )
            out = out[:, len(context_tokens):].tolist()
            for o in out:
                generated += 1
                #disabled clean up of spaces, see what effect this has TODO
                text = self.tokenizer.decode(
                    o,
                    clean_up_tokenization_spaces=False,
                    skip_special_tokens=True)
                if self.stop_token:
                    index = text.find(self.stop_token)
                    if index == -1:
                        index = None
                    text = text[:index]
                if stop_tokens is not None:
                    for stop_token in stop_tokens:
                        index = text.find(self.stop_token)
                        if index == -1:
                            index = None
                        text = text[:index]
        return text
Ejemplo n.º 5
0
    def generate_raw(self,
                     context,
                     prompt='',
                     generate_num=None,
                     temperature=None,
                     top_k=None,
                     top_p=None,
                     repetition_penalty=None,
                     repetition_penalty_range=512,
                     repetition_penalty_slope=3.33,
                     stop_tokens=None):
        assert (top_k is not None)
        assert (temperature is not None)
        assert (top_p)
        assert (repetition_penalty)

        context_tokens = memory_merge(prompt, context, self.tokenizer,
                                      self.max_history_tokens)

        logger.debug(
            "Text passing into model `%r`",
            self.tokenizer.decode(
                context_tokens,
                clean_up_tokenization_spaces=True,
                # skip_special_tokens=True,
            ),
        )
        generated = 0
        text = ""
        for _ in range(self.samples // self.batch_size):
            out = self.sample_sequence(
                context_tokens,
                generate_num=generate_num,
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                repetition_penalty=repetition_penalty,
                repetition_penalty_range=repetition_penalty_range,
                repetition_penalty_slope=repetition_penalty_slope,
                stop_tokens=stop_tokens,
            )
            text += out.text
            generated += 1
            # disabled clean up of spaces, see what effect this has TODO
            if self.stop_token:
                index = text.find(self.stop_token)
                if index == -1:
                    index = None
                text = text[:index]
            if stop_tokens is not None:
                for stop_token in stop_tokens:
                    index = text.find(self.stop_token)
                    if index == -1:
                        index = None
                    text = text[:index]
        return text
Ejemplo n.º 6
0
    def generate_raw(self,
                     prompt,
                     generate_num=None,
                     temperature=None,
                     stop_tokens=None):
        # the prompt is a list of strings, encode each one tok tokens, then truncate the longest ones
        context_tokens = [
            self.tokenizer.encode(p,
                                  add_special_tokens=False,
                                  max_length=self.max_history_tokens)
            for p in prompt
        ]
        truncate_multiple_sequences(context_tokens, self.max_history_tokens)
        context_tokens = list(itertools.chain(*context_tokens))

        # if os.environ.get("DEBUG_GPT2", False):
        logger.debug(
            "Text passing into model `%r`",
            self.tokenizer.decode(
                context_tokens,
                clean_up_tokenization_spaces=True,
                skip_special_tokens=True,
            ),
        )

        generated = 0
        for _ in range(self.samples // self.batch_size):
            out = self.sample_sequence(
                context_tokens,
                generate_num=generate_num,
                temperature=temperature,
                stop_tokens=stop_tokens,
            )
            out = out[:, len(context_tokens):].tolist()
            for o in out:
                generated += 1
                text = self.tokenizer.decode(o,
                                             clean_up_tokenization_spaces=True,
                                             skip_special_tokens=True)
                if self.stop_token:
                    index = text.find(self.stop_token)
                    if index == -1:
                        index = None
                    text = text[:index]
                if stop_tokens is not None:
                    for stop_token in stop_tokens:
                        index = text.find(self.stop_token)
                        if index == -1:
                            index = None
                        text = text[:index]
        return text
Ejemplo n.º 7
0
    def result_replace(self, result, allow_action=False):
        # logger.debug("BEFORE RESULT_REPLACE: `%s`", repr(result))

        result = cut_trailing_sentence(result, allow_action=allow_action)

        if len(result) == 0:
            return ""
        first_letter_capitalized = result[0].isupper()
        result = result.replace('."', '".')
        result = result.replace("#", "")
        result = result.replace("*", "")
        result = result.replace("\n\n", "\n")
        # result = first_to_second_person(result)

        if not first_letter_capitalized:
            result = result[0].lower() + result[1:]

        logger.debug("AFTER RESULT_REPLACE: `%r`. allow_action=%r",
                     repr(result), allow_action)

        return result
Ejemplo n.º 8
0
def sample_sequence(model,
                    length,
                    context,
                    temperature=1,
                    top_k=0,
                    top_p=0.9,
                    repetition_penalty=1.0,
                    device="cpu",
                    stop_tokens=None,
                    tokenizer=None):
    """Actually generate the tokens"""
    logger.debug('temp: {}    top_k: {}    top_p: {}    rep-pen: {}'.format(
        temperature, top_k, top_p, repetition_penalty))

    max_length = context.shape[1] + length  # check to see if greater than 2048?

    if settings.getboolean('force-cpu'):
        context = context.long().cpu()
    else:
        context = context.long().cuda()

    out = model.generate(
        context,
        do_sample=True,
        min_length=max_length,
        max_length=max_length,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        repetition_penalty_range=300,
        repetition_penalty_slope=3.33,
        use_cache=True,
        pad_token_id=tokenizer.eos_token_id,
    ).long()

    generated = tokenizer.decode(out[0])

    return generated
Ejemplo n.º 9
0
    def generate(self, prompt, options=None, seed=None, depth=0):
        logger.debug("BEFORE PROMPT_REPLACE: `%r`", prompt)

        prompt = [self.prompt_replace(p) for p in prompt]

        # logger.debug("AFTER PROMPT_REPLACE is: `%r`", repr(prompt))

        text = self.generate_raw(prompt,
                                 stop_tokens=self.tokenizer.encode(
                                     ["<|endoftext|>", ">"]))

        logger.debug("Generated result is: `%r`", repr(text))

        result = self.result_replace(text)

        if (depth > 6) and len(result) == 0:
            # Sometimes it keeps generating a story startng with an action (">"), if it's tried a few times and it keeps
            # happening, lets let it keep action text which starts in ">"
            result = self.result_replace(text, allow_action=True)
            logger.info(
                "Model generated empty text after formatting `%r`. Trying to format less with allow_action=True. `%r`",
                text,
                result,
            )

        if len(result) == 0:
            if depth < 20:
                logger.info("Model generated empty text trying again %r",
                            depth)
                return self.generate(prompt + [" {}".format(depth)],
                                     seed=depth,
                                     depth=depth + 1)
            else:
                logger.warn(
                    "Model generated empty text %r times. Try another action",
                    depth)
        return result
Ejemplo n.º 10
0
def play(generator):
    print("\n")

    with open(Path("interface", "mainTitle.txt"), "r",
              encoding="utf-8") as file:
        colPrint(file.read(), colors["title"], wrap=False)

    with open(Path("interface", "subTitle.txt"), "r",
              encoding="utf-8") as file:
        cols = termWidth
        for line in file:
            line = re.sub(r'\n', '', line)
            line = line[:cols]
            #fills in the graphic using reverse video mode substituted into the areas between |'s
            colPrint(
                re.sub(r'\|[ _]*(\||$)',
                       lambda x: '\x1B[7m' + x.group(0) + '\x1B[27m', line),
                colors['subtitle'], False)

    print()
    colPrint(
        "Go to https://github.com/cloveranon/Clover-Edition/ or email [email protected] for bug reports, help, and feature requests.",
        colors['subsubtitle'])

    while True:
        # May be needed to avoid out of mem
        gc.collect()
        torch.cuda.empty_cache()

        print("\n\n")

        colPrint(
            "0: Pick Prompt From File (Default if you type nothing)\n1: Write Custom Prompt",
            colors['menu'])

        if getNumberInput(1) == 1:
            with open(Path("interface", "prompt-instructions.txt"),
                      "r",
                      encoding="utf-8") as file:
                colPrint(file.read(), colors["instructions"], False)
            prompt = colInput("Prompt>", colors["main-prompt"],
                              colors["user-text"])
            context = colInput("Context>", colors["main-prompt"],
                               colors["user-text"])
            filename = colInput(
                "Name to save prompt as? (Leave blank for no save): ",
                colors["query"],
                colors["user-text"],
            )
            filename = re.sub(
                "-$", "",
                re.sub("^-", "", re.sub("[^a-zA-Z0-9_-]+", "-", filename)))
            if filename != "":
                with open(Path("prompts", filename + ".txt"),
                          "w",
                          encoding="utf-8") as f:
                    f.write(context + "\n" + prompt)
        else:
            prompt, context = selectFile()
        assert (prompt + context)

        instructions()

        print()
        colPrint("Generating story...", colors["loading-message"])

        story = newStory(generator, prompt, context)

        while True:
            # Generate suggested actions
            act_alts = settings.getint("action-sugg")
            if act_alts > 0:

                # TODO change this to two messages for different colors
                suggested_actions = []
                colPrint("\nSuggested actions:", colors["selection-value"])
                action_suggestion_lines = 2
                for i in range(act_alts):
                    suggested_action = story.getSuggestion()
                    if len(suggested_action.strip()) > 0:
                        j = len(suggested_actions)
                        suggested_actions.append(suggested_action)
                        suggestion = "{}> {}".format(j, suggested_action)
                        action_suggestion_lines += colPrint(
                            suggestion, colors["selection-value"])
                print()

            bell()
            action = colInput("> You ", colors["main-prompt"],
                              colors["user-text"])

            # Clear suggestions and user input
            if act_alts > 0:
                action_suggestion_lines += 2
                if not IN_COLAB:
                    clear_lines(action_suggestion_lines)

                    # Show user input again
                    # colPrint("\n> " + action.rstrip(), colors["user-text"], end="")

            setRegex = re.search("^/set ([^ ]+) ([^ ]+)$", action)
            if setRegex:
                if setRegex.group(1) in settings:
                    currentSettingValue = settings[setRegex.group(1)]
                    colPrint(
                        "Current Value of {}: {}     Changing to: {}".format(
                            setRegex.group(1), currentSettingValue,
                            setRegex.group(2)))
                    settings[setRegex.group(1)] = setRegex.group(2)
                    colPrint("Save config file?", colors["query"])
                    colPrint("Saving an invalid option will corrupt file!",
                             colors["error"])
                    if (colInput(
                            "y/n? >",
                            colors["selection-prompt"],
                            colors["selection-value"],
                    ) == "y"):
                        with open("config.ini", "w", encoding="utf-8") as file:
                            config.write(file)
                else:
                    colPrint("Invalid Setting", colors["error"])
                    instructions()
            elif action == "/menu":
                break
            elif action == "/restart":
                print()
                colPrint("Restarting story...", colors["loading-message"])

                story = newStory(generator, story.prompt, context)
                continue
            elif action == "/quit":
                exit()
            elif action == "/help":
                instructions()
            elif action == "/print":
                print("\nPRINTING\n")
                #TODO colorize printed story
                colPrint(str(story), colors["print-story"])
            elif action == '/retry':

                if len(story.story) == 1:
                    print()
                    colPrint("Restarting story...", colors["loading-message"])
                    story = newStory(generator, story.prompt, context)
                    continue
                else:
                    newaction = story.story[-1][0]

                colPrint(newaction, colors['user-text'], end='')
                story.story = story.story[:-1]
                result = "\n" + story.act(newaction)[0]

                if len(story.story) >= 2:
                    similarity = get_similarity(result, story.story[-2][1][0])
                    if similarity > 0.9:
                        story.story = story.story[:-1]
                        colPrint(
                            "Woops that action caused the model to start looping. Try a different action to prevent that.",
                            colors["error"],
                        )
                        continue
                colPrint(result, colors["ai-text"])

                continue

            elif action == '/revert':

                if len(story.story) == 1:
                    colPrint("You can't go back any farther. ",
                             colors["error"])
                    continue

                story.story = story.story[:-1]
                colPrint("Last action reverted. ", colors["message"])
                if len(story.story) < 2:
                    colPrint(story.prompt, colors["ai-text"])
                colPrint(story.story[-1][1][0], colors["ai-text"])

                continue

            elif action == "/alter":
                story.story[-1][1][0] = alterText(story.story[-1][1][0])
                if len(story.story) < 2:
                    colPrint(story.prompt, colors["ai-text"])
                else:
                    colPrint("\n" + story.story[-1][0] + "\n",
                             colors["transformed-user-text"])
                colPrint("\n" + story.story[-1][1][0] + "\n\n",
                         colors["ai-text"])

            elif action == "/prompt":
                story.prompt = alterText(story.prompt)
                if len(story.story) < 2:
                    colPrint(story.prompt, colors["ai-text"])
                else:
                    colPrint("\n" + story.story[-1][0] + "\n",
                             colors["transformed-user-text"])
                colPrint("\n" + story.story[-1][1][0] + "\n\n",
                         colors["ai-text"])

            else:
                if act_alts > 0:
                    # Options to select a suggestion action
                    if action in [
                            str(i) for i in range(len(suggested_actions))
                    ]:
                        action = suggested_actions[int(action)]

                original_action = action
                action = action.strip()
                #TODO debug stuff to delete
                if action != original_action:
                    logger.debug("STRIPPED WHITE SPACE OFF ACTION %r vs %r",
                                 original_action, action)

                # Crop actions to a max length
                #action = action[:4096]

                if action != "":

                    # Roll a 20 sided dice to make things interesting
                    d = random.randint(1, 20)
                    logger.debug("roll d20=%s", d)

                    # If it says 'You say "' then it's still dialouge. Normalise it by removing `You say `, we will add again soon
                    action = re.sub("^ ?[Yy]ou say [\"']", '"', action)
                    if any(action.lstrip().startswith(t) for t in ['"', "'"]):
                        if settings.getboolean("action-d20"):
                            action = d20ify_speech(action, d)
                        else:
                            action = "You say " + action
                        logger.info(
                            "%r. %r, %r", action,
                            any(action.lstrip().startswith(t)
                                for t in ['"', "'"]),
                            settings.getboolean("action-d20"))
                    else:
                        action = first_to_second_person(action)
                        if not action.lower().startswith(
                                "you ") and not action.lower().startswith(
                                    "i "):
                            action = action[0].lower() + action[1:]
                            # roll a d20
                            if settings.getboolean("action-d20"):
                                action = d20ify_action(action, d)
                            else:
                                action = "You " + action

                        if action[-1] not in [".", "?", "!"]:
                            action = action + "."

                action = "\n> " + action + "\n"

                colPrint(
                    "\n>" + action.lstrip().lstrip("> \n"),
                    colors["transformed-user-text"],
                )
                #TODO check if leading white space makes sense
                result = "\n" + story.act(action)[0]

                #TODO: Replace all this nonsense
                if len(story.story) >= 2:
                    similarity = get_similarity(result, story.story[-2][1][0])
                    if similarity > 0.9:
                        story.story = story.story[:-1]
                        colPrint(
                            "Woops that action caused the model to start looping. Try a different action to prevent that.",
                            colors["error"],
                        )
                        continue

                if player_won(result):
                    colPrint(result + "\n CONGRATS YOU WIN", colors["message"])
                    break
                elif player_died(result):
                    colPrint(result, colors["ai-text"])
                    colPrint("YOU DIED. GAME OVER", colors["error"])
                    colPrint(
                        "\nOptions:\n0)Start a new game\n1)\"I'm not dead yet!\" (If you didn't actually die)",
                        colors["menu"],
                    )
                    choice = getNumberInput(1)
                    if choice == 0:
                        break
                    else:
                        colPrint("Sorry about that...where were we?",
                                 colors["query"])
                colPrint(result, colors["ai-text"])
Ejemplo n.º 11
0
def sample_sequence(model,
                    length,
                    context,
                    temperature=1,
                    top_k=0,
                    top_p=0.9,
                    repetition_penalty=1.0,
                    repetition_penalty_range=512,
                    repetition_penalty_slope=3.33,
                    device="cpu",
                    stop_tokens=None,
                    tokenizer=None):
    """Actually generate the tokens"""
    logger.debug(
        'temp: {}    top_k: {}    top_p: {}    rep-pen: {}    rep-pen-range: {}    rep-pen-slope: {}'
        .format(temperature, top_k, top_p, repetition_penalty,
                repetition_penalty_range, repetition_penalty_slope))
    context_tokens = context
    context = torch.tensor(context, dtype=torch.long, device=device)
    # context = context.repeat(num_samples, 1)
    generated = context
    USE_PAST = True
    next_token = context
    pasts = None
    clines = 0

    penalty = None
    if not repetition_penalty_range is None and not repetition_penalty_slope is None and repetition_penalty_range > 0:
        penalty = (torch.arange(repetition_penalty_range) /
                   (repetition_penalty_range - 1)) * 2. - 1
        penalty = (repetition_penalty_slope *
                   penalty) / (1 + torch.abs(penalty) *
                               (repetition_penalty_slope - 1))
        penalty = 1 + ((penalty + 1) / 2) * (repetition_penalty - 1)

    with torch.no_grad():
        for j in range(length):
            # why would we ever not use past?
            # is generated and next_token always same thing?
            if not USE_PAST:
                input_ids_next = generated
                pasts = None
            else:
                input_ids_next = next_token

            # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet/CTRL (cached hidden-states)
            model_kwargs = {"past": pasts, "use_cache": True}
            model_inputs = model.prepare_inputs_for_generation(
                generated.unsqueeze(0), **model_kwargs)
            model_outputs = model(**model_inputs, return_dict=True)
            logits, pasts = model_outputs.logits, model_outputs.past_key_values
            logits = logits[0, -1, :].float()

            # Originally the order was Temperature, Repetition Penalty, then top-k/p
            if settings.getboolean('top-p-first'):
                logits = top_k_top_p_filtering(logits,
                                               top_k=top_k,
                                               top_p=top_p)

            logits = logits / (temperature if temperature > 0 else 1.0)

            # repetition penalty from CTRL (https://arxiv.org/abs/1909.05858) plus range limit
            if repetition_penalty != 1.0:
                if penalty is not None:
                    penalty_len = min(generated.shape[0],
                                      repetition_penalty_range)
                    penalty_context = generated[-repetition_penalty_range:]
                    score = torch.gather(logits, 0, penalty_context)
                    penalty = penalty.type(score.dtype).to(score.device)
                    penalty_window = penalty[-penalty_len:]
                    score = torch.where(score < 0, score * penalty_window,
                                        score / penalty_window)
                    logits.scatter_(0, penalty_context, score)
                else:
                    score = torch.gather(logits, 0, generated)
                    score = torch.where(score < 0, score * repetition_penalty,
                                        score / repetition_penalty)
                    logits.scatter_(0, generated, score)

            if not settings.getboolean('top-p-first'):
                logits = top_k_top_p_filtering(logits,
                                               top_k=top_k,
                                               top_p=top_p)

            if temperature == 0:  # greedy sampling:
                next_token = torch.argmax(logits, dim=-1).unsqueeze(-1)
            else:
                next_token = torch.multinomial(F.softmax(logits, dim=-1),
                                               num_samples=1)
            generated = torch.cat((generated, next_token), dim=-1)
            # Decode into plain text
            o = generated[len(context_tokens):].tolist()
            generated.text = tokenizer.decode(
                o,
                clean_up_tokenization_spaces=False,
                skip_special_tokens=True)
            if use_ptoolkit():
                clear_lines(clines)
                generated.text = format_result(generated.text)
                clines = output(generated.text, "ai-text")
            if ((stop_tokens is not None) and (j > 4)
                    and (next_token[0] in stop_tokens)):
                # Why the minimum tokens, j>X. Because sometimes the models starts with whitespace, which will strip away anyway. Having a minimum amount of tokens before we stop usually means we don't just stop because of "\n " or similar
                logger.debug(
                    "Stopping generation as we found stop tokens. One of `%s`, in '%s'. token generated `%s`",
                    stop_tokens,
                    next_token,
                    j,
                )
                break
    clear_lines(clines)
    return generated
Ejemplo n.º 12
0
def sample_sequence(
        model,
        length,
        context,
        temperature=1,
        top_k=0,
        top_p=0.9,
        repetition_penalty=1.0,
        device="cpu",
        stop_tokens=None,
        tokenizer=None
):
    """Actually generate the tokens"""
    logger.debug(
        'temp: {}    top_k: {}    top_p: {}    rep-pen: {}'.format(temperature, top_k, top_p, repetition_penalty))
    context_tokens = context
    context = torch.tensor(context, dtype=torch.long, device=device)
    # context = context.repeat(num_samples, 1)
    generated = context
    USE_PAST = True
    next_token = context
    pasts = None
    clines = 0
    with torch.no_grad():
        for j in range(length):
            # why would we ever not use past?
            # is generated and next_token always same thing?
            if not USE_PAST:
                input_ids_next = generated
                pasts = None
            else:
                input_ids_next = next_token

            # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet/CTRL (cached hidden-states)
            logits, pasts = model(input_ids=input_ids_next, past=pasts)
            logits = logits[-1, :].float()

            # переписать  логику TODO
            if settings.getboolean('sparse-gen'): 
                probs = entmax_bisect(logits, dim=-1, alpha=settings.sparse-level)
                next_token = torch.multinomial(probs, num_samples=1)
            else:
                # Originally the order was Temperature, Repetition Penalty, then top-k/p
                if settings.getboolean('top-p-first'):
                    logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)

                logits = logits / (temperature if temperature > 0 else 1.0)

                # repetition penalty from CTRL (https://arxiv.org/abs/1909.05858)
                for k in set(generated.tolist()):
                    logits[k] /= repetition_penalty

                if not settings.getboolean('top-p-first'):
                    logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)

                if temperature == 0:  # greedy sampling:
                    next_token = torch.argmax(logits, dim=-1).unsqueeze(-1)
                else:
                    next_token = torch.multinomial(
                        F.softmax(logits, dim=-1), num_samples=1
                    )
            generated = torch.cat((generated, next_token), dim=-1)
            # Decode into plain text
            o = generated[len(context_tokens):].tolist()
            generated.text = tokenizer.decode(
                o, clean_up_tokenization_spaces=False, skip_special_tokens=True
            )
            if use_ptoolkit():
                clear_lines(clines)
                generated.text = format_result(generated.text)
                clines = output(generated.text, "ai-text")
            if (
                    (stop_tokens is not None)
                    and (j > 4)
                    and (next_token[0] in stop_tokens)
            ):
                # Why the minimum tokens, j>X. Because sometimes the models starts with whitespace, which will strip away anyway. Having a minimum amount of tokens before we stop usually means we don't just stop because of "\n " or similar
                logger.debug(
                    "Stopping generation as we found stop tokens. One of `%s`, in '%s'. token generated `%s`",
                    stop_tokens,
                    next_token,
                    j,
                )
                break
    clear_lines(clines)
    return generated
Ejemplo n.º 13
0
def sample_sequence(model,
                    length,
                    context,
                    num_samples=1,
                    temperature=1,
                    top_k=0,
                    top_p=0.9,
                    repetition_penalty=1.0,
                    is_xlnet=False,
                    is_xlm_mlm=False,
                    xlm_mask_token=None,
                    xlm_lang=None,
                    device="cpu",
                    stop_tokens=None,
                    tokenizer=None):
    logger.debug('temp: {}    top_k: {}    top_p: {}    rep-pen: {}'.format(
        temperature, top_k, top_p, repetition_penalty))
    context = torch.tensor(context, dtype=torch.long, device=device)
    context = context.unsqueeze(0).repeat(num_samples, 1)
    generated = context
    USE_PAST = True
    next_token = context
    outputs = None
    with torch.no_grad():
        for j in range(length):
            #why would we ever not use past?
            #is generated and next_token always same thing?
            if USE_PAST:
                past = outputs[1] if outputs is not None else None
                inputs = {"input_ids": next_token, "past": past}
            else:
                inputs = {"input_ids": generated}

            outputs = model(
                **inputs
            )  # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet/CTRL (cached hidden-states)

            logits = outputs[0][:, -1, :].float()

            #Originally the order was Temperature, Repetition Penalty, then top-k/p
            if settings.getboolean('top-p-first'):
                logits = top_k_top_p_filtering(logits,
                                               top_k=top_k,
                                               top_p=top_p)

            logits = logits / (temperature if temperature > 0 else 1.0)

            # repetition penalty from CTRL (https://arxiv.org/abs/1909.05858)
            for i in range(num_samples):
                for k in set(generated[i].tolist()):
                    logits[i, k] /= repetition_penalty

            if not settings.getboolean('top-p-first'):
                logits = top_k_top_p_filtering(logits,
                                               top_k=top_k,
                                               top_p=top_p)

            if temperature == 0:  # greedy sampling:
                next_token = torch.argmax(logits, dim=-1).unsqueeze(-1)
            else:
                next_token = torch.multinomial(F.softmax(logits, dim=-1),
                                               num_samples=1)
            generated = torch.cat((generated, next_token), dim=1)
            if ((stop_tokens is not None) and (j > 4)
                    and (next_token[0][0] in stop_tokens)):
                # Why the minimum tokens, j>X. Because sometimes the models starts with whitespace, which will strip away anyway. Having a minimum amount of tokens before we stop usually means we don't just stop because of "\n " or similar
                logger.debug(
                    "Stopping generation as we found stop tokens. One of `%s`, in '%s'. token generated `%s`",
                    stop_tokens,
                    next_token,
                    j,
                )
                break
    return generated