Beispiel #1
0
def getTermWidth():
    termWidth = get_terminal_size()[0]
    if termWidth < 5:
        logger.warning("Your detected terminal width is: " +
                       str(get_terminal_size()[0]))
        termWidth = 999999999
    return termWidth
Beispiel #2
0
    def __init__(self,
                 generate_num=60,
                 temperature=0.4,
                 top_k=40,
                 top_p=0.9,
                 dtype=DTYPE,
                 model_path: Union[str,
                                   Path] = Path('models',
                                                'pytorch-gpt2-xl-aid2-v5'),
                 repetition_penalty=1,
                 repetition_penalty_range=512,
                 repetition_penalty_slope=3.33):
        self.generate_num = generate_num
        self.temp = temperature
        self.top_k = top_k
        self.top_p = top_p
        self.samples = 1
        self.dtype = dtype
        self.repetition_penalty = repetition_penalty
        self.repetition_penalty_range = repetition_penalty_range
        self.repetition_penalty_slope = repetition_penalty_slope
        self.batch_size = 1
        self.max_history_tokens = 1024 - generate_num
        self.stop_token = "<|endoftext|>"

        if isinstance(model_path, str):
            self.checkpoint_path = model_path
            logger.warning(
                f"Using DEBUG MODE! This will load one of the generic (non-finetuned) GPT2 models. "
                f"Selected: {model_path}")
        elif isinstance(model_path, Path):
            self.checkpoint_path = model_path
            if not self.checkpoint_path.exists():
                raise FileNotFoundError(
                    "Could not find {} Make sure to download a pytorch model and put it in the models directory!"
                    .format(str(self.checkpoint_path)))
        else:
            raise ValueError(
                f"model_path must be either str or Path, got {type(model_path)}"
            )

        self.device = torch.device("cuda" if self.dtype ==
                                   torch.float16 else "cpu")
        logger.info("Using device={}, checkpoint={}, dtype={}".format(
            self.device, str(self.checkpoint_path), self.dtype))

        # Load tokenizer and model
        model_class, tokenizer_class = MODEL_CLASSES[
            "gpt2-experimental"] if settings.getboolean(
                "gpt2_experimental") else MODEL_CLASSES["gpt2"]
        if "gpt-neo" in str(model_path):
            self.max_history_tokens = 2048 - generate_num
            model_class = GPTNeoForCausalLM
        self.tokenizer = tokenizer_class.from_pretrained(
            str(self.checkpoint_path))
        self.model = model_class.from_pretrained(str(self.checkpoint_path))
        self.model.to(self.dtype).to(self.device)
        self.model.eval()
    def __init__(
        self,
        generate_num=60,
        temperature=0.4,
        top_k=40,
        top_p=0.9,
        dtype=DTYPE,
        model_path: Union[str, Path] = Path('models', 'gpt-neo-2.7B-horni'),
        repetition_penalty=1,
    ):
        self.generate_num = generate_num
        self.temp = temperature
        self.top_k = top_k
        self.top_p = top_p
        self.samples = 1
        self.dtype = dtype
        self.repetition_penalty = repetition_penalty
        self.batch_size = 1
        self.max_history_tokens = 1024 - generate_num
        self.stop_token = "<|endoftext|>"

        if isinstance(model_path, str):
            self.checkpoint_path = model_path
            logger.warning(
                f"Using DEBUG MODE! This will load one of the generic (non-finetuned) GPT2 models. "
                f"Selected: {model_path}")
        elif isinstance(model_path, Path):
            self.checkpoint_path = model_path
            if not self.checkpoint_path.exists():
                raise FileNotFoundError(
                    "Could not find {} Make sure to download a pytorch model and put it in the models directory!"
                    .format(str(self.checkpoint_path)))
        else:
            raise ValueError(
                f"model_path must be either str or Path, got {type(model_path)}"
            )

        self.device = torch.device("cuda" if self.dtype ==
                                   torch.float16 else "cpu")
        logger.info("Using device={}, checkpoint={}, dtype={}".format(
            self.device, str(self.checkpoint_path), self.dtype))

        # Load tokenizer and model
        model_class, tokenizer_class = MODEL_CLASSES["gpt_neo"]
        self.checkpoint = torch.load(Path(model_path, 'pytorch_model.bin'),
                                     map_location='cpu')
        self.tokenizer = tokenizer_class.from_pretrained(Path(model_path))
        self.model = model_class.from_pretrained(model_path,
                                                 state_dict=self.checkpoint)
        self.model.to(self.dtype).to(self.device)
        self.model.eval()
Beispiel #4
0
    def __init__(
        self,
        generate_num=60,
        temperature=0.4,
        top_k=40,
        top_p=0.9,
        dtype=DTYPE,
        model_path=Path('models', 'pytorch-gpt2-xl-aid2-v5'),
        censor=False,
        repetition_penalty=1,
    ):
        self.generate_num = generate_num
        self.temp = temperature
        self.top_k = top_k
        self.top_p = top_p
        self.censor = censor
        self.samples = 1
        self.dtype = dtype
        self.repetition_penalty = repetition_penalty
        self.batch_size = 1
        self.max_history_tokens = 1024 - generate_num
        self.stop_token = "<|endoftext|>"

        self.checkpoint_path = model_path
        if not self.checkpoint_path.exists():
            raise FileNotFoundError(
                "Could not find {} Make sure to download a pytorch model and put it in the models directory!"
                .format(str(self.checkpoint_path)))

        if os.environ.get("DEBUG_GPT2", False):
            self.checkpoint_path = Path('gpt2')
            logger.warning(
                "using DEBUG_GPT2 MODE! This is just for devs to quickly check a small GPT2 model with poor output"
            )
        self.device = torch.device("cuda" if self.dtype ==
                                   torch.float16 else "cpu")
        logger.info("Using device={}, checkpoint={}, dtype={}".format(
            self.device, str(self.checkpoint_path), self.dtype))

        # Load tokenizer and model
        model_class, tokenizer_class = MODEL_CLASSES["gpt2"]
        self.tokenizer = tokenizer_class.from_pretrained(self.checkpoint_path)
        self.model = model_class.from_pretrained(self.checkpoint_path)
        self.model.to(self.dtype).to(self.device)
        self.model.eval()
Beispiel #5
0
        if 'VSCODE_PID' in os.environ:  # pragma: no cover
            raise ImportError("vscode")
    except ImportError:
        if get_terminal_size()[0] == 0 or 'google.colab' in sys.modules:
            return True
        return False
    else:
        return True


IN_COLAB = _in_colab()
logger.info("Colab detected: {}".format(IN_COLAB))
IN_COLAB = IN_COLAB or settings.getboolean('colab-mode')
if IN_COLAB:
    logger.warning(
        "Colab mode enabled, disabling line clearing and readline to avoid colab bugs."
    )
else:
    try:
        import readline
        logger.info(
            'readline has been imported. This enables a number of editting features but may cause bugs for colab users.'
        )
    except ModuleNotFoundError:
        pass

termWidth = get_terminal_size()[0]
if termWidth < 5:
    logger.warning("Your detected terminal width is: " +
                   str(get_terminal_size()[0]))
    termWidth = 999999999
Beispiel #6
0
import os
from pathlib import Path
from typing import Union

import torch
import torch.nn.functional as F
import re
from gpt2 import GPT2LMHeadModelExperimental
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM
from getconfig import settings, logger
from utils import cut_trailing_sentence, output, clear_lines, format_result, use_ptoolkit

if not settings.getboolean('force-cpu') and not torch.cuda.is_available():
    logger.warning('CUDA is not available, you are limited to CPU only.')

DTYPE = torch.float32 if ((not torch.cuda.is_available()) or
                          settings.getboolean('force-cpu')) else torch.float16
logger.info('Cuda Available: {}    Force CPU: {}    Precision: {}'.format(
    torch.cuda.is_available(), settings.getboolean('force-cpu'),
    '32-bit' if DTYPE == torch.float32 else '16-bit'))

# warnings.filterwarnings("ignore")
MODEL_CLASSES = {
    "gpt2": (GPT2LMHeadModel, GPT2Tokenizer),
    "gpt2-experimental": (GPT2LMHeadModelExperimental, GPT2Tokenizer),
}


def getTokens(tokenizer, l):
    tokenizer.encode()
Beispiel #7
0
        'prompt-toolkit')


def clear_lines(n):
    """Clear the last line in the terminal."""
    if in_colab() or settings.getboolean('colab-mode'):
        # this wont work in colab etc
        return
    screen_code = "\033[1A[\033[2K"  # up one line, and clear line
    for _ in range(n):
        print(screen_code, end="\r")


if in_colab():
    logger.warning(
        "Colab mode enabled, disabling line clearing and readline to avoid colab bugs."
    )
else:
    try:
        if settings.getboolean('prompt-toolkit'):
            from inline_editor import edit_multiline
            from prompt_toolkit import prompt as ptprompt
            from prompt_toolkit import print_formatted_text
            from prompt_toolkit.styles import Style
            from prompt_toolkit.formatted_text import to_formatted_text, HTML
        else:
            raise ModuleNotFoundError

        logger.info(
            'Python Prompt Toolkit has been imported. This enables a number of editing features but may cause bugs for colab users.'
        )
Beispiel #8
0
                'IPKernelApp' not in get_ipython().config):  # pragma: no cover
            raise ImportError("console")
        if 'VSCODE_PID' in os.environ:  # pragma: no cover
            raise ImportError("vscode")
    except ImportError:
        return False
    else:
        return True


is_notebook = _is_notebook()
logger.info("Notebook detected: {}".format(is_notebook))

termWidth = get_terminal_size()[0]
if termWidth < 5:
    logger.warning("Your detected terminal width is: " +
                   str(get_terminal_size()[0]))
    termWidth = 999999999


# ECMA-48 set graphics codes for the curious. Check out "man console_codes"
def colPrint(text, col="0", wrap=True, end=None):
    if wrap:
        width = settings.getint("text-wrap-width")
        width = 999999999 if width < 2 else width
        width = min(width, termWidth)
        text = textwrap.fill(text, width, replace_whitespace=False)
    print("\x1B[{}m{}\x1B[{}m".format(col, text, colors["default"]), end=end)
    return text.count('\n') + 1


def colInput(str, col1=colors["default"], col2=colors["default"]):