Example #1
0
    def generate_and_download_framework(
            self, metadata: NetworkMetadata,
            workspace: NNFolderWorkspace) -> NetworkModels:

        cache_variant = False
        if metadata.other.kv_cache:
            cache_variant = True

        trt_gpt2_config = self.config
        metadata_serialized = trt_gpt2_config.get_metadata_string(metadata)
        workspace_dir = workspace.get_path()

        pytorch_model_dir = os.path.join(workspace_dir, metadata_serialized)
        # We keep track of the generated torch location for cleanup later
        self.torch_gpt2_dir = pytorch_model_dir

        model = None
        tfm_config = GPT2Config(use_cache=cache_variant)

        if not os.path.exists(pytorch_model_dir):
            # Generate the pre-trained weights
            model = GPT2LMHeadModel(tfm_config).from_pretrained(
                metadata.variant)
            model.save_pretrained(pytorch_model_dir)
            print("Pytorch Model saved to {}".format(pytorch_model_dir))
        else:
            print(
                "Frameworks file already exists, skipping generation and loading from file instead."
            )
            model = GPT2LMHeadModel(tfm_config).from_pretrained(
                pytorch_model_dir)

        root_onnx_model_name = "{}.onnx".format(metadata_serialized)
        root_onnx_model_fpath = os.path.join(os.getcwd(), workspace_dir,
                                             root_onnx_model_name)
        onnx_model_fpath = root_onnx_model_fpath

        gpt2 = GPT2TorchFile(model, metadata)
        self.onnx_gpt2 = gpt2.as_onnx_model(onnx_model_fpath,
                                            force_overwrite=False)

        onnx_models = [
            NetworkModel(
                name=GPT2ModelTRTConfig.NETWORK_DECODER_SEGMENT_NAME,
                fpath=self.onnx_gpt2.fpath,
            )
        ]
        torch_models = [
            NetworkModel(
                name=GPT2ModelTRTConfig.NETWORK_DECODER_SEGMENT_NAME,
                fpath=pytorch_model_dir,
            )
        ]

        return NetworkModels(torch=torch_models, onnx=onnx_models, trt=None)
def main():
    # Config
    config = InferenceConfig()
    gpt_config = GPT2Config.from_json_file(config.model_config_path)

    # torch related
    torch.set_grad_enabled(False)
    torch.manual_seed(config.random_seed)

    # Logger
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    handler = StreamHandler(sys.stdout)
    handler.setFormatter(logging.Formatter("[%(asctime)s] %(message)s"))
    logger.addHandler(handler)

    # Text Utils
    logging.info(f"loading Tokenizer...")
    tokenizer = GPT2Tokenizer(config.tokenizer_vocab_path,
                              config.tokenizer_merge_path)

    # Forward Model
    logging.info(f"loading Forward Model...")
    forward_model = GPT2LMHeadModel(gpt_config)
    forward_model.load_state_dict(
        load_model_weight(gpt_config, config.forward_model_path))

    # Backward Model
    logging.info(f"loading Backward Model...")
    backward_model = GPT2LMHeadModel(gpt_config)
    backward_model.load_state_dict(
        load_model_weight(gpt_config, config.backward_model_path))

    # Example
    example_contexts = [
        "<|endoftext|>".join(["How are you doing?"]),
        "<|endoftext|>".join(["Does money buy happiness?"]),
        "<|endoftext|>".join([
            "Does money buy happiness?",
            "Depends how much money you spend on it .",
        ]),
        "<|endoftext|>".join([
            "Does money buy happiness?",
            "Depends how much money you spend on it .",
            "What is the best way to buy happiness ?",
        ]),
    ]
    inferencer = Inferencer(config, tokenizer, forward_model, backward_model)
    results = inferencer.run(example_contexts)

    for context, results in zip(example_contexts, results):
        logging.info(f"Example Context:{context}")
        for i, reply in enumerate(results):
            logging.info(f"Output Utterance Top-{i+1}: {reply}")
Example #3
0
    def __init__(self, config):
        medium_config = GPT2Config(n_embd=1024, n_layer=24, n_head=16)
        model = GPT2LMHeadModel(medium_config)

        print("Step 1/3: Downloading weights [823 MB]...")
        wget.download(
            "https://convaisharables.blob.core.windows.net/lsp/multiref/medium_ft.pkl",
            "/tmp/medium_ft.pkl",
        )

        print("Step 2/3: Loading weights...")
        weights = torch.load("/tmp/medium_ft.pkl")
        weights["lm_head.weight"] = weights["lm_head.decoder.weight"]
        weights.pop("lm_head.decoder.weight", None)

        print("Step 3/3: Loading a model...")
        model.load_state_dict(weights)

        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"using device: {device}")
        model.to(device)
        model.eval()

        self.device = device
        self.model = model
        self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
        print("Model is ready!")
    def __init__(self, model_path, generation_type, use_finetuned=True):
        self.model_path = model_path
        self.batch_size = int(args["--batch-size"])

        self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

        self.MAX_LEN = {
            GENERATION_TYPE_SMALL: 20,
            GENERATION_TYPE_LARGE: 500
        }[generation_type]
        logger.info(
            f"Using {generation_type} for decoding, MAX_LEN={self.MAX_LEN}")
        if use_finetuned:
            logger.info("Using a finetuned model")
            self.config = GPT2Config.from_pretrained(self.model_path)
            model = GPT2LMHeadModel.from_pretrained(self.model_path)
            with open(f"{self.model_path}/special_tokens_map.json", "r") as f:
                special_tokens = json.load(f)
            self.tokenizer.add_special_tokens(special_tokens)
        else:
            logger.info("NOT using a finetuned model")
            model = GPT2LMHeadModel(config=GPT2Config.from_pretrained(
                pretrained_model_name_or_path=self.model_path))
        self.model = model.cuda()
        self.model.eval()
Example #5
0
    def __init__(self, args, task):
        super().__init__(task.target_dictionary)

        if not has_hf:
            raise ImportError(
                '\n\nPlease install huggingface/transformers with:'
                '\n\n  pip install transformers'
                '\n\nOr to make local edits, install the submodule:'
                '\n\n  git submodule update --init '
                'fairseq/models/huggingface/transformers')

        config = GPT2Config(
            vocab_size=len(task.target_dictionary),
            n_positions=args.max_target_positions + 1,
            n_ctx=args.max_target_positions,
            n_embd=args.embed_dim,
            n_layer=args.num_layers,
            n_head=args.num_attention_heads,
            resid_pdrop=args.dropout,
            embd_pdrop=args.dropout,
            attn_pdrop=args.attention_dropout,
            layer_norm_epsilon=1e-6,
        )
        self.model = GPT2LMHeadModel(config)

        # set zero embedding for padding symbol
        self.pad_idx = task.target_dictionary.pad()
        self.model.transformer.wte.weight.data[self.pad_idx].zero_()
        self.model.transformer.wpe.weight.data[0].zero_()
Example #6
0
def load_model(target_folder, config):
    # Parse parameters
    model_size = config.get('model', 'model_size')
    no_cuda = config.getboolean('model', 'no_cuda')

    logger.info("Loading the model...")
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not no_cuda else "cpu")
    # Tokenizer
    tokenizer = GPT2Tokenizer(os.path.join(target_folder, 'vocab.json'),
                              os.path.join(target_folder, 'merges.txt'))
    # Config
    config = GPT2Config.from_json_file(
        os.path.join(target_folder, 'config.json'))
    # Weights
    state_dict_path = glob(os.path.join(target_folder, f'*.pkl'))[0]
    state_dict = torch.load(state_dict_path, map_location=device)
    if model_size == 'small':
        for key in list(state_dict.keys()):
            state_dict[key.replace('module.', '')] = state_dict.pop(key)
    state_dict['lm_head.weight'] = state_dict['lm_head.decoder.weight']
    state_dict.pop("lm_head.decoder.weight", None)
    # Model
    model = GPT2LMHeadModel(config)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    return model, tokenizer
Example #7
0
def model_fn(model_dir):
    logger.info('Loading the model.')

    vocab_file_path = os.path.join(model_dir, 'vocab.json')
    merge_file_path = os.path.join(model_dir, 'merges.txt')
    model_file_path = os.path.join(model_dir, 'lyric_model.bin')

    tokenizer = MyTokenizer(vocab_file_path, merge_file_path)
    bos = tokenizer.convert_tokens_to_ids('<s>')
    eos = tokenizer.convert_tokens_to_ids('</s>')
    pad = tokenizer.convert_tokens_to_ids('<pad>')
    unk = tokenizer.convert_tokens_to_ids('<unk>')

    config = GPT2Config(vocab_size=52003,
                        resid_pdrop=0,
                        embd_pdrop=0,
                        attn_pdrop=0,
                        summary_first_dropout=0)

    model = GPT2LMHeadModel(config)

    model.load_state_dict(torch.load(model_file_path, map_location=device),
                          strict=False)
    model.to(device)

    return model, tokenizer
    def build_model(self):
        """创建GPT-2生成模型
        """
        # 使用bert tokenizer # 初始化tokenizer
        self.tokenizer = BertTokenizer(vocab_file=self.args.vocab_path)
        # temp = self.tokenizer.convert_tokens_to_ids('')
        # print(self.tokenizer.convert_ids_to_tokens(temp))
        # tokenizer的字典大小
        self.vocab_size = len(self.tokenizer)

        self.pad_id = self.tokenizer.convert_tokens_to_ids(PAD)

        if self.args.pretrained_model:
            # 如果指定了预训练的GPT2模型
            model = GPT2LMHeadModel.from_pretrained(self.args.pretrained_model)
        else:
            # 若没有指定预训练模型,则初始化模型
            model_config = GPT2Config(self.args.model_config)
            model = GPT2LMHeadModel(config=model_config)

        # 根据tokenizer的vocabulary调整GPT2模型的voca的大小
        model.resize_token_embeddings(self.vocab_size)

        print('model config:\n{}'.format(model.config.to_json_string()))

        return model, model.config.to_dict().get("n_ctx")
Example #9
0
    def __init__(self, args, task):
        try:
            from transformers import GPT2Config, GPT2LMHeadModel
        except ImportError:
            raise ImportError(
                "\n\nPlease install huggingface/transformers with:"
                "\n\n  pip install transformers")

        super().__init__(task.target_dictionary)

        config = GPT2Config(
            vocab_size=len(task.target_dictionary),
            n_positions=args.max_target_positions + 1,
            n_ctx=args.max_target_positions,
            n_embd=args.embed_dim,
            n_layer=args.num_layers,
            n_head=args.num_attention_heads,
            resid_pdrop=args.dropout,
            embd_pdrop=args.dropout,
            attn_pdrop=args.attention_dropout,
            layer_norm_epsilon=1e-6,
        )
        self.model = GPT2LMHeadModel(config)

        # set zero embedding for padding symbol
        self.pad_idx = task.target_dictionary.pad()
        self.model.transformer.wte.weight.data[self.pad_idx].zero_()
        self.model.transformer.wpe.weight.data[0].zero_()
def main():

    config = GPT2Config(
        vocab_size=30000,
        n_positions=1024,
        n_ctx=1024,
        n_embd=2560,
        n_layer=32,
        n_head=32,
        n_inner=4*2560,
        activation_function="gelu_new",
        resid_pdrop=0.1,
        embd_pdrop=0.1,
        attn_pdrop=0.1,
        layer_norm_epsilon=1e-5,
        initializer_range=0.02,
        summary_type="cls_index",
        summary_use_proj=True,
        summary_activation=None,
        summary_proj_to_labels=True,
        summary_first_dropout=0.1,
        bos_token_id=30000,
        eos_token_id=30000,
        gradient_checkpointing=False,
    )

    print("initializing model")
    model = GPT2LMHeadModel(config)

    convert(
        model=model,
        m0_path="model-v1/80000/mp_rank_00_model_states.pt",
        m1_path="model-v1/80000/mp_rank_01_model_states.pt",
        save_path="model/CPM/",
    )
Example #11
0
 def __init__(
     self,
     batch_size,
     epochs,
     t_total=100000,
     config_path="config/model_config.json",
     data_path="data/train.json",
     valid_examples=100,
     vocab_path="vocab/vocab.txt",
     max_length=1024,
     warm_up_steps=0,
     lr=1e-4,
 ):
     super(Net, self).__init__()
     self.batch_size = batch_size
     self.epochs = epochs
     self.t_total = t_total
     self.warm_up_steps = warm_up_steps
     self.lr = lr
     self.model_name = "bert_pretrained_model"
     self.config = GPT2Config.from_json_file(config_path)
     self.model = GPT2LMHeadModel(config=self.config)
     self.data = [json.loads(line.strip()) for line in open(data_path)]
     self.dataset_train = DS(self.data[:-valid_examples],
                             vocab_path=vocab_path,
                             max_length=max_length)
     self.dataset_valid = DS(self.data[-valid_examples:],
                             vocab_path=vocab_path,
                             max_length=max_length)
Example #12
0
    def create_and_check_forward_and_backwards(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
        model = GPT2LMHeadModel(config)
        model.to(torch_device)

        result = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
        self.parent.assertEqual(result.loss.shape, ())
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
        result.loss.backward()
Example #13
0
    def __init__(self):
        super().__init__()

        self.tokenizer = BertTokenizer(vocab_file=FLAGS.vocab_path)

        self.config = GPT2Config.from_json_file(FLAGS.model_config)

        self.model = GPT2LMHeadModel(config=self.config)
Example #14
0
def create_model(pre_trained=False):
    if pre_trained:
        model = GPT2LMHeadModel.from_pretrained(config.MODEL_PATH)
    else:
        model_config = GPT2Config.from_json_file(config.CONFIG_JSON_FILE)
        model = GPT2LMHeadModel(config=model_config)
    # model.resize_token_embeddings(vocab_size)
    n_ctx = model.config.to_dict().get("n_ctx")
    return model, n_ctx
Example #15
0
def load_pretrained_model(args):

    if args.pretrained_model:
        logger.info(f'loading pretrained model from {args.pretrained_model}')
        model = GPT2LMHeadModel.from_pretrained(args.pretrained_model)
    else:
        logger.info('init pretrained model...')
        config = GPT2Config.from_json_file(args.model_config)
        model = GPT2LMHeadModel(config)
    return model, model.config.to_dict().get("n_ctx")
Example #16
0
    def execute_inference(
        self,
        metadata: NetworkMetadata,
        network_fpaths: NetworkModels,
        inference_input: str,
        timing_profile: TimingProfile,
    ) -> NetworkResult:

        # Execute some tests
        tokenizer = GPT2Tokenizer.from_pretrained(metadata.variant)
        input_ids = tokenizer(inference_input, return_tensors="pt").input_ids

        # By default, HuggingFace model structure is one giant file.
        gpt2_torch_fpath = network_fpaths.torch[0].fpath
        config = GPT2Config(use_cache=metadata.other.kv_cache)
        gpt2_model = GPT2LMHeadModel(config).from_pretrained(gpt2_torch_fpath)
        gpt2_torch = GPT2TorchFile.TorchModule(gpt2_model.transformer,
                                               gpt2_model.lm_head,
                                               gpt2_model.config)
        greedy_output = gpt2_torch.generate(input_ids)  #greedy search

        # get single decoder iteration inference timing profile
        _, decoder_e2e_median_time = gpt2_inference(gpt2_torch, input_ids,
                                                    timing_profile)

        # get complete decoder inference result and its timing profile
        sample_output, full_e2e_median_runtime = full_inference_greedy(
            gpt2_torch,
            input_ids,
            timing_profile,
            max_length=GPT2ModelTRTConfig.MAX_SEQUENCE_LENGTH[
                metadata.variant],
        )

        semantic_outputs = []
        for i, sample_output in enumerate(sample_output):
            semantic_outputs.append(
                tokenizer.decode(sample_output, skip_special_tokens=True))

        return NetworkResult(
            input=inference_input,
            output_tensor=greedy_output,
            semantic_output=semantic_outputs,
            median_runtime=[
                NetworkRuntime(
                    name=GPT2ModelTRTConfig.NETWORK_DECODER_SEGMENT_NAME,
                    runtime=decoder_e2e_median_time,
                ),
                NetworkRuntime(
                    name=GPT2ModelTRTConfig.NETWORK_FULL_NAME,
                    runtime=full_e2e_median_runtime,
                ),
            ],
            models=network_fpaths,
        )
Example #17
0
    def __init__(self,
                 max_output_length=25,
                 max_input_length=300,
                 device='cpu',
                 tokenizer_type='gpt2',
                 bpe_model="",
                 starter_model=None):
        if tokenizer_type == "gpt2":
            self.tokenizer = utils_tokenizer.GPT2Tokenizer()
            config = GPT2Config.from_pretrained("gpt2")

        elif tokenizer_type == "bpecap":
            self.tokenizer = utils_tokenizer.BPETokenizer(bpe_model)
            config = GPT2Config.from_dict({
                "finetuning_task":
                None,
                "initializer_range":
                0.02,
                "layer_norm_epsilon":
                1e-05,
                "n_ctx":
                1024,
                "n_embd":
                768,
                "n_head":
                12,
                "n_layer":
                12,
                "n_positions":
                1024,
                "num_labels":
                1,
                "resid_pdrop":
                0.1,
                "use_bfloat16":
                False,
                "vocab_size":
                self.tokenizer.vocab_size
            })
        else:
            print("Tokenizer unrecognized. Should be gpt2 or bpecap.")
            exit()

        self.model = GPT2LMHeadModel(config)

        self.model.to(device)
        self.device = device
        if starter_model is not None:
            self.reload(starter_model)

        self.max_output_length = max_output_length
        self.max_input_length = max_input_length

        self.model.train()
        self.mode = "train"
Example #18
0
    def get_model(self, field_ce, flatten):
        if field_ce:
            model = TabFormerGPT2LMHeadModel(self.config, self.vocab)
        else:
            model = GPT2LMHeadModel(self.config)
        if not flatten:
            tab_emb_config = ddict(vocab_size=len(self.vocab),
                                   hidden_size=self.config.hidden_size)
            model = TabFormerBaseModel(model,
                                       TabFormerEmbeddings(tab_emb_config))

        return model
Example #19
0
def main():
    # 初始化参数
    args = set_args()
    # 设置使用哪些显卡进行训练
    os.environ["CUDA_VISIBLE_DEVICES"] = args.device
    args.cuda = not args.no_cuda

    # 当用户使用GPU,并且GPU可用时
    args.cuda = torch.cuda.is_available() and not args.no_cuda
    device = 'cuda:0' if args.cuda else 'cpu'
    args.device = device
    logger.info('using device:{}'.format(device))

    # 初始化tokenizer
    tokenizer = BertTokenizerFast.from_pretrained(args.pretrained_model)
    args.sep_id = tokenizer.sep_token_id
    args.pad_id = tokenizer.pad_token_id
    args.cls_id = tokenizer.cls_token_id

    # 创建模型的输出目录
    if not os.path.exists(args.save_model_path):
        os.mkdir(args.save_model_path)

    # 创建模型
    if args.pretrained_model:  # 加载预训练模型
        model = GPT2LMHeadModel.from_pretrained(args.pretrained_model)
    else:  # 初始化模型
        model_config = GPT2Config.from_json_file(args.model_config)
        model = GPT2LMHeadModel(config=model_config)
    model = model.to(device)
    logger.info('model config:\n{}'.format(model.config.to_json_string()))
    assert model.config.vocab_size == tokenizer.vocab_size

    # 并行训练模型
    if args.cuda and torch.cuda.device_count() > 1:
        model = DataParallel(model).cuda()
        logger.info("use GPU {} to train".format(args.device))

    # 计算模型参数数量
    num_parameters = 0
    parameters = model.parameters()
    for parameter in parameters:
        num_parameters += parameter.numel()
    logger.info('number of model parameters: {}'.format(num_parameters))

    # 记录参数设置
    logger.info("args:{}".format(args))

    # 加载训练集和验证集
    # ========= Loading Dataset ========= #
    train_dataset, validate_dataset = load_dataset(args)
    train(tokenizer, model, train_dataset, validate_dataset, args)
Example #20
0
        def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
            model = GPT2LMHeadModel(config)
            model.to(torch_device)
            model.eval()

            loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)

            result = {"loss": loss, "lm_logits": lm_logits}

            self.parent.assertListEqual(list(result["loss"].size()), [])
            self.parent.assertListEqual(
                list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size],
            )
def train(config):

    # tokenize(filename=config.corpus_path, vocab_size=config.vocab_size)

    tokenizer = load_gpt_tokenizer(config.corpus)
    # print(tokenizer.tokenize(' Hi there <|endoftext|>'))

    gpt_config = GPT2Config(
        vocab_size=config.vocab_size,
        n_positions=config.seq_len,
        n_ctx=config.seq_len,
    )

    model = GPT2LMHeadModel(gpt_config)

    print(f'{model.num_parameters()} parameters')

    dataset, data_collator = load_dataset(path=config.corpus_path,
                                          tokenizer=tokenizer,
                                          seq_len=config.seq_len)

    training_args = TrainingArguments(
        output_dir=
        f'../../../home_kahlo/jihwan.lee/lang_acquisition/trained/{config.corpus}/{config.random_seed}/checkpoints',
        overwrite_output_dir=True,
        num_train_epochs=config.max_epoch,
        per_device_train_batch_size=32,
        save_steps=config.step,
        seed=config.random_seed,
        # max_steps=
        # save_total_limit=epoch,
    )
    print('training args set')

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=dataset,
        prediction_loss_only=True,
    )

    print('start training')

    trainer.train()

    trainer.save_model(
        f"../../../home_kahlo/jihwan.lee/lang_acquisition/trained/{config.corpus}/{config.random_seed}"
    )

    return
Example #22
0
def korean_gpt_long_setence_life_test():
    config = get_config()
    kogpt2_config = get_kog_config()
    kogpt2_model_path = "C:\\Users\\multicampus\\s02p23c104\\Back\\AI\\checkpoints\\kogpt_life_model_20_2020-04-26-23-56-31.pth"

    kogpt2_vocab_path = config['kogpt_vocab_path']
    kogpt2model = GPT2LMHeadModel(config=GPT2Config.from_dict(kogpt2_config))
    torch.load(kogpt2_model_path)
    kogpt2model.load_state_dict(torch.load(kogpt2_model_path))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    kogpt2model.to(device)
    kogpt2model.eval()
    vocab = nlp.vocab.BERTVocab.from_sentencepiece(kogpt2_vocab_path,
                                                   mask_token=None,
                                                   sep_token=None,
                                                   cls_token=None,
                                                   unknown_token='<unk>',
                                                   padding_token='<pad>',
                                                   bos_token='<s>',
                                                   eos_token='</s>')
    tok = SentencepieceTokenizer(kogpt2_vocab_path)

    sent = '나는 밥을 먹었'
    toked = tok(sent)
    print(toked)
    sent_cnt = 0

    input_ids = torch.tensor([
        vocab[vocab.bos_token],
    ] + vocab[toked]).unsqueeze(0)
    input_ids = input_ids.to(device)

    outputs = kogpt2model.generate(input_ids=input_ids,
                                   max_length=100,
                                   min_length=50,
                                   repetition_penalty=1.2,
                                   do_sample=True,
                                   num_beams=3,
                                   bos_token_id=0,
                                   pad_token_id=3,
                                   eos_token_id=1,
                                   num_return_sequences=3)

    target = outputs[0]
    print("========수필===========")
    for i in range(3):  # 3 output sequences were generated
        toked = vocab.to_tokens(outputs[i].squeeze().tolist())
        ret = re.sub(r'(<s>|</s>|<pad>|<unk>)', '',
                     ''.join(toked).replace('▁', ' ').strip())
        print('Generated {}: {}'.format(i, ret))
Example #23
0
 def build_model(self, hparams):
     config = GPT2Config(
         vocab_size=hparams.vocab_size,
         n_positions=hparams.max_length,
         n_ctx=hparams.max_length,
         n_embd=hparams.n_embd if hasattr(hparams, "n_embd") else 512,
         n_layer=hparams.n_layer if hasattr(hparams, "n_layer") else 4,
         n_head=hparams.n_head if hasattr(hparams, "n_head") else 1,
         resid_pdrop=0,
         embd_pdrop=0,
         attn_pdrop=0,
         summary_first_dropout=0,
     )
     return GPT2LMHeadModel(config)
Example #24
0
 def test_evaluation_with_keys_to_drop(self):
     config = GPT2Config(vocab_size=100, n_positions=128, n_ctx=128, n_embd=32, n_layer=3, n_head=4)
     tiny_gpt2 = GPT2LMHeadModel(config)
     x = torch.randint(0, 100, (128,))
     eval_dataset = RepeatDataset(x)
     args = TrainingArguments("./test")
     trainer = Trainer(tiny_gpt2, args, eval_dataset=eval_dataset)
     # By default the past_key_values are removed
     result = trainer.predict(eval_dataset)
     self.assertTrue(isinstance(result.predictions, np.ndarray))
     # We can still get them by setting ignore_keys to []
     result = trainer.predict(eval_dataset, ignore_keys=[])
     self.assertTrue(isinstance(result.predictions, tuple))
     self.assertEqual(len(result.predictions), 2)
Example #25
0
    def __init__(self, args, pretrained, model_checkpoint, report_every,
                 ren, norm_fn, device, logdir=None):
        self.args = args
        self._ren = ren
        self._device = device

        self.tokenizer = GPT2Tokenizer.from_pretrained(
            model_checkpoint, do_lower_case=True)
        self.pad_id = self.tokenizer.eos_token_id
        self.use_segments = True

        self._config = GPT2Config.from_json_file(os.path.join(model_checkpoint, CONFIG_NAME))
        self._max_len = 256  # 512  # self._config.n_ctx

        self._model = GPT2LMHeadModel.from_pretrained(
            model_checkpoint).to(device) if pretrained else GPT2LMHeadModel(self._config).to(device)
        num_param, _, __ = _tally_parameters(self._model)
        logger.info("model paramerters: {}".format(num_param))

        if not os.path.exists("checkpoints"):
            os.mkdir("checkpoints")
        self.save_dir = os.path.join("checkpoints", args.save_dir)
        if not os.path.exists(self.save_dir):
            os.mkdir(self.save_dir)
        elif args.infer_from == "":
            if SYS != "Windows":
                raise Exception("path exists {}".format(self.save_dir))

        self._optimizer = None
        self.writer = SummaryWriter(logdir=logdir)
        self.report_every = report_every
        self.batch_step = 0
        self.training_step = 1
        self.gradient_accumulation_steps = args.gradient_accumulation_steps
        self.max_val_step = args.max_val_step

        self._dataset = {}
        self._data_loader = {}

        self._weights = None
        self._w_decay = None

        if norm_fn == 'linear':
            self._norm_fn = _linear_normalize
        elif norm_fn == 'softmax':
            self._norm_fn = _softmax_normalize

        if ren:
            assert norm_fn == 'linear'
 def setUp(self):
     x = torch.randint(0, 5, (10, ))
     self.dataset = DummyDataset(x)
     args = TrainingArguments(".")
     config = GPT2Config(vocab_size=100,
                         n_positions=128,
                         n_ctx=128,
                         n_embd=32,
                         n_layer=3,
                         n_head=4)
     self.model = GPT2LMHeadModel(config)
     self.wrapper = BaalTransformersTrainer(model=self.model,
                                            args=args,
                                            eval_dataset=self.dataset,
                                            tokenizer=None)
Example #27
0
 def __init__(self, use_cuda=True, path_model='models/DialoGPT/medium_ft.pkl'):
     self.use_cuda = use_cuda
     self.turn_sep = ' <|endoftext|> '
     self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
     model_config = GPT2Config(n_embd=1024, n_layer=24, n_head=16)        
     self.model = GPT2LMHeadModel(model_config)
     weights = torch.load(path_model)
     weights["lm_head.weight"] = weights["lm_head.decoder.weight"]
     weights.pop("lm_head.decoder.weight",None)
     self.model.load_state_dict(weights)
     if self.use_cuda:
         self.model = self.model.cuda()
     self.ix_EOS = 50256
     self.way = 'DPT'
     self.model.eval()
Example #28
0
def get_kogpt2_model(model_file, vocab_file, ctx="cpu"):
    kogpt2model = GPT2LMHeadModel(config=GPT2Config.from_dict(kogpt2_config))
    kogpt2model.load_state_dict(torch.load(model_file))
    device = torch.device(ctx)
    kogpt2model.to(device)
    kogpt2model.eval()
    vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(vocab_file,
                                                         mask_token=None,
                                                         sep_token=None,
                                                         cls_token=None,
                                                         unknown_token='<unk>',
                                                         padding_token='<pad>',
                                                         bos_token='<s>',
                                                         eos_token='</s>')
    return kogpt2model, vocab_b_obj
Example #29
0
def export_to_huggingface_model(model, path):
    from transformers import GPT2LMHeadModel, GPT2Config
    model_from = model
    while isinstance(model_from, (DDP, torchDDP, FP16_Module)):
        model_from = model_from.module
    conf_dict = model_from._conf_dict
    print('Export to huggingface model ', path, 'with config', conf_dict)
    config = GPT2Config(**conf_dict)
    hf_model = GPT2LMHeadModel(config=config)
    model_to = hf_model
    while isinstance(model_to, (DDP, torchDDP, FP16_Module)):
        model_to = model_to.module
    move_weights(model_from, model_to, dst2src=True)
    hf_model.save_pretrained(path)
    print('Saved huggingface model', type(model))
Example #30
0
 def __init__(self, path, cuda):
     from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config
     self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
     model_config = GPT2Config(n_embd=1024, n_layer=24, n_head=16)
     self.model = GPT2LMHeadModel(model_config)
     download_model(path)
     weights = torch.load(path)
     if "lm_head.decoder.weight" in weights:
         weights["lm_head.weight"] = weights["lm_head.decoder.weight"]
         weights.pop("lm_head.decoder.weight", None)
     self.model.load_state_dict(weights)
     self.ix_EOS = 50256
     self.model.eval()
     self.cuda = cuda
     if self.cuda:
         self.model.cuda()