Ejemplo n.º 1
0
    def test_generation_from_short_input_same_as_parlai_3B(self):
        FASTER_GEN_KWARGS = dict(num_beams=1,
                                 early_stopping=True,
                                 min_length=15,
                                 max_length=25)
        TOK_DECODE_KW = dict(skip_special_tokens=True,
                             clean_up_tokenization_spaces=True)

        torch.cuda.empty_cache()
        model = BlenderbotForConditionalGeneration.from_pretrained(
            self.ckpt).half().to(torch_device)

        src_text = ["Sam"]
        model_inputs = self.tokenizer(src_text,
                                      return_tensors="pt").to(torch_device)

        generated_utterances = model.generate(**model_inputs,
                                              **FASTER_GEN_KWARGS)
        tgt_text = 'Sam is a great name. It means "sun" in Gaelic.'

        generated_txt = self.tokenizer.batch_decode(generated_utterances,
                                                    **TOK_DECODE_KW)
        assert generated_txt[0].strip() == tgt_text

        src_text = "Social anxiety\nWow, I am never shy. Do you have anxiety?\nYes. I end up sweating and blushing and feel like i'm going to throw up.\nand why is that?"

        model_inputs = self.tokenizer([src_text],
                                      return_tensors="pt").to(torch_device)

        generated_ids = model.generate(**model_inputs, **FASTER_GEN_KWARGS)[0]
        reply = self.tokenizer.decode(generated_ids, **TOK_DECODE_KW)

        assert "I think it's because we are so worried about what people think of us." == reply.strip(
        )
        del model
Ejemplo n.º 2
0
 def test_generate_fp16(self):
     config, input_dict = self.model_tester.prepare_config_and_inputs()
     input_ids = input_dict["input_ids"]
     attention_mask = input_ids.ne(1).to(torch_device)
     model = BlenderbotForConditionalGeneration(config).eval().to(torch_device)
     if torch_device == "cuda":
         model.half()
     model.generate(input_ids, attention_mask=attention_mask)
     model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
Ejemplo n.º 3
0
 def test_embed_pos_shape(self):
     config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
     )
     model = BlenderbotForConditionalGeneration(config)
     expected_shape = (config.max_position_embeddings +
                       config.extra_pos_embeddings, config.d_model)
     assert model.model.encoder.embed_positions.weight.shape == expected_shape
     model.model.decoder.embed_positions.weight.shape == expected_shape
def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path,
                              config_json_path):
    """
    Copy/paste/tweak model's weights to our BERT structure.
    """
    model = torch.load(checkpoint_path, map_location="cpu")
    sd = model["model"]
    cfg = BlenderbotConfig.from_json_file(config_json_path)
    m = BlenderbotForConditionalGeneration(cfg)
    valid_keys = m.model.state_dict().keys()
    failures = []
    mapping = {}
    for k, v in sd.items():
        if k in IGNORE_KEYS:
            continue

        new_k = rename_state_dict_key(k)
        if new_k not in valid_keys:
            failures.append([k, new_k])
        else:
            mapping[new_k] = v
    if cfg.normalize_before:  # Blenderbot-3B checkpoints. Rename layernorm_embedding -> layer_norm
        rename_layernorm_keys(sd)
    m.model.load_state_dict(mapping, strict=True)
    m.half()
    m.save_pretrained(pytorch_dump_folder_path)
Ejemplo n.º 5
0
    def __init__(self):
        # model_name = 'facebook/blenderbot_small-90M'
        model_name = 'facebook/blenderbot-400M-distill'
        # model_name = 'facebook/blenderbot-1B-distill'
        # model_name = 'facebook/blenderbot-3B'
        
        print("Creating model")
        self.model = BlenderbotForConditionalGeneration.from_pretrained(model_name)

        print("Creating tokenizer")
        self.tokenizer = BlenderbotTokenizer.from_pretrained(model_name)

        self.history = []
Ejemplo n.º 6
0
 def test_initialization_module(self):
     config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
     )
     model = BlenderbotForConditionalGeneration(config).model
     model.to(torch_device)
     model.eval()
     enc_embeds = model.encoder.embed_tokens.weight
     assert (enc_embeds == model.shared.weight).all().item()
     self.assertAlmostEqual(
         torch.std(enc_embeds).item(), config.init_std, 2)
Ejemplo n.º 7
0
    def __init__(self, size, env, device, max_context_length):
        """
        The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller,
        Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu, Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith,
        Y-Lan Boureau, Jason Weston on 30 Apr 2020.

        Args:
            size (str): model size
            env (BaseEnv): dialogue environment
            device (str): device (one of ['CPU', 'CUDA', 'CUDA:N']
            max_context_length (int): max history context length
                (it means that length of input context tokens)
        """

        assert size in ['small', 'medium', 'large', 'xlarge'], \
            "model size must be one of ['small', 'medium', 'large', 'xlarge']"

        if size == "small":
            super().__init__("facebook/blenderbot_small-90M", env)
            self.model = BlenderbotSmallForConditionalGeneration.from_pretrained(
                self.name).to(device)
            self.tokenizer = BlenderbotSmallTokenizer.from_pretrained(
                self.name)
        else:
            if size == "medium":
                super().__init__("facebook/blenderbot-400M-distill", env)
            elif size == "large":
                super().__init__("facebook/blenderbot-1B-distill", env)
            elif size == "xlarge":
                super().__init__("facebook/blenderbot-3B", env)

            self.model = BlenderbotForConditionalGeneration.from_pretrained(
                self.name).to(device)
            self.tokenizer = BlenderbotTokenizer.from_pretrained(self.name)

        self.size = size
        self.device = device.lower()
        self.max_context_length = max_context_length
        self.eos = "</s> <s>"

        print('Done!')
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

mname = 'facebook/blenderbot-400M-distill'
# mname = 'facebook/blenderbot-3B' # Too big to run on laptop

model = BlenderbotForConditionalGeneration.from_pretrained(mname)
tokenizer = BlenderbotTokenizer.from_pretrained(mname)

UTTERANCE = "My friends are cool but they eat too many carbs."
inputs = tokenizer([UTTERANCE], return_tensors='pt')
reply_ids = model.generate(**inputs)

print(tokenizer.batch_decode(reply_ids))
Ejemplo n.º 9
0
    tokenizer = GPT2Tokenizer.from_pretrained("microsoft/DialoGPT-small")
    #------dialogpt medium------#
    model = GPT2LMHeadModel.from_pretrained("microsoft/DialoGPT-medium")
    tokenizer = GPT2Tokenizer.from_pretrained("microsoft/DialoGPT-medium")

    print("dialogpt is done!")

elif download_type == 'gptneo':
    #------gptneo small------#
    model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125M")
    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
    #------gptneo large------#
    #model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
    #tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

    print("gptneo is done!")

elif download_type == 'blender':
    #------blender small------#
    model = BlenderbotSmallForConditionalGeneration.from_pretrained(
        "facebook/blenderbot_small-90M")
    tokenizer = BlenderbotSmallTokenizer.from_pretrained(
        "facebook/blenderbot_small-90M")
    #------blender medium------#
    model = BlenderbotForConditionalGeneration.from_pretrained(
        "facebook/blenderbot-400M-distill")
    tokenizer = BlenderbotTokenizer.from_pretrained(
        "facebook/blenderbot-400M-distill")

    print("blender is done!")
Ejemplo n.º 10
0
def load_Blenderbot(Bbot_PATH):
    BbotModel = BlenderbotForConditionalGeneration.from_pretrained(Bbot_PATH)
    BbotTokenizer = BlenderbotTokenizer.from_pretrained(Bbot_PATH)
    return BbotModel, BbotTokenizer
Ejemplo n.º 11
0
def get_model():
    mname = 'facebook/blenderbot-90M'
    model = BlenderbotForConditionalGeneration.from_pretrained(mname)    
    tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname)
    return model, tokenizer