def setup_class(self):
        self.use_gpu = torch.cuda.is_available()
        self.test_dir = Path(tempfile.mkdtemp())

        self.base_tokenizer = CTRLTokenizer.from_pretrained('ctrl', do_lower_case=True,
                                                            cache_dir=self.test_dir)
        self.rust_tokenizer = PyCtrlTokenizer(
            get_from_cache(self.base_tokenizer.pretrained_vocab_files_map['vocab_file']['ctrl']),
            get_from_cache(self.base_tokenizer.pretrained_vocab_files_map['merges_file']['ctrl']),
            do_lower_case=True
        )
        self.model = CTRLModel.from_pretrained('ctrl',
                                               output_attentions=False).eval()
        if self.use_gpu:
            self.model.cuda()
        self.sentence_list = ['For instance, on the planet Earth, man had always assumed that he was more intelligent '
                              'than dolphins because he had achieved so much—the wheel, New York, wars and so on—whilst'
                              ' all the dolphins had ever done was muck about in the water having a good time. But '
                              'conversely, the dolphins had always believed that they were far more intelligent than '
                              'man—for precisely the same reasons.'] * 1

        # Pre-allocate GPU memory
        tokens_list = [self.base_tokenizer.tokenize(sentence) for sentence in self.sentence_list]
        features = [self.base_tokenizer.convert_tokens_to_ids(tokens) for tokens in tokens_list]
        features = [self.base_tokenizer.prepare_for_model(input, None, add_special_tokens=True, max_length=128) for
                    input
                    in features]
        all_input_ids = torch.tensor([f['input_ids'] for f in features], dtype=torch.long)

        if self.use_gpu:
            all_input_ids = all_input_ids.cuda()

        with torch.no_grad():
            _ = self.model(all_input_ids)[0].cpu().numpy()
 def test_model_from_pretrained(self):
     for model_name in CTRL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
         model = CTRLModel.from_pretrained(model_name)
         self.assertIsNotNone(model)
 def test_model_from_pretrained(self):
     for model_name in list(CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
         model = CTRLModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
         self.assertIsNotNone(model)
Ejemplo n.º 4
0
import torch
from transformers import CTRLTokenizer, CTRLModel
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
model = CTRLModel.from_pretrained('ctrl')
input_ids =torch.tensor(tokenizer.encode("Links Hello, my dog is cute",add_special_tokens=True)).unsqueeze(0) # Batch size 1 #因为只有一个句子所以需要unsqueeze去掉一层.
outputs = model(input_ids)# 输出的第一个是结果,第二个是cache没啥用.










Ejemplo n.º 5
0
 def test_model_from_pretrained(self):
     cache_dir = "/tmp/transformers_test/"
     for model_name in list(CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
         model = CTRLModel.from_pretrained(model_name, cache_dir=cache_dir)
         shutil.rmtree(cache_dir)
         self.assertIsNotNone(model)