def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = CTRLLMHeadModel(config) model.eval() loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) result = {"loss": loss, "lm_logits": lm_logits} self.parent.assertListEqual(list(result["loss"].size()), []) self.parent.assertListEqual( list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size])
def test_lm_generate_ctrl(self): model = CTRLLMHeadModel.from_pretrained("ctrl") model.to(torch_device) input_ids = torch.tensor([[11859, 0, 1611, 8]], dtype=torch.long, device=torch_device) # Legal the president is expected_output_ids = [ 11859, 0, 1611, 8, 5, 150, 26449, 2, 19, 348, 469, 3, 2595, 48, 20740, 246533, 246533, 19, 30, 5, ] # Legal the president is a good guy and I don't want to lose my job. \n \n I have a output_ids = model.generate(input_ids, do_sample=False) self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
def get_models(params, datasets): model_params = params["model_params"] model_name = model_params.get("transformer_name", "ctrl") n_gpus = model_params.get("n_gpus", 0) config_params = model_params.get("config_params", None) if n_gpus ==0 or (not torch.cuda.is_available()): device = "cpu" device_ids = [] else: device = "cuda" device_ids = range(min(n_gpus, torch.cuda.device_count())) print("gpu device ids ", device_ids) if model_name == "ctrl": config = AutoConfig.from_pretrained("ctrl") if config_params is not None: for k, v in config_params.items(): if hasattr(config, k): setattr(config, k, v) # print(config) model = CTRLLMHeadModel(config).to(device) else: raise NotImplementedError() models = {"model": model, "device":device, "device_ids": device_ids} return models
def test_lm_generate_ctrl(self): model = CTRLLMHeadModel.from_pretrained("ctrl") input_ids = torch.Tensor([[11859, 586, 20984, 8]]).long() # Legal My neighbor is expected_output_ids = [ 11859, 586, 20984, 8, 13391, 3, 980, 8258, 72, 327, 148, 2, 53, 29, 226, 3, 780, 49, 3, 980, ] # Legal My neighbor is refusing to pay rent after 2 years and we are having to force him to pay torch.manual_seed(0) output_ids = model.generate(input_ids) self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = CTRLLMHeadModel(config) model.to(torch_device) model.eval() result = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) self.parent.assertEqual(result.loss.shape, ()) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
required=True, help= 'location of the .data file of the TensorFlow checkpoint. This is NOT the model folder. This could be <path>/seqlen256_v1.ckpt/model.ckpt-413000.data-00000-of-00001' ) parser.add_argument('--pytorch_checkpoint', type=str, default='pytorch_model.bin', help='location of where to write the PyTorch checkpoint') parser.add_argument('--num_layers', type=int, default=48, help='number of layers in the model being converted') args = parser.parse_args() model = CTRLLMHeadModel(CTRLConfig()) if os.path.isfile(args.tf_checkpoint): print('INFO :: Found TensorFlow checkpoint') else: print( 'INFO :: TensorFlow checkpoint not found. Please verify location of the .data file or raise GitHub issue if problem persists.' ) if os.path.isfile(args.pytorch_checkpoint): print( 'PyTorch model already exists. Will not over-write. Please delete old checkpoint or specify different file name' ) sys.exit(1) chkpt_for_reader = '.'.join(args.tf_checkpoint.split('.')[:-1])