예제 #1
0
        def create_and_check_lm_head_model(self, config, input_ids, input_mask,
                                           head_mask, token_type_ids, *args):
            model = CTRLLMHeadModel(config)
            model.eval()

            loss, lm_logits, _ = model(input_ids,
                                       token_type_ids=token_type_ids,
                                       labels=input_ids)

            result = {"loss": loss, "lm_logits": lm_logits}
            self.parent.assertListEqual(list(result["loss"].size()), [])
            self.parent.assertListEqual(
                list(result["lm_logits"].size()),
                [self.batch_size, self.seq_length, self.vocab_size])
    def test_lm_generate_ctrl(self):
        model = CTRLLMHeadModel.from_pretrained("ctrl")
        model.to(torch_device)
        input_ids = torch.tensor([[11859, 0, 1611, 8]],
                                 dtype=torch.long,
                                 device=torch_device)  # Legal the president is
        expected_output_ids = [
            11859,
            0,
            1611,
            8,
            5,
            150,
            26449,
            2,
            19,
            348,
            469,
            3,
            2595,
            48,
            20740,
            246533,
            246533,
            19,
            30,
            5,
        ]  # Legal the president is a good guy and I don't want to lose my job. \n \n I have a

        output_ids = model.generate(input_ids, do_sample=False)
        self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
def get_models(params, datasets):
    model_params = params["model_params"]
    model_name = model_params.get("transformer_name", "ctrl")
    n_gpus = model_params.get("n_gpus", 0)
    config_params = model_params.get("config_params", None)

    if n_gpus ==0 or (not torch.cuda.is_available()):
        device = "cpu"
        device_ids = []
    else:
        device = "cuda"
        device_ids = range(min(n_gpus, torch.cuda.device_count()))

    print("gpu device ids ", device_ids)

    if model_name == "ctrl":
        config = AutoConfig.from_pretrained("ctrl")
        if config_params is not None:
            for k, v in config_params.items():
                if hasattr(config, k):
                    setattr(config, k, v)
#            print(config)        
        model = CTRLLMHeadModel(config).to(device)
    else:
        raise NotImplementedError()

    models = {"model": model, "device":device, "device_ids": device_ids}
    return models
예제 #4
0
    def test_lm_generate_ctrl(self):
        model = CTRLLMHeadModel.from_pretrained("ctrl")
        input_ids = torch.Tensor([[11859, 586, 20984,
                                   8]]).long()  # Legal My neighbor is
        expected_output_ids = [
            11859,
            586,
            20984,
            8,
            13391,
            3,
            980,
            8258,
            72,
            327,
            148,
            2,
            53,
            29,
            226,
            3,
            780,
            49,
            3,
            980,
        ]  # Legal My neighbor is refusing to pay rent after 2 years and we are having to force him to pay
        torch.manual_seed(0)

        output_ids = model.generate(input_ids)
        self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
    def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
        model = CTRLLMHeadModel(config)
        model.to(torch_device)
        model.eval()

        result = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
        self.parent.assertEqual(result.loss.shape, ())
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
예제 #6
0
    required=True,
    help=
    'location of the .data file of the TensorFlow checkpoint. This is NOT the model folder. This could be <path>/seqlen256_v1.ckpt/model.ckpt-413000.data-00000-of-00001'
)
parser.add_argument('--pytorch_checkpoint',
                    type=str,
                    default='pytorch_model.bin',
                    help='location of where to write the PyTorch checkpoint')
parser.add_argument('--num_layers',
                    type=int,
                    default=48,
                    help='number of layers in the model being converted')

args = parser.parse_args()

model = CTRLLMHeadModel(CTRLConfig())

if os.path.isfile(args.tf_checkpoint):
    print('INFO :: Found TensorFlow checkpoint')
else:
    print(
        'INFO :: TensorFlow checkpoint not found. Please verify location of the .data file or raise GitHub issue if problem persists.'
    )

if os.path.isfile(args.pytorch_checkpoint):
    print(
        'PyTorch model already exists. Will not over-write. Please delete old checkpoint or specify different file name'
    )
    sys.exit(1)

chkpt_for_reader = '.'.join(args.tf_checkpoint.split('.')[:-1])