예제 #1
0
    def convert_to_huggingface(self):
        self.encoder.save_pretrained('./tmp_encoder')
        self.decoder.save_pretrained('./tmp_decoder')
        encoder_decoder_config = EncoderDecoderConfig.from_pretrained(
            './models/checkpoint-1500')

        encoder = AutoModel.from_pretrained('./tmp_encoder')

        decoder = AutoModelForCausalLM.from_pretrained(
            './tmp_decoder', add_cross_attention=True)

        huggingface_model = EncoderDecoderModel(config=encoder_decoder_config,
                                                encoder=encoder,
                                                decoder=decoder)

        return huggingface_model
예제 #2
0
def encoder_decoder_example():
	from transformers import EncoderDecoderConfig, EncoderDecoderModel
	from transformers import BertConfig, GPT2Config

	pretrained_model_name = 'bert-base-uncased'
	#pretrained_model_name = 'gpt2'

	if 'bert' in pretrained_model_name:
		# Initialize a BERT bert-base-uncased style configuration.
		config_encoder, config_decoder = BertConfig(), BertConfig()
	elif 'gpt2' in pretrained_model_name:
		config_encoder, config_decoder = GPT2Config(), GPT2Config()
	else:
		print('Invalid model, {}.'.format(pretrained_model_name))
		return

	config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)

	if 'bert' in pretrained_model_name:
		# Initialize a Bert2Bert model from the bert-base-uncased style configurations.
		model = EncoderDecoderModel(config=config)
		#model = EncoderDecoderModel.from_encoder_decoder_pretrained(pretrained_model_name, pretrained_model_name)  # Initialize Bert2Bert from pre-trained checkpoints.
		tokenizer = BertTokenizer.from_pretrained(pretrained_model_name)
	elif 'gpt2' in pretrained_model_name:
		model = EncoderDecoderModel(config=config)
		tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model_name)

	#print('Configuration of the encoder & decoder:\n{}.\n{}.'.format(model.config.encoder, model.config.decoder))
	#print('Encoder type = {}, decoder type = {}.'.format(type(model.encoder), type(model.decoder)))

	if False:
		# Access the model configuration.
		config_encoder = model.config.encoder
		config_decoder  = model.config.decoder

		# Set decoder config to causal LM.
		config_decoder.is_decoder = True
		config_decoder.add_cross_attention = True

	#--------------------
	input_ids = torch.tensor(tokenizer.encode('Hello, my dog is cute', add_special_tokens=True)).unsqueeze(0)  # Batch size 1.

	if False:
		# Forward.
		outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)

		# Train.
		outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
		loss, logits = outputs.loss, outputs.logits

		# Save the model, including its configuration.
		model.save_pretrained('my-model')

		#--------------------
		# Load model and config from pretrained folder.
		encoder_decoder_config = EncoderDecoderConfig.from_pretrained('my-model')
		model = EncoderDecoderModel.from_pretrained('my-model', config=encoder_decoder_config)

	#--------------------
	# Generate.
	#	REF [site] >>
	#		https://huggingface.co/transformers/internal/generation_utils.html
	#		https://huggingface.co/blog/how-to-generate
	generated = model.generate(input_ids, decoder_start_token_id=model.config.decoder.pad_token_id)
	#generated = model.generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2, num_return_sequences=5, do_sample=True, top_k=0, temperature=0.7, early_stopping=True, decoder_start_token_id=model.config.decoder.pad_token_id)
	print('Generated = {}.'.format(tokenizer.decode(generated[0], skip_special_tokens=True)))
예제 #3
0
SPECIAL_TOKENS = [
    "<bos>", "<eos>", "<persona>", "<speaker1>", "<speaker2>", "<pad>"
]

ATTR_TO_SPECIAL_TOKEN = {
    'bos_token': '<bos>',
    'eos_token': '<eos>',
    'pad_token': '<pad>',
    'additional_special_tokens': ['<speaker1>', '<speaker2>', '<persona>']
}

tokenizer = BertTokenizer.from_pretrained("prajjwal1/bert-tiny")
tokenizer.add_special_tokens(ATTR_TO_SPECIAL_TOKEN)

encoder_decoder_config = EncoderDecoderConfig.from_pretrained(
    './models/checkpoint-1200')
model = EncoderDecoderModel.from_pretrained('./models/checkpoint-1200',
                                            config=encoder_decoder_config)
model.get_encoder().resize_token_embeddings(len(tokenizer))
model.get_decoder().resize_token_embeddings(len(tokenizer))
print(type(model.get_encoder()), type(model.get_decoder()))
# model = SimpleEncoderDecoder(tokenizer)
# model = load()
# model.to('cpu')

# create ids of encoded input vectors
input_ids = tokenizer("I want to buy a car", return_tensors="pt").input_ids

# create BOS token
decoder_input_ids = tokenizer("<bos>",
                              add_special_tokens=False,
예제 #4
0
def load_config(model_name_or_path):
    return EncoderDecoderConfig.from_pretrained(model_name_or_path)
예제 #5
0
def main(args):
    print(args)
    check_args(args)

    if USE_GPU:
        float_dtype = torch.cuda.FloatTensor
        long_dtype = torch.cuda.LongTensor
    else:
        float_dtype = torch.FloatTensor
        long_dtype = torch.LongTensor

    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased-itokens")

    # add_tokens(tokenizer)

    vocab, train_loader, val_loader = build_loaders(args, tokenizer)
    model_kwargs = {}

    encoder_decoder_config = EncoderDecoderConfig.from_pretrained(
        "bert-base-uncased-itokens")
    model = EncoderDecoderModel.from_pretrained("bert-base-uncased-itokens",
                                                config=encoder_decoder_config)

    # modify_network(model, tokenizer)
    # model, model_kwargs = build_model(args, vocab)
    # model.type(float_dtype)
    model.cuda()
    print(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    obj_discriminator, d_obj_kwargs = build_obj_discriminator(args, vocab)
    img_discriminator, d_img_kwargs = build_img_discriminator(args, vocab)
    gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type)

    if obj_discriminator is not None:
        obj_discriminator.type(float_dtype)
        obj_discriminator.train()
        print(obj_discriminator)
        optimizer_d_obj = torch.optim.Adam(obj_discriminator.parameters(),
                                           lr=args.learning_rate)

    if img_discriminator is not None:
        img_discriminator.type(float_dtype)
        img_discriminator.train()
        print(img_discriminator)
        optimizer_d_img = torch.optim.Adam(img_discriminator.parameters(),
                                           lr=args.learning_rate)

    restore_path = None
    if args.restore_from_checkpoint:
        restore_path = '%s_with_model.pt' % args.checkpoint_name
        restore_path = os.path.join(args.output_dir, restore_path)
    if restore_path is not None and os.path.isfile(restore_path):
        print('Restoring from checkpoint:')
        print(restore_path)
        checkpoint = torch.load(restore_path)
        model.load_state_dict(checkpoint['model_state'])
        optimizer.load_state_dict(checkpoint['optim_state'])

        if obj_discriminator is not None:
            obj_discriminator.load_state_dict(checkpoint['d_obj_state'])
            optimizer_d_obj.load_state_dict(checkpoint['d_obj_optim_state'])

        if img_discriminator is not None:
            img_discriminator.load_state_dict(checkpoint['d_img_state'])
            optimizer_d_img.load_state_dict(checkpoint['d_img_optim_state'])

        t = checkpoint['counters']['t']
        if 0 <= args.eval_mode_after <= t:
            model.eval()
        else:
            model.train()
        epoch = checkpoint['counters']['epoch']
    else:
        t, epoch = 0, 0
        checkpoint = {
            'args': args.__dict__,
            'vocab': vocab,
            'model_kwargs': model_kwargs,
            'd_obj_kwargs': d_obj_kwargs,
            'd_img_kwargs': d_img_kwargs,
            'losses_ts': [],
            'losses': defaultdict(list),
            'd_losses': defaultdict(list),
            'checkpoint_ts': [],
            'train_batch_data': [],
            'train_samples': [],
            'train_iou': [],
            'val_batch_data': [],
            'val_samples': [],
            'val_losses': defaultdict(list),
            'val_iou': [],
            'norm_d': [],
            'norm_g': [],
            'counters': {
                't': None,
                'epoch': None,
            },
            'model_state': None,
            'model_best_state': None,
            'optim_state': None,
            'd_obj_state': None,
            'd_obj_best_state': None,
            'd_obj_optim_state': None,
            'd_img_state': None,
            'd_img_best_state': None,
            'd_img_optim_state': None,
            'best_t': [],
        }

    while True:
        if t >= args.num_iterations:
            break
        epoch += 1
        print('Starting epoch %d' % epoch)

        for batch in train_loader:
            print(batch)
            exit()
            if t == args.eval_mode_after:
                print('switching to eval mode')
                model.eval()
                optimizer = optim.Adam(model.parameters(),
                                       lr=args.learning_rate)
            t += 1
            if USE_GPU:
                for k in batch.keys():
                    batch[k] = batch[k].cuda().long()
            masks = None

            with timeit('forward', args.timing):
                output = model(**batch)
            # with timeit('loss', args.timing):
            #   # Skip the pixel loss if using GT boxes
            #   skip_pixel_loss = False
            #   total_loss, losses = calculate_model_losses(
            #                           args, skip_pixel_loss, model, imgs, imgs_pred)

            # if img_discriminator is not None:
            #   scores_fake = img_discriminator(imgs_pred)
            #   weight = args.discriminator_loss_weight * args.d_img_weight
            #   total_loss = add_loss(total_loss, gan_g_loss(scores_fake), losses,
            #                         'g_gan_img_loss', weight)

            losses = {}
            total_loss = output["loss"]
            losses['total_loss'] = total_loss.item()
            if not math.isfinite(losses['total_loss']):
                print('WARNING: Got loss = NaN, not backpropping')
                continue

            optimizer.zero_grad()
            with timeit('backward', args.timing):
                total_loss.backward()
            optimizer.step()
            total_loss_d = None
            ac_loss_real = None
            ac_loss_fake = None
            d_losses = {}

            # if img_discriminator is not None:
            #   d_img_losses = LossManager()
            #   imgs_fake = imgs_pred.detach()
            #   scores_fake = img_discriminator(imgs_fake)
            #   scores_real = img_discriminator(imgs)

            #   d_img_gan_loss = gan_d_loss(scores_real, scores_fake)
            #   d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss')

            #   optimizer_d_img.zero_grad()
            #   d_img_losses.total_loss.backward()
            #   optimizer_d_img.step()

            if t % args.print_every == 0:
                print('t = %d / %d' % (t, args.num_iterations))
                for name, val in losses.items():
                    print(' G [%s]: %.4f' % (name, val))
                    checkpoint['losses'][name].append(val)
                checkpoint['losses_ts'].append(t)

                # if img_discriminator is not None:
                #   for name, val in d_img_losses.items():
                #     print(' D_img [%s]: %.4f' % (name, val))
                #     checkpoint['d_losses'][name].append(val)

            if t % args.checkpoint_every == 0:
                print('checking on train')
                train_results = check_model(args, t, train_loader, model)
                t_losses = train_results[0]

                print('checking on val')
                val_results = check_model(args, t, val_loader, model)
                val_losses = val_results[0]

                for k, v in val_losses.items():
                    checkpoint['val_losses'][k].append(v)

                checkpoint['model_state'] = model.state_dict()

                if obj_discriminator is not None:
                    checkpoint['d_obj_state'] = obj_discriminator.state_dict()
                    checkpoint[
                        'd_obj_optim_state'] = optimizer_d_obj.state_dict()

                if img_discriminator is not None:
                    checkpoint['d_img_state'] = img_discriminator.state_dict()
                    checkpoint[
                        'd_img_optim_state'] = optimizer_d_img.state_dict()

                checkpoint['optim_state'] = optimizer.state_dict()
                checkpoint['counters']['t'] = t
                checkpoint['counters']['epoch'] = epoch
                checkpoint_path = os.path.join(
                    args.output_dir, '%s_with_model.pt' % args.checkpoint_name)
                print('Saving checkpoint to ', checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

                # Save another checkpoint without any model or optim state
                checkpoint_path = os.path.join(
                    args.output_dir, '%s_no_model.pt' % args.checkpoint_name)
                key_blacklist = [
                    'model_state', 'optim_state', 'model_best_state',
                    'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state',
                    'd_img_state', 'd_img_optim_state', 'd_img_best_state'
                ]
                small_checkpoint = {}
                for k, v in checkpoint.items():
                    if k not in key_blacklist:
                        small_checkpoint[k] = v
                torch.save(small_checkpoint, checkpoint_path)
예제 #6
0
 def get_from_pretrained(path):
     conf_path = join(dirname(path), "config.json")
     conf = EncoderDecoderConfig.from_pretrained(conf_path)
     model = EncoderDecoderModel.from_pretrained(path, config=conf)
     return model