def convert_to_huggingface(self): self.encoder.save_pretrained('./tmp_encoder') self.decoder.save_pretrained('./tmp_decoder') encoder_decoder_config = EncoderDecoderConfig.from_pretrained( './models/checkpoint-1500') encoder = AutoModel.from_pretrained('./tmp_encoder') decoder = AutoModelForCausalLM.from_pretrained( './tmp_decoder', add_cross_attention=True) huggingface_model = EncoderDecoderModel(config=encoder_decoder_config, encoder=encoder, decoder=decoder) return huggingface_model
def encoder_decoder_example(): from transformers import EncoderDecoderConfig, EncoderDecoderModel from transformers import BertConfig, GPT2Config pretrained_model_name = 'bert-base-uncased' #pretrained_model_name = 'gpt2' if 'bert' in pretrained_model_name: # Initialize a BERT bert-base-uncased style configuration. config_encoder, config_decoder = BertConfig(), BertConfig() elif 'gpt2' in pretrained_model_name: config_encoder, config_decoder = GPT2Config(), GPT2Config() else: print('Invalid model, {}.'.format(pretrained_model_name)) return config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder) if 'bert' in pretrained_model_name: # Initialize a Bert2Bert model from the bert-base-uncased style configurations. model = EncoderDecoderModel(config=config) #model = EncoderDecoderModel.from_encoder_decoder_pretrained(pretrained_model_name, pretrained_model_name) # Initialize Bert2Bert from pre-trained checkpoints. tokenizer = BertTokenizer.from_pretrained(pretrained_model_name) elif 'gpt2' in pretrained_model_name: model = EncoderDecoderModel(config=config) tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model_name) #print('Configuration of the encoder & decoder:\n{}.\n{}.'.format(model.config.encoder, model.config.decoder)) #print('Encoder type = {}, decoder type = {}.'.format(type(model.encoder), type(model.decoder))) if False: # Access the model configuration. config_encoder = model.config.encoder config_decoder = model.config.decoder # Set decoder config to causal LM. config_decoder.is_decoder = True config_decoder.add_cross_attention = True #-------------------- input_ids = torch.tensor(tokenizer.encode('Hello, my dog is cute', add_special_tokens=True)).unsqueeze(0) # Batch size 1. if False: # Forward. outputs = model(input_ids=input_ids, decoder_input_ids=input_ids) # Train. outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids) loss, logits = outputs.loss, outputs.logits # Save the model, including its configuration. model.save_pretrained('my-model') #-------------------- # Load model and config from pretrained folder. encoder_decoder_config = EncoderDecoderConfig.from_pretrained('my-model') model = EncoderDecoderModel.from_pretrained('my-model', config=encoder_decoder_config) #-------------------- # Generate. # REF [site] >> # https://huggingface.co/transformers/internal/generation_utils.html # https://huggingface.co/blog/how-to-generate generated = model.generate(input_ids, decoder_start_token_id=model.config.decoder.pad_token_id) #generated = model.generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2, num_return_sequences=5, do_sample=True, top_k=0, temperature=0.7, early_stopping=True, decoder_start_token_id=model.config.decoder.pad_token_id) print('Generated = {}.'.format(tokenizer.decode(generated[0], skip_special_tokens=True)))
SPECIAL_TOKENS = [ "<bos>", "<eos>", "<persona>", "<speaker1>", "<speaker2>", "<pad>" ] ATTR_TO_SPECIAL_TOKEN = { 'bos_token': '<bos>', 'eos_token': '<eos>', 'pad_token': '<pad>', 'additional_special_tokens': ['<speaker1>', '<speaker2>', '<persona>'] } tokenizer = BertTokenizer.from_pretrained("prajjwal1/bert-tiny") tokenizer.add_special_tokens(ATTR_TO_SPECIAL_TOKEN) encoder_decoder_config = EncoderDecoderConfig.from_pretrained( './models/checkpoint-1200') model = EncoderDecoderModel.from_pretrained('./models/checkpoint-1200', config=encoder_decoder_config) model.get_encoder().resize_token_embeddings(len(tokenizer)) model.get_decoder().resize_token_embeddings(len(tokenizer)) print(type(model.get_encoder()), type(model.get_decoder())) # model = SimpleEncoderDecoder(tokenizer) # model = load() # model.to('cpu') # create ids of encoded input vectors input_ids = tokenizer("I want to buy a car", return_tensors="pt").input_ids # create BOS token decoder_input_ids = tokenizer("<bos>", add_special_tokens=False,
def load_config(model_name_or_path): return EncoderDecoderConfig.from_pretrained(model_name_or_path)
def main(args): print(args) check_args(args) if USE_GPU: float_dtype = torch.cuda.FloatTensor long_dtype = torch.cuda.LongTensor else: float_dtype = torch.FloatTensor long_dtype = torch.LongTensor tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased-itokens") # add_tokens(tokenizer) vocab, train_loader, val_loader = build_loaders(args, tokenizer) model_kwargs = {} encoder_decoder_config = EncoderDecoderConfig.from_pretrained( "bert-base-uncased-itokens") model = EncoderDecoderModel.from_pretrained("bert-base-uncased-itokens", config=encoder_decoder_config) # modify_network(model, tokenizer) # model, model_kwargs = build_model(args, vocab) # model.type(float_dtype) model.cuda() print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) obj_discriminator, d_obj_kwargs = build_obj_discriminator(args, vocab) img_discriminator, d_img_kwargs = build_img_discriminator(args, vocab) gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type) if obj_discriminator is not None: obj_discriminator.type(float_dtype) obj_discriminator.train() print(obj_discriminator) optimizer_d_obj = torch.optim.Adam(obj_discriminator.parameters(), lr=args.learning_rate) if img_discriminator is not None: img_discriminator.type(float_dtype) img_discriminator.train() print(img_discriminator) optimizer_d_img = torch.optim.Adam(img_discriminator.parameters(), lr=args.learning_rate) restore_path = None if args.restore_from_checkpoint: restore_path = '%s_with_model.pt' % args.checkpoint_name restore_path = os.path.join(args.output_dir, restore_path) if restore_path is not None and os.path.isfile(restore_path): print('Restoring from checkpoint:') print(restore_path) checkpoint = torch.load(restore_path) model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optim_state']) if obj_discriminator is not None: obj_discriminator.load_state_dict(checkpoint['d_obj_state']) optimizer_d_obj.load_state_dict(checkpoint['d_obj_optim_state']) if img_discriminator is not None: img_discriminator.load_state_dict(checkpoint['d_img_state']) optimizer_d_img.load_state_dict(checkpoint['d_img_optim_state']) t = checkpoint['counters']['t'] if 0 <= args.eval_mode_after <= t: model.eval() else: model.train() epoch = checkpoint['counters']['epoch'] else: t, epoch = 0, 0 checkpoint = { 'args': args.__dict__, 'vocab': vocab, 'model_kwargs': model_kwargs, 'd_obj_kwargs': d_obj_kwargs, 'd_img_kwargs': d_img_kwargs, 'losses_ts': [], 'losses': defaultdict(list), 'd_losses': defaultdict(list), 'checkpoint_ts': [], 'train_batch_data': [], 'train_samples': [], 'train_iou': [], 'val_batch_data': [], 'val_samples': [], 'val_losses': defaultdict(list), 'val_iou': [], 'norm_d': [], 'norm_g': [], 'counters': { 't': None, 'epoch': None, }, 'model_state': None, 'model_best_state': None, 'optim_state': None, 'd_obj_state': None, 'd_obj_best_state': None, 'd_obj_optim_state': None, 'd_img_state': None, 'd_img_best_state': None, 'd_img_optim_state': None, 'best_t': [], } while True: if t >= args.num_iterations: break epoch += 1 print('Starting epoch %d' % epoch) for batch in train_loader: print(batch) exit() if t == args.eval_mode_after: print('switching to eval mode') model.eval() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) t += 1 if USE_GPU: for k in batch.keys(): batch[k] = batch[k].cuda().long() masks = None with timeit('forward', args.timing): output = model(**batch) # with timeit('loss', args.timing): # # Skip the pixel loss if using GT boxes # skip_pixel_loss = False # total_loss, losses = calculate_model_losses( # args, skip_pixel_loss, model, imgs, imgs_pred) # if img_discriminator is not None: # scores_fake = img_discriminator(imgs_pred) # weight = args.discriminator_loss_weight * args.d_img_weight # total_loss = add_loss(total_loss, gan_g_loss(scores_fake), losses, # 'g_gan_img_loss', weight) losses = {} total_loss = output["loss"] losses['total_loss'] = total_loss.item() if not math.isfinite(losses['total_loss']): print('WARNING: Got loss = NaN, not backpropping') continue optimizer.zero_grad() with timeit('backward', args.timing): total_loss.backward() optimizer.step() total_loss_d = None ac_loss_real = None ac_loss_fake = None d_losses = {} # if img_discriminator is not None: # d_img_losses = LossManager() # imgs_fake = imgs_pred.detach() # scores_fake = img_discriminator(imgs_fake) # scores_real = img_discriminator(imgs) # d_img_gan_loss = gan_d_loss(scores_real, scores_fake) # d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss') # optimizer_d_img.zero_grad() # d_img_losses.total_loss.backward() # optimizer_d_img.step() if t % args.print_every == 0: print('t = %d / %d' % (t, args.num_iterations)) for name, val in losses.items(): print(' G [%s]: %.4f' % (name, val)) checkpoint['losses'][name].append(val) checkpoint['losses_ts'].append(t) # if img_discriminator is not None: # for name, val in d_img_losses.items(): # print(' D_img [%s]: %.4f' % (name, val)) # checkpoint['d_losses'][name].append(val) if t % args.checkpoint_every == 0: print('checking on train') train_results = check_model(args, t, train_loader, model) t_losses = train_results[0] print('checking on val') val_results = check_model(args, t, val_loader, model) val_losses = val_results[0] for k, v in val_losses.items(): checkpoint['val_losses'][k].append(v) checkpoint['model_state'] = model.state_dict() if obj_discriminator is not None: checkpoint['d_obj_state'] = obj_discriminator.state_dict() checkpoint[ 'd_obj_optim_state'] = optimizer_d_obj.state_dict() if img_discriminator is not None: checkpoint['d_img_state'] = img_discriminator.state_dict() checkpoint[ 'd_img_optim_state'] = optimizer_d_img.state_dict() checkpoint['optim_state'] = optimizer.state_dict() checkpoint['counters']['t'] = t checkpoint['counters']['epoch'] = epoch checkpoint_path = os.path.join( args.output_dir, '%s_with_model.pt' % args.checkpoint_name) print('Saving checkpoint to ', checkpoint_path) torch.save(checkpoint, checkpoint_path) # Save another checkpoint without any model or optim state checkpoint_path = os.path.join( args.output_dir, '%s_no_model.pt' % args.checkpoint_name) key_blacklist = [ 'model_state', 'optim_state', 'model_best_state', 'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state', 'd_img_state', 'd_img_optim_state', 'd_img_best_state' ] small_checkpoint = {} for k, v in checkpoint.items(): if k not in key_blacklist: small_checkpoint[k] = v torch.save(small_checkpoint, checkpoint_path)
def get_from_pretrained(path): conf_path = join(dirname(path), "config.json") conf = EncoderDecoderConfig.from_pretrained(conf_path) model = EncoderDecoderModel.from_pretrained(path, config=conf) return model