class Tester: """ 测试 """ def __init__(self, _hparams): self.test_loader = get_test_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.test_cap = _hparams.test_cap def testing(self, save_path, test_path): """ 测试 :param save_path: 模型的保存地址 :param test_path: 保存测试过程生成句子的路径 :return: """ print('*' * 20, 'test', '*' * 20) self.load_models(save_path) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.test_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(test_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.test_cap, test_path) for metric, score in result: print(metric, score) def load_models(self, save_path): ckpt = torch.load(save_path, map_location={'cuda:2': 'cuda:0' }) # 映射是因为解决保存模型的卡与加载模型的卡不一致的问题 encoder_state_dict = ckpt['encoder_state_dict'] self.encoder.load_state_dict(encoder_state_dict) decoder_state_dict = ckpt['decoder_state_dict'] self.decoder.load_state_dict(decoder_state_dict) def set_eval(self): self.encoder.eval() self.decoder.eval()
n_layers=args.n_layers, dropout=args.dropout, bidirectional=args.bidirectional, nonlinearity=args.nonlinearity) # load the best model model.load_state_dict(torch.load(args.model_file)) model.eval() ## enable evaluation modes # set up output filename if args.sample_idx is not None: output_filename = 'sampled-SMILES-{}.smi'.format(args.sample_idx) else: output_filename = 'sampled-SMILES.smi' output_file = os.path.join(args.output_dir, output_filename) # sample a set of SMILES from the final, trained model sampled_count = 0 batch_size = 512 while sampled_count < args.mols_per_file: sampled_smiles, NLLs = model.sample(batch_size, return_smiles=True, return_nll=True) # write sampled SMILES with open(output_file, 'a+') as f: for sm, nll in zip(sampled_smiles, NLLs): _ = f.write(sm + '\t' + str(round(nll.detach().item(), 6)) + '\n') sampled_count += batch_size
def main(args): # hyperparameters batch_size = args.batch_size num_workers = 1 # Image Preprocessing transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) # load COCOs dataset IMAGES_PATH = 'data/train2014' CAPTION_FILE_PATH = 'data/annotations/captions_train2014.json' vocab = load_vocab() train_loader = get_coco_data_loader(path=IMAGES_PATH, json=CAPTION_FILE_PATH, vocab=vocab, transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) IMAGES_PATH = 'data/val2014' CAPTION_FILE_PATH = 'data/annotations/captions_val2014.json' val_loader = get_coco_data_loader(path=IMAGES_PATH, json=CAPTION_FILE_PATH, vocab=vocab, transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) losses_val = [] losses_train = [] # Build the models ngpu = 1 initial_step = initial_epoch = 0 embed_size = args.embed_size num_hiddens = args.num_hidden learning_rate = 1e-3 num_epochs = 3 log_step = args.log_step save_step = 500 checkpoint_dir = args.checkpoint_dir encoder = CNN(embed_size) decoder = RNN(embed_size, num_hiddens, len(vocab), 1, rec_unit=args.rec_unit) # Loss criterion = nn.CrossEntropyLoss() if args.checkpoint_file: encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models( args.checkpoint_file, args.sample) initial_step, initial_epoch, losses_train, losses_val = meta encoder.load_state_dict(encoder_state_dict) decoder.load_state_dict(decoder_state_dict) else: params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.batchnorm.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() if args.sample: return utils.sample(encoder, decoder, vocab, val_loader) # Train the Models total_step = len(train_loader) try: for epoch in range(initial_epoch, num_epochs): for step, (images, captions, lengths) in enumerate(train_loader, start=initial_step): # Set mini-batch dataset images = utils.to_var(images, volatile=True) captions = utils.to_var(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() if ngpu > 1: # run on multiple GPU features = nn.parallel.data_parallel( encoder, images, range(ngpu)) outputs = nn.parallel.data_parallel( decoder, features, range(ngpu)) else: # run on single GPU features = encoder(images) outputs = decoder(features, captions, lengths) train_loss = criterion(outputs, targets) losses_train.append(train_loss.data[0]) train_loss.backward() optimizer.step() # Run validation set and predict if step % log_step == 0: encoder.batchnorm.eval() # run validation set batch_loss_val = [] for val_step, (images, captions, lengths) in enumerate(val_loader): images = utils.to_var(images, volatile=True) captions = utils.to_var(captions, volatile=True) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] features = encoder(images) outputs = decoder(features, captions, lengths) val_loss = criterion(outputs, targets) batch_loss_val.append(val_loss.data[0]) losses_val.append(np.mean(batch_loss_val)) # predict sampled_ids = decoder.sample(features) sampled_ids = sampled_ids.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(sampled_ids, vocab) print('Sample:', sentence) true_ids = captions.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(true_ids, vocab) print('Target:', sentence) print( 'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}' .format(epoch, step, losses_train[-1], losses_val[-1])) encoder.batchnorm.train() # Save the models if (step + 1) % save_step == 0: utils.save_models(encoder, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses( losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl')) except KeyboardInterrupt: pass finally: # Do final save utils.save_models(encoder, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl'))
class Trainer: """ 训练 """ def __init__(self, _hparams): utils.set_seed(_hparams.fixed_seed) self.train_loader = get_train_loader(_hparams) self.val_loader = get_val_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.loss_fn = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr) self.writer = SummaryWriter() self.max_sen_len = _hparams.max_sen_len self.val_cap = _hparams.val_cap self.ft_encoder_lr = _hparams.ft_encoder_lr self.ft_decoder_lr = _hparams.ft_decoder_lr self.best_CIDEr = 0 def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path, val_path): print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs', '*' * 20) self.encoder.fine_tune() self.optimizer = torch.optim.Adam([ { 'params': self.encoder.parameters(), 'lr': self.ft_encoder_lr }, { 'params': self.decoder.parameters(), 'lr': self.ft_decoder_lr }, ]) self.training(fine_tune_epochs, val_interval, save_path, val_path) self.encoder.froze() print('*' * 20, 'fine tune encoder complete', '*' * 20) def get_params(self): """ 模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数 :return: """ return list(self.decoder.parameters()) def training(self, max_epochs, val_interval, save_path, val_path): """ 训练 :param val_path: 保存验证过程生成句子的路径 :param save_path: 保存模型的地址 :param val_interval: 验证的间隔 :param max_epochs: 最大训练的轮次 :return: """ print('*' * 20, 'train', '*' * 20) for epoch in range(max_epochs): self.set_train() epoch_loss = 0 epoch_steps = len(self.train_loader) for step, (img, cap, cap_len) in tqdm(enumerate(self.train_loader)): # batch_size * 3 * 224 * 224 img = img.to(DEVICE) cap = cap.to(DEVICE) self.optimizer.zero_grad() features = self.encoder.forward(img) outputs = self.decoder.forward(features, cap) outputs = pack_padded_sequence(outputs, cap_len - 1, batch_first=True)[0] targets = pack_padded_sequence(cap[:, 1:], cap_len - 1, batch_first=True)[0] train_loss = self.loss_fn(outputs, targets) epoch_loss += train_loss.item() train_loss.backward() self.optimizer.step() epoch_loss /= epoch_steps self.writer.add_scalar('epoch_loss', epoch_loss, epoch) print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch)) if (epoch + 1) % val_interval == 0: CIDEr = self.validating(epoch, val_path) if self.best_CIDEr <= CIDEr: self.best_CIDEr = CIDEr self.save_model(save_path, epoch) def save_model(self, save_path, train_epoch): """ 保存最好的模型 :param save_path: 保存模型文件的地址 :param train_epoch: 当前训练的轮次 :return: """ model_state_dict = { 'encoder_state_dict': self.encoder.state_dict(), 'decoder_state_dict': self.decoder.state_dict(), 'tran_epoch': train_epoch, } print('*' * 20, 'save model to: ', save_path, '*' * 20) torch.save(model_state_dict, save_path) def validating(self, train_epoch, val_path): """ 验证 :param val_path: 保存验证过程生成句子的路径 :param train_epoch: 当前训练的epoch :return: """ print('*' * 20, 'validate', '*' * 20) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(val_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.val_cap, val_path) scores = {} for metric, score in result: scores[metric] = score self.writer.add_scalar(metric, score, train_epoch) return scores['CIDEr'] def set_train(self): self.encoder.train() self.decoder.train() def set_eval(self): self.encoder.eval() self.decoder.eval()
def main(args): # hyperparameters batch_size = args.batch_size num_workers = 2 # Image Preprocessing transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) vocab = load_vocab() loader = get_basic_loader(dir_path=os.path.join(args.image_path), transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) # Build the models embed_size = args.embed_size num_hiddens = args.num_hidden checkpoint_path = 'checkpoints' encoder = CNN(embed_size) decoder = RNN(embed_size, num_hiddens, len(vocab), 1, rec_unit=args.rec_unit) encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models( args.checkpoint_file) encoder.load_state_dict(encoder_state_dict) decoder.load_state_dict(decoder_state_dict) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Train the Models try: results = [] for step, (images, image_ids) in enumerate(loader): images = utils.to_var(images, volatile=True) features = encoder(images) captions = decoder.sample(features) captions = captions.cpu().data.numpy() captions = [ utils.convert_back_to_text(cap, vocab) for cap in captions ] captions_formatted = [{ 'image_id': int(img_id), 'caption': cap } for img_id, cap in zip(image_ids, captions)] results.extend(captions_formatted) print('Sample:', captions_formatted) except KeyboardInterrupt: print('Ok bye!') finally: import json file_name = 'captions_model.json' with open(file_name, 'w') as f: json.dump(results, f)
track_loss(sched_file, model, dataset, epoch, counter, loss.item(), args.batch_size) # save SMILES? if args.sample_every_epochs is not None: sample_smiles(args.output_dir, args.sample_idx, model, args.sample_size, epoch, counter) if early_stop.stop: break # append information about final training step if args.log_every_epochs is not None or args.log_every_steps is not None: sched = pd.DataFrame({'epoch': [None], 'step': [early_stop.step_at_best], 'outcome': ['training loss'], 'value': [early_stop.best_loss]}) sched.to_csv(sched_file, index=False, mode='a', header=False) # load the best model model.load_state_dict(torch.load(model_file)) model.eval() ## enable evaluation modes # sample a set of SMILES from the final, trained model sampled_smiles = [] while len(sampled_smiles) < args.sample_size: sampled_smiles.extend(model.sample(args.batch_size, return_smiles=True)) # write sampled SMILES write_smiles(sampled_smiles, smiles_file)