# load weights if opt.netE != '': NetE.load_state_dict(torch.load(opt.netE)) if opt.netG != '': NetG.load_state_dict(torch.load(opt.netG)) if opt.netD != '': NetD.load_state_dict(torch.load(opt.netD)) optimizer_encorder = optim.RMSprop(params=NetE.parameters(), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False) optimizer_decoder = optim.RMSprop(params=NetG.parameters(), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False) optimizer_discriminator = optim.RMSprop(params=NetD.parameters(), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False) data, _ = next(iter(dataloader))
NetE = Encoder(imageSize, nc, ngf, nz).to(device) Sampler = Sampler().to(device) NetG = Decoder(nc, ngf, nz).to(device) NetE.apply(weights_init) NetG.apply(weights_init) # load weights if opt.netE != '': NetE.load_state_dict(torch.load(opt.netE)) if opt.netG != '': NetG.load_state_dict(torch.load(opt.netG)) optimizer_encorder = optim.RMSprop(params=NetE.parameters( ), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False) optimizer_decoder = optim.RMSprop(params=NetG.parameters( ), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False) data, _ = next(iter(dataloader)) fixed_batch = Variable(data).to(device) vutils.save_image(fixed_batch, '%s/real_samples.png' % opt.outf, normalize=True) margin = 0.6 equilibrium = 0.68 for epoch in range(opt.niter): for i, data in enumerate(dataloader, 0): # input real_cpu = data[0] batch_size = real_cpu.size(0)
def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing, augmentation, normalization for using the pretrained resnet transform = transforms.Compose([ transforms.RandomCrop(args.im_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Configure the network encoder = Encoder(args.embed_size).to(device) decoder = Decoder(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): # mini-batch images = images.to(device) captions = captions.to(device) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, backward and optimize features = encoder(images) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) decoder.zero_grad() encoder.zero_grad() loss.backward() optimizer.step() # Log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch, args.num_epochs, i, total_step, loss.item())) # Save the model checkpoints if (i + 1) % args.save_step == 0: torch.save(decoder.state_dict(), os.path.join(args.model_path, 'decoder.ckpt')) torch.save(encoder.state_dict(), os.path.join(args.model_path, 'encoder.ckpt'))
def train(args): #数据预处理,生成vocab和data preprocess(args['cap_path'], args['vocab_path'], args['data_path']) if not os.path.exists(args['model_path']): os.mkdir(args['model_path']) #对图片进行处理,进行数据增强 transform = transforms.Compose([ transforms.Resize((args['resize'], args['resize'])), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) with open(args['vocab_path'], 'rb') as f: vocab = pickle.load(f) with open(args['data_path'], 'rb') as f: Data = pickle.load(f) data_loader = get_loader(args['train_img_path'], Data, vocab, transform, args['batch_size'], shuffle=True, num_workers=args['num_workers']) encoder = Encoder(args['embed_size'], args['pooling_kernel']).cuda() decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab), args['num_layers']).cuda() criterion = nn.CrossEntropyLoss().cuda() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args['learning_rate']) total_step = len(data_loader) for epoch in range(args['num_epochs']): for i, (images, captions, lengths) in enumerate(data_loader): images = images.cuda() captions = captions.cuda() targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] features = encoder(images) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) decoder.zero_grad() encoder.zero_grad() loss.backward() optimizer.step() #打印训练信息 if i % args['log_step'] == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch, args['num_epochs'], i, total_step, loss.item(), np.exp(loss.item()))) #保存模型 if (i + 1) % args['save_step'] == 0: torch.save( decoder.state_dict(), os.path.join(args['model_path'], 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args['model_path'], 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) #每个epoch结束也保存一次模型 torch.save( decoder.state_dict(), os.path.join(args['model_path'], 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args['model_path'], 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))