예제 #1
0
# load weights
if opt.netE != '':
    NetE.load_state_dict(torch.load(opt.netE))
if opt.netG != '':
    NetG.load_state_dict(torch.load(opt.netG))
if opt.netD != '':
    NetD.load_state_dict(torch.load(opt.netD))

optimizer_encorder = optim.RMSprop(params=NetE.parameters(),
                                   lr=lr,
                                   alpha=0.9,
                                   eps=1e-8,
                                   weight_decay=0,
                                   momentum=0,
                                   centered=False)
optimizer_decoder = optim.RMSprop(params=NetG.parameters(),
                                  lr=lr,
                                  alpha=0.9,
                                  eps=1e-8,
                                  weight_decay=0,
                                  momentum=0,
                                  centered=False)
optimizer_discriminator = optim.RMSprop(params=NetD.parameters(),
                                        lr=lr,
                                        alpha=0.9,
                                        eps=1e-8,
                                        weight_decay=0,
                                        momentum=0,
                                        centered=False)

data, _ = next(iter(dataloader))
예제 #2
0
NetE = Encoder(imageSize, nc, ngf, nz).to(device)
Sampler = Sampler().to(device)
NetG = Decoder(nc, ngf, nz).to(device)

NetE.apply(weights_init)
NetG.apply(weights_init)

# load weights
if opt.netE != '':
    NetE.load_state_dict(torch.load(opt.netE))
if opt.netG != '':
    NetG.load_state_dict(torch.load(opt.netG))

optimizer_encorder = optim.RMSprop(params=NetE.parameters(
), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False)
optimizer_decoder = optim.RMSprop(params=NetG.parameters(
), lr=lr, alpha=0.9, eps=1e-8, weight_decay=0, momentum=0, centered=False)

data, _ = next(iter(dataloader))
fixed_batch = Variable(data).to(device)
vutils.save_image(fixed_batch,
                  '%s/real_samples.png' % opt.outf,
                  normalize=True)

margin = 0.6
equilibrium = 0.68

for epoch in range(opt.niter):
    for i, data in enumerate(dataloader, 0):
        # input
        real_cpu = data[0]
        batch_size = real_cpu.size(0)
예제 #3
0
def main(args):
    # Create model directory for saving trained models
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, augmentation, normalization for using the pretrained resnet
    transform = transforms.Compose([
        transforms.RandomCrop(args.im_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Configure the network
    encoder = Encoder(args.embed_size).to(device)
    decoder = Decoder(args.embed_size, args.hidden_size, len(vocab),
                      args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            # mini-batch
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Log info
            if i % args.log_step == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
                    epoch, args.num_epochs, i, total_step, loss.item()))

            # Save the model checkpoints
            if (i + 1) % args.save_step == 0:
                torch.save(decoder.state_dict(),
                           os.path.join(args.model_path, 'decoder.ckpt'))
                torch.save(encoder.state_dict(),
                           os.path.join(args.model_path, 'encoder.ckpt'))
예제 #4
0
def train(args):
    #数据预处理,生成vocab和data
    preprocess(args['cap_path'], args['vocab_path'], args['data_path'])

    if not os.path.exists(args['model_path']):
        os.mkdir(args['model_path'])

    #对图片进行处理,进行数据增强
    transform = transforms.Compose([
        transforms.Resize((args['resize'], args['resize'])),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    with open(args['vocab_path'], 'rb') as f:
        vocab = pickle.load(f)

    with open(args['data_path'], 'rb') as f:
        Data = pickle.load(f)

    data_loader = get_loader(args['train_img_path'],
                             Data,
                             vocab,
                             transform,
                             args['batch_size'],
                             shuffle=True,
                             num_workers=args['num_workers'])

    encoder = Encoder(args['embed_size'], args['pooling_kernel']).cuda()
    decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab),
                      args['num_layers']).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args['learning_rate'])

    total_step = len(data_loader)
    for epoch in range(args['num_epochs']):
        for i, (images, captions, lengths) in enumerate(data_loader):
            images = images.cuda()
            captions = captions.cuda()
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            #打印训练信息
            if i % args['log_step'] == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args['num_epochs'], i, total_step,
                            loss.item(), np.exp(loss.item())))

            #保存模型
            if (i + 1) % args['save_step'] == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))

        #每个epoch结束也保存一次模型
        torch.save(
            decoder.state_dict(),
            os.path.join(args['model_path'],
                         'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
        torch.save(
            encoder.state_dict(),
            os.path.join(args['model_path'],
                         'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))