def __init__(self, in_shape): super().__init__() c, h, w = in_shape self.encoder = nn.Sequential( nn.Conv2d(c, 16, kernel_size=3, stride=1, padding=1), # b, 16, 32, 32 nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), # b, 16, 16, 16 nn.Conv2d(16, 8, kernel_size=3, stride=1, padding=1), # b, 8, 16, 16 nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2) # b, 8, 8, 8 ) self.decoder = nn.Sequential( nn.ConvTranspose2d(8, 16, kernel_size=3, stride=2, padding=0), # 16, 17, 17 nn.ReLU(), nn.ConvTranspose2d(16, c, kernel_size=3, stride=2, padding=1), # 3, 33, 33 utils.CenterCrop(h, w), nn.Sigmoid())
def __init__(self, in_shape, n_latent): super().__init__() self.in_shape = in_shape self.n_latent = n_latent c, h, w = in_shape self.z_dim = h // 2**2 # receptive field downsampled 3 times self.encoder = nn.Sequential( nn.BatchNorm2d(c), nn.Conv2d(c, 32, kernel_size=4, stride=2, padding=1), # 32, 16, 16 nn.BatchNorm2d(32), nn.LeakyReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1), # 32, 8, 8 nn.BatchNorm2d(64), nn.LeakyReLU(), ) self.z_mean = nn.Linear(64 * self.z_dim**2, n_latent) self.z_var = nn.Linear(64 * self.z_dim**2, n_latent) self.z_develop = nn.Linear(n_latent, 64 * self.z_dim**2) self.decoder = nn.Sequential( nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=0), nn.BatchNorm2d(32), nn.ReLU(), nn.ConvTranspose2d(32, 1, kernel_size=3, stride=2, padding=1), utils.CenterCrop(h, w), nn.Sigmoid())
def train(args): np.random.seed(args.seed) if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # dataloader transform = utils.Compose([utils.Scale(args.image_size), utils.CenterCrop(args.image_size), utils.ToTensor(ctx), ]) train_dataset = data.ImageFolder(args.dataset, transform) train_loader = gluon.data.DataLoader(train_dataset, batch_size=args.batch_size, last_batch='discard') style_loader = utils.StyleLoader(args.style_folder, args.style_size, ctx=ctx) print('len(style_loader):',style_loader.size()) # models vgg = net.Vgg16() utils.init_vgg_params(vgg, 'models', ctx=ctx) style_model = net.Net(ngf=args.ngf) style_model.initialize(init=mx.initializer.MSRAPrelu(), ctx=ctx) if args.resume is not None: print('Resuming, initializing using weight from {}.'.format(args.resume)) style_model.collect_params().load(args.resume, ctx=ctx) print('style_model:',style_model) # optimizer and loss trainer = gluon.Trainer(style_model.collect_params(), 'adam', {'learning_rate': args.lr}) mse_loss = gluon.loss.L2Loss() for e in range(args.epochs): agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch # prepare data style_image = style_loader.get(batch_id) style_v = utils.subtract_imagenet_mean_preprocess_batch(style_image.copy()) style_image = utils.preprocess_batch(style_image) features_style = vgg(style_v) gram_style = [net.gram_matrix(y) for y in features_style] xc = utils.subtract_imagenet_mean_preprocess_batch(x.copy()) f_xc_c = vgg(xc)[1] with autograd.record(): style_model.setTarget(style_image) y = style_model(x) y = utils.subtract_imagenet_mean_batch(y) features_y = vgg(y) content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = net.gram_matrix(features_y[m]) _, C, _ = gram_style[m].shape gram_s = F.expand_dims(gram_style[m], 0).broadcast_to((args.batch_size, 1, C, C)) style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s[:n_batch, :, :]) total_loss = content_loss + style_loss total_loss.backward() trainer.step(args.batch_size) mx.nd.waitall() agg_content_loss += content_loss[0] agg_style_loss += style_loss[0] if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.3f}\tstyle: {:.3f}\ttotal: {:.3f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss.asnumpy()[0] / (batch_id + 1), agg_style_loss.asnumpy()[0] / (batch_id + 1), (agg_content_loss + agg_style_loss).asnumpy()[0] / (batch_id + 1) ) print(mesg) if (batch_id + 1) % (4 * args.log_interval) == 0: # save model save_model_filename = "Epoch_" + str(e) + "iters_" + str(count) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".params" save_model_path = os.path.join(args.save_model_dir, save_model_filename) style_model.collect_params().save(save_model_path) print("\nCheckpoint, trained model saved at", save_model_path) # save model save_model_filename = "Final_epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".params" save_model_path = os.path.join(args.save_model_dir, save_model_filename) style_model.collect_params().save(save_model_path) print("\nDone, trained model saved at", save_model_path)