def main(): sys.path.append(patch_path('..')) output_dir_path = patch_path('output') model_dir_path = patch_path('models') from mxnet_img_to_img.library.dcgan import DCGan from mxnet_img_to_img.data.facades_data_set import load_image_pairs from mxnet_img_to_img.library.image_utils import load_image, visualize, save_image img_pairs = load_image_pairs(patch_path('data/facades')) ctx = mx.cpu() gan = DCGan(model_ctx=ctx) gan.load_model(model_dir_path) shuffle(img_pairs) for i, (source_img_path, _) in enumerate(img_pairs[:20]): source_img = load_image(source_img_path, 64, 64) target_img = gan.generate(source_image_path=source_img_path, filename=str(i) + '.png', output_dir_path=output_dir_path) img = mx.nd.concat(source_img.as_in_context(gan.model_ctx), target_img, dim=2) visualize(img) img = ((img.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype( np.uint8) save_image( img, os.path.join(output_dir_path, DCGan.model_name + '-generated-' + str(i) + '.png'))
def generate(self, source_image_path, filename, output_dir_path): source_image_feats = self.fe.extract_image_features( image_path=source_image_path) source_image_feats = nd.array(source_image_feats, ctx=self.model_ctx).reshape( (1, 1000, 1, 1)) latent_z = nd.random_normal(loc=0, scale=1, shape=(1, self.random_input_size, 1, 1), ctx=self.model_ctx) img = self.netG(nd.concat(latent_z, source_image_feats, dim=1))[0] img = inverted_transform(img).asnumpy().astype(np.uint8) # img = ((img.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8) save_image(img, os.path.join(output_dir_path, filename)) return img
def fit(self, image_pairs, model_dir_path, lr=0.0002, beta1=0.5, lambda1=100, epochs=100, batch_size=10): config = dict() config['image_width'] = self.img_width config['image_height'] = self.img_height config['pool_size'] = self.pool_size config['num_down_sampling'] = self.num_down_sampling np.save(self.get_config_file_path(model_dir_path), config) img_in_list = [] img_out_list = [] for source_img_path, target_img_path in image_pairs: source_img = load_image(source_img_path, self.img_width, self.img_height) target_img = load_image(target_img_path, self.img_width, self.img_height) source_img = nd.expand_dims(source_img, axis=0) target_img = nd.expand_dims(target_img, axis=0) img_in_list.append(source_img) img_out_list.append(target_img) train_data = mx.io.NDArrayIter(data=[nd.concat(*img_in_list, dim=0), nd.concat(*img_out_list, dim=0)], batch_size=batch_size) ctx = self.model_ctx # Pixel2Pixel networks self.netG = UnetGenerator(in_channels=3, num_downs=self.num_down_sampling) self.netD = Discriminator(in_channels=6) # Initialize parameters Pixel2PixelGan.network_init(self.netG, ctx) Pixel2PixelGan.network_init(self.netD, ctx) # trainer for the generator and discrminator trainerG = gluon.Trainer(self.netG.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta1}) trainerD = gluon.Trainer(self.netD.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta1}) GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L1Loss() image_pool = ImagePool(self.pool_size) metric = mx.metric.CustomMetric(self.facc) logging.basicConfig(level=logging.DEBUG) for epoch in range(epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 fake_out = [] for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) fake_out = self.netG(real_in) fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1)) with autograd.record(): # Train with fake image output = self.netD(fake_concat) fake_label = nd.zeros(shape=output.shape, ctx=ctx) errD_fake = GAN_loss(output, fake_label) metric.update([fake_label, ], [output, ]) # Train with real image output = self.netD(nd.concat(real_in, real_out, dim=1)) real_label = nd.ones(shape=output.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) metric.update([real_label, ], [output, ]) errD = (errD_real + errD_fake) * 0.5 errD.backward() trainerD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): fake_out = self.netG(real_in) fake_concat = nd.concat(real_in, fake_out, dim=1) output = self.netD(fake_concat) real_label = nd.ones(shape=output.shape, ctx=ctx) errG = GAN_loss(output, real_label) + L1_loss(real_out, fake_out) * lambda1 errG.backward() trainerG.step(batch.data[0].shape[0]) # Print log infomation every ten batches if iter % 10 == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() metric.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) self.checkpoint(model_dir_path) # Visualize one generated image for each epoch fake_img = fake_out[0] fake_img = ((fake_img.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8) save_image(fake_img, os.path.join(model_dir_path, Pixel2PixelGan.model_name + '-training-') + str(epoch) + '.png')
def fit(self, image_pairs, model_dir_path, epochs=100, batch_size=64, learning_rate=0.0002, beta1=0.5, image_pool_size=50, start_epoch=0, print_every=2): img_in_list = [] img_out_list = [] total_pairs = len(image_pairs) for i, (source_img_path, target_img_path) in enumerate(image_pairs): logging.debug('extracting %d pair from %d pairs', i + 1, total_pairs) target_img = load_image(target_img_path, 64, 64) source_img = self.fe.extract_image_features(source_img_path) target_img = nd.expand_dims(target_img, axis=0) img_in_list.append(source_img) img_out_list.append(target_img) train_data = mx.io.NDArrayIter(data=[ nd.concat(*img_out_list, dim=0), nd.concat(*img_in_list, dim=0) ], batch_size=batch_size) config = dict() config['random_input_size'] = self.random_input_size np.save(self.get_config_file_path(model_dir_path), config) image_pool = ImagePool(image_pool_size) loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() if self.netG is None: self.netG, self.netD = self.create_model() self.netG.initialize(mx.init.Normal(0.02), ctx=self.model_ctx) self.netD.initialize(mx.init.Normal(0.02), ctx=self.model_ctx) trainerG = gluon.Trainer(self.netG.collect_params(), 'adam', { 'learning_rate': learning_rate, 'beta1': beta1 }) trainerD = gluon.Trainer(self.netD.collect_params(), 'adam', { 'learning_rate': learning_rate, 'beta1': beta1 }) real_label = nd.ones((batch_size, ), ctx=self.model_ctx) fake_label = nd.zeros((batch_size, ), ctx=self.model_ctx) metric = mx.metric.CustomMetric(facc) logging.basicConfig(level=logging.DEBUG) fake_images = [] for epoch in range(start_epoch, epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 for batch in train_data: # Step 1: Update netD real_images = batch.data[0].as_in_context(self.model_ctx) real_images = nd.array(real_images, ctx=self.model_ctx) bsize = real_images.shape[0] source_image_feats = batch.data[1].as_in_context( self.model_ctx) random_input = nd.random_normal(0, 1, shape=(real_images.shape[0], self.random_input_size, 1, 1), ctx=self.model_ctx) fake_images = self.netG( nd.concat(random_input, source_image_feats.reshape((bsize, 1000, 1, 1)), dim=1)) fake_concat = image_pool.query( [fake_images, source_image_feats]) with autograd.record(): # train with real image output = self.netD([real_images, source_image_feats]) errD_real = loss(output, real_label) metric.update([ real_label, ], [ output, ]) # train with fake image output = self.netD(fake_concat) errD_fake = loss(output, fake_label) errD = errD_real + errD_fake errD.backward() metric.update([ fake_label, ], [ output, ]) trainerD.step(bsize) # Step 2: Update netG with autograd.record(): fake_images = self.netG( nd.concat(random_input, source_image_feats.reshape( (bsize, 1000, 1, 1)), dim=1)) output = self.netD([fake_images, source_image_feats]) errG = loss(output, real_label) errG.backward() trainerG.step(bsize) # Print log infomation every ten batches if iter % print_every == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() metric.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) self.checkpoint(model_dir_path) # Visualize one generated image for each epoch fake_img = inverted_transform(fake_images[0]).asnumpy().astype( np.uint8) # fake_img = ((fake_img.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8) save_image( fake_img, os.path.join(model_dir_path, DCGan.model_name + '-training-') + str(epoch) + '.png')