コード例 #1
0
def main():
    sys.path.append(patch_path('..'))
    output_dir_path = patch_path('output')
    model_dir_path = patch_path('models')

    from mxnet_img_to_img.library.dcgan import DCGan
    from mxnet_img_to_img.data.facades_data_set import load_image_pairs
    from mxnet_img_to_img.library.image_utils import load_image, visualize, save_image

    img_pairs = load_image_pairs(patch_path('data/facades'))

    ctx = mx.cpu()
    gan = DCGan(model_ctx=ctx)
    gan.load_model(model_dir_path)

    shuffle(img_pairs)

    for i, (source_img_path, _) in enumerate(img_pairs[:20]):
        source_img = load_image(source_img_path, 64, 64)
        target_img = gan.generate(source_image_path=source_img_path,
                                  filename=str(i) + '.png',
                                  output_dir_path=output_dir_path)
        img = mx.nd.concat(source_img.as_in_context(gan.model_ctx),
                           target_img,
                           dim=2)
        visualize(img)
        img = ((img.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(
            np.uint8)
        save_image(
            img,
            os.path.join(output_dir_path,
                         DCGan.model_name + '-generated-' + str(i) + '.png'))
コード例 #2
0
ファイル: dcgan.py プロジェクト: Ahanmr/mxnet-img2img
    def generate(self, source_image_path, filename, output_dir_path):
        source_image_feats = self.fe.extract_image_features(
            image_path=source_image_path)
        source_image_feats = nd.array(source_image_feats,
                                      ctx=self.model_ctx).reshape(
                                          (1, 1000, 1, 1))

        latent_z = nd.random_normal(loc=0,
                                    scale=1,
                                    shape=(1, self.random_input_size, 1, 1),
                                    ctx=self.model_ctx)
        img = self.netG(nd.concat(latent_z, source_image_feats, dim=1))[0]
        img = inverted_transform(img).asnumpy().astype(np.uint8)

        # img = ((img.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)
        save_image(img, os.path.join(output_dir_path, filename))
        return img
コード例 #3
0
ファイル: pixel2pixel.py プロジェクト: Ahanmr/mxnet-img2img
    def fit(self, image_pairs, model_dir_path, lr=0.0002, beta1=0.5, lambda1=100, epochs=100, batch_size=10):

        config = dict()
        config['image_width'] = self.img_width
        config['image_height'] = self.img_height
        config['pool_size'] = self.pool_size
        config['num_down_sampling'] = self.num_down_sampling
        np.save(self.get_config_file_path(model_dir_path), config)

        img_in_list = []
        img_out_list = []
        for source_img_path, target_img_path in image_pairs:
            source_img = load_image(source_img_path, self.img_width, self.img_height)
            target_img = load_image(target_img_path, self.img_width, self.img_height)
            source_img = nd.expand_dims(source_img, axis=0)
            target_img = nd.expand_dims(target_img, axis=0)
            img_in_list.append(source_img)
            img_out_list.append(target_img)
        train_data = mx.io.NDArrayIter(data=[nd.concat(*img_in_list, dim=0), nd.concat(*img_out_list, dim=0)],
                                       batch_size=batch_size)

        ctx = self.model_ctx

        # Pixel2Pixel networks
        self.netG = UnetGenerator(in_channels=3, num_downs=self.num_down_sampling)
        self.netD = Discriminator(in_channels=6)

        # Initialize parameters
        Pixel2PixelGan.network_init(self.netG, ctx)
        Pixel2PixelGan.network_init(self.netD, ctx)

        # trainer for the generator and discrminator
        trainerG = gluon.Trainer(self.netG.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta1})
        trainerD = gluon.Trainer(self.netD.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta1})

        GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
        L1_loss = gluon.loss.L1Loss()

        image_pool = ImagePool(self.pool_size)
        metric = mx.metric.CustomMetric(self.facc)

        logging.basicConfig(level=logging.DEBUG)

        for epoch in range(epochs):
            tic = time.time()
            btic = time.time()
            train_data.reset()
            iter = 0
            fake_out = []

            for batch in train_data:
                ############################
                # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
                ###########################
                real_in = batch.data[0].as_in_context(ctx)
                real_out = batch.data[1].as_in_context(ctx)

                fake_out = self.netG(real_in)
                fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1))
                with autograd.record():
                    # Train with fake image
                    output = self.netD(fake_concat)
                    fake_label = nd.zeros(shape=output.shape, ctx=ctx)
                    errD_fake = GAN_loss(output, fake_label)
                    metric.update([fake_label, ], [output, ])

                    # Train with real image
                    output = self.netD(nd.concat(real_in, real_out, dim=1))
                    real_label = nd.ones(shape=output.shape, ctx=ctx)
                    errD_real = GAN_loss(output, real_label)
                    metric.update([real_label, ], [output, ])
                    errD = (errD_real + errD_fake) * 0.5
                    errD.backward()

                trainerD.step(batch.data[0].shape[0])

                ############################
                # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
                ###########################
                with autograd.record():
                    fake_out = self.netG(real_in)
                    fake_concat = nd.concat(real_in, fake_out, dim=1)
                    output = self.netD(fake_concat)
                    real_label = nd.ones(shape=output.shape, ctx=ctx)
                    errG = GAN_loss(output, real_label) + L1_loss(real_out, fake_out) * lambda1
                    errG.backward()

                trainerG.step(batch.data[0].shape[0])

                # Print log infomation every ten batches
                if iter % 10 == 0:
                    name, acc = metric.get()
                    logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic)))
                    logging.info(
                        'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
                        % (nd.mean(errD).asscalar(),
                           nd.mean(errG).asscalar(), acc, iter, epoch))
                iter = iter + 1
                btic = time.time()

            name, acc = metric.get()
            metric.reset()
            logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc))
            logging.info('time: %f' % (time.time() - tic))

            self.checkpoint(model_dir_path)

            # Visualize one generated image for each epoch
            fake_img = fake_out[0]
            fake_img = ((fake_img.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)
            save_image(fake_img,
                       os.path.join(model_dir_path, Pixel2PixelGan.model_name + '-training-') + str(epoch) + '.png')
コード例 #4
0
ファイル: dcgan.py プロジェクト: Ahanmr/mxnet-img2img
    def fit(self,
            image_pairs,
            model_dir_path,
            epochs=100,
            batch_size=64,
            learning_rate=0.0002,
            beta1=0.5,
            image_pool_size=50,
            start_epoch=0,
            print_every=2):

        img_in_list = []
        img_out_list = []
        total_pairs = len(image_pairs)
        for i, (source_img_path, target_img_path) in enumerate(image_pairs):
            logging.debug('extracting %d pair from %d pairs', i + 1,
                          total_pairs)

            target_img = load_image(target_img_path, 64, 64)
            source_img = self.fe.extract_image_features(source_img_path)
            target_img = nd.expand_dims(target_img, axis=0)
            img_in_list.append(source_img)
            img_out_list.append(target_img)

        train_data = mx.io.NDArrayIter(data=[
            nd.concat(*img_out_list, dim=0),
            nd.concat(*img_in_list, dim=0)
        ],
                                       batch_size=batch_size)

        config = dict()
        config['random_input_size'] = self.random_input_size
        np.save(self.get_config_file_path(model_dir_path), config)

        image_pool = ImagePool(image_pool_size)

        loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()

        if self.netG is None:
            self.netG, self.netD = self.create_model()

            self.netG.initialize(mx.init.Normal(0.02), ctx=self.model_ctx)
            self.netD.initialize(mx.init.Normal(0.02), ctx=self.model_ctx)

        trainerG = gluon.Trainer(self.netG.collect_params(), 'adam', {
            'learning_rate': learning_rate,
            'beta1': beta1
        })
        trainerD = gluon.Trainer(self.netD.collect_params(), 'adam', {
            'learning_rate': learning_rate,
            'beta1': beta1
        })

        real_label = nd.ones((batch_size, ), ctx=self.model_ctx)
        fake_label = nd.zeros((batch_size, ), ctx=self.model_ctx)

        metric = mx.metric.CustomMetric(facc)

        logging.basicConfig(level=logging.DEBUG)

        fake_images = []
        for epoch in range(start_epoch, epochs):
            tic = time.time()
            btic = time.time()
            train_data.reset()
            iter = 0
            for batch in train_data:

                # Step 1: Update netD
                real_images = batch.data[0].as_in_context(self.model_ctx)
                real_images = nd.array(real_images, ctx=self.model_ctx)
                bsize = real_images.shape[0]
                source_image_feats = batch.data[1].as_in_context(
                    self.model_ctx)
                random_input = nd.random_normal(0,
                                                1,
                                                shape=(real_images.shape[0],
                                                       self.random_input_size,
                                                       1, 1),
                                                ctx=self.model_ctx)

                fake_images = self.netG(
                    nd.concat(random_input,
                              source_image_feats.reshape((bsize, 1000, 1, 1)),
                              dim=1))
                fake_concat = image_pool.query(
                    [fake_images, source_image_feats])

                with autograd.record():
                    # train with real image
                    output = self.netD([real_images, source_image_feats])
                    errD_real = loss(output, real_label)
                    metric.update([
                        real_label,
                    ], [
                        output,
                    ])

                    # train with fake image
                    output = self.netD(fake_concat)
                    errD_fake = loss(output, fake_label)
                    errD = errD_real + errD_fake
                    errD.backward()
                    metric.update([
                        fake_label,
                    ], [
                        output,
                    ])

                trainerD.step(bsize)

                # Step 2: Update netG
                with autograd.record():
                    fake_images = self.netG(
                        nd.concat(random_input,
                                  source_image_feats.reshape(
                                      (bsize, 1000, 1, 1)),
                                  dim=1))
                    output = self.netD([fake_images, source_image_feats])
                    errG = loss(output, real_label)
                    errG.backward()

                trainerG.step(bsize)

                # Print log infomation every ten batches
                if iter % print_every == 0:
                    name, acc = metric.get()
                    logging.info('speed: {} samples/s'.format(
                        batch_size / (time.time() - btic)))
                    logging.info(
                        'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
                        % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(),
                           acc, iter, epoch))
                iter = iter + 1
                btic = time.time()

            name, acc = metric.get()
            metric.reset()
            logging.info('\nbinary training acc at epoch %d: %s=%f' %
                         (epoch, name, acc))
            logging.info('time: %f' % (time.time() - tic))

            self.checkpoint(model_dir_path)

            # Visualize one generated image for each epoch
            fake_img = inverted_transform(fake_images[0]).asnumpy().astype(
                np.uint8)
            # fake_img = ((fake_img.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)

            save_image(
                fake_img,
                os.path.join(model_dir_path, DCGan.model_name + '-training-') +
                str(epoch) + '.png')