Exemple #1
0
        def sample_and_save(prefix='normal'):
            sample_bs = 100
            bs = 100
            if self.config.dataset == 'cifar100':
                bs = 1000
            z = utils.sample_normal(bs, self.config.nz, device=self.device)
            if self.config.conditional:
                num_rep = int(math.ceil(bs / self.num_classes))
                y = [[i] * num_rep for i in range(self.num_classes)]
                y = np.hstack(y)
                y = torch.from_numpy(y).long()
                y = y.to(self.device)
            else:
                y = None

            gen_list = []
            with torch.no_grad():
                for i in range(int(bs / sample_bs)):
                    z_cur = z[i * sample_bs:(i + 1) * sample_bs]
                    if self.config.conditional:
                        y_cur = y[i * sample_bs:(i + 1) * sample_bs]
                    else:
                        y_cur = None
                    gen = self.netG(z_cur, y_cur)
                    gen_list.append(gen)
            gen = torch.cat(gen_list, dim=0)
            vutils.save_image(gen * 0.5 + 0.5,
                              '{}/{}_{}.png'.format(img_path, prefix,
                                                    self.itr),
                              nrow=10)
Exemple #2
0
    def build_graph(self, graph):
        with graph.as_default():
            # TODO: MNIST sizes are hard-coded in
            self.x = x = tf.placeholder(tf.float32, [None, 784])
            x_reshape = tf.reshape(x, [-1, 28, 28, 1])

            net = slim.conv2d(x_reshape,
                              self.num_features,
                              kernel_size=self.kernel_size)
            net = slim.flatten(net)

            # Sample from the latent distribution
            self.q_z_mean = slim.fully_connected(net,
                                                 self.hidden_size,
                                                 activation_fn=None)
            tf.summary.histogram('q_z_mean', self.q_z_mean)
            self.q_z_log_var = slim.fully_connected(net,
                                                    self.hidden_size,
                                                    activation_fn=None)
            tf.summary.histogram('q_z_log_var', self.q_z_log_var)
            z = sample_normal(self.q_z_mean, self.q_z_log_var)
            self.representation = z

            # The decoder
            net = tf.reshape(z, [-1, 1, 1, self.hidden_size])
            net = slim.conv2d_transpose(net,
                                        self.num_features,
                                        kernel_size=self.kernel_size)
            net = slim.flatten(net)
            net = slim.fully_connected(net,
                                       x.get_shape().as_list()[-1],
                                       activation_fn=None)

            # TODO: figure out the whole logits and Bernoulli dist vs MSE thing
            self.p_x = Bernoulli(logits=net)

            tf.summary.image('generated',
                             tf.reshape(self.p_x.mean(), [-1, 28, 28, 1]),
                             max_outputs=1)

            self.loss = self._vae_loss()
            tf.summary.scalar('loss', self.loss)

            learning_rate = tf.Variable(self.learning_rate)
            tf.summary.scalar('learning_rate', learning_rate)

            global_step = tf.Variable(0, trainable=False, name="global_step")
            self.train_op = tf.train.AdamOptimizer(
                learning_rate=learning_rate) \
                .minimize(self.loss, var_list=slim.get_model_variables(), global_step=global_step)

        return graph
    def build_graph(self, graph):
        with graph.as_default():
            # TODO: MNIST sizes are hard-coded in
            self.x = x = tf.placeholder(tf.float32, [None, 784])
            shape = x.get_shape().as_list()

            # This layer's size is halfway between the layer before and after.
            # TODO: allow the architecture to be specified at the command line.
            net = slim.fully_connected(x, (shape[-1] - self.hidden_size) / 2)

            # Sample from the latent distribution
            self.q_z_mean = slim.fully_connected(net,
                                                 self.hidden_size,
                                                 activation_fn=None)
            tf.summary.histogram('q_z_mean', self.q_z_mean)
            self.q_z_log_var = slim.fully_connected(net,
                                                    self.hidden_size,
                                                    activation_fn=None)
            tf.summary.histogram('q_z_log_var', self.q_z_log_var)
            z = sample_normal(self.q_z_mean, self.q_z_log_var)
            self.representation = z

            # The decoder
            net = slim.fully_connected(z, (shape[-1] - self.hidden_size) / 2)
            # TODO: figure out the whole logits and Bernoulli dist vs MSE thing
            # Do not include the batch size in creating the final layer
            net = slim.fully_connected(net, shape[-1], activation_fn=None)
            self.p_x = Bernoulli(logits=net)

            tf.summary.image('generated',
                             tf.reshape(self.p_x.mean(), [-1, 28, 28, 1]),
                             max_outputs=1)

            self.loss = self._vae_loss()
            tf.summary.scalar('loss', self.loss)

            learning_rate = tf.Variable(self.learning_rate)
            tf.summary.scalar('learning_rate', learning_rate)

            global_step = tf.Variable(0, trainable=False, name="global_step")
            self.train_op = tf.train.AdamOptimizer(
                learning_rate=learning_rate) \
                .minimize(self.loss, var_list=slim.get_model_variables(), global_step=global_step)

        return graph
Exemple #4
0
    def create_samples(self, num_samples):

        print('Creating samples')
        bs = 100

        gen_list = []
        with torch.no_grad():
            for i in range(int(num_samples / bs)):
                z_cur = utils.sample_normal(bs,
                                            self.config.nz,
                                            device=self.device)
                y_cur = None

                gen = self.netG(z_cur, y_cur)
                gen_list.append(gen.detach().cpu())

        gen = torch.cat(gen_list, dim=0)
        vutils.save_image(gen[0:100] * 0.5 + 0.5,
                          '{}/samples.png'.format(self.logdir),
                          nrow=10)
        print('Sample creation done')

        return gen
Exemple #5
0
model.eval()

transform = transforms.Compose(
    [transforms.CenterCrop((224, 224)),
     transforms.ToTensor()])
dataset = ZeroShotImageFolder(args.data_root, train=False, transform=transform)
dataloader = DataLoader(dataset, batch_size=50)
z_all = []
targets_all = []

for images, target in dataloader:
    if torch.no_grad():
        import ipdb
        ipdb.set_trace()
        z_inv, mu_logvar, _ = model(images.cuda())
        z_var = utils.sample_normal(mu_logvar)
        z = z_inv + z_var
    z_all.append(z.detach())
    targets_all.append(target)

z_all = torch.cat(z_all, 0).cpu().numpy()
targets_all = torch.cat(targets_all, 0).numpy()

scores = []
for n in [1, 2, 4, 8]:
    nbrs = NearestNeighbors(n_neighbors=n).fit(z_all)
    indices = nbrs.kneighbors(z_all, n_neighbors=n + 1, return_distance=False)
    nbr_targets = targets_all[indices[:, 1:]]
    nbr_targets = np.any(nbr_targets == targets_all[:, np.newaxis], axis=-1)
    score = sum(nbr_targets) / nbr_targets.shape[0] * 100
    print("n = ", n, ", score = ", score)
Exemple #6
0
    def compute_inception_fid(self):
        self.netG.eval()
        if self.config.use_ema:
            G_state = copy.deepcopy(self.netG.state_dict())
            self.netG.load_state_dict(self.ema.target_dict)

        # bs = self.config.batchSize
        bs = 32
        samples = []
        labels_gen = []
        num_batches = int(self.config.num_inception_imgs / bs)
        for batch in range(num_batches):
            with torch.no_grad():
                z = utils.sample_normal(bs, self.config.nz, device=self.device)
                if self.config.conditional:
                    y = utils.sample_cats(bs,
                                          self.num_classes,
                                          device=self.device)
                    labels_gen.append(y.cpu().numpy())
                else:
                    y = None

                gen = self.netG(z, y)
                gen = gen * 0.5 + 0.5
                gen = gen * 255.0
                gen = gen.cpu().numpy().astype(np.uint8)
                gen = np.transpose(gen, (0, 2, 3, 1))
                samples.extend(gen)

        if self.config.conditional:
            labels_gen = np.hstack(labels_gen)
            samples = (samples, labels_gen)
            IS_mean, IS_std, fid, intra_fid = self.inception_evaluator.compute_metrics(
                samples)
            self.log('IS: {} +/- {}'.format(IS_mean, IS_std))
            self.log('FID: {}'.format(fid))
            self.log('Intra FID: {}'.format(intra_fid))

            # Choosing the min FID model
            if self.best_intra_fid > intra_fid:
                self.best_is = IS_mean
                self.best_is_std = IS_std
                self.best_fid = fid
                self.best_intra_fid = intra_fid
                self.save_state('model_best.pth')
        else:
            IS_mean, IS_std, fid = self.inception_evaluator.compute_metrics(
                samples)
            self.log('IS: {} +/- {}'.format(IS_mean, IS_std))
            self.log('FID: {}'.format(fid))

            # Choosing the min FID model
            if self.best_fid > fid:
                self.best_is = IS_mean
                self.best_is_std = IS_std
                self.best_fid = fid
                self.save_state('model_best.pth')

        self.log('Best IS: {} +/- {}'.format(self.best_is, self.best_is_std))
        self.log('Best FID: {}'.format(self.best_fid))
        if self.config.conditional:
            self.log('Best intra FID: {}'.format(self.best_intra_fid))

        if self.config.use_ema:
            self.netG.load_state_dict(G_state)
Exemple #7
0
    def gan_updates(self, real_data, real_labels, real_indices):

        self.optimizerD.zero_grad()

        batch_size = real_data.size(0)
        noise = utils.sample_normal(batch_size,
                                    self.config.nz,
                                    device=self.device)
        if self.config.conditional:
            fake_labels = utils.sample_cats(batch_size,
                                            self.num_classes,
                                            device=self.device)
        else:
            real_labels = None
            fake_labels = None

        fake_data = self.netG(noise, fake_labels)

        # Discriminator updates
        outD_real = self.netD(real_data, real_labels)
        outD_fake = self.netD(fake_data.detach(), fake_labels)
        if self.weight_update_flag and self.weight_update_type == 'discrete':
            self.disc_vector_cur[real_indices] = torch.squeeze(outD_real)

        if self.weight_update_flag:
            if self.weight_update_type == 'discrete':
                real_weights = self.weight_vector[real_indices].view(-1, 1)
            else:
                real_weights = self.netW(real_data, real_labels) + self.eps
                real_weights = (real_weights /
                                real_weights.sum()) * self.config.batchSize
        else:
            real_weights = torch.ones(real_data.size(0), 1).to(self.device)

        if self.config.conditioning == 'acgan':

            outD_real_cls = outD_real[1]
            outD_real = outD_real[0]
            outD_fake_cls = outD_fake[1]
            outD_fake = outD_fake[0]
            aux_loss_real = self.aux_loss_fn(outD_real_cls, real_labels)
            aux_loss_fake = self.aux_loss_fn(outD_fake_cls, fake_labels)

        errD_real, errD_fake = self.disc_loss_fn(outD_real, outD_fake,
                                                 real_weights)
        if self.config.conditioning == 'acgan':
            errD_real = errD_real + aux_loss_real
            errD_fake = errD_fake + aux_loss_fake

        errD_real.backward()
        errD_fake.backward()

        if self.config.regularization == 'gradient_penalty':
            if self.config.conditional:
                fake_data_consistent = self.netG(noise, real_labels)
                gp = losses.gradient_penalty(self.netD,
                                             real_data,
                                             fake_data_consistent,
                                             self.config.gp_lamb,
                                             device=self.device,
                                             labels=real_labels)
            else:
                gp = losses.gradient_penalty(self.netD,
                                             real_data,
                                             fake_data,
                                             self.config.gp_lamb,
                                             device=self.device)
            gp.backward()
        if self.config.regularization == 'ortho':
            losses.orthogonal_regularization(self.netD,
                                             self.config.ortho_strength)

        self.optimizerD.step()
        if self.config.lrdecay:
            self.schedulerD.step()
            self.schedulerG.step()

        disc_loss = errD_real.item() + errD_fake.item()

        # Generator updates
        if self.itr % self.config.disc_iters == 0:
            self.optimizerG.zero_grad()
            outD = self.netD(fake_data, fake_labels)

            if self.config.conditioning == 'acgan':
                outD_cls = outD[1]
                outD = outD[0]
                aux_loss = self.aux_loss_fn(outD_cls, fake_labels)

            errG = self.gen_loss_fn(outD)
            if self.config.conditioning == 'acgan':
                errG = errG + aux_loss
            errG.backward()

            self.optimizerG.step()
            gen_loss = errG.item()
            self.prev_gen_loss = gen_loss
        else:
            gen_loss = self.prev_gen_loss

        return disc_loss, gen_loss
Exemple #8
0
    def __init__(self, input_, cont_dim=2, discrete_dim=0,
                 filters=[32, 64], hidden_dim=1024, model_name="ConcreteVae"):
        """
        Constructs a Variational Autoencoder that supports continuous and
        discrete dimensions. Currently only one discrete dimension is
        supported.

        Args:
        input_        the input tensor
        cont_dim      the number of continuous latent dimensions
        discrete_dim  the number of categories in the discrete latent dimension
        filters       the number of filters for each convolution
        hidden_dim    the dimension of the fully-connected hidden layer between
                          the convolutions and the latent variable
        model_name    the name of the model
        """
        self.input_ = input_
        input_shape = input_.get_shape().as_list()
        print('Input shape {}'.format(input_shape))

        self.model_name = model_name

        # Build the encoder
        # According to karpathy, generative models work better when
        # they discard pooling layers in favor of larger strides
        # (https://cs231n.github.io/convolutional-networks/#pool)
        net = slim.conv2d(self.input_, filters[0], kernel_size=5, stride=2,
                          padding='SAME')
        net = slim.conv2d(net, filters[1], kernel_size=5, stride=2,
                          padding='SAME')
        # Use dropout to reduce overfitting
        # net = slim.dropout(net, 0.9)
        net = slim.flatten(net)

        # Sample from the latent distribution
        q_z_mean = slim.fully_connected(net, cont_dim, activation_fn=None)
        q_z_log_var = slim.fully_connected(net, cont_dim, activation_fn=None)
        # TODO: support multiple categorical variables
        q_category_logits = slim.fully_connected(net, discrete_dim,
                                                 activation_fn=None)
        q_category = tf.nn.softmax(q_category_logits)
        self.q_z_mean = q_z_mean
        self.q_z_log_var = q_z_log_var
        self.q_category = q_category

        self.continuous_z = sample_normal(q_z_mean, q_z_log_var)
        self.tau = tf.Variable(5.0, name="temperature")
        self.category = sample_gumbel(q_category_logits, self.tau)
        self.z = tf.concat([self.continuous_z, self.category], axis=1)

        # Build the decoder
        net = tf.reshape(self.z, [-1, 1, 1, cont_dim + discrete_dim])
        net = slim.conv2d_transpose(net, filters[1], kernel_size=5,
                                    stride=2, padding='SAME')
        net = slim.conv2d_transpose(net, filters[0], kernel_size=5,
                                    stride=2, padding='SAME')
        net = slim.conv2d_transpose(net, input_shape[3], kernel_size=5,
                                    padding='VALID')
        net = slim.flatten(net)
        # TODO: figure out the whole logits and Bernoulli dist vs MSE thing
        # Do not include the batch size in creating the final layer
        self.logits = slim.fully_connected(net, np.product(input_shape[1:]),
                                           activation_fn=None)
        print('Output shape {}'.format(self.logits.get_shape()))
        p_x = Bernoulli(logits=self.logits)
        self.p_x = p_x

        self.loss = self._vae_loss()
        self.learning_rate = tf.Variable(1e-3, name="learning_rate")
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate) \
            .minimize(self.loss, var_list=slim.get_model_variables())