Ejemplo n.º 1
0
def get_code(encoder, path, layer, mask=None):
    '''
    Push the given image through an encoder (here, AlexNet) to get a code.
    '''

    # set up the inputs for the net: 
    image_size = encoder.blobs['data'].shape[2:]    # (1, 3, 227, 227)
    images = np.zeros_like(encoder.blobs["data"].data, dtype='float32')

    in_image = scipy.misc.imread(path)
    in_image = scipy.misc.imresize(in_image, (image_size[0], image_size[1]))
    images[0] = np.transpose(in_image, (2, 0, 1))   # convert to (3, 227, 227) format

    data = images[:,::-1]   # convert from RGB to BGR

    # subtract the ImageNet mean
    image_mean = scipy.io.loadmat('misc/ilsvrc_2012_mean.mat')['image_mean'] # (256, 256, 3)
    topleft = util.compute_topleft(image_size, image_mean.shape[:2])
    image_mean = image_mean[topleft[0]:topleft[0]+image_size[0], topleft[1]:topleft[1]+image_size[1]]   # crop the image mean
    data -= np.expand_dims(np.transpose(image_mean, (2,0,1)), 0)    # mean is already BGR

    if mask is not None:
        data *= mask

    # initialize the encoder
    encoder = caffe.Net(settings.encoder_definition, settings.encoder_weights, caffe.TEST)

    # extract the features
    encoder.forward(data=data)
    features = encoder.blobs[layer].data.copy()

    return features, data
Ejemplo n.º 2
0
def get_code(generator, classifier, batch_size):
    '''
    Generate <batch_size> h's corresponding to images generated by generator. 
    Sample uniformly from h distribution, feed through generator and classifier, and if prob_highest_class > 55% then choose h.
    return h and its corresponding class
    '''
    gen_in = settings.generator_in_layer
    gen_out = settings.generator_out_layer
    h_shape = generator.blobs[gen_in].data.shape
    
    # Get the input and output sizes
    image_shape = classifier.blobs['data'].data.shape
    generator_output_shape = generator.blobs[gen_out].data.shape

    # Calculate the difference between the input image of the condition net 
    # and the output image from the generator
    image_size = util.get_image_size(image_shape)
    generator_output_size = util.get_image_size(generator_output_shape)

    # The top left offset to crop the output image to get a 227x227 image
    topleft = util.compute_topleft(image_size, generator_output_size)
    
    h_list = []
    class_list = []
    i = 0
    print ("starting to generate h's")
    while len(h_list) < batch_size:
        print ("round %d" %i)
        # Sample h from uniform distribution
        h = np.random.normal(0, 1, h_shape)
        
        # Push h through Generator to get image
        generator.blobs[gen_in].data[:] = h
        generated = generator.forward()
        x = generated[gen_out].copy()       # 256x256

        # Crop from 256x256 to 227x227
        cropped_x = x[:,:,topleft[0]:topleft[0]+image_size[0], topleft[1]:topleft[1]+image_size[1]]
        cropped_x_copy = cropped_x.copy()
        
        softmax_output = classifier.forward(data=cropped_x_copy, end='prob')
        probs = softmax_output['prob'][0]
        best_class = probs.argmax()     # highest probability unit
        print (probs[best_class])
        if probs[best_class] > 55:
            h_list.append(h)
            class_list.append(best_class)
            i +=1
    return h
Ejemplo n.º 3
0
    def sampling(
            self,
            condition_net,
            image_encoder,
            image_generator,
            gen_in_layer,
            gen_out_layer,
            start_code,
            n_iters,
            lr,
            lr_end,
            threshold,
            layer,
            conditions,  #units=None, xy=0, 
            epsilon1=1,
            epsilon2=1,
            epsilon3=1e-10,
            inpainting=None,  # in-painting args
            output_dir=None,
            reset_every=0,
            save_every=1):

        # Get the input and output sizes
        image_shape = condition_net.blobs['data'].data.shape
        generator_output_shape = image_generator.blobs[
            gen_out_layer].data.shape
        encoder_input_shape = image_encoder.blobs['data'].data.shape

        # Calculate the difference between the input image of the condition net
        # and the output image from the generator
        image_size = util.get_image_size(image_shape)
        generator_output_size = util.get_image_size(generator_output_shape)
        encoder_input_size = util.get_image_size(encoder_input_shape)

        # The top left offset to crop the output image to get a 227x227 image
        topleft = util.compute_topleft(image_size, generator_output_size)
        topleft_DAE = util.compute_topleft(encoder_input_size,
                                           generator_output_size)

        src = image_generator.blobs[
            gen_in_layer]  # the input feature layer of the generator

        # Make sure the layer size and initial vector size match
        assert src.data.shape == start_code.shape

        # Variables to store the best sample
        last_xx = np.zeros(image_shape)  # best image
        last_prob = -sys.maxint  # highest probability

        h = start_code.copy()

        condition_idx = 1
        list_samples = []
        i = 0
        print('Captions to be conditioned :')
        for i in xrange(len(conditions)):
            print(conditions[i]['readable'])

        while True:
            step_size = lr + ((lr_end - lr) * i) / n_iters
            # condition = conditions[condition_idx]  # Select a class

            # 1. Compute the epsilon1 term ---

            d_prior = self.h_autoencoder_grad(h=h,
                                              encoder=image_generator,
                                              decoder=image_encoder,
                                              gen_out_layer=gen_out_layer,
                                              topleft=topleft_DAE,
                                              inpainting=inpainting)

            # 2. Compute the epsilon2 term ---
            # Push the code through the generator to get an image x
            image_generator.blobs["feat"].data[:] = h
            generated = image_generator.forward()
            x = generated[gen_out_layer].copy()  # 256x256

            # Crop from 256x256 to 227x227
            cropped_x = x[:, :, topleft[0]:topleft[0] + image_size[0],
                          topleft[1]:topleft[1] + image_size[1]]
            cropped_x_copy = cropped_x.copy()
            # pdb.set_trace()
            if inpainting is not None:
                cropped_x = util.apply_mask(img=cropped_x,
                                            mask=inpainting['mask'],
                                            context=inpainting['image'])

            # Forward pass the image x to the condition net up to an unit k at the given layer
            # Backprop the gradient through the condition net to the image layer to get a gradient image
            grad_caption = []
            # pdb.set_trace()
            for length in xrange(len(conditions)):
                condition_ids = conditions[length]['sentence']
                d_condition_x, prob, info = self.forward_backward_from_x_to_condition(
                    net=condition_net,
                    end=layer,
                    image=cropped_x,
                    condition=condition_ids)
                grad_caption.append(d_condition_x)

            # Average all the gradients of the captions
            d_condition_x = np.mean(grad_caption, axis=0)

            if inpainting is not None:
                # Mask out the class gradient image
                d_condition_x[:] *= inpainting["mask"]

                # An additional objective for matching the context image
                d_context_x256 = np.zeros_like(x.copy())
                d_context_x256[:, :, topleft[0]:topleft[0] + image_size[0],
                               topleft[1]:topleft[1] + image_size[1]] = (
                                   inpainting["image"] -
                                   cropped_x_copy) * inpainting["mask_neg"]
                d_context_h = self.backward_from_x_to_h(
                    generator=image_generator,
                    diff=d_context_x256,
                    start=gen_in_layer,
                    end=gen_out_layer)

            # Put the gradient back in the 256x256 format
            d_condition_x256 = np.zeros_like(x)
            d_condition_x256[:, :, topleft[0]:topleft[0] + image_size[0],
                             topleft[1]:topleft[1] +
                             image_size[1]] = d_condition_x.copy()

            # Backpropagate the above gradient all the way to h (through generator)
            # This gradient 'd_condition' is d log(p(y|h)) / dh (the epsilon2 term in Eq. 11 in the paper)
            d_condition = self.backward_from_x_to_h(generator=image_generator,
                                                    diff=d_condition_x256,
                                                    start=gen_in_layer,
                                                    end=gen_out_layer)

            self.print_progress(i, info, prob, d_condition)

            # 3. Compute the epsilon3 term ---
            noise = np.zeros_like(h)
            if epsilon3 > 0:
                noise = np.random.normal(0, epsilon3,
                                         h.shape)  # Gaussian noise

            # Update h according to Eq.11 in the paper
            d_h = epsilon1 * d_prior + epsilon2 * d_condition + noise

            # Plus the optional epsilon4 for matching the context region when in-painting
            if inpainting is not None:
                d_h += inpainting["epsilon4"] * d_context_h

            h += step_size / np.abs(d_h).mean() * d_h

            h = np.clip(h, a_min=0,
                        a_max=30)  # Keep the code within a realistic range

            # Reset the code every N iters (for diversity when running a long sampling chain)
            if reset_every > 0 and i % reset_every == 0 and i > 0:
                h = np.random.normal(0, 1, h.shape)

                # Experimental: For sample diversity, it's a good idea to randomly pick epsilon1 as well
                epsilon1 = np.random.uniform(low=1e-6, high=1e-2)

            # Save every sample
            last_xx = cropped_x.copy()
            last_prob = prob

            # Filter samples based on threshold or every N iterations
            if save_every > 0 and i % save_every == 0 and prob > threshold:
                name = "%s/samples/%05d.jpg" % (output_dir, i)

                label = self.get_label(condition)
                list_samples.append((last_xx.copy(), name, label))

            # Stop if grad is 0
            if norm(d_h) == 0:
                print " d_h is 0"
                break

            # Randomly sample a class every N iterations
            if i > 0 and i % n_iters == 0:
                condition_idx += 1
                # pdb.set_trace()
                break

            i += 1  # Next iter

        # returning the last sample
        print "-------------------------"
        print "Last sample: prob [%s] " % last_prob

        return last_xx, list_samples
Ejemplo n.º 4
0
    def h_sampling(
            self,
            condition_net,
            image_encoder,
            image_generator,
            gen_in_layer,
            gen_out_layer,
            start_code,
            n_iters,
            lr,
            lr_end,
            threshold,
            layer,
            conditions,  #units=None, xy=0, 
            epsilon1=1,
            epsilon2=1,
            epsilon3=1e-10,
            inpainting=None,  # in-painting args
            output_dir=None,
            reset_every=0,
            save_every=1):
        '''
        The architecture is such that x <- h -> c
        Therefore unlike the usual sampling from h -> x -> c which results in images with dim: 227x227,
        our sample method results in images with dim: 256x256
        '''

        # Get the input and output sizes
        generator_output_shape = image_generator.blobs[
            gen_out_layer].data.shape
        encoder_input_shape = image_encoder.blobs['data'].data.shape

        # Calculate the difference between the input image of the condition net
        # and the output image from the generator
        generator_output_size = util.get_image_size(generator_output_shape)
        encoder_input_size = util.get_image_size(encoder_input_shape)

        # The top left offset to crop the output image to get a 227x227 image
        topleft_DAE = util.compute_topleft(encoder_input_size,
                                           generator_output_size)

        src = image_generator.blobs[
            gen_in_layer]  # the input feature layer of the generator

        # Make sure the layer size and initial vector size match
        assert src.data.shape == start_code.shape

        # Variables to store the best sample
        last_xx = np.zeros(generator_output_shape)  # best image
        last_prob = -sys.maxint  # highest probability

        h = start_code.copy()
        h_shape = h.shape

        # Adam Parameters
        mom1 = 0.9
        mom2 = 0.999
        eps = 1e-8
        t = 1
        m_t = np.zeros(h_shape)
        v_t = np.zeros(h_shape)

        condition_idx = 0
        list_samples = []
        i = 0

        # for d_h plots
        d_prior_mins = []
        d_prior_maxs = []
        d_condition_mins = []
        d_condition_maxs = []
        boundary_points = []

        while True:

            #step_size = lr + ((lr_end - lr) * i) / n_iters
            condition = conditions[condition_idx]  # Select a class

            # 1. Compute the epsilon1 term ---
            # compute gradient d log(p(h)) / dh per DAE results in Alain & Bengio 2014
            d_prior = self.h_autoencoder_grad(h=h,
                                              encoder=image_generator,
                                              decoder=image_encoder,
                                              gen_out_layer=gen_out_layer,
                                              topleft=topleft_DAE,
                                              inpainting=inpainting)

            # 2. Compute the epsilon2 term ---
            # Push the code through the generator to get an image x
            image_generator.blobs["feat"].data[:] = h
            generated = image_generator.forward()
            x = generated[gen_out_layer].copy()  # 256x256

            # Forward pass the latent code h to the condition net up to an unit k at the given layer
            # Backprop the gradient through the condition net to the latent layer to get a gradient latent code h
            d_condition, prob, info = self.forward_backward_from_h_to_condition(
                net=condition_net, end=layer, h_code=h, condition=condition)

            self.print_progress(i, info, condition, prob, d_condition)

            # 3. Compute the epsilon3 term ---
            noise = np.zeros_like(h)
            if epsilon3 > 0:
                noise = np.random.normal(0, epsilon3,
                                         h.shape)  # Gaussian noise

            # Update h according to Eq.11 in the paper
            d_h = epsilon1 * d_prior + epsilon2 * d_condition + noise

            d_prior_mins.append(min(d_prior[0]))
            d_prior_maxs.append(max(d_prior[0]))
            d_condition_mins.append(min(d_condition[0]))
            d_condition_maxs.append(max(d_condition[0]))

            ################ Adam ################
            m_t = mom1 * m_t + (1 - mom1) * d_h
            v_t = mom2 * v_t + (1 - mom2) * (d_h**2)
            m_t_hat = m_t / (1 - mom1**t)
            v_t_hat = v_t / (1 - mom2**t)
            step_size = lr
            t += 1

            #h += step_size*m_t_hat/((np.sqrt(v_t_hat) + eps)*(np.abs(d_h).mean()))

            h += step_size / np.abs(d_h).mean() * d_h

            h = np.clip(h, a_min=0,
                        a_max=30)  # Keep the code within a realistic range
            # stochastic clipping
            #h[h>30] = np.random.uniform(0, 30)
            #h[h<0] = np.random.uniform(0, 30)

            boundary_points.append(
                np.count_nonzero(h == 30) + np.count_nonzero(h == 0))

            # Reset the code every N iters (for diversity when running a long sampling chain)
            if reset_every > 0 and i % reset_every == 0 and i > 0:
                h = np.random.normal(0, 1, h.shape)

                # Experimental: For sample diversity, it's a good idea to randomly pick epsilon1 as well
                epsilon1 = np.random.uniform(low=1e-6, high=1e-2)

            # Save every sample
            last_xx = x.copy()
            last_prob = prob

            # Filter samples based on threshold or every N iterations
            if save_every > 0 and i % save_every == 0 and prob > threshold:
                name = "%s/samples/%05d.jpg" % (output_dir, i)

                label = self.get_label(condition)
                list_samples.append((last_xx.copy(), name, label))

            # Stop if grad is 0
            if norm(d_h) == 0:
                print " d_h is 0"
                break

            # Randomly sample a class every N iterations
            if i > 0 and i % n_iters == 0:
                condition_idx += 1

                if condition_idx == len(conditions):
                    break

            i += 1  # Next iter

        # returning the last sample
        print "-------------------------"
        print "Last sample: prob [%s] " % last_prob

        return last_xx, list_samples, h, np.array(d_prior_mins), np.array(
            d_prior_maxs), np.array(d_condition_mins), np.array(
                d_condition_maxs), np.array(boundary_points)
Ejemplo n.º 5
0
gen_in = settings.generator_in_layer
gen_out = settings.generator_out_layer

h_shape = generator.blobs[gen_in].data.shape

# Get the input and output sizes
image_shape = encoder.blobs['data'].data.shape
generator_output_shape = generator.blobs[gen_out].data.shape

# Calculate the difference between the input image of the condition net
# and the output image from the generator
image_size = util.get_image_size(image_shape)
generator_output_size = util.get_image_size(generator_output_shape)

# The top left offset to crop the output image to get a 227x227 image
topleft = util.compute_topleft(image_size, generator_output_size)

image_mean = scipy.io.loadmat('misc/ilsvrc_2012_mean.mat')[
    'image_mean']  # (256, 256, 3)
image_mean = np.expand_dims(np.transpose(image_mean, (2, 0, 1)), 0)
#image_mean = np.repeat(image_mean, 10, axis=0)

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision.datasets as datasets
import torch.utils.data
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim