Ejemplo n.º 1
0
def create_class_visualization(target_y, model, **kwargs):
    """
    Generate an image to maximize the score of target_y under a pretrained model.
    
    Inputs:
    - target_y: Integer in the range [0, 1000) giving the index of the class
    - model: A pretrained CNN that will be used to generate the image
    
    Keyword arguments:
    - l2_reg: Strength of L2 regularization on the image
    - learning_rate: How big of a step to take
    - num_iterations: How many iterations to use
    - blur_every: How often to blur the image as an implicit regularizer
    - max_jitter: How much to jitter the image as an implicit regularizer
    - show_every: How often to show the intermediate result
    """
    l2_reg = kwargs.pop('l2_reg', 1e-3)
    learning_rate = kwargs.pop('learning_rate', 25)
    num_iterations = kwargs.pop('num_iterations', 100)
    blur_every = kwargs.pop('blur_every', 10)
    max_jitter = kwargs.pop('max_jitter', 16)
    show_every = kwargs.pop('show_every', 25)

    X = 255 * np.random.rand(224, 224, 3)
    X = preprocess_image(X)[None]

    sess = get_session()
    for t in range(num_iterations):
        ox, oy = np.random.randint(0, max_jitter, 2)
        X = jitter(X, ox, oy)

        Y = tf.convert_to_tensor(X)
        with tf.GradientTape() as tape:
            #Y = tf.convert_to_tensor(X)
            tape.watch(Y)
            loss = model(Y)[0, target_y] - l2_reg * tf.nn.l2_loss(Y)
            #dY = tape.gradient(loss, Y)
            #dX = sess.run(dY)
            #X += dX[0] * learning_rate
        dY = tape.gradient(loss, Y)
        dX = sess.run(dY)
        X += dX[0] * learning_rate

        X = jitter(X, -ox, -oy)
        X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD,
                    (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD)
        if t % blur_every == 0:
            X = blur_image(X, sigma=0.5)

        if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
            plt.imshow(deprocess_image(X[0]))
            class_name = class_names[target_y]
            plt.title('%s\nIteration %d / %d' %
                      (class_name, t + 1, num_iterations))
            plt.gcf().set_size_inches(4, 4)
            plt.axis('off')
            plt.savefig("%s.jpg" % t)
    return X
def style_transfer(content_image,
                   style_image,
                   image_size,
                   style_size,
                   content_layer,
                   content_weight,
                   style_layers,
                   style_weights,
                   tv_weight,
                   init_random=False):
    content_img = preprocess_image(load_image(content_image, size=image_size))
    feats = model.extract_features(model.image)
    content_target = sess.run(feats[content_layer],
                              {model.image: content_img[None]})
    # Extract features from the style image
    style_img = preprocess_image(load_image(style_image, size=style_size))
    style_feat_vars = [feats[idx] for idx in style_layers]
    style_target_vars = []
    # Compute list of TensorFlow Gram matrices
    for style_feat_var in style_feat_vars:
        style_target_vars.append(gram_matrix(style_feat_var))
    # Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image
    style_targets = sess.run(style_target_vars, {model.image: style_img[None]})
    # Initialize generated image to content image
    if init_random:
        img_var = tf.Variable(tf.random_uniform(content_img[None].shape, 0, 1),
                              name="image")
    else:
        img_var = tf.Variable(content_img[None], name="image")
    # Extract features on generated image
    feats = model.extract_features(img_var)
    # Compute loss
    c_loss = content_loss(content_weight, feats[content_layer], content_target)
    s_loss = style_loss(feats, style_layers, style_targets, style_weights)
    t_loss = tv_loss(img_var, tv_weight)
    loss = c_loss + s_loss + t_loss
    # Set up optimization hyperparameters
    initial_lr = 3.0
    decayed_lr = 0.1
    decay_lr_at = 180
    max_iter = 200
    # Create and initialize the Adam optimizer
    lr_var = tf.Variable(initial_lr, name="lr")
    # Create train_op that updates the generated image when run
    with tf.variable_scope("optimizer") as opt_scope:
        train_op = tf.train.AdamOptimizer(lr_var).minimize(loss,
                                                           var_list=[img_var])
    # Initialize the generated image and optimization variables
    opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                 scope=opt_scope.name)
    sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars))
    # Create an op that will clamp the image values when run
    clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.5, 1.5))
    f, axarr = plt.subplots(1, 2)
    axarr[0].axis('off')
    axarr[1].axis('off')
    axarr[0].set_title('Content Source Img.')
    axarr[1].set_title('Style Source Img.')
    axarr[0].imshow(deprocess_image(content_img))
    axarr[1].imshow(deprocess_image(style_img))
    plt.show()
    plt.figure()
    # Hardcoded handcrafted
    for t in range(max_iter):
        # Take an optimization step to update img_var
        sess.run(train_op)
        if t < decay_lr_at:
            sess.run(clamp_image_op)
        if t == decay_lr_at:
            sess.run(tf.assign(lr_var, decayed_lr))
        if t % 100 == 0:
            print('Iteration {}'.format(t))
            img = sess.run(img_var)
            plt.imshow(deprocess_image(img[0], rescale=True))
            plt.axis('off')
            plt.show()
    print('Iteration {}'.format(t))
    img = sess.run(img_var)
    plt.imshow(deprocess_image(img[0], rescale=True))
    plt.axis('off')
    plt.show()
check_scipy()

from cs231n.classifiers.squeezenet import SqueezeNet
import tensorflow as tf

tf.reset_default_graph()  # remove all existing variables in the graph
sess = get_session()  # start a new Session

# Load pretrained SqueezeNet model
SAVE_PATH = 'cs231n/datasets/squeezenet.ckpt'
# if not os.path.exists(SAVE_PATH):
#     raise ValueError("You need to download SqueezeNet!")
model = SqueezeNet(save_path=SAVE_PATH, sess=sess)

# Load data for testing
content_img_test = preprocess_image(load_image('styles/tubingen.jpg',
                                               size=192))[None]
style_img_test = preprocess_image(
    load_image('styles/starry_night.jpg', size=192))[None]
answers = np.load('style-transfer-checks-tf.npz')


def content_loss(content_weight, content_current, content_original):
    shapes = tf.shape(content_current)
    F_l = tf.reshape(content_current, [shapes[1], shapes[2] * shapes[3]])
    P_l = tf.reshape(content_original, [shapes[1], shapes[2] * shapes[3]])
    loss = content_weight * (tf.reduce_sum((F_l - P_l)**2))
    return loss


def content_loss_test(correct):
    content_layer = 3
def create_class_visualization(target_y, model, sess, **kwargs):
    """
    Generate an image to maximize the score of target_y under a pretrained model.
    
    Inputs:
    - target_y: Integer in the range [0, 1000) giving the index of the class
    - model: A pretrained CNN that will be used to generate the image
    
    Keyword arguments:
    - l2_reg: Strength of L2 regularization on the image
    - learning_rate: How big of a step to take
    - num_iterations: How many iterations to use
    - blur_every: How often to blur the image as an implicit regularizer
    - max_jitter: How much to gjitter the image as an implicit regularizer
    - show_every: How often to show the intermediate result
    """
    l2_reg = kwargs.pop('l2_reg', 1e-3)
    learning_rate = kwargs.pop('learning_rate', 25)
    num_iterations = kwargs.pop('num_iterations', 100)
    blur_every = kwargs.pop('blur_every', 10)
    max_jitter = kwargs.pop('max_jitter', 16)
    show_every = kwargs.pop('show_every', 25)

    X = 255 * np.random.rand(224, 224, 3)
    X = preprocess_image(X)[None]

    ########################################################################
    # TODO: Compute the loss and the gradient of the loss with respect to  #
    # the input image, model.image. We compute these outside the loop so   #
    # that we don't have to recompute the gradient graph at each iteration #
    #                                                                      #
    # Note: loss and grad should be TensorFlow Tensors, not numpy arrays!  #
    #                                                                      #
    # The loss is the score for the target label, target_y. You should     #
    # use model.classifier to get the scores, and tf.gradients to compute  #
    # gradients. Don't forget the (subtracted) L2 regularization term!     #
    ########################################################################
    loss = None  # scalar loss
    grad = None  # gradient of loss with respect to model.image, same size as model.image
    (N, H, W, C) = X.shape
    correct_scores = tf.gather_nd(
        model.classifier, tf.stack((tf.range(N), model.labels), axis=1))
    loss = correct_scores - tf.scalar_mul(
        tf.constant(l2_reg), tf.norm(tf.reshape(model.image, [1, -1]), axis=1))
    grad = tf.gradients(loss, model.image)

    ############################################################################
    #                             END OF YOUR CODE                             #
    ############################################################################

    for t in range(num_iterations):
        # Randomly jitter the image a bit; this gives slightly nicer results
        ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2)
        Xi = X.copy()
        X = np.roll(np.roll(X, ox, 1), oy, 2)

        ########################################################################
        # TODO: Use sess to compute the value of the gradient of the score for #
        # class target_y with respect to the pixels of the image, and make a   #
        # gradient step on the image using the learning rate. You should use   #
        # the grad variable you defined above.                                 #
        #                                                                      #
        # Be very careful about the signs of elements in your code.            #
        ########################################################################
        [loss_result, grad_result] = sess.run([loss, grad], {
            model.image: X,
            model.labels: [target_y]
        })
        grad_result = grad_result[0]
        X = X + learning_rate * grad_result
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # Undo the jitter
        X = np.roll(np.roll(X, -ox, 1), -oy, 2)

        # As a regularizer, clip and periodically blur
        X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD,
                    (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD)
        if t % blur_every == 0:
            X = blur_image(X, sigma=0.5)

        # Periodically show the image
        if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
            print('save image in iteration {}/{}, loss is {}'.format(
                t, num_iterations, loss_result))
            plt.imshow(deprocess_image(X[0]))
            class_name = class_names[target_y]
            plt.title('%s\nIteration %d / %d' %
                      (class_name, t + 1, num_iterations))
            plt.gcf().set_size_inches(4, 4)
            plt.axis('off')
            # plt.show()
            plt.savefig('reports/class_visualization_image_{}_{}.png'.format(
                target_y, t))
            plt.close()
    return X
plt.figure(figsize=(12, 6))
for i in range(5):
    plt.subplot(1, 5, i + 1)
    plt.imshow(X_raw[i])
    plt.title(class_names[y[i]])
    plt.axis('off')
plt.gcf().tight_layout()
plt.savefig('reports/imageNet_images.png')

# ## Preprocess images
# The input to the pretrained model is expected to be normalized, so we first preprocess the images by subtracting the pixelwise mean and dividing by the pixelwise standard deviation.

# In[ ]:

X = np.array([preprocess_image(img) for img in X_raw])

# # Saliency Maps
# Using this pretrained model, we will compute class saliency maps as described in Section 3.1 of [2].
#
# A **saliency map** tells us the degree to which each pixel in the image affects the classification score for that image.
# To compute it, we compute the gradient of the unnormalized score corresponding to the correct class (which is a scalar) with respect to the pixels of the image.
# If the image has shape `(H, W, 3)` then this gradient will also have shape `(H, W, 3)`;
# for each pixel in the image, this gradient tells us the amount by which the classification score will change if the pixel changes by a small amount.
# To compute the saliency map, we take the absolute value of this gradient, then take the maximum value over the 3 input channels;
# the final saliency map thus has shape `(H, W)` and all entries are nonnegative.
#
# You will need to use the `model.classifier` Tensor containing the scores for each input, and will need to feed in values for the `model.image` and `model.labels` placeholder when evaluating the gradient.
# Open the file `cs231n/classifiers/squeezenet.py` and read the documentation to make sure you understand how to use the model. For example usage, you can see the `loss` attribute.
#
# [2] Karen Simonyan, Andrea Vedaldi, and Andrew Zisserman. "Deep Inside Convolutional Networks: Visualising
Ejemplo n.º 6
0
#############################################################################
# Shallow feature reconstruction
filename = 'kitten.jpg'
layer = 3  # layers start from 0 so these are features after 4 convolutions
img = imresize(imread(filename), (64, 64))

plt.imshow(img)
plt.gcf().set_size_inches(3, 3)
plt.title('Original image')
plt.axis('off')
plt.show()

# Preprocess the image before passing it to the network:
# subtract the mean, add a dimension, etc
img_pre = preprocess_image(img, data['mean_image'])

# Extract features from the image
feats, _ = model.forward(img_pre, end=layer)

# Invert the features
kwargs = {
    'num_iterations': 400,
    'learning_rate': 5000,
    'l2_reg': 1e-8,
    'show_every': 100,
    'blur_every': 10,
}
X = invert_features(feats, layer, model, **kwargs)
#############################################################################
# Deep feature reconstruction
Ejemplo n.º 7
0
def create_class_visualization(target_y, model, **kwargs):
	"""
	Generate an image to maximize the score of target_y under a pretrained model.

	Inputs:
	- target_y: Integer in the range [0, 1000) giving the index of the class
	- model: A pretrained CNN that will be used to generate the image

	Keyword arguments:
	- l2_reg: Strength of L2 regularization on the image
	- learning_rate: How big of a step to take
	- num_iterations: How many iterations to use
	- blur_every: How often to blur the image as an implicit regularizer
	- max_jitter: How much to gjitter the image as an implicit regularizer
	- show_every: How often to show the intermediate result
	"""
	l2_reg = kwargs.pop('l2_reg', 1e-3)
	learning_rate = kwargs.pop('learning_rate', 25)
	num_iterations = kwargs.pop('num_iterations', 100)
	blur_every = kwargs.pop('blur_every', 10)
	max_jitter = kwargs.pop('max_jitter', 16)
	show_every = kwargs.pop('show_every', 25)
	X = 255 * np.random.rand(224, 224, 3)
	X = preprocess_image(X)[None]
	########################################################################
	# TODO: Compute the loss and the gradient of the loss with respect to  #
	# the input image, model.image. We compute these outside the loop so   #
	# that we don't have to recompute the gradient graph at each iteration #
	#                                                                      #
	# Note: loss and grad should be TensorFlow Tensors, not numpy arrays!  #
	#                                                                      #
	# The loss is the score for the target label, target_y. You should     #
	# use model.classifier to get the scores, and tf.gradients to compute  #
	# gradients. Don't forget the (subtracted) L2 regularization term!     #
	########################################################################
	loss = None # scalar loss
	grad = None # gradient of loss with respect to model.image, same size as model.image
	loss = model.classifier[0,target_y]
	grad = tf.gradients(loss, model.image)
	grad = tf.squeeze(grad) - l2_reg*2*model.image
	############################################################################
	#                             END OF YOUR CODE                             #
	############################################################################
	for t in range(num_iterations):
		# Randomly jitter the image a bit; this gives slightly nicer results
		ox, oy = np.random.randint(-max_jitter, max_jitter+1, 2)
		Xi = X.copy()
		X = np.roll(np.roll(X, ox, 1), oy, 2) 
		img_step = sess.run(grad,feed_dict={model.image:X, model.labels:np.array([target_y])})
		X += img_step*learning_rate/np.linalg.norm(img_step)
		X = np.roll(np.roll(X, -ox, 1), -oy, 2)
		X = np.clip(X, -SQUEEZENET_MEAN/SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN)/SQUEEZENET_STD)
		if t % blur_every == 0:
			X = blur_image(X, sigma=0.5)
			# Periodically show the image
		if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
			plt.imshow(deprocess_image(X[0]))
			class_name = class_names[target_y]
			plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations))
			plt.gcf().set_size_inches(4, 4)
			plt.axis('off')
			plt.show()
	return X
Ejemplo n.º 8
0
def style_transfer(content_image,
                   style_image,
                   image_size,
                   style_size,
                   content_layer,
                   content_weight,
                   style_layers,
                   style_weights,
                   tv_weight,
                   init_random=False):
    """Run style transfer!
    
    Inputs:
    - content_image: filename of content image
    - style_image: filename of style image
    - image_size: size of smallest image dimension (used for content loss and generated image)
    - style_size: size of smallest style image dimension
    - content_layer: layer to use for content loss
    - content_weight: weighting on content loss
    - style_layers: list of layers to use for style loss
    - style_weights: list of weights to use for each layer in style_layers
    - tv_weight: weight of total variation regularization term
    - init_random: initialize the starting image to uniform random noise
    """
    # Extract features from the content image
    content_img = preprocess_image(load_image(content_image, size=image_size))
    feats = extract_features(content_img[None], model)
    content_target = feats[content_layer]

    # Extract features from the style image
    style_img = preprocess_image(load_image(style_image, size=style_size))
    s_feats = extract_features(style_img[None], model)
    style_targets = []
    # Compute list of TensorFlow Gram matrices
    for idx in style_layers:
        style_targets.append(gram_matrix(s_feats[idx]))

    # Set up optimization hyperparameters
    initial_lr = 3.0
    decayed_lr = 0.1
    decay_lr_at = 180
    max_iter = 200

    step = tf.Variable(0, trainable=False)
    boundaries = [decay_lr_at]
    values = [initial_lr, decayed_lr]
    learning_rate_fn = tf.keras.optimizers.schedules.PiecewiseConstantDecay(
        boundaries, values)

    # Later, whenever we perform an optimization step, we pass in the step.
    learning_rate = learning_rate_fn(step)

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    # Initialize the generated image and optimization variables

    f, axarr = plt.subplots(1, 2)
    axarr[0].axis('off')
    axarr[1].axis('off')
    axarr[0].set_title('Content Source Img.')
    axarr[1].set_title('Style Source Img.')
    axarr[0].imshow(deprocess_image(content_img))
    axarr[1].imshow(deprocess_image(style_img))
    plt.show()
    plt.figure()

    # Initialize generated image to content image
    if init_random:
        initializer = tf.random_uniform_initializer(0, 1)
        img = initializer(shape=content_img[None].shape)
        img_var = tf.Variable(img)
        print("Intializing randomly.")
    else:
        img_var = tf.Variable(content_img[None])
        print("Initializing with content image.")

    for t in range(max_iter):
        with tf.GradientTape() as tape:
            tape.watch(img_var)
            feats = extract_features(img_var, model)
            # Compute loss
            c_loss = content_loss(content_weight, feats[content_layer],
                                  content_target)
            s_loss = style_loss(feats, style_layers, style_targets,
                                style_weights)
            t_loss = tv_loss(img_var, tv_weight)
            loss = c_loss + s_loss + t_loss
        # Compute gradient
        grad = tape.gradient(loss, img_var)
        optimizer.apply_gradients([(grad, img_var)])

        img_var.assign(tf.clip_by_value(img_var, -1.5, 1.5))

        if t % 10 == 0:
            print('Iteration {}'.format(t))
            #plt.imshow(deprocess_image(img_var[0].numpy(), rescale=True))
            #plt.axis('off')
            #plt.show()

    print('Iteration {}'.format(t))
    plt.imshow(deprocess_image(img_var[0].numpy(), rescale=True))
    plt.axis('off')
    plt.show()
def create_class_visualization(target_y, model, **kwargs):
    """
    Generate an image to maximize the score of target_y under a pretrained model.
    
    Inputs:
    - target_y: Integer in the range [0, 1000) giving the index of the class
    - model: A pretrained CNN that will be used to generate the image
    
    Keyword arguments:
    - l2_reg: Strength of L2 regularization on the image
    - learning_rate: How big of a step to take
    - num_iterations: How many iterations to use
    - blur_every: How often to blur the image as an implicit regularizer
    - max_jitter: How much to jitter the image as an implicit regularizer
    - show_every: How often to show the intermediate result
    """
    l2_reg = kwargs.pop('l2_reg', 1e-3)
    learning_rate = kwargs.pop('learning_rate', 25)
    num_iterations = kwargs.pop('num_iterations', 200)
    blur_every = kwargs.pop('blur_every', 10)
    max_jitter = kwargs.pop('max_jitter', 16)
    show_every = kwargs.pop('show_every', 25)

    # We use a single image of random noise as a starting point
    X = 255 * np.random.rand(224, 224, 3)
    X = preprocess_image(X)[None]

    loss = None  # scalar loss
    grad = None  # gradient of loss with respect to model.image, same size as model.image

    X = tf.Variable(X)
    for t in range(num_iterations):
        # Randomly jitter the image a bit; this gives slightly nicer results
        ox, oy = np.random.randint(0, max_jitter, 2)
        X = jitter(X, ox, oy)

        ########################################################################
        # TODO: Compute the value of the gradient of the score for             #
        # class target_y with respect to the pixels of the image, and make a   #
        # gradient step on the image using the learning rate. You should use   #
        # the tf.GradientTape() and tape.gradient to compute gradients.        #
        #                                                                      #
        # Be very careful about the signs of elements in your code.            #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        #X = tf.convert_to_tensor(X)

        # 1) Define a gradient tape object and watch input Image variable
        with tf.GradientTape() as tg:
            tg.watch(
                X
            )  # here watch the input image variable. the input needs to be tf tensor type
            # 2) Compute the “loss” for the batch of given input images.
            #   - get scores output by the model for the given batch of input images
            scores1 = model.call(
                X)  # defined in SqueezeNet() Class, which is in squeezenet.py
            #   - get correct score
            correct_scores = scores1[:,
                                     target_y]  # get the correct score, here there is only one score, because target_y is only one class

            #SyI = np.argmax([correct_scores , -l2_reg*np.sum(X*X)])

        # 3) Use the gradient() method of the gradient tape object to compute the gradient of the loss with respect to the image
        dX = tg.gradient(correct_scores, X)
        dX += l2_reg * 2 * X  # add L2 regularization to the image gradient

        X += learning_rate * dX

        pass

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # Undo the jitter
        X = jitter(X, -ox, -oy)
        # As a regularizer, clip and periodically blur

        if (t % 20 == 0):  # print progress every 10 updates
            template = 'Training progress is at {}th iteration out of {} iterations.'
            print(template.format(t, num_iterations))

        X = tf.clip_by_value(X, -SQUEEZENET_MEAN / SQUEEZENET_STD,
                             (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD)
        if t % blur_every == 0:
            X = blur_image(X, sigma=0.5)

        # Periodically show the image
        if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
            plt.imshow(deprocess_image(X[0]))
            class_name = class_names[target_y]
            plt.title('%s\nIteration %d / %d' %
                      (class_name, t + 1, num_iterations))
            plt.gcf().set_size_inches(4, 4)
            plt.axis('off')
            plt.show()
    return X
Ejemplo n.º 10
0
def style_transfer(content_image, style_image, output_image, image_size, style_size, content_layer, content_weight,
                   style_layers, style_weights, tv_weight, init_random = False, sess=sess, model=model):
    """Run style transfer!

    Inputs:
    - content_image: filename of content image
    - style_image: filename of style image
    - output_image: filename to write to
    - image_size: size of smallest image dimension (used for content loss and generated image)
    - style_size: size of smallest style image dimension
    - content_layer: layer to use for content loss
    - content_weight: weighting on content loss
    - style_layers: list of layers to use for style loss
    - style_weights: list of weights to use for each layer in style_layers
    - tv_weight: weight of total variation regularization term
    - init_random: initialize the starting image to uniform random noise
    """
    # Extract features from the content image
    content_img = preprocess_image(load_image(content_image, size=image_size))
    feats = model.extract_features(model.image)
    content_target = sess.run(feats[content_layer],
                              {model.image: content_img[None]})

    # Extract features from the style image
    style_img = preprocess_image(load_image(style_image, size=style_size))
    style_feat_vars = [feats[idx] for idx in style_layers]
    style_target_vars = []
    # Compute list of TensorFlow Gram matrices
    for style_feat_var in style_feat_vars:
        style_target_vars.append(gram_matrix(style_feat_var))
    # Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image
    style_targets = sess.run(style_target_vars, {model.image: style_img[None]})

    # Initialize generated image to content image

    if init_random:
        img_var = tf.Variable(tf.random_uniform(content_img[None].shape, 0, 1), name="image")
    else:
        img_var = tf.Variable(content_img[None], name="image")

    # Extract features on generated image
    feats = model.extract_features(img_var)
    # Compute loss
    c_loss = content_loss(content_weight, feats[content_layer], content_target)
    s_loss = style_loss(feats, style_layers, style_targets, style_weights)
    t_loss = tv_loss(img_var, tv_weight)
    loss = c_loss + s_loss + t_loss

    # Set up optimization hyperparameters
    initial_lr = 3.0
    decayed_lr = 0.1
    decay_lr_at = 180
    max_iter = 100

    # Create and initialize the Adam optimizer
    lr_var = tf.Variable(initial_lr, name="lr")
    # Create train_op that updates the generated image when run
    with tf.variable_scope("optimizer") as opt_scope:
        train_op = tf.train.AdamOptimizer(lr_var).minimize(loss, var_list=[img_var])
    # Initialize the generated image and optimization variables
    opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=opt_scope.name)
    sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars))
    # Create an op that will clamp the image values when run
    clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.5, 1.5))

    if output_image[-4:] == '.jpg':
        output_image = output_image[:-4]

    # Hardcoded handcrafted
    for t in range(0, max_iter+1):
        # Take an optimization step to update img_var
        sess.run(train_op)
        if t < decay_lr_at:
            sess.run(clamp_image_op)
        if t == decay_lr_at:
            sess.run(tf.assign(lr_var, decayed_lr))
        if t % 25 == 0:
            print('Iteration {}'.format(t))
            img = sess.run(img_var)
            cv2.imwrite(output_image + "_iter" + str(t) + ".jpg", deprocess_image(img[0]))
Ejemplo n.º 11
0
    - gram: Tensor of shape (N, C, C) giving the (optionally normalized)
      Gram matrices for the input image.
    """
    features_shape = tf.shape(features)
    features_T = tf.transpose(features, perm=[0, 3, 2, 1])
    mult = tf.batch_matmul(features_T, features)
    if normalize:
        mult = tf.scalar_mul(tf.reciprocal(tf.cast(features_shape[1]*features_shape[2]*features_shape[3], tf.float32)), mult)
    return mult


style_layers = [1, 4, 6, 7]
style_weights = [2000000, 800, 12, 1]
style_feats = model.extract_features(model.image)
# TODO: make this a dynamic tensor
style_img = preprocess_image(load_image('./styles/van_gogh.jpg'))
style_feat_vars = [style_feats[idx] for idx in [1, 4, 6, 7]]
style_target_vars = []
# Compute list of TensorFlow Gram matrices
for style_feat_var in style_feat_vars:
	style_target_vars.append(gram_matrix(style_feat_var))
# Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image
style_targets = sess.run(style_target_vars, {model.image: style_img[None]})

def gan_style_loss(gan_output_image):
    # preprocess the gan image per the constants in cs231n/image_utils
    processed_gan_img = tf_preprocess_image(gan_output_image)
    gan_img_feats = model.extract_features(processed_gan_img)

    loss = tf.constant(0, tf.float32)
    for i in range(len(style_layers)):
def create_class_visualization(target_y, model, **kwargs):
    """
    Generate an image to maximize the score of target_y under a pretrained model.
    
    Inputs:
    - target_y: Integer in the range [0, 1000) giving the index of the class
    - model: A pretrained CNN that will be used to generate the image
    
    Keyword arguments:
    - l2_reg: Strength of L2 regularization on the image
    - learning_rate: How big of a step to take
    - num_iterations: How many iterations to use
    - blur_every: How often to blur the image as an implicit regularizer
    - max_jitter: How much to jitter the image as an implicit regularizer
    - show_every: How often to show the intermediate result
    """
    l2_reg = kwargs.pop('l2_reg', 1e-3)
    learning_rate = kwargs.pop('learning_rate', 25)
    num_iterations = kwargs.pop('num_iterations', 100)
    blur_every = kwargs.pop('blur_every', 10)
    max_jitter = kwargs.pop('max_jitter', 16)
    show_every = kwargs.pop('show_every', 25)

    # We use a single image of random noise as a starting point
    X = 255 * np.random.rand(224, 224, 3)
    X = preprocess_image(X)[None]

    loss = None  # scalar loss
    grad = None  # gradient of loss with respect to model.image, same size as model.image

    X = tf.Variable(X)
    for t in range(num_iterations):
        # Randomly jitter the image a bit; this gives slightly nicer results
        ox, oy = np.random.randint(0, max_jitter, 2)
        X = jitter(X, ox, oy)

        ########################################################################
        # TODO: Compute the value of the gradient of the score for             #
        # class target_y with respect to the pixels of the image, and make a   #
        # gradient step on the image using the learning rate. You should use   #
        # the tf.GradientTape() and tape.gradient to compute gradients.        #
        #                                                                      #
        # Be very careful about the signs of elements in your code.            #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        with tf.GradientTape() as tape:
            tape.watch(X)
            score = model(X)
            correct_score = score[0, target_y]
            img = correct_score - l2_reg * tf.nn.l2_normalize(X)

        grad = tape.gradient(img, X)
        dX = learning_rate * tf.math.l2_normalize(grad)
        X += dX

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # Undo the jitter
        X = jitter(X, -ox, -oy)
        # As a regularizer, clip and periodically blur

        X = tf.clip_by_value(X, -SQUEEZENET_MEAN / SQUEEZENET_STD,
                             (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD)
        if t % blur_every == 0:
            X = blur_image(X, sigma=0.5)

        # Periodically show the image
        if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
            plt.imshow(deprocess_image(X[0]))
            class_name = class_names[target_y]
            plt.title('%s\nIteration %d / %d' %
                      (class_name, t + 1, num_iterations))
            plt.gcf().set_size_inches(4, 4)
            plt.axis('off')
            plt.show()
    return X
def create_class_visualization(target_y, model, **kwargs):
    """
    Generate an image to maximize the score of target_y under a pretrained model.
    
    Inputs:
    - target_y: Integer in the range [0, 1000) giving the index of the class
    - model: A pretrained CNN that will be used to generate the image
    
    Keyword arguments:
    - l2_reg: Strength of L2 regularization on the image
    - learning_rate: How big of a step to take
    - num_iterations: How many iterations to use
    - blur_every: How often to blur the image as an implicit regularizer
    - max_jitter: How much to gjitter the image as an implicit regularizer
    - show_every: How often to show the intermediate result
    """
    l2_reg = kwargs.pop('l2_reg', 1e-3)
    learning_rate = kwargs.pop('learning_rate', 25)
    num_iterations = kwargs.pop('num_iterations', 100)
    blur_every = kwargs.pop('blur_every', 10)
    max_jitter = kwargs.pop('max_jitter', 16)
    show_every = kwargs.pop('show_every', 25)

    # We use a single image of random noise as a starting point
    X = 255 * np.random.rand(224, 224, 3)
    X = preprocess_image(X)[None]

    loss = model.scores[0, target_y] + l2_reg * tf.reduce_sum(
        model.scores * model.scores)
    grad = tf.gradients(loss, model.image)
    print(grad)
    grad = grad[0]

    ########################################################################
    # TODO: Compute the loss and the gradient of the loss with respect to  #
    # the input image, model.image. We compute these outside the loop so   #
    # that we don't have to recompute the gradient graph at each iteration #
    #                                                                      #
    # Note: loss and grad should be TensorFlow Tensors, not numpy arrays!  #
    #                                                                      #
    # The loss is the score for the target label, target_y. You should     #
    # use model.scores to get the scores, and tf.gradients to compute  #
    # gradients. Don't forget the (subtracted) L2 regularization term!     #
    ########################################################################

    pass
    ############################################################################
    #                             END OF YOUR CODE                             #
    ############################################################################

    for t in range(num_iterations):
        # Randomly jitter the image a bit; this gives slightly nicer results
        ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2)
        X = np.roll(np.roll(X, ox, 1), oy, 2)

        g = sess.run(grad,
                     feed_dict={
                         model.image: X,
                         model.labels: np.array([target_y])
                     })
        X += learning_rate * g
        ########################################################################
        # TODO: Use sess to compute the value of the gradient of the score for #
        # class target_y with respect to the pixels of the image, and make a   #
        # gradient step on the image using the learning rate. You should use   #
        # the grad variable you defined above.                                 #
        #                                                                      #
        # Be very careful about the signs of elements in your code.            #
        ########################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # Undo the jitter
        X = np.roll(np.roll(X, -ox, 1), -oy, 2)

        # As a regularizer, clip and periodically blur
        X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD,
                    (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD)
        if t % blur_every == 0:
            X = blur_image(X, sigma=0.5)

        # Periodically show the image
        if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
            plt.imshow(deprocess_image(X[0]))
            class_name = class_names[target_y]
            plt.title('%s\nIteration %d / %d' %
                      (class_name, t + 1, num_iterations))
            plt.gcf().set_size_inches(4, 4)
            plt.axis('off')
            plt.show()
    return X
Ejemplo n.º 14
0
      plt.gcf().set_size_inches(8, 8)
      plt.axis('off')
      filename = 'images/deepdream_%d.jpg' % (t+1)
      plt.savefig(filename)
  return X

def read_image(filename, max_size):
    """
    Read an image from disk and resize it so its larger side is max_size
    """
    img = imread(filename)
    H, W, _ = img.shape
    if H >= W:
      img = imresize(img, (max_size, int(W * float(max_size) / H)))
    elif H < W:
      img = imresize(img, (int(H * float(max_size) / W), max_size))
    return img

filename = 'kitten.jpg'
max_size = 256
img = read_image(filename, max_size)
plt.imshow(img)
plt.axis('off')

# Preprocess the image by converting to float, transposing,
# and performing mean subtraction.
img_pre = preprocess_image(img, data['mean_image'], mean='pixel')
out = deepdream(img_pre, 7, model, learning_rate=2000)