def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to jitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] sess = get_session() for t in range(num_iterations): ox, oy = np.random.randint(0, max_jitter, 2) X = jitter(X, ox, oy) Y = tf.convert_to_tensor(X) with tf.GradientTape() as tape: #Y = tf.convert_to_tensor(X) tape.watch(Y) loss = model(Y)[0, target_y] - l2_reg * tf.nn.l2_loss(Y) #dY = tape.gradient(loss, Y) #dX = sess.run(dY) #X += dX[0] * learning_rate dY = tape.gradient(loss, Y) dX = sess.run(dY) X += dX[0] * learning_rate X = jitter(X, -ox, -oy) X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.savefig("%s.jpg" % t) return X
def style_transfer(content_image, style_image, image_size, style_size, content_layer, content_weight, style_layers, style_weights, tv_weight, init_random=False): content_img = preprocess_image(load_image(content_image, size=image_size)) feats = model.extract_features(model.image) content_target = sess.run(feats[content_layer], {model.image: content_img[None]}) # Extract features from the style image style_img = preprocess_image(load_image(style_image, size=style_size)) style_feat_vars = [feats[idx] for idx in style_layers] style_target_vars = [] # Compute list of TensorFlow Gram matrices for style_feat_var in style_feat_vars: style_target_vars.append(gram_matrix(style_feat_var)) # Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image style_targets = sess.run(style_target_vars, {model.image: style_img[None]}) # Initialize generated image to content image if init_random: img_var = tf.Variable(tf.random_uniform(content_img[None].shape, 0, 1), name="image") else: img_var = tf.Variable(content_img[None], name="image") # Extract features on generated image feats = model.extract_features(img_var) # Compute loss c_loss = content_loss(content_weight, feats[content_layer], content_target) s_loss = style_loss(feats, style_layers, style_targets, style_weights) t_loss = tv_loss(img_var, tv_weight) loss = c_loss + s_loss + t_loss # Set up optimization hyperparameters initial_lr = 3.0 decayed_lr = 0.1 decay_lr_at = 180 max_iter = 200 # Create and initialize the Adam optimizer lr_var = tf.Variable(initial_lr, name="lr") # Create train_op that updates the generated image when run with tf.variable_scope("optimizer") as opt_scope: train_op = tf.train.AdamOptimizer(lr_var).minimize(loss, var_list=[img_var]) # Initialize the generated image and optimization variables opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=opt_scope.name) sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars)) # Create an op that will clamp the image values when run clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.5, 1.5)) f, axarr = plt.subplots(1, 2) axarr[0].axis('off') axarr[1].axis('off') axarr[0].set_title('Content Source Img.') axarr[1].set_title('Style Source Img.') axarr[0].imshow(deprocess_image(content_img)) axarr[1].imshow(deprocess_image(style_img)) plt.show() plt.figure() # Hardcoded handcrafted for t in range(max_iter): # Take an optimization step to update img_var sess.run(train_op) if t < decay_lr_at: sess.run(clamp_image_op) if t == decay_lr_at: sess.run(tf.assign(lr_var, decayed_lr)) if t % 100 == 0: print('Iteration {}'.format(t)) img = sess.run(img_var) plt.imshow(deprocess_image(img[0], rescale=True)) plt.axis('off') plt.show() print('Iteration {}'.format(t)) img = sess.run(img_var) plt.imshow(deprocess_image(img[0], rescale=True)) plt.axis('off') plt.show()
check_scipy() from cs231n.classifiers.squeezenet import SqueezeNet import tensorflow as tf tf.reset_default_graph() # remove all existing variables in the graph sess = get_session() # start a new Session # Load pretrained SqueezeNet model SAVE_PATH = 'cs231n/datasets/squeezenet.ckpt' # if not os.path.exists(SAVE_PATH): # raise ValueError("You need to download SqueezeNet!") model = SqueezeNet(save_path=SAVE_PATH, sess=sess) # Load data for testing content_img_test = preprocess_image(load_image('styles/tubingen.jpg', size=192))[None] style_img_test = preprocess_image( load_image('styles/starry_night.jpg', size=192))[None] answers = np.load('style-transfer-checks-tf.npz') def content_loss(content_weight, content_current, content_original): shapes = tf.shape(content_current) F_l = tf.reshape(content_current, [shapes[1], shapes[2] * shapes[3]]) P_l = tf.reshape(content_original, [shapes[1], shapes[2] * shapes[3]]) loss = content_weight * (tf.reduce_sum((F_l - P_l)**2)) return loss def content_loss_test(correct): content_layer = 3
def create_class_visualization(target_y, model, sess, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to gjitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] ######################################################################## # TODO: Compute the loss and the gradient of the loss with respect to # # the input image, model.image. We compute these outside the loop so # # that we don't have to recompute the gradient graph at each iteration # # # # Note: loss and grad should be TensorFlow Tensors, not numpy arrays! # # # # The loss is the score for the target label, target_y. You should # # use model.classifier to get the scores, and tf.gradients to compute # # gradients. Don't forget the (subtracted) L2 regularization term! # ######################################################################## loss = None # scalar loss grad = None # gradient of loss with respect to model.image, same size as model.image (N, H, W, C) = X.shape correct_scores = tf.gather_nd( model.classifier, tf.stack((tf.range(N), model.labels), axis=1)) loss = correct_scores - tf.scalar_mul( tf.constant(l2_reg), tf.norm(tf.reshape(model.image, [1, -1]), axis=1)) grad = tf.gradients(loss, model.image) ############################################################################ # END OF YOUR CODE # ############################################################################ for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2) Xi = X.copy() X = np.roll(np.roll(X, ox, 1), oy, 2) ######################################################################## # TODO: Use sess to compute the value of the gradient of the score for # # class target_y with respect to the pixels of the image, and make a # # gradient step on the image using the learning rate. You should use # # the grad variable you defined above. # # # # Be very careful about the signs of elements in your code. # ######################################################################## [loss_result, grad_result] = sess.run([loss, grad], { model.image: X, model.labels: [target_y] }) grad_result = grad_result[0] X = X + learning_rate * grad_result ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = np.roll(np.roll(X, -ox, 1), -oy, 2) # As a regularizer, clip and periodically blur X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: print('save image in iteration {}/{}, loss is {}'.format( t, num_iterations, loss_result)) plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') # plt.show() plt.savefig('reports/class_visualization_image_{}_{}.png'.format( target_y, t)) plt.close() return X
plt.figure(figsize=(12, 6)) for i in range(5): plt.subplot(1, 5, i + 1) plt.imshow(X_raw[i]) plt.title(class_names[y[i]]) plt.axis('off') plt.gcf().tight_layout() plt.savefig('reports/imageNet_images.png') # ## Preprocess images # The input to the pretrained model is expected to be normalized, so we first preprocess the images by subtracting the pixelwise mean and dividing by the pixelwise standard deviation. # In[ ]: X = np.array([preprocess_image(img) for img in X_raw]) # # Saliency Maps # Using this pretrained model, we will compute class saliency maps as described in Section 3.1 of [2]. # # A **saliency map** tells us the degree to which each pixel in the image affects the classification score for that image. # To compute it, we compute the gradient of the unnormalized score corresponding to the correct class (which is a scalar) with respect to the pixels of the image. # If the image has shape `(H, W, 3)` then this gradient will also have shape `(H, W, 3)`; # for each pixel in the image, this gradient tells us the amount by which the classification score will change if the pixel changes by a small amount. # To compute the saliency map, we take the absolute value of this gradient, then take the maximum value over the 3 input channels; # the final saliency map thus has shape `(H, W)` and all entries are nonnegative. # # You will need to use the `model.classifier` Tensor containing the scores for each input, and will need to feed in values for the `model.image` and `model.labels` placeholder when evaluating the gradient. # Open the file `cs231n/classifiers/squeezenet.py` and read the documentation to make sure you understand how to use the model. For example usage, you can see the `loss` attribute. # # [2] Karen Simonyan, Andrea Vedaldi, and Andrew Zisserman. "Deep Inside Convolutional Networks: Visualising
############################################################################# # Shallow feature reconstruction filename = 'kitten.jpg' layer = 3 # layers start from 0 so these are features after 4 convolutions img = imresize(imread(filename), (64, 64)) plt.imshow(img) plt.gcf().set_size_inches(3, 3) plt.title('Original image') plt.axis('off') plt.show() # Preprocess the image before passing it to the network: # subtract the mean, add a dimension, etc img_pre = preprocess_image(img, data['mean_image']) # Extract features from the image feats, _ = model.forward(img_pre, end=layer) # Invert the features kwargs = { 'num_iterations': 400, 'learning_rate': 5000, 'l2_reg': 1e-8, 'show_every': 100, 'blur_every': 10, } X = invert_features(feats, layer, model, **kwargs) ############################################################################# # Deep feature reconstruction
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to gjitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] ######################################################################## # TODO: Compute the loss and the gradient of the loss with respect to # # the input image, model.image. We compute these outside the loop so # # that we don't have to recompute the gradient graph at each iteration # # # # Note: loss and grad should be TensorFlow Tensors, not numpy arrays! # # # # The loss is the score for the target label, target_y. You should # # use model.classifier to get the scores, and tf.gradients to compute # # gradients. Don't forget the (subtracted) L2 regularization term! # ######################################################################## loss = None # scalar loss grad = None # gradient of loss with respect to model.image, same size as model.image loss = model.classifier[0,target_y] grad = tf.gradients(loss, model.image) grad = tf.squeeze(grad) - l2_reg*2*model.image ############################################################################ # END OF YOUR CODE # ############################################################################ for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(-max_jitter, max_jitter+1, 2) Xi = X.copy() X = np.roll(np.roll(X, ox, 1), oy, 2) img_step = sess.run(grad,feed_dict={model.image:X, model.labels:np.array([target_y])}) X += img_step*learning_rate/np.linalg.norm(img_step) X = np.roll(np.roll(X, -ox, 1), -oy, 2) X = np.clip(X, -SQUEEZENET_MEAN/SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN)/SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show() return X
def style_transfer(content_image, style_image, image_size, style_size, content_layer, content_weight, style_layers, style_weights, tv_weight, init_random=False): """Run style transfer! Inputs: - content_image: filename of content image - style_image: filename of style image - image_size: size of smallest image dimension (used for content loss and generated image) - style_size: size of smallest style image dimension - content_layer: layer to use for content loss - content_weight: weighting on content loss - style_layers: list of layers to use for style loss - style_weights: list of weights to use for each layer in style_layers - tv_weight: weight of total variation regularization term - init_random: initialize the starting image to uniform random noise """ # Extract features from the content image content_img = preprocess_image(load_image(content_image, size=image_size)) feats = extract_features(content_img[None], model) content_target = feats[content_layer] # Extract features from the style image style_img = preprocess_image(load_image(style_image, size=style_size)) s_feats = extract_features(style_img[None], model) style_targets = [] # Compute list of TensorFlow Gram matrices for idx in style_layers: style_targets.append(gram_matrix(s_feats[idx])) # Set up optimization hyperparameters initial_lr = 3.0 decayed_lr = 0.1 decay_lr_at = 180 max_iter = 200 step = tf.Variable(0, trainable=False) boundaries = [decay_lr_at] values = [initial_lr, decayed_lr] learning_rate_fn = tf.keras.optimizers.schedules.PiecewiseConstantDecay( boundaries, values) # Later, whenever we perform an optimization step, we pass in the step. learning_rate = learning_rate_fn(step) optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) # Initialize the generated image and optimization variables f, axarr = plt.subplots(1, 2) axarr[0].axis('off') axarr[1].axis('off') axarr[0].set_title('Content Source Img.') axarr[1].set_title('Style Source Img.') axarr[0].imshow(deprocess_image(content_img)) axarr[1].imshow(deprocess_image(style_img)) plt.show() plt.figure() # Initialize generated image to content image if init_random: initializer = tf.random_uniform_initializer(0, 1) img = initializer(shape=content_img[None].shape) img_var = tf.Variable(img) print("Intializing randomly.") else: img_var = tf.Variable(content_img[None]) print("Initializing with content image.") for t in range(max_iter): with tf.GradientTape() as tape: tape.watch(img_var) feats = extract_features(img_var, model) # Compute loss c_loss = content_loss(content_weight, feats[content_layer], content_target) s_loss = style_loss(feats, style_layers, style_targets, style_weights) t_loss = tv_loss(img_var, tv_weight) loss = c_loss + s_loss + t_loss # Compute gradient grad = tape.gradient(loss, img_var) optimizer.apply_gradients([(grad, img_var)]) img_var.assign(tf.clip_by_value(img_var, -1.5, 1.5)) if t % 10 == 0: print('Iteration {}'.format(t)) #plt.imshow(deprocess_image(img_var[0].numpy(), rescale=True)) #plt.axis('off') #plt.show() print('Iteration {}'.format(t)) plt.imshow(deprocess_image(img_var[0].numpy(), rescale=True)) plt.axis('off') plt.show()
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to jitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 200) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) # We use a single image of random noise as a starting point X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] loss = None # scalar loss grad = None # gradient of loss with respect to model.image, same size as model.image X = tf.Variable(X) for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(0, max_jitter, 2) X = jitter(X, ox, oy) ######################################################################## # TODO: Compute the value of the gradient of the score for # # class target_y with respect to the pixels of the image, and make a # # gradient step on the image using the learning rate. You should use # # the tf.GradientTape() and tape.gradient to compute gradients. # # # # Be very careful about the signs of elements in your code. # ######################################################################## # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** #X = tf.convert_to_tensor(X) # 1) Define a gradient tape object and watch input Image variable with tf.GradientTape() as tg: tg.watch( X ) # here watch the input image variable. the input needs to be tf tensor type # 2) Compute the “loss” for the batch of given input images. # - get scores output by the model for the given batch of input images scores1 = model.call( X) # defined in SqueezeNet() Class, which is in squeezenet.py # - get correct score correct_scores = scores1[:, target_y] # get the correct score, here there is only one score, because target_y is only one class #SyI = np.argmax([correct_scores , -l2_reg*np.sum(X*X)]) # 3) Use the gradient() method of the gradient tape object to compute the gradient of the loss with respect to the image dX = tg.gradient(correct_scores, X) dX += l2_reg * 2 * X # add L2 regularization to the image gradient X += learning_rate * dX pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = jitter(X, -ox, -oy) # As a regularizer, clip and periodically blur if (t % 20 == 0): # print progress every 10 updates template = 'Training progress is at {}th iteration out of {} iterations.' print(template.format(t, num_iterations)) X = tf.clip_by_value(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show() return X
def style_transfer(content_image, style_image, output_image, image_size, style_size, content_layer, content_weight, style_layers, style_weights, tv_weight, init_random = False, sess=sess, model=model): """Run style transfer! Inputs: - content_image: filename of content image - style_image: filename of style image - output_image: filename to write to - image_size: size of smallest image dimension (used for content loss and generated image) - style_size: size of smallest style image dimension - content_layer: layer to use for content loss - content_weight: weighting on content loss - style_layers: list of layers to use for style loss - style_weights: list of weights to use for each layer in style_layers - tv_weight: weight of total variation regularization term - init_random: initialize the starting image to uniform random noise """ # Extract features from the content image content_img = preprocess_image(load_image(content_image, size=image_size)) feats = model.extract_features(model.image) content_target = sess.run(feats[content_layer], {model.image: content_img[None]}) # Extract features from the style image style_img = preprocess_image(load_image(style_image, size=style_size)) style_feat_vars = [feats[idx] for idx in style_layers] style_target_vars = [] # Compute list of TensorFlow Gram matrices for style_feat_var in style_feat_vars: style_target_vars.append(gram_matrix(style_feat_var)) # Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image style_targets = sess.run(style_target_vars, {model.image: style_img[None]}) # Initialize generated image to content image if init_random: img_var = tf.Variable(tf.random_uniform(content_img[None].shape, 0, 1), name="image") else: img_var = tf.Variable(content_img[None], name="image") # Extract features on generated image feats = model.extract_features(img_var) # Compute loss c_loss = content_loss(content_weight, feats[content_layer], content_target) s_loss = style_loss(feats, style_layers, style_targets, style_weights) t_loss = tv_loss(img_var, tv_weight) loss = c_loss + s_loss + t_loss # Set up optimization hyperparameters initial_lr = 3.0 decayed_lr = 0.1 decay_lr_at = 180 max_iter = 100 # Create and initialize the Adam optimizer lr_var = tf.Variable(initial_lr, name="lr") # Create train_op that updates the generated image when run with tf.variable_scope("optimizer") as opt_scope: train_op = tf.train.AdamOptimizer(lr_var).minimize(loss, var_list=[img_var]) # Initialize the generated image and optimization variables opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=opt_scope.name) sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars)) # Create an op that will clamp the image values when run clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.5, 1.5)) if output_image[-4:] == '.jpg': output_image = output_image[:-4] # Hardcoded handcrafted for t in range(0, max_iter+1): # Take an optimization step to update img_var sess.run(train_op) if t < decay_lr_at: sess.run(clamp_image_op) if t == decay_lr_at: sess.run(tf.assign(lr_var, decayed_lr)) if t % 25 == 0: print('Iteration {}'.format(t)) img = sess.run(img_var) cv2.imwrite(output_image + "_iter" + str(t) + ".jpg", deprocess_image(img[0]))
- gram: Tensor of shape (N, C, C) giving the (optionally normalized) Gram matrices for the input image. """ features_shape = tf.shape(features) features_T = tf.transpose(features, perm=[0, 3, 2, 1]) mult = tf.batch_matmul(features_T, features) if normalize: mult = tf.scalar_mul(tf.reciprocal(tf.cast(features_shape[1]*features_shape[2]*features_shape[3], tf.float32)), mult) return mult style_layers = [1, 4, 6, 7] style_weights = [2000000, 800, 12, 1] style_feats = model.extract_features(model.image) # TODO: make this a dynamic tensor style_img = preprocess_image(load_image('./styles/van_gogh.jpg')) style_feat_vars = [style_feats[idx] for idx in [1, 4, 6, 7]] style_target_vars = [] # Compute list of TensorFlow Gram matrices for style_feat_var in style_feat_vars: style_target_vars.append(gram_matrix(style_feat_var)) # Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image style_targets = sess.run(style_target_vars, {model.image: style_img[None]}) def gan_style_loss(gan_output_image): # preprocess the gan image per the constants in cs231n/image_utils processed_gan_img = tf_preprocess_image(gan_output_image) gan_img_feats = model.extract_features(processed_gan_img) loss = tf.constant(0, tf.float32) for i in range(len(style_layers)):
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to jitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) # We use a single image of random noise as a starting point X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] loss = None # scalar loss grad = None # gradient of loss with respect to model.image, same size as model.image X = tf.Variable(X) for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(0, max_jitter, 2) X = jitter(X, ox, oy) ######################################################################## # TODO: Compute the value of the gradient of the score for # # class target_y with respect to the pixels of the image, and make a # # gradient step on the image using the learning rate. You should use # # the tf.GradientTape() and tape.gradient to compute gradients. # # # # Be very careful about the signs of elements in your code. # ######################################################################## # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** with tf.GradientTape() as tape: tape.watch(X) score = model(X) correct_score = score[0, target_y] img = correct_score - l2_reg * tf.nn.l2_normalize(X) grad = tape.gradient(img, X) dX = learning_rate * tf.math.l2_normalize(grad) X += dX # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = jitter(X, -ox, -oy) # As a regularizer, clip and periodically blur X = tf.clip_by_value(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show() return X
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to gjitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) # We use a single image of random noise as a starting point X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] loss = model.scores[0, target_y] + l2_reg * tf.reduce_sum( model.scores * model.scores) grad = tf.gradients(loss, model.image) print(grad) grad = grad[0] ######################################################################## # TODO: Compute the loss and the gradient of the loss with respect to # # the input image, model.image. We compute these outside the loop so # # that we don't have to recompute the gradient graph at each iteration # # # # Note: loss and grad should be TensorFlow Tensors, not numpy arrays! # # # # The loss is the score for the target label, target_y. You should # # use model.scores to get the scores, and tf.gradients to compute # # gradients. Don't forget the (subtracted) L2 regularization term! # ######################################################################## pass ############################################################################ # END OF YOUR CODE # ############################################################################ for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2) X = np.roll(np.roll(X, ox, 1), oy, 2) g = sess.run(grad, feed_dict={ model.image: X, model.labels: np.array([target_y]) }) X += learning_rate * g ######################################################################## # TODO: Use sess to compute the value of the gradient of the score for # # class target_y with respect to the pixels of the image, and make a # # gradient step on the image using the learning rate. You should use # # the grad variable you defined above. # # # # Be very careful about the signs of elements in your code. # ######################################################################## pass ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = np.roll(np.roll(X, -ox, 1), -oy, 2) # As a regularizer, clip and periodically blur X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show() return X
plt.gcf().set_size_inches(8, 8) plt.axis('off') filename = 'images/deepdream_%d.jpg' % (t+1) plt.savefig(filename) return X def read_image(filename, max_size): """ Read an image from disk and resize it so its larger side is max_size """ img = imread(filename) H, W, _ = img.shape if H >= W: img = imresize(img, (max_size, int(W * float(max_size) / H))) elif H < W: img = imresize(img, (int(H * float(max_size) / W), max_size)) return img filename = 'kitten.jpg' max_size = 256 img = read_image(filename, max_size) plt.imshow(img) plt.axis('off') # Preprocess the image by converting to float, transposing, # and performing mean subtraction. img_pre = preprocess_image(img, data['mean_image'], mean='pixel') out = deepdream(img_pre, 7, model, learning_rate=2000)