def generate_fooling_images(): idx = 4 Xi = X[idx][None] target_y = 89 X_fooling = make_fooling_image(Xi, target_y, model) # Make sure that X_fooling is classified as y_target scores = sess.run(model.scores, {model.image: X_fooling}) assert scores[0].argmax() == target_y, 'The network is not fooled!' # Show original image, fooling image, and difference orig_img = deprocess_image(Xi[0]) fool_img = deprocess_image(X_fooling[0]) # Rescale plt.subplot(2, 2, 1) plt.imshow(orig_img) plt.axis('off') plt.title(class_names[y[idx]]) plt.subplot(2, 2, 2) plt.imshow(fool_img) plt.title(class_names[target_y]) plt.axis('off') plt.subplot(2, 2, 3) plt.title('Difference') plt.imshow(deprocess_image((Xi - X_fooling)[0])) plt.axis('off') plt.subplot(2, 2, 4) plt.title('Magnified difference (10x)') plt.imshow(deprocess_image(10 * (Xi - X_fooling)[0])) plt.axis('off')
def deepdream(X, layer, model, **kwargs): """ Generate a DeepDream image. Inputs: - X: Starting image, of shape (1, 3, H, W) - layer: Index of layer at which to dream - model: A PretrainedCNN object Keyword arguments: - learning_rate: How much to update the image at each iteration - max_jitter: Maximum number of pixels for jitter regularization - num_iterations: How many iterations to run for - show_every: How often to show the generated image """ X = X.copy() learning_rate = kwargs.pop('learning_rate', 5.0) max_jitter = kwargs.pop('max_jitter', 16) num_iterations = kwargs.pop('num_iterations', 100) show_every = kwargs.pop('show_every', 25) for t in range(num_iterations): # As a regularizer, add random jitter to the image ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2) X = np.roll(np.roll(X, ox, -1), oy, -2) dX = None ############################################################################ # TODO: Compute the image gradient dX using the DeepDream method. You'll # # need to use the forward and backward methods of the model object to # # extract activations and set gradients for the chosen layer. After # # computing the image gradient dX, you should use the learning rate to # # update the image X. # ############################################################################ act, cache = model.forward(X, start=None, end=layer, mode='test') dX, _ = model.backward(act, cache) X += learning_rate * dX #pass ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = np.roll(np.roll(X, -ox, -1), -oy, -2) # As a regularizer, clip the image mean_pixel = data['mean_image'].mean(axis=(1, 2), keepdims=True) X = np.clip(X, -mean_pixel, 255.0 - mean_pixel) # Periodically show the image if t == 0 or (t + 1) % show_every == 0: img = deprocess_image(X, data['mean_image'], mean='pixel') plt.imshow(img) plt.title('t = %d' % (t + 1)) plt.gcf().set_size_inches(8, 8) plt.axis('off') plt.show() return X
def create_class_visualization(target_y, model, **kwargs): """ Perform optimization over the image to generate class visualizations. Inputs: - target_y: Integer in the range [0, 100) giving the target class - model: A PretrainedCNN that will be used for generation Keyword arguments: - learning_rate: Floating point number giving the learning rate - blur_every: An integer; how often to blur the image as a regularizer - l2_reg: Floating point number giving L2 regularization strength on the image; this is lambda in the equation above. - max_jitter: How much random jitter to add to the image as regularization - num_iterations: How many iterations to run for - show_every: How often to show the image """ learning_rate = kwargs.pop('learning_rate', 10000) blur_every = kwargs.pop('blur_every', 1) l2_reg = kwargs.pop('l2_reg', 1e-6) max_jitter = kwargs.pop('max_jitter', 4) num_iterations = kwargs.pop('num_iterations', 100) show_every = kwargs.pop('show_every', 25) X = np.random.randn(1, 3, 64, 64) for t in xrange(num_iterations): # As a regularizer, add random jitter to the image ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2) X = np.roll(np.roll(X, ox, -1), oy, -2) dX = None ############################################################################ # TODO: Compute the image gradient dX of the image with respect to the # # target_y class score. This should be similar to the fooling images. Also # # add L2 regularization to dX and update the image X using the image # # gradient and the learning rate. # ############################################################################ pass ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = np.roll(np.roll(X, -ox, -1), -oy, -2) # As a regularizer, clip the image X = np.clip(X, -data['mean_image'], 255.0 - data['mean_image']) # As a regularizer, periodically blur the image if t % blur_every == 0: X = blur_image(X) # Periodically show the image if t % show_every == 0: plt.imshow(deprocess_image(X, data['mean_image'])) plt.gcf().set_size_inches(3, 3) plt.axis('off') plt.show() return X
def invert_features(target_feats, layer, model, **kwargs): """ Perform feature inversion in the style of Mahendran and Vedaldi 2015, using L2 regularization and periodic blurring. Inputs: - target_feats: Image features of the target image, of shape (1, C, H, W); we will try to generate an image that matches these features - layer: The index of the layer from which the features were extracted - model: A PretrainedCNN that was used to extract features Keyword arguments: - learning_rate: The learning rate to use for gradient descent - num_iterations: The number of iterations to use for gradient descent - l2_reg: The strength of L2 regularization to use; this is lambda in the equation above. - blur_every: How often to blur the image as implicit regularization; set to 0 to disable blurring. - show_every: How often to show the generated image; set to 0 to disable showing intermediate reuslts. Returns: - X: Generated image of shape (1, 3, 64, 64) that matches the target features. """ learning_rate = kwargs.pop('learning_rate', 10000) num_iterations = kwargs.pop('num_iterations', 500) l2_reg = kwargs.pop('l2_reg', 1e-7) blur_every = kwargs.pop('blur_every', 1) show_every = kwargs.pop('show_every', 50) X = np.random.randn(1, 3, 64, 64) for t in xrange(num_iterations): ############################################################################ # TODO: Compute the image gradient dX of the reconstruction loss with # # respect to the image. You should include L2 regularization penalizing # # large pixel values in the generated image using the l2_reg parameter; # # then update the generated image using the learning_rate from above. # ############################################################################ recons_feats, cache = model.forward(X, end=layer) dout = -2 * (target_feats - recons_feats) dX, _ = model.backward(dout, cache) dX += 2 * l2_reg * X X -= learning_rate * dX # As a regularizer, clip the image X = np.clip(X, -data['mean_image'], 255.0 - data['mean_image']) # As a regularizer, periodically blur the image if (blur_every > 0) and t % blur_every == 0: X = blur_image(X) if (show_every > 0) and (t % show_every == 0 or t + 1 == num_iterations): plt.imshow(deprocess_image(X, data['mean_image'])) plt.gcf().set_size_inches(3, 3) plt.axis('off') plt.title('t = %d' % t) plt.show()
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to jitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] sess = get_session() for t in range(num_iterations): ox, oy = np.random.randint(0, max_jitter, 2) X = jitter(X, ox, oy) Y = tf.convert_to_tensor(X) with tf.GradientTape() as tape: #Y = tf.convert_to_tensor(X) tape.watch(Y) loss = model(Y)[0, target_y] - l2_reg * tf.nn.l2_loss(Y) #dY = tape.gradient(loss, Y) #dX = sess.run(dY) #X += dX[0] * learning_rate dY = tape.gradient(loss, Y) dX = sess.run(dY) X += dX[0] * learning_rate X = jitter(X, -ox, -oy) X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.savefig("%s.jpg" % t) return X
def create_class_visualization(target_y, model, **kwargs): """ Perform optimization over the image to generate class visualizations. Inputs: - target_y: Integer in the range [0, 100) giving the target class - model: A PretrainedCNN that will be used for generation Keyword arguments: - learning_rate: Floating point number giving the learning rate - blur_every: An integer; how often to blur the image as a regularizer - l2_reg: Floating point number giving L2 regularization strength on the image; this is lambda in the equation above. - max_jitter: How much random jitter to add to the image as regularization - num_iterations: How many iterations to run for - show_every: How often to show the image """ learning_rate = kwargs.pop('learning_rate', 10000) blur_every = kwargs.pop('blur_every', 1) l2_reg = kwargs.pop('l2_reg', 1e-6) max_jitter = kwargs.pop('max_jitter', 4) num_iterations = kwargs.pop('num_iterations', 100) show_every = kwargs.pop('show_every', 25) X = np.random.randn(1, 3, 64, 64) mode = 'test' for t in xrange(num_iterations): # As a regularizer, add random jitter to the image ox, oy = np.random.randint(-max_jitter, max_jitter+1, 2) X = np.roll(np.roll(X, ox, -1), oy, -2) scores, cache = model.forward(X, mode=mode) class_mask = np.zeros(scores.shape) class_mask[0,target_y] = 1 scores = scores * class_mask dX, grads = model.backward(scores, cache) dX = dX - l2_reg * X X = X + learning_rate * dX # Undo the jitter X = np.roll(np.roll(X, -ox, -1), -oy, -2) # As a regularizer, clip the image X = np.clip(X, -data['mean_image'], 255.0 - data['mean_image']) # As a regularizer, periodically blur the image if t % blur_every == 0: X = blur_image(X) # Periodically show the image if t % show_every == 0: plt.imshow(deprocess_image(X, data['mean_image'])) plt.gcf().set_size_inches(3, 3) plt.axis('off') img_path = 'images/class_%d_%d.jpg' % (target_y, t) plt.savefig(img_path) return X
def show_saliency_maps(X, y, mask): mask = np.asarray(mask) Xm = X[mask] ym = y[mask] saliency = compute_saliency_maps(Xm, ym, model) print(saliency.shape) for i in range(mask.size): plt.subplot(2, mask.size, i + 1) plt.imshow(deprocess_image(Xm[i])) plt.axis('off') plt.title(class_names[ym[i]]) plt.subplot(2, mask.size, mask.size + i + 1) plt.title(mask[i]) plt.imshow(saliency[i], cmap=plt.cm.hot) plt.axis('off') plt.gcf().set_size_inches(10, 4) plt.show()
def deepdream(X, layer, model, **kwargs): """ Generate a DeepDream image. Inputs: - X: Starting image, of shape (1, 3, H, W) - layer: Index of layer at which to dream - model: A PretrainedCNN object Keyword arguments: - learning_rate: How much to update the image at each iteration - max_jitter: Maximum number of pixels for jitter regularization - num_iterations: How many iterations to run for - show_every: How often to show the generated image """ X = X.copy() learning_rate = kwargs.pop('learning_rate', 5.0) max_jitter = kwargs.pop('max_jitter', 16) num_iterations = kwargs.pop('num_iterations', 100) show_every = kwargs.pop('show_every', 25) for t in xrange(num_iterations): # As a regularizer, add random jitter to the image ox, oy = np.random.randint(-max_jitter, max_jitter+1, 2) X = np.roll(np.roll(X, ox, -1), oy, -2) activation, cache = model.forward(X, mode='test', start=0, end=layer) dX, grads = model.backward(activation, cache) X = X + learning_rate * dX # Undo the jitter X = np.roll(np.roll(X, -ox, -1), -oy, -2) # As a regularizer, clip the image mean_pixel = data['mean_image'].mean(axis=(1, 2), keepdims=True) X = np.clip(X, -mean_pixel, 255.0 - mean_pixel) # Periodically show the image if t == 0 or (t + 1) % show_every == 0: img = deprocess_image(X, data['mean_image'], mean='pixel') plt.imshow(img) plt.title('t = %d' % (t + 1)) plt.gcf().set_size_inches(8, 8) plt.axis('off') filename = 'images/deepdream_%d.jpg' % (t+1) plt.savefig(filename) return X
def show_saliency_maps(mask): mask = np.asarray(mask) X = data['X_val'][mask] y = data['y_val'][mask] saliency = compute_saliency_maps(X, y, model) for i in xrange(mask.size): plt.subplot(2, mask.size, i + 1) plt.imshow(deprocess_image(X[i], data['mean_image'])) plt.axis('off') plt.title(data['class_names'][y[i]][0]) plt.subplot(2, mask.size, mask.size + i + 1) plt.title(mask[i]) plt.imshow(saliency[i]) plt.axis('off') plt.gcf().set_size_inches(10, 4) plt.show()
def style_transfer(content_image, style_image, image_size, style_size, content_layer, content_weight, style_layers, style_weights, tv_weight, init_random=False): content_img = preprocess_image(load_image(content_image, size=image_size)) feats = model.extract_features(model.image) content_target = sess.run(feats[content_layer], {model.image: content_img[None]}) # Extract features from the style image style_img = preprocess_image(load_image(style_image, size=style_size)) style_feat_vars = [feats[idx] for idx in style_layers] style_target_vars = [] # Compute list of TensorFlow Gram matrices for style_feat_var in style_feat_vars: style_target_vars.append(gram_matrix(style_feat_var)) # Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image style_targets = sess.run(style_target_vars, {model.image: style_img[None]}) # Initialize generated image to content image if init_random: img_var = tf.Variable(tf.random_uniform(content_img[None].shape, 0, 1), name="image") else: img_var = tf.Variable(content_img[None], name="image") # Extract features on generated image feats = model.extract_features(img_var) # Compute loss c_loss = content_loss(content_weight, feats[content_layer], content_target) s_loss = style_loss(feats, style_layers, style_targets, style_weights) t_loss = tv_loss(img_var, tv_weight) loss = c_loss + s_loss + t_loss # Set up optimization hyperparameters initial_lr = 3.0 decayed_lr = 0.1 decay_lr_at = 180 max_iter = 200 # Create and initialize the Adam optimizer lr_var = tf.Variable(initial_lr, name="lr") # Create train_op that updates the generated image when run with tf.variable_scope("optimizer") as opt_scope: train_op = tf.train.AdamOptimizer(lr_var).minimize(loss, var_list=[img_var]) # Initialize the generated image and optimization variables opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=opt_scope.name) sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars)) # Create an op that will clamp the image values when run clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.5, 1.5)) f, axarr = plt.subplots(1, 2) axarr[0].axis('off') axarr[1].axis('off') axarr[0].set_title('Content Source Img.') axarr[1].set_title('Style Source Img.') axarr[0].imshow(deprocess_image(content_img)) axarr[1].imshow(deprocess_image(style_img)) plt.show() plt.figure() # Hardcoded handcrafted for t in range(max_iter): # Take an optimization step to update img_var sess.run(train_op) if t < decay_lr_at: sess.run(clamp_image_op) if t == decay_lr_at: sess.run(tf.assign(lr_var, decayed_lr)) if t % 100 == 0: print('Iteration {}'.format(t)) img = sess.run(img_var) plt.imshow(deprocess_image(img[0], rescale=True)) plt.axis('off') plt.show() print('Iteration {}'.format(t)) img = sess.run(img_var) plt.imshow(deprocess_image(img[0], rescale=True)) plt.axis('off') plt.show()
def create_class_visualization(target_y, model, sess, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to gjitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] ######################################################################## # TODO: Compute the loss and the gradient of the loss with respect to # # the input image, model.image. We compute these outside the loop so # # that we don't have to recompute the gradient graph at each iteration # # # # Note: loss and grad should be TensorFlow Tensors, not numpy arrays! # # # # The loss is the score for the target label, target_y. You should # # use model.classifier to get the scores, and tf.gradients to compute # # gradients. Don't forget the (subtracted) L2 regularization term! # ######################################################################## loss = None # scalar loss grad = None # gradient of loss with respect to model.image, same size as model.image (N, H, W, C) = X.shape correct_scores = tf.gather_nd( model.classifier, tf.stack((tf.range(N), model.labels), axis=1)) loss = correct_scores - tf.scalar_mul( tf.constant(l2_reg), tf.norm(tf.reshape(model.image, [1, -1]), axis=1)) grad = tf.gradients(loss, model.image) ############################################################################ # END OF YOUR CODE # ############################################################################ for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2) Xi = X.copy() X = np.roll(np.roll(X, ox, 1), oy, 2) ######################################################################## # TODO: Use sess to compute the value of the gradient of the score for # # class target_y with respect to the pixels of the image, and make a # # gradient step on the image using the learning rate. You should use # # the grad variable you defined above. # # # # Be very careful about the signs of elements in your code. # ######################################################################## [loss_result, grad_result] = sess.run([loss, grad], { model.image: X, model.labels: [target_y] }) grad_result = grad_result[0] X = X + learning_rate * grad_result ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = np.roll(np.roll(X, -ox, 1), -oy, 2) # As a regularizer, clip and periodically blur X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: print('save image in iteration {}/{}, loss is {}'.format( t, num_iterations, loss_result)) plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') # plt.show() plt.savefig('reports/class_visualization_image_{}_{}.png'.format( target_y, t)) plt.close() return X
# Run the following to generate a fooling image. Feel free to change the `idx` variable to explore other images. # In[ ]: idx = 0 Xi = X[idx][None] target_y = 6 X_fooling = make_fooling_image(Xi, target_y, model) # Make sure that X_fooling is classified as y_target scores = sess.run(model.classifier, {model.image: X_fooling}) assert scores[0].argmax() == target_y, 'The network is not fooled!' # Show original image, fooling image, and difference orig_img = deprocess_image(Xi[0]) fool_img = deprocess_image(X_fooling[0]) # Rescale plt.subplot(1, 4, 1) plt.imshow(orig_img) plt.axis('off') plt.title(class_names[y[idx]]) plt.subplot(1, 4, 2) plt.imshow(fool_img) plt.title(class_names[target_y]) plt.axis('off') plt.subplot(1, 4, 3) plt.title('Difference') plt.imshow(deprocess_image((Xi - X_fooling)[0])) plt.axis('off') plt.subplot(1, 4, 4)
print i, ' '.join('"%s"' % name for name in names) # Visualize some examples of the training data classes_to_show = 7 examples_per_class = 5 class_idxs = np.random.choice(len(data['class_names']), size=classes_to_show, replace=False) for i, class_idx in enumerate(class_idxs): train_idxs, = np.nonzero(data['y_train'] == class_idx) train_idxs = np.random.choice(train_idxs, size=examples_per_class, replace=False) for j, train_idx in enumerate(train_idxs): img = deprocess_image(data['X_train'][train_idx], data['mean_image']) plt.subplot(examples_per_class, classes_to_show, 1 + i + classes_to_show * j) if j == 0: plt.title(data['class_names'][class_idx][0]) plt.imshow(img) plt.gca().axis('off') plt.show() model = PretrainedCNN(h5_file='cs231n/datasets/pretrained_model.h5') batch_size = 100 # Test the model on training data mask = np.random.randint(data['X_train'].shape[0], size=batch_size)
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to gjitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) # We use a single image of random noise as a starting point X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] loss = model.scores[0, target_y] + l2_reg * tf.reduce_sum( model.scores * model.scores) grad = tf.gradients(loss, model.image) print(grad) grad = grad[0] ######################################################################## # TODO: Compute the loss and the gradient of the loss with respect to # # the input image, model.image. We compute these outside the loop so # # that we don't have to recompute the gradient graph at each iteration # # # # Note: loss and grad should be TensorFlow Tensors, not numpy arrays! # # # # The loss is the score for the target label, target_y. You should # # use model.scores to get the scores, and tf.gradients to compute # # gradients. Don't forget the (subtracted) L2 regularization term! # ######################################################################## pass ############################################################################ # END OF YOUR CODE # ############################################################################ for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2) X = np.roll(np.roll(X, ox, 1), oy, 2) g = sess.run(grad, feed_dict={ model.image: X, model.labels: np.array([target_y]) }) X += learning_rate * g ######################################################################## # TODO: Use sess to compute the value of the gradient of the score for # # class target_y with respect to the pixels of the image, and make a # # gradient step on the image using the learning rate. You should use # # the grad variable you defined above. # # # # Be very careful about the signs of elements in your code. # ######################################################################## pass ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = np.roll(np.roll(X, -ox, 1), -oy, 2) # As a regularizer, clip and periodically blur X = np.clip(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show() return X
# Find a correctly classified validation image while True: i = np.random.randint(data['X_val'].shape[0]) X = data['X_val'][i:i + 1] y = data['y_val'][i:i + 1] y_pred = model.loss(X)[0].argmax() if y_pred == y: break target_y = 67 X_fooling = make_fooling_image(X, target_y, model) # Make sure that X_fooling is classified as y_target scores = model.loss(X_fooling) assert scores[0].argmax() == target_y, 'The network is not fooled!' # Show original image, fooling image, and difference plt.subplot(1, 3, 1) plt.imshow(deprocess_image(X, data['mean_image'])) plt.axis('off') plt.title(data['class_names'][int(y)][0]) plt.subplot(1, 3, 2) plt.imshow(deprocess_image(X_fooling, data['mean_image'], renorm=True)) plt.title(data['class_names'][target_y][0]) plt.axis('off') plt.subplot(1, 3, 3) plt.title('Difference') plt.imshow(deprocess_image(X - X_fooling, data['mean_image'])) plt.axis('off') plt.show()
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to jitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) # We use a single image of random noise as a starting point X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] loss = None # scalar loss grad = None # gradient of loss with respect to model.image, same size as model.image X = tf.Variable(X) for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(0, max_jitter, 2) X = jitter(X, ox, oy) ######################################################################## # TODO: Compute the value of the gradient of the score for # # class target_y with respect to the pixels of the image, and make a # # gradient step on the image using the learning rate. You should use # # the tf.GradientTape() and tape.gradient to compute gradients. # # # # Be very careful about the signs of elements in your code. # ######################################################################## # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** with tf.GradientTape() as tape: tape.watch(X) score = model(X) correct_score = score[0, target_y] img = correct_score - l2_reg * tf.nn.l2_normalize(X) grad = tape.gradient(img, X) dX = learning_rate * tf.math.l2_normalize(grad) X += dX # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = jitter(X, -ox, -oy) # As a regularizer, clip and periodically blur X = tf.clip_by_value(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show() return X
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to gjitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 100) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] ######################################################################## # TODO: Compute the loss and the gradient of the loss with respect to # # the input image, model.image. We compute these outside the loop so # # that we don't have to recompute the gradient graph at each iteration # # # # Note: loss and grad should be TensorFlow Tensors, not numpy arrays! # # # # The loss is the score for the target label, target_y. You should # # use model.classifier to get the scores, and tf.gradients to compute # # gradients. Don't forget the (subtracted) L2 regularization term! # ######################################################################## loss = None # scalar loss grad = None # gradient of loss with respect to model.image, same size as model.image loss = model.classifier[0,target_y] grad = tf.gradients(loss, model.image) grad = tf.squeeze(grad) - l2_reg*2*model.image ############################################################################ # END OF YOUR CODE # ############################################################################ for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(-max_jitter, max_jitter+1, 2) Xi = X.copy() X = np.roll(np.roll(X, ox, 1), oy, 2) img_step = sess.run(grad,feed_dict={model.image:X, model.labels:np.array([target_y])}) X += img_step*learning_rate/np.linalg.norm(img_step) X = np.roll(np.roll(X, -ox, 1), -oy, 2) X = np.clip(X, -SQUEEZENET_MEAN/SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN)/SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show() return X
# # Visualize Examples # Run the following to visualize some example images from random classses in TinyImageNet-100-A. It selects classes and images randomly, so you can run it several times to see different images. # In[ ]: # Visualize some examples of the training data classes_to_show = 7 examples_per_class = 5 class_idxs = np.random.choice(len(data['class_names']), size=classes_to_show, replace=False) for i, class_idx in enumerate(class_idxs): train_idxs, = np.nonzero(data['y_train'] == class_idx) train_idxs = np.random.choice(train_idxs, size=examples_per_class, replace=False) for j, train_idx in enumerate(train_idxs): img = deprocess_image(data['X_train'][train_idx], data['mean_image']) plt.subplot(examples_per_class, classes_to_show, 1 + i + classes_to_show * j) if j == 0: plt.title(data['class_names'][class_idx][0]) plt.imshow(img) plt.gca().axis('off') plt.show() # # Pretrained model # We have trained a deep CNN for you on the TinyImageNet-100-A dataset that we will use for image visualization. The model has 9 convolutional layers (with spatial batch normalization) and 1 fully-connected hidden layer (with batch normalization). # # To get the model, run the script `get_pretrained_model.sh` from the `cs231n/datasets` directory. After doing so, run the following to load the model from disk. # In[ ]:
def create_class_visualization(target_y, model, **kwargs): """ Generate an image to maximize the score of target_y under a pretrained model. Inputs: - target_y: Integer in the range [0, 1000) giving the index of the class - model: A pretrained CNN that will be used to generate the image Keyword arguments: - l2_reg: Strength of L2 regularization on the image - learning_rate: How big of a step to take - num_iterations: How many iterations to use - blur_every: How often to blur the image as an implicit regularizer - max_jitter: How much to jitter the image as an implicit regularizer - show_every: How often to show the intermediate result """ l2_reg = kwargs.pop('l2_reg', 1e-3) learning_rate = kwargs.pop('learning_rate', 25) num_iterations = kwargs.pop('num_iterations', 200) blur_every = kwargs.pop('blur_every', 10) max_jitter = kwargs.pop('max_jitter', 16) show_every = kwargs.pop('show_every', 25) # We use a single image of random noise as a starting point X = 255 * np.random.rand(224, 224, 3) X = preprocess_image(X)[None] loss = None # scalar loss grad = None # gradient of loss with respect to model.image, same size as model.image X = tf.Variable(X) for t in range(num_iterations): # Randomly jitter the image a bit; this gives slightly nicer results ox, oy = np.random.randint(0, max_jitter, 2) X = jitter(X, ox, oy) ######################################################################## # TODO: Compute the value of the gradient of the score for # # class target_y with respect to the pixels of the image, and make a # # gradient step on the image using the learning rate. You should use # # the tf.GradientTape() and tape.gradient to compute gradients. # # # # Be very careful about the signs of elements in your code. # ######################################################################## # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** #X = tf.convert_to_tensor(X) # 1) Define a gradient tape object and watch input Image variable with tf.GradientTape() as tg: tg.watch( X ) # here watch the input image variable. the input needs to be tf tensor type # 2) Compute the “loss” for the batch of given input images. # - get scores output by the model for the given batch of input images scores1 = model.call( X) # defined in SqueezeNet() Class, which is in squeezenet.py # - get correct score correct_scores = scores1[:, target_y] # get the correct score, here there is only one score, because target_y is only one class #SyI = np.argmax([correct_scores , -l2_reg*np.sum(X*X)]) # 3) Use the gradient() method of the gradient tape object to compute the gradient of the loss with respect to the image dX = tg.gradient(correct_scores, X) dX += l2_reg * 2 * X # add L2 regularization to the image gradient X += learning_rate * dX pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ # Undo the jitter X = jitter(X, -ox, -oy) # As a regularizer, clip and periodically blur if (t % 20 == 0): # print progress every 10 updates template = 'Training progress is at {}th iteration out of {} iterations.' print(template.format(t, num_iterations)) X = tf.clip_by_value(X, -SQUEEZENET_MEAN / SQUEEZENET_STD, (1.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD) if t % blur_every == 0: X = blur_image(X, sigma=0.5) # Periodically show the image if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1: plt.imshow(deprocess_image(X[0])) class_name = class_names[target_y] plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show() return X
def style_transfer(content_image, style_image, image_size, style_size, content_layer, content_weight, style_layers, style_weights, tv_weight, init_random=False): """Run style transfer! Inputs: - content_image: filename of content image - style_image: filename of style image - image_size: size of smallest image dimension (used for content loss and generated image) - style_size: size of smallest style image dimension - content_layer: layer to use for content loss - content_weight: weighting on content loss - style_layers: list of layers to use for style loss - style_weights: list of weights to use for each layer in style_layers - tv_weight: weight of total variation regularization term - init_random: initialize the starting image to uniform random noise """ # Extract features from the content image content_img = preprocess_image(load_image(content_image, size=image_size)) feats = extract_features(content_img[None], model) content_target = feats[content_layer] # Extract features from the style image style_img = preprocess_image(load_image(style_image, size=style_size)) s_feats = extract_features(style_img[None], model) style_targets = [] # Compute list of TensorFlow Gram matrices for idx in style_layers: style_targets.append(gram_matrix(s_feats[idx])) # Set up optimization hyperparameters initial_lr = 3.0 decayed_lr = 0.1 decay_lr_at = 180 max_iter = 200 step = tf.Variable(0, trainable=False) boundaries = [decay_lr_at] values = [initial_lr, decayed_lr] learning_rate_fn = tf.keras.optimizers.schedules.PiecewiseConstantDecay( boundaries, values) # Later, whenever we perform an optimization step, we pass in the step. learning_rate = learning_rate_fn(step) optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) # Initialize the generated image and optimization variables f, axarr = plt.subplots(1, 2) axarr[0].axis('off') axarr[1].axis('off') axarr[0].set_title('Content Source Img.') axarr[1].set_title('Style Source Img.') axarr[0].imshow(deprocess_image(content_img)) axarr[1].imshow(deprocess_image(style_img)) plt.show() plt.figure() # Initialize generated image to content image if init_random: initializer = tf.random_uniform_initializer(0, 1) img = initializer(shape=content_img[None].shape) img_var = tf.Variable(img) print("Intializing randomly.") else: img_var = tf.Variable(content_img[None]) print("Initializing with content image.") for t in range(max_iter): with tf.GradientTape() as tape: tape.watch(img_var) feats = extract_features(img_var, model) # Compute loss c_loss = content_loss(content_weight, feats[content_layer], content_target) s_loss = style_loss(feats, style_layers, style_targets, style_weights) t_loss = tv_loss(img_var, tv_weight) loss = c_loss + s_loss + t_loss # Compute gradient grad = tape.gradient(loss, img_var) optimizer.apply_gradients([(grad, img_var)]) img_var.assign(tf.clip_by_value(img_var, -1.5, 1.5)) if t % 10 == 0: print('Iteration {}'.format(t)) #plt.imshow(deprocess_image(img_var[0].numpy(), rescale=True)) #plt.axis('off') #plt.show() print('Iteration {}'.format(t)) plt.imshow(deprocess_image(img_var[0].numpy(), rescale=True)) plt.axis('off') plt.show()
def style_transfer(content_image, style_image, output_image, image_size, style_size, content_layer, content_weight, style_layers, style_weights, tv_weight, init_random = False, sess=sess, model=model): """Run style transfer! Inputs: - content_image: filename of content image - style_image: filename of style image - output_image: filename to write to - image_size: size of smallest image dimension (used for content loss and generated image) - style_size: size of smallest style image dimension - content_layer: layer to use for content loss - content_weight: weighting on content loss - style_layers: list of layers to use for style loss - style_weights: list of weights to use for each layer in style_layers - tv_weight: weight of total variation regularization term - init_random: initialize the starting image to uniform random noise """ # Extract features from the content image content_img = preprocess_image(load_image(content_image, size=image_size)) feats = model.extract_features(model.image) content_target = sess.run(feats[content_layer], {model.image: content_img[None]}) # Extract features from the style image style_img = preprocess_image(load_image(style_image, size=style_size)) style_feat_vars = [feats[idx] for idx in style_layers] style_target_vars = [] # Compute list of TensorFlow Gram matrices for style_feat_var in style_feat_vars: style_target_vars.append(gram_matrix(style_feat_var)) # Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image style_targets = sess.run(style_target_vars, {model.image: style_img[None]}) # Initialize generated image to content image if init_random: img_var = tf.Variable(tf.random_uniform(content_img[None].shape, 0, 1), name="image") else: img_var = tf.Variable(content_img[None], name="image") # Extract features on generated image feats = model.extract_features(img_var) # Compute loss c_loss = content_loss(content_weight, feats[content_layer], content_target) s_loss = style_loss(feats, style_layers, style_targets, style_weights) t_loss = tv_loss(img_var, tv_weight) loss = c_loss + s_loss + t_loss # Set up optimization hyperparameters initial_lr = 3.0 decayed_lr = 0.1 decay_lr_at = 180 max_iter = 100 # Create and initialize the Adam optimizer lr_var = tf.Variable(initial_lr, name="lr") # Create train_op that updates the generated image when run with tf.variable_scope("optimizer") as opt_scope: train_op = tf.train.AdamOptimizer(lr_var).minimize(loss, var_list=[img_var]) # Initialize the generated image and optimization variables opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=opt_scope.name) sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars)) # Create an op that will clamp the image values when run clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.5, 1.5)) if output_image[-4:] == '.jpg': output_image = output_image[:-4] # Hardcoded handcrafted for t in range(0, max_iter+1): # Take an optimization step to update img_var sess.run(train_op) if t < decay_lr_at: sess.run(clamp_image_op) if t == decay_lr_at: sess.run(tf.assign(lr_var, decayed_lr)) if t % 25 == 0: print('Iteration {}'.format(t)) img = sess.run(img_var) cv2.imwrite(output_image + "_iter" + str(t) + ".jpg", deprocess_image(img[0]))