def generate(model, args): print('[GENERATE] Ran with layer {} and neuron {}'.format( args['layer'], args['neuron'])) layer_id = args['layer'].split(' ')[0] layer_neuron = '{}:{}'.format(layer_id, args['neuron']) s = int(args['size']) min_scale = args['transform_min'] max_scale = args['transform_max'] scale_offset = (max_scale - min_scale) * 10 # https://github.com/tensorflow/lucid/issues/148 with tf.Graph().as_default() as graph, tf.Session() as sess: t_img = param.image(s) crop_W = int(s / 2) t_offset = tf.random.uniform((2, ), 0, s - crop_W, dtype="int32") t_img_crop = t_img[:, t_offset[0]:t_offset[0] + crop_W, t_offset[1]:t_offset[1] + crop_W] if (args['transforms']): transforms = [ transform.jitter(2), transform.random_scale( [min_scale + n / 10. for n in range(20)]), transform.random_rotate(range(-10, 11)), transform.jitter(2) ] T = render.make_vis_T(model, layer_neuron, t_img_crop, transforms=transforms) else: T = render.make_vis_T(model, layer_neuron, t_img_crop) tf.initialize_all_variables().run() for i in range(1024): T("vis_op").run() img = t_img.eval()[0] # https://github.com/tensorflow/lucid/issues/108 # img = render.render_vis(model, layer_neuron)[-1][0] img = Image.fromarray(np.uint8(img * 255)) return {'image': img}
def render_vis(model, objective_f, param_f=None, optimizer=None, transforms=None, steps=2560, relu_gradient_override=True, output_size=1024, output_path='image.jpg'): """Adapted render_vis function from the Lucid library https://github.com/tensorflow/lucid/blob/master/lucid/optvis/render.py """ global _size with tf.Graph().as_default() as graph, tf.Session() as sess: T = render.make_vis_T(model, objective_f, param_f, optimizer, transforms, relu_gradient_override) loss, vis_op, t_image = T('loss'), T('vis_op'), T('input') tf.global_variables_initializer().run() images = [] bar = IncrementalBar('Creating image...', max=steps, suffix='%(percent)d%%') for i in range(steps): sess.run(vis_op, feed_dict={_size: 224}) bar.next() bar.finish() print('Saving image as {}.'.format(output_path)) img = sess.run(t_image, feed_dict={_size: output_size}) PIL.Image.fromarray((img.reshape(output_size, output_size, 3) * 255).astype(np.uint8)).save(output_path)
def feature_inversion(img, model, layer=None, n_steps=512, cossim_pow=0.0): with tf.Graph().as_default(), tf.Session() as sess: img = imgToModelSize(img, model) objective = objectives.Objective.sum([ 1.0 * dot_compare(layer, cossim_pow=cossim_pow), objectives.blur_input_each_step(), ]) t_input = tf.placeholder(tf.float32, img.shape) param_f = param.image(img.shape[0], decorrelate=True, fft=True, alpha=False) param_f = tf.stack([param_f[0], t_input]) transforms = [ transform.pad(8, mode='constant', constant_value=.5), transform.jitter(8), transform.random_scale([0.9, 0.95, 1.05, 1.1] + [1]*4), transform.random_rotate(list(range(-5, 5)) + [0]*5), transform.jitter(2), ] T = render.make_vis_T(model, objective, param_f, transforms=transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(n_steps): _ = sess.run([vis_op], {t_input: img}) result = t_image.eval(feed_dict={t_input: img}) show(result[0])
def feature_inversion(model, layer, example_image, n_steps=512, cossim_pow=1.0, input_blur_coeff=0.0): with tf.Graph().as_default(), tf.Session() as sess: model.load_graphdef() model_name = type(model).__name__ img_shape = model.image_shape img = example_image objective = objectives.Objective.sum([ dot_compare(layer, cossim_pow=cossim_pow), input_blur_coeff * objectives.blur_input_each_step(), ]) t_input = tf.placeholder(tf.float32, img_shape) param_f = param.image(img_shape[0]) param_f = tf.stack([param_f[0], t_input]) T = render.make_vis_T(model, objective, param_f, transforms=transform.standard_transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(n_steps): _ = sess.run([vis_op], {t_input: img}) return t_image.eval(feed_dict={t_input: img})[0]
def caricature(img, model, layer, n_steps=512, cossim_pow=0.0, verbose=True): if isinstance(layer, str): layers = [layer] elif isinstance(layer, (tuple, list)): layers = layer else: raise TypeError("layer must be str, tuple or list") with tf.Graph().as_default(), tf.Session() as sess: img = resize(img, model.image_shape[:2]) objective = objectives.Objective.sum([ 1.0 * dot_compare(layer, cossim_pow=cossim_pow, batch=i+1) for i, layer in enumerate(layers) ]) t_input = tf.placeholder(tf.float32, img.shape) param_f = param.image(img.shape[0], decorrelate=True, fft=True, alpha=False, batch=len(layers)) param_f = tf.concat([t_input[None], param_f], 0) transforms = transform.standard_transforms + [transform.crop_or_pad_to(*model.image_shape[:2])] T = render.make_vis_T(model, objective, param_f, transforms=transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(n_steps): _ = sess.run([vis_op], {t_input: img}) result = t_image.eval(feed_dict={t_input: img}) if verbose: lucid.misc.io.showing.images(result[1:], layers) return result
def render_set( channel, n_iter, prefix, starting_pos=None, force=False, objective=None, ): f_model = os.path.join(save_model_dest, channel + f"_{prefix}.npy") f_image = os.path.join(save_image_dest, channel + f"_{prefix}.png") if os.path.exists(f_model) and not force: return True print("Starting", channel, prefix) obj = objective # Add this to "sharpen" the image... too much and it gets crazy #obj += 0.001*objectives.total_variation() sess = create_session() t_size = tf.placeholder_with_default(size_n, []) param_f = lambda: cppn(t_size) T = render.make_vis_T( model, obj, param_f=param_f, transforms=[], optimizer=optimizer, ) tf.global_variables_initializer().run() # Assign the starting weights if starting_pos is not None: for v, x in zip(tf.trainable_variables(), starting_pos): sess.run(tf.assign(v, x)) for i in tqdm(range(n_iter)): _, loss = sess.run([ T("vis_op"), T("loss"), ]) # Save trained variables train_vars = sess.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) params = np.array(sess.run(train_vars), object) save(params, f_model) # Save final image images = T("input").eval({t_size: 600}) img = images[0] sess.close() imsave(f_image, img)
def render_vis(model, objective_f, param_f=None, optimizer=None, transforms=None, thresholds=(512, ), print_objectives=None, verbose=True, relu_gradient_override=True, use_fixed_seed=False): with tf.Graph().as_default() as graph, tf.Session() as sess: if use_fixed_seed: # does not mean results are reproducible, see Args doc tf.set_random_seed(0) T = make_vis_T(model, objective_f, param_f, optimizer, transforms, relu_gradient_override) print_objective_func = make_print_objective_func(print_objectives, T) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() images = [] try: bar = tqdm(range(max(thresholds) + 1)) for i in bar: # print('>', T('inceptioni3d/Logits/Conv3d_0c_1x1/conv_3d/add')) # print('>', T('inceptioni3d/Logits/SpatialSqueeze')) loss_, _ = sess.run([loss, vis_op]) if i in thresholds: print('HERE') vis = t_image.eval() images.append(vis) if verbose: display_videos(images) print(i, loss_) bar.set_description(f"loss {loss_:.2f}") # print_objective_func(sess) # showarray(visstd(vis)) except KeyboardInterrupt: # log.warning("Interrupted optimization at step {:d}.".format(i+1)) vis = t_image.eval() showarray(visstd(vis)) return images
def feature_inversion(img, model, layer, n_steps=512, cossim_pow=0.0, verbose=True): if isinstance(layer, str): layers = [layer] elif isinstance(layer, (tuple, list)): layers = layer else: raise TypeError("layer must be str, tuple or list") with tf.Graph().as_default(), tf.Session() as sess: img = imgToModelSize(img, model) objective = objectives.Objective.sum([ 1.0 * dot_compare(layer, cossim_pow=cossim_pow, batch=i + 1) for i, layer in enumerate(layers) ]) t_input = tf.placeholder(tf.float32, img.shape) param_f = param.image(img.shape[0], decorrelate=True, fft=True, alpha=False, batch=len(layers)) param_f = tf.concat([t_input[None], param_f], 0) transforms = [ transform.pad(8, mode='constant', constant_value=.5), transform.jitter(8), transform.random_scale([0.9, 0.95, 1.05, 1.1] + [1] * 4), transform.random_rotate(list(range(-5, 5)) + [0] * 5), transform.jitter(2), ] T = render.make_vis_T(model, objective, param_f, transforms=transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(n_steps): _ = sess.run([vis_op], {t_input: img}) result = t_image.eval(feed_dict={t_input: img}) if verbose: lucid.misc.io.showing.images(result[1:], layers) return result
def render_set(n, channel): print("Starting", channel, n) obj = objectives.channel(channel, n) # Add this to "sharpen" the image... too much and it gets crazy #obj += 0.001*objectives.total_variation() sess = create_session() t_size = tf.placeholder_with_default(size_n, []) f_model = os.path.join(save_model_dest, channel + f"_{n}.npy") T = render.make_vis_T( model, obj, param_f=lambda: cppn(t_size), transforms=[], optimizer=optimizer, ) tf.global_variables_initializer().run() train_vars = sess.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if not os.path.exists(f_model): for i in tqdm(range(training_steps)): _, loss = sess.run([ T("vis_op"), T("loss"), ]) # Save trained variables params = np.array(sess.run(train_vars), object) save(params, f_model) else: params = load(f_model) # Save final image feed_dict = dict(zip(train_vars, params)) feed_dict[t_size] = image_size images = T("input").eval(feed_dict) img = images[0] sess.close() f_image = os.path.join(save_image_dest, channel + f"_{n}.jpg") imageio.imwrite(f_image, img) print(f"Saved to {f_image}")
def render_vis_explore(model, objective_f, param_f=None, optimizer=None, transforms=None, vis_every=100, thresholds=[], print_objectives=None, verbose=True, relu_gradient_override=True, use_fixed_seed=False): with tf.Graph().as_default() as graph, \ tf.Session() as sess, tqdm() as pbar: if use_fixed_seed: # does not mean results are reproducible, see Args doc tf.set_random_seed(0) T = make_vis_T(model, objective_f, param_f, optimizer, transforms, relu_gradient_override) print_objective_func = make_print_objective_func(print_objectives, T) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() images = [] i = 0 try: while True: i += 1 loss_, _ = sess.run([loss, vis_op]) if i % vis_every == 0: vis = t_image.eval() images.append(vis) if verbose: display_videos(images) pbar.update(1) # print(i, loss_) # print_objective_func(sess) # showarray(visstd(vis)) except KeyboardInterrupt: # log.warning("Interrupted optimization at step {:d}.".format(i+1)) vis = t_image.eval() showarray(visstd(vis)) return images
def render_set(n, channel, train_n): # Creates independent images param_f = lambda : create_network(batch_size) obj = sum( objectives.channel(channel, n, batch=i) for i in range(batch_size) ) # This gives some visual similarity to the models #obj += 10*objectives.input_diff(target_img) # This does as well but not as nice #obj += 0.01*objectives.alignment(channel, decay_ratio=3) # This gives some visual similarity to the models #obj += 10*objectives.input_diff(target_img) # See more here # https://colab.research.google.com/github/tensorflow/lucid/blob/master/notebooks/differentiable-parameterizations/aligned_interpolation.ipynb#scrollTo=jOCYDhRrnPjp T = render.make_vis_T( model, obj, param_f=param_f, transforms=[], optimizer=optimizer, ) saver = tf.train.Saver() tf.global_variables_initializer().run() for i in tqdm(range(train_n)): _, loss = sess.run([T("vis_op"), T("loss"), ]) # Save trained variables f_model = os.path.join(save_model_dest, channel + f"_{n}_batches_{batch_size}.ckpt") save_path = saver.save(sess, f_model) # Return image images = T("input").eval({t_size: 600}) return images
def render_vis_with_loss(model, objective_f, size, optimizer=None, transforms=[], thresholds=(256, ), print_objectives=None, relu_gradient_override=True): param_f = param.image(size) images = [] losses = [] with param_f.graph.as_default() as graph, tf.Session() as sess: T = render.make_vis_T(model, objective_f, param_f=param_f, optimizer=optimizer, transforms=transforms, relu_gradient_override=relu_gradient_override) print_objective_func = render.make_print_objective_func( print_objectives, T) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(max(thresholds) + 1): loss_, _ = sess.run([loss, vis_op]) if i in thresholds: vis = t_image.eval() images.append(vis) losses.append(loss_) # if display: # print(f'loss: {loss_}') # print_objective_func(sess) # show(vis) tf.compat.v1.reset_default_graph() return images[-1], losses[-1]
def render_vis(model, objective_f, file_name, filter_idx, param_f=None, optimizer=None, transforms=None, thresholds=(512, ), verbose=True, relu_gradient_override=True, use_fixed_seed=False): with tf.Graph().as_default() as graph, tf.Session() as sess: if use_fixed_seed: # does not mean results are reproducible, see Args doc tf.set_random_seed(0) T = render.make_vis_T(model, objective_f, param_f, optimizer, transforms, relu_gradient_override) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() loss_p = 0 try: for i in range(max(thresholds) + 1): loss_, _ = sess.run([loss, vis_op]) if i in thresholds: vis = t_image.eval() loss_p = loss_ plt.title("Filter {}, {:.5f}".format(filter_idx, loss_)) plt.imshow(np.hstack(vis)) plt.axis('off') plt.savefig(file_name) plt.clf() except KeyboardInterrupt: vis = t_image.eval() show(np.hstack(vis)) return loss_p
def render_activation_grid_less_naive(self, img, layer="mixed4d", W=42, n_groups=6, subsample_factor=1, n_steps=256): # Get the activations with tf.Graph().as_default(), tf.Session() as sess: t_input = tf.placeholder("float32", [None, None, None, 3]) T = render.import_model(self.model, t_input, t_input) acts = T(layer).eval({t_input: img[None]})[0] acts_flat = acts.reshape([-1] + [acts.shape[2]]) N = acts_flat.shape[0] # The trick to avoiding "decoherence" is to recognize images that are # for similar activation vectors and if n_groups > 0: reducer = ChannelReducer(n_groups, "NMF") groups = reducer.fit_transform(acts_flat) groups /= groups.max(0) else: groups = np.zeros([]) print(groups.shape) # The key trick to increasing memory efficiency is random sampling. # Even though we're visualizing lots of images, we only run a small # subset through the network at once. In order to do this, we'll need # to hold tensors in a tensorflow graph around the visualization process. with tf.Graph().as_default() as graph, tf.Session() as sess: # Using the groups, create a paramaterization of images that # partly shares paramters between the images for similar activation # vectors. Each one still has a full set of unique parameters, and could # optimize to any image. We're just making it easier to find solutions # where things are the same. group_imgs_raw = param.fft_image([n_groups, W, W, 3]) unique_imgs_raw = param.fft_image([N, W, W, 3]) opt_imgs = param.to_valid_rgb(tf.stack([ 0.7 * unique_imgs_raw[i] + 0.5 * sum(groups[i, j] * group_imgs_raw[j] for j in range(n_groups)) for i in range(N) ]), decorrelate=True) # Construct a random batch to optimize this step batch_size = 64 rand_inds = tf.random_uniform([batch_size], 0, N, dtype=tf.int32) pres_imgs = tf.gather(opt_imgs, rand_inds) pres_acts = tf.gather(acts_flat, rand_inds) obj = objectives.Objective.sum([ objectives.direction(layer, pres_acts[n], batch=n) for n in range(batch_size) ]) # Actually do the optimization... T = render.make_vis_T(self.model, obj, param_f=pres_imgs) tf.global_variables_initializer().run() for i in range(n_steps): T("vis_op").run() if (i + 1) % (n_steps // 2) == 0: show(pres_imgs.eval()[::4]) vis_imgs = opt_imgs.eval() # Combine the images and display the resulting grid print("") vis_imgs_ = vis_imgs.reshape(list(acts.shape[:2]) + [W, W, 3]) vis_imgs_cropped = vis_imgs_[:, :, 2:-2, 2:-2, :] show(np.hstack(np.hstack(vis_imgs_cropped))) return vis_imgs_cropped
def run(self, layer, class_, channel=None, style_template=None, transforms=False, opt_steps=500, gram_coeff=1e-14): """ layer : layer_name to visualize class_ : class to consider style_template: template for comparision of generated activation maximization map transforms : transforms required opt_steps : number of optimization steps """ self.layer = layer self.channel = channel if channel is not None else 0 with tf.Graph().as_default() as graph, tf.Session() as sess: if style_template is not None: try: gram_template = tf.constant( np.load(style_template), #[1:-1,:,:], dtype=tf.float32) except: image = cv2.imread(style_template) print(image.shape) gram_template = tf.constant( np.pad(cv2.imread(style_template), ((1, 1), (0, 0))), #[1:-1,:,:], dtype=tf.float32) else: gram_template = None obj = self._channel(self.layer + "/convolution", self.channel, gram=gram_template, gram_coeff=gram_coeff) obj += -self.L1 * objectives.L1(constant=.5) obj += -self.TV * objectives.total_variation() #obj += self.blur * objectives.blur_input_each_step() if transforms == True: transforms = [ transform.pad(self.jitter), transform.jitter(self.jitter), #transform.random_scale([self.scale ** (n/10.) for n in range(-10, 11)]), #transform.random_rotate(range(-self.rotate, self.rotate + 1)) ] else: transforms = [] T = render.make_vis_T( self.model, obj, param_f=lambda: self.image(240, channels=self.n_channels, fft=self.decorrelate, decorrelate=self.decorrelate), optimizer=None, transforms=transforms, relu_gradient_override=False) tf.initialize_all_variables().run() images_array = [] for i in range(opt_steps): T("vis_op").run() images_array.append( T("input").eval()[:, :, :, -1].reshape((240, 240))) plt.figure(figsize=(10, 10)) # for i in range(1, self.n_channels+1): # plt.imshow(np.load(style_template)[:, :, i-1], cmap='gray', # interpolation='bilinear', vmin=0., vmax=1.) # plt.savefig('gram_template_{}.png'.format(i), bbox_inches='tight') texture_images = [] for i in range(1, self.n_channels + 1): # plt.subplot(1, self.n_channels, i) image = T("input").eval()[:, :, :, i - 1].reshape((240, 240)) print("channel: ", i, image.min(), image.max()) # plt.imshow(image, cmap='gray', # interpolation='bilinear', vmin=0., vmax=1.) # plt.xticks([]) # plt.yticks([]) texture_images.append(image) # show(np.hstack(T("input").eval())) os.makedirs(os.path.join(self.savepath, class_), exist_ok=True) # print(self.savepath, class_, self.layer+'_' + str(self.channel) +'.png') # plt.savefig(os.path.join(self.savepath, class_, self.layer+'_' + str(self.channel) + '_' + str(i) +'_noreg.png'), bbox_inches='tight') # plt.show() # print(np.array(texture_images).shape) return np.array(texture_images), images_array
def get_visualizations_and_losses(model, objective_f, param_f=None, optimizer=None, transforms=None, threshold_start=0, thresholds=(64, 128, 256, 512, 1024), visualization_index=None, visualization_layer=None, minimum_loss=0, num_bins=100, max_bin_hits=10, bin_factor=10000000, loss_logger=None): with tf.Graph().as_default(), tf.Session() as sess: T = render.make_vis_T(model, objective_f, param_f, optimizer, transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() images = [] try: # loss not changing much is indicator of failed image bin_losses = [] bin_hits = 0 for i in range(threshold_start, max(thresholds) + 1): threshold_loss, _ = sess.run([loss, vis_op]) if num_bins > 0: bin_loss = int(bin_factor * threshold_loss) if bin_loss not in bin_losses: # truncate and append bin_losses = bin_losses[-num_bins:] bin_losses.append(bin_loss) else: bin_hits = bin_hits + 1 if bin_hits > max_bin_hits: print( "\nLOSS CRISIS: feature={}:{}; bin=[{}], hits=[{}], threshold={}, loss=[{}]; aborting image" .format( visualization_layer, visualization_index, bin_loss, bin_hits, i, threshold_loss, )) if loss_logger is not None: loss_logger( threshold_loss, i, 'RECURS_{}_{}'.format(bin_hits, max_bin_hits)) return [] if minimum_loss > 0 and abs(threshold_loss) < minimum_loss: print( "\nLOSS ANXIETY ({}): layer={}, index={}, threshold={}, loss={}" .format(minimum_loss, visualization_layer, visualization_index, i, threshold_loss)) if loss_logger is not None: loss_logger(threshold_loss, i, 'MIN_{}'.format(minimum_loss)) return [] if i in thresholds: vis = t_image.eval() images.append([ i, vis, float(threshold_loss), visualization_index, visualization_layer ]) if loss_logger is not None: loss_logger(threshold_loss, i, '') except KeyboardInterrupt as e: print("Interrupted optimization at step {:d}.".format(i + 1)) raise e return images
def render_icons( directions, model, layer, size=80, n_steps=128, verbose=False, S=None, num_attempts=2, cossim=True, alpha=False, ): image_attempts = [] loss_attempts = [] # Render two attempts, and pull the one with the lowest loss score. for attempt in range(num_attempts): # Render an image for each activation vector param_f = lambda: param.image( size, batch=directions.shape[0], fft=True, decorrelate=True, alpha=alpha ) if cossim is True: obj_list = [ direction_neuron_cossim_S(layer, v, batch=n, S=S) for n, v in enumerate(directions) ] else: obj_list = [ direction_neuron_S(layer, v, batch=n, S=S) for n, v in enumerate(directions) ] obj = objectives.Objective.sum(obj_list) transforms = transform.standard_transforms if alpha: transforms.append(transform.collapse_alpha_random()) # This is the tensorflow optimization process print("attempt: ", attempt) with tf.Graph().as_default(), tf.Session() as sess: learning_rate = 0.05 losses = [] trainer = tf.train.AdamOptimizer(learning_rate) T = render.make_vis_T(model, obj, param_f, trainer, transforms) vis_op, t_image = T("vis_op"), T("input") losses_ = [obj_part(T) for obj_part in obj_list] tf.global_variables_initializer().run() for i in range(n_steps): loss, _ = sess.run([losses_, vis_op]) losses.append(loss) if i % 100 == 0: print(i) img = t_image.eval() img_rgb = img[:, :, :, :3] if alpha: print("alpha true") k = 0.8 bg_color = 0.0 img_a = img[:, :, :, 3:] img_merged = img_rgb * ((1 - k) + k * img_a) + bg_color * k * ( 1 - img_a ) image_attempts.append(img_merged) else: print("alpha false") image_attempts.append(img_rgb) loss_attempts.append(losses[-1]) # Use only the icons with the lowest loss loss_attempts = np.asarray(loss_attempts) loss_final = [] image_final = [] print("merging best scores from attempts...") for i, d in enumerate(directions): # note, this should be max, it is not a traditional loss mi = np.argmax(loss_attempts[:, i]) image_final.append(image_attempts[mi][i]) return (image_final, loss_final)
def render_icons( directions, model, layer, size=80, n_steps=128, verbose=False, S=None, num_attempts=3, cossim=True, alpha=False, ): model.load_graphdef() image_attempts = [] loss_attempts = [] depth = 4 if alpha else 3 batch = len(directions) input_shape = (batch, size, size, depth) # Render two attempts, and pull the one with the lowest loss score. for attempt in range(num_attempts): # Render an image for each activation vector param_f = lambda: param.image(size, batch=len(directions), fft=True, decorrelate=True, alpha=alpha) if cossim is True: obj_list = [ direction_neuron_cossim_S(layer, v, batch=n, S=S) for n, v in enumerate(directions) ] else: obj_list = [ direction_neuron_S(layer, v, batch=n, S=S) for n, v in enumerate(directions) ] obj_list += [objectives.penalize_boundary_complexity(input_shape, w=5)] obj = objectives.Objective.sum(obj_list) # holy mother of transforms transforms = [ transform.pad(16, mode='constant'), transform.jitter(4), transform.jitter(4), transform.jitter(8), transform.jitter(8), transform.jitter(8), transform.random_scale(0.998**n for n in range(20, 40)), transform.random_rotate( chain(range(-20, 20), range(-10, 10), range(-5, 5), 5 * [0])), transform.jitter(2), transform.crop_or_pad_to(size, size) ] if alpha: transforms.append(transform.collapse_alpha_random()) # This is the tensorflow optimization process # print("attempt: ", attempt) with tf.Graph().as_default(), tf.Session() as sess: learning_rate = 0.05 losses = [] trainer = tf.train.AdamOptimizer(learning_rate) T = render.make_vis_T(model, obj, param_f, trainer, transforms) vis_op, t_image = T("vis_op"), T("input") losses_ = [obj_part(T) for obj_part in obj_list] tf.global_variables_initializer().run() for i in range(n_steps): loss, _ = sess.run([losses_, vis_op]) losses.append(loss) # if i % 100 == 0: # print(i) img = t_image.eval() img_rgb = img[:, :, :, :3] if alpha: # print("alpha true") k = 0.8 bg_color = 0.0 img_a = img[:, :, :, 3:] img_merged = img_rgb * ( (1 - k) + k * img_a) + bg_color * k * (1 - img_a) image_attempts.append(img_merged) else: # print("alpha false") image_attempts.append(img_rgb) loss_attempts.append(losses[-1]) # Use only the icons with the lowest loss loss_attempts = np.asarray(loss_attempts) loss_final = [] image_final = [] # print("merging best scores from attempts...") for i, d in enumerate(directions): # note, this should be max, it is not a traditional loss mi = np.argmax(loss_attempts[:, i]) image_final.append(image_attempts[mi][i]) return (image_final, loss_final)
def optimize_input(obj, model, param_f, transforms, lr=0.05, step_n=512, num_output_channels=4, do_render=False, out_name="out"): sess = create_session() # Set up optimization problem size = 84 t_size = tf.placeholder_with_default(size, []) T = render.make_vis_T( model, obj, param_f=param_f, transforms=transforms, optimizer=tf.train.AdamOptimizer(lr), ) tf.global_variables_initializer().run() if do_render: video_fn = out_name + '.mp4' writer = FFMPEG_VideoWriter(video_fn, (size, size * 4), 60.0) # Optimization loop try: for i in range(step_n): _, loss, img = sess.run([T("vis_op"), T("loss"), T("input")]) if do_render: #if outputting only one channel... if num_output_channels == 1: img = img[..., -1:] #print(img.shape) img = np.tile(img, 3) else: #img=img[...,-3:] img = img.transpose([0, 3, 1, 2]) img = img.reshape([84 * 4, 84, 1]) img = np.tile(img, 3) writer.write_frame(_normalize_array(img)) if i > 0 and i % 50 == 0: clear_output() print("%d / %d score: %f" % (i, step_n, loss)) show(img) except KeyboardInterrupt: pass finally: if do_render: print("closing...") writer.close() # Save trained variables if do_render: train_vars = sess.graph.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) params = np.array(sess.run(train_vars), object) save(params, out_name + '.npy') # Save final image final_img = T("input").eval({t_size: 600})[..., -1:] #change size save(final_img, out_name + '.jpg', quality=90) out = T("input").eval({t_size: 84}) sess.close() return out