def feature_inversion(model, layer, example_image, n_steps=512, cossim_pow=1.0, input_blur_coeff=0.0): with tf.Graph().as_default(), tf.Session() as sess: model.load_graphdef() model_name = type(model).__name__ img_shape = model.image_shape img = example_image objective = objectives.Objective.sum([ dot_compare(layer, cossim_pow=cossim_pow), input_blur_coeff * objectives.blur_input_each_step(), ]) t_input = tf.placeholder(tf.float32, img_shape) param_f = param.image(img_shape[0]) param_f = tf.stack([param_f[0], t_input]) T = render.make_vis_T(model, objective, param_f, transforms=transform.standard_transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(n_steps): _ = sess.run([vis_op], {t_input: img}) return t_image.eval(feed_dict={t_input: img})[0]
def render_atlas_tile(model,op_name,directions,icon_size=45,n_steps=127,transforms_amount=1,cossim_pow=0,L2_amount=2): transforms_options = [ [ transform.jitter(2) ], [ transform.pad(12, mode="constant", constant_value=.5), transform.jitter(8), transform.random_scale([1 + (i - 5) / 50. for i in range(11)]), transform.random_rotate(list(range(-10, 11)) + 5 * [0]), transform.jitter(4), ], [ transform.pad(2, mode='constant', constant_value=.5), transform.jitter(4), transform.jitter(4), transform.jitter(8), transform.jitter(8), transform.jitter(8), transform.random_scale([0.995**n for n in range(-5,80)] + [0.998**n for n in 2*list(range(20,40))]), transform.random_rotate(list(range(-20,20))+list(range(-10,10))+list(range(-5,5))+5*[0]), transform.jitter(2), ], ] param_f = lambda: param.image(icon_size, batch=directions.shape[0]) obj = objectives.Objective.sum( [objectives.direction_neuron(op_name, v, batch=n, cossim_pow=cossim_pow) for n,v in enumerate(directions) ]) - L2_amount * objectives.L2("input", 0.5) * objectives.L2("input", 0.5) thresholds=(n_steps//2, n_steps) vis_imgs = render.render_vis(model, obj, param_f, transforms=transforms_options[transforms_amount], thresholds=thresholds, verbose=False)[-1] return vis_imgs
def caricature(img, model, layer, n_steps=512, cossim_pow=0.0, verbose=True): if isinstance(layer, str): layers = [layer] elif isinstance(layer, (tuple, list)): layers = layer else: raise TypeError("layer must be str, tuple or list") with tf.Graph().as_default(), tf.Session() as sess: img = resize(img, model.image_shape[:2]) objective = objectives.Objective.sum([ 1.0 * dot_compare(layer, cossim_pow=cossim_pow, batch=i+1) for i, layer in enumerate(layers) ]) t_input = tf.placeholder(tf.float32, img.shape) param_f = param.image(img.shape[0], decorrelate=True, fft=True, alpha=False, batch=len(layers)) param_f = tf.concat([t_input[None], param_f], 0) transforms = transform.standard_transforms + [transform.crop_or_pad_to(*model.image_shape[:2])] T = render.make_vis_T(model, objective, param_f, transforms=transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(n_steps): _ = sess.run([vis_op], {t_input: img}) result = t_image.eval(feed_dict={t_input: img}) if verbose: lucid.misc.io.showing.images(result[1:], layers) return result
def assert_gradient_ascent(objective, model, batch=None, alpha=False, shape=None): with tf.Graph().as_default() as graph, tf.Session() as sess: shape = shape or [1, 32, 32, 3] t_input = param.image(shape[1], h=shape[2], batch=batch, alpha=alpha) if alpha: t_input = transform.collapse_alpha_random()(t_input) model.import_graph(t_input, scope="import", forget_xy_shape=True) def T(layer): if layer == "input": return t_input if layer == "labels": return model.labels return graph.get_tensor_by_name("import/%s:0" % layer) loss_t = objective(T) opt_op = tf.train.AdamOptimizer(0.1).minimize(-loss_t) tf.global_variables_initializer().run() start_value = sess.run([loss_t]) for _ in range(NUM_STEPS): _ = sess.run([opt_op]) end_value, = sess.run([loss_t]) print(start_value, end_value) assert start_value < end_value
def vis_traditional( self, feature_list=None, *, transforms=[transform.jitter(2)], l2_coeff=0.0, l2_layer_name=None, ): if feature_list is None: feature_list = list(range(self.acts_reduced.shape[-1])) try: feature_list = list(feature_list) except TypeError: feature_list = [feature_list] obj = sum([ objectives.direction_neuron(self.layer_name, self.channel_dirs[feature], batch=feature) for feature in feature_list ]) if l2_coeff != 0.0: assert ( l2_layer_name is not None ), "l2_layer_name must be specified if l2_coeff is non-zero" obj -= objectives.L2(l2_layer_name) * l2_coeff param_f = lambda: param.image(64, batch=len(feature_list)) return render.render_vis(self.model, obj, param_f=param_f, transforms=transforms)[-1]
def neuron_groups(img, filename, layer, n_groups=10, attr_classes=None, filenumber=0): # Compute activations dirname = '../images/' + filename+'/' if attr_classes is None: attr_classes = [] with tf.Graph().as_default(), tf.Session(): t_input = tf.placeholder_with_default(img, [None, None, 3]) T = render.import_model(model, t_input, t_input) acts = T(layer).eval() # We'll use ChannelReducer (a wrapper around scikit learn's factorization tools) # to apply Non-Negative Matrix factorization (NMF). nmf = ChannelReducer(n_groups, "NMF") spatial_factors = nmf.fit_transform(acts)[0].transpose(2, 0, 1).astype("float32") channel_factors = nmf._reducer.components_.astype("float32") # Let's organize the channels based on their horizontal position in the image x_peak = np.argmax(spatial_factors.max(1), 1) ns_sorted = np.argsort(x_peak) spatial_factors = spatial_factors[ns_sorted] channel_factors = channel_factors[ns_sorted] # And create a feature visualziation of each group param_f = lambda: param.image(80, batch=n_groups) obj = sum(objectives.direction(layer, channel_factors[i], batch=i) for i in range(n_groups)) group_icons = render.render_vis(model, obj, param_f, verbose=False)[-1] # We'd also like to know about attribution # First, let's turn each group into a vector over activations group_vecs = [spatial_factors[i, ..., None] * channel_factors[i] for i in range(n_groups)] attrs = np.asarray([raw_class_group_attr(img, layer, attr_class, group_vecs) for attr_class in attr_classes]) print( attrs ) try: os.mkdir(dirname ) except Exception as e: print(e) # Let's render the visualization! finally: with open(dirname + '/attrs.txt', 'w') as f_w: f_w.write(str(attrs)) for index, icon in enumerate(group_icons): imgdata=to_image_url(icon) print(imgdata) imgdata = base64.b64decode(str(imgdata)) print(imgdata) with open(dirname + str(index) + '.png', 'wb') as f_jpg: f_jpg.write(imgdata)
def arbitrary_channels_to_rgb(*args, **kwargs): """Arbitrary parametrization for testing""" channels = kwargs.pop('channels', None) or 10 full_im = param.image(*args, channels=channels, **kwargs) r = tf.reduce_mean(full_im[..., :channels // 3]**2, axis=-1) g = tf.reduce_mean(full_im[..., channels // 3:2 * channels // 3]**2, axis=-1) b = tf.reduce_mean(full_im[..., 2 * channels // 3:]**2, axis=-1) return tf.stack([r, g, b], axis=-1)
def neuron_groups(model, img, layer, n_groups=6, attr_classes=[]): # Compute activations with tf.Graph().as_default(), tf.Session(): t_input = tf.placeholder_with_default(img, [None, None, 3]) T = render.import_model(model, t_input, t_input) acts = T(layer).eval() # We'll use ChannelReducer (a wrapper around scikit learn's factorization tools) # to apply Non-Negative Matrix factorization (NMF). nmf = ChannelReducer(n_groups, "PCA") print(layer, n_groups) spatial_factors = nmf.fit_transform(acts)[0].transpose(2, 0, 1).astype("float32") channel_factors = nmf._reducer.components_.astype("float32") # Let's organize the channels based on their horizontal position in the image x_peak = np.argmax(spatial_factors.max(1), 1) ns_sorted = np.argsort(x_peak) spatial_factors = spatial_factors[ns_sorted] channel_factors = channel_factors[ns_sorted] # And create a feature visualziation of each group param_f = lambda: param.image(80, batch=n_groups) obj = sum(objectives.direction(layer, channel_factors[i], batch=i) for i in range(n_groups)) group_icons = render.render_vis(model, obj, param_f, verbose=False)[-1] # We'd also like to know about attribution # # First, let's turn each group into a vector over activations group_vecs = [spatial_factors[i, ..., None] * channel_factors[i] for i in range(n_groups)] attrs = np.asarray([raw_class_group_attr(img, layer, attr_class, model, group_vecs) for attr_class in attr_classes]) gray_scale_groups = [skimage.color.rgb2gray(icon) for icon in group_icons] # Let's render the visualization! data = { "img": _image_url(img), "n_groups": n_groups, "spatial_factors": [_image_url(factor[..., None] / np.percentile(spatial_factors, 99) * [1, 0, 0]) for factor in spatial_factors], "group_icons": [_image_url(icon) for icon in gray_scale_groups] } # with open('ng.pickle', 'wb') as handle: # pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL) # with open('./svelte_python/ng.pickle', 'rb') as p_file: # data = pickle.load(p_file) generate_html('neuron_groups', data)
def __init__(self, pb_path, image_shape=[224, 224, 3], image_value_range=(-1, 1), input_name='input_1', threshold=2048, scale=224): self.model = self._create_model(pb_path, image_shape, image_value_range, input_name) self.threshold = threshold self.param_f = lambda: param.image(scale, fft=True, decorrelate=True)
def test_integration(decorrelate, fft): obj = objectives.neuron("mixed3a_pre_relu", 0) param_f = lambda: param.image(16, decorrelate=decorrelate, fft=fft) rendering = render.render_vis(model, obj, param_f=param_f, thresholds=(1, 2), verbose=False, transforms=[]) start_image = rendering[0] end_image = rendering[-1] assert (start_image != end_image).any()
def feature_inversion(img, model, layer, n_steps=512, cossim_pow=0.0, verbose=True): if isinstance(layer, str): layers = [layer] elif isinstance(layer, (tuple, list)): layers = layer else: raise TypeError("layer must be str, tuple or list") with tf.Graph().as_default(), tf.Session() as sess: img = imgToModelSize(img, model) objective = objectives.Objective.sum([ 1.0 * dot_compare(layer, cossim_pow=cossim_pow, batch=i + 1) for i, layer in enumerate(layers) ]) t_input = tf.placeholder(tf.float32, img.shape) param_f = param.image(img.shape[0], decorrelate=True, fft=True, alpha=False, batch=len(layers)) param_f = tf.concat([t_input[None], param_f], 0) transforms = [ transform.pad(8, mode='constant', constant_value=.5), transform.jitter(8), transform.random_scale([0.9, 0.95, 1.05, 1.1] + [1] * 4), transform.random_rotate(list(range(-5, 5)) + [0] * 5), transform.jitter(2), ] T = render.make_vis_T(model, objective, param_f, transforms=transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(n_steps): _ = sess.run([vis_op], {t_input: img}) result = t_image.eval(feed_dict={t_input: img}) if verbose: lucid.misc.io.showing.images(result[1:], layers) return result
def generate(model, args): print('[GENERATE] Ran with layer {} and neuron {}'.format( args['layer'], args['neuron'])) layer_id = args['layer'].split(' ')[0] layer_neuron = '{}:{}'.format(layer_id, args['neuron']) s = int(args['size']) min_scale = args['transform_min'] max_scale = args['transform_max'] scale_offset = (max_scale - min_scale) * 10 # https://github.com/tensorflow/lucid/issues/148 with tf.Graph().as_default() as graph, tf.Session() as sess: t_img = param.image(s) crop_W = int(s / 2) t_offset = tf.random.uniform((2, ), 0, s - crop_W, dtype="int32") t_img_crop = t_img[:, t_offset[0]:t_offset[0] + crop_W, t_offset[1]:t_offset[1] + crop_W] if (args['transforms']): transforms = [ transform.jitter(2), transform.random_scale( [min_scale + n / 10. for n in range(20)]), transform.random_rotate(range(-10, 11)), transform.jitter(2) ] T = render.make_vis_T(model, layer_neuron, t_img_crop, transforms=transforms) else: T = render.make_vis_T(model, layer_neuron, t_img_crop) tf.initialize_all_variables().run() for i in range(1024): T("vis_op").run() img = t_img.eval()[0] # https://github.com/tensorflow/lucid/issues/108 # img = render.render_vis(model, layer_neuron)[-1][0] img = Image.fromarray(np.uint8(img * 255)) return {'image': img}
def visualization(learning_rate, neuron, channel, contrast, NRO_IMG, SAVE_P): LEARNING_RATE = learning_rate optimizer = tf.train.AdamOptimizer(LEARNING_RATE) obj = objectives.neuron(neuron, channel) imgs = render.render_vis(model, obj, optimizer=optimizer, transforms=[], param_f=lambda: param.image(256, fft=True, decorrelate=True, init_val=NRO_IMG), # 256 es el tamanio de la imagen thresholds=(0,2), verbose=False) # Note that we're doubling the image scale to make artifacts more obvious plt.figure() plt.imshow(imgs[0][0]) plt.axis('off') contraste = contrast # Mover este numero hasta ver algo razonable plt.imshow(contraste*(imgs[1][0]-imgs[0][0]) + 0.5) plt.savefig(SAVE_P, bbox_inches='tight')
def render_vis_with_loss(model, objective_f, size, optimizer=None, transforms=[], thresholds=(256, ), print_objectives=None, relu_gradient_override=True): param_f = param.image(size) images = [] losses = [] with param_f.graph.as_default() as graph, tf.Session() as sess: T = render.make_vis_T(model, objective_f, param_f=param_f, optimizer=optimizer, transforms=transforms, relu_gradient_override=relu_gradient_override) print_objective_func = render.make_print_objective_func( print_objectives, T) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() for i in range(max(thresholds) + 1): loss_, _ = sess.run([loss, vis_op]) if i in thresholds: vis = t_image.eval() images.append(vis) losses.append(loss_) # if display: # print(f'loss: {loss_}') # print_objective_func(sess) # show(vis) tf.compat.v1.reset_default_graph() return images[-1], losses[-1]
LEARNING_RATE = 0.05 optimizer = tf.train.AdamOptimizer(LEARNING_RATE) # objective = "mixed4b_pre_relu:452" # objective = "mixed3b_pre_relu:10" objective = "mixed5b_pre_relu:1" thresholds = (1, 32, 128, 256) # (1, 32, 128, 256, 2048) imgs = render.render_vis( model, objective, optimizer=optimizer, transforms=[], param_f=lambda: param.image(64, fft=False, decorrelate=False), thresholds=thresholds, verbose=True) fig([imgs]) JITTER = 1 ROTATE = 5 SCALE = 1.1 transforms = [ transform.pad(2 * JITTER), transform.jitter(JITTER), transform.random_scale([SCALE**(n / 10.) for n in range(-10, 11)]), transform.random_rotate(range(-ROTATE, ROTATE + 1)) ]
from lucid.modelzoo.vision_base import Model class FrozenNetwork(Model): model_path = network_protobuf_path image_shape = [256, 256, 3] image_value_range = (0, 1) input_name = 'input_1' network = FrozenNetwork() network.load_graphdef() obj = objectives.channel(layer_name, neuron_index) param_f = lambda: param.image(512, fft=True, decorrelate=True) renders = render.render_vis(network, obj, param_f, thresholds=(2024, )) last_image_file = sorted(glob.glob("projection/out/*step*.png"))[-1] stylegan_render = imageio.imread(last_image_file) lucid_render = renders[0][0] lucid_render = (np.clip(lucid_render, 0, 1) * 255).astype(np.uint8) h, w = lucid_render.shape[:2] canvas = PIL.Image.new('RGB', (w * 2, h), 'white') canvas.paste(Image.fromarray(lucid_render), (0, 0)) canvas.paste( Image.fromarray(stylegan_render).resize((w, h), PIL.Image.LANCZOS), (w, 0)) canvas.save("projection/combined_%s_%03d.png" % (layer_name.split("/")[0], neuron_index))
def param_f(): return param.image(W, batch=acts_flat.shape[0])
def get_vm_model_image_losses(args, layer=None): model_name_scope = None if args.model_type == 'vm_model': model_name_scope = 'encode' elif args.model_type == 'simclr_model': model_name_scope = 'base_model' else: raise NotImplementedError('Model type %s not supported!' % args.model_type) def model(t_image): t_image = t_image * 255 ending_points, _ = get_network_outputs({'images': t_image}, prep_type=args.prep_type, model_type=args.model_type, setting_name=args.setting_name, module_name=['encode'], **json.loads(args.cfg_kwargs)) all_vars = tf.global_variables() var_list = [x for x in all_vars if x.name.startswith(model_name_scope)] saver = tf.train.Saver(var_list=var_list) if not args.from_scratch: if not args.load_from_ckpt: model_ckpt_path = tf_model_loader.load_model_from_mgdb( db=args.load_dbname, col=args.load_colname, exp=args.load_expId, port=args.load_port, cache_dir=args.model_cache_dir, step_num=args.load_step, ) else: model_ckpt_path = args.load_from_ckpt saver.restore(tf.get_default_session(), model_ckpt_path) else: SESS = tf.get_default_session() init_op_global = tf.global_variables_initializer() SESS.run(init_op_global) init_op_local = tf.local_variables_initializer() SESS.run(init_op_local) all_train_ref = tf.get_collection_ref(tf.GraphKeys.TRAINABLE_VARIABLES) def _remove_others(vars_ref): cp_vars_ref = copy.copy(vars_ref) for each_v in cp_vars_ref: if each_v.op.name.startswith(model_name_scope): vars_ref.remove(each_v) _remove_others(all_train_ref) return ending_points layer = layer or "encode_9" batch_size = 16 param_f = lambda: param.image(224, batch=batch_size) num_of_units = 64 images = [] all_losses = [] for start_idx in range(0, num_of_units, batch_size): obj = objectives.channel(layer, 0 + start_idx, 0) for idx in range(1, batch_size): obj += objectives.channel(layer, idx + start_idx, idx) image, losses = render_vis(model, obj, param_f, model_name_scope=model_name_scope) images.append(image) all_losses.append(losses) images = np.concatenate(images, axis=0) all_losses = np.sum(all_losses, axis=0) return layer, images, all_losses
content_weight = 100.0 # Style Gram matrix weighted average decay coefficient style_decay = 0.95 sess = create_session(timeout_sec=0) # t_fragments is used to feed rasterized UV coordinates for the current view. # Channels: [U, V, _, Alpha]. Alpha is 1 for pixels covered by the object, and # 0 for background. t_fragments = tf.placeholder(tf.float32, [None, None, 4]) t_uv = t_fragments[...,:2] t_alpha = t_fragments[...,3:] # Texture atlas to optimize t_texture = param.image(TEXTURE_SIZE, fft=True, decorrelate=True)[0] # Variable to store the original mesh texture used to render content views content_var = tf.Variable(tf.zeros([TEXTURE_SIZE, TEXTURE_SIZE, 3]), trainable=False) # Sample current and original textures with provided pixel data t_joined_texture = tf.concat([t_texture, content_var], -1) t_joined_frame = sample_bilinear(t_joined_texture, t_uv) * t_alpha t_frame_current, t_frame_content = t_joined_frame[...,:3], t_joined_frame[...,3:] t_joined_frame = tf.stack([t_frame_current, t_frame_content], 0) # Feeding the rendered frames to the Neural Network t_input = tf.placeholder_with_default(t_joined_frame, [None, None, None, 3]) model.import_graph(t_input) # style loss