def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] self.conv_layers = {c: [] for c in ['i', 'f', 'c', 'o', 'a', 'ahat']} for l in range(self.nb_layers): for c in ['i', 'f', 'c', 'o']: act = self.LSTM_activation if c == 'c' else self.LSTM_inner_activation self.conv_layers[c].append( Convolution2D(self.R_stack_sizes[l], self.R_filt_sizes[l], padding='same', data_format="channels_last", activation=act)) act = 'relu' if l == 0 else self.A_activation self.conv_layers['ahat'].append( Convolution2D(self.stack_sizes[l], self.Ahat_filt_sizes[l], padding='same', data_format="channels_last", activation=act)) if l < self.nb_layers - 1: self.conv_layers['a'].append( Convolution2D(self.stack_sizes[l + 1], self.A_filt_sizes[l], padding='same', data_format="channels_last", activation=self.A_activation)) self.upsample = UpSampling2D(data_format="channels_last") # upsampling self.pool = MaxPooling2D(data_format="channels_last") # downsampling self._trainable_weights = [] nb_row, nb_col = (input_shape[-3], input_shape[-2]) # Super model for c in sorted(self.conv_layers.keys()): for l in range(len(self.conv_layers[c])): ds_factor = 2**l if c == 'ahat': nb_channels = self.R_stack_sizes[l] elif c == 'a': nb_channels = 2 * self.stack_sizes[l] else: # i, c, o, f nb_channels = self.stack_sizes[l] * 2 + self.R_stack_sizes[ l] if l < self.nb_layers - 1: nb_channels += self.R_stack_sizes[l + 1] in_shape = (input_shape[0], nb_row // ds_factor, nb_col // ds_factor, nb_channels ) # up -> downsampling self.conv_layers[c][l].build(in_shape) self._trainable_weights += self.conv_layers[c][ l].trainable_weights self.states = [None] * self.nb_layers * 3 # ['r', 'c', 'e'] if self.extrap_start_time is not None: self.t_extrap = K.variable(np.array(self.extrap_start_time), 'int32') self.states += [None] * 2
def get_initial_states(self, x): input_shape = self.input_spec[0].shape init_nb_row = input_shape[self.row_axis] init_nb_col = input_shape[self.column_axis] base_initial_state = K.zeros_like( x) # (batch_samples, timesteps) + image_shape non_channel_axis = -2 for _ in range(2): base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) initial_states = [] states_to_pass = ['r', 'c', 'e'] nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} if self.extrap_start_time is not None: # pass prediction in states so can use as actual for t+1 when extrapolating states_to_pass.append('ahat') nlayers_to_pass['ahat'] = 1 for u in states_to_pass: # ['r', 'c', 'e'] is the state for l in range( nlayers_to_pass[u]): # initialize all the state with zero ds_factor = 2**l # why downsampling? nb_row = init_nb_row // ds_factor nb_col = init_nb_col // ds_factor if u in ['r', 'c']: stack_size = self.R_stack_sizes[l] elif u == 'e': stack_size = 2 * self.stack_sizes[l] elif u == 'ahat': stack_size = self.stack_sizes[l] output_size = nb_row * nb_col * stack_size # flattened size reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) output_shp = [-1, nb_row, nb_col, stack_size] initial_state = K.reshape(initial_state, output_shp) initial_states += [initial_state] if self.extrap_start_time is not None: initial_states += [ K.variable(0, 'int32') ] # the last state will correspond to the current timestep return initial_states
# We are going to use VGG16 pretrained on ImageNet. In order to match the VGG16 # paper Very Deep Convolutional Networks for Large-Scale Image Recognition by # Simomyan and Zisserman 2015 we need to subtract the mean RGB values from all # channels. Those values have been computed on the ImageNet dataset. We also need # to flip the ordering of the channels to BGR. content_array[:, :, :, 0] -= 103.939 content_array[:, :, :, 1] -= 116.779 content_array[:, :, :, 2] -= 123.68 style_array[:, :, :, 0] -= 103.939 style_array[:, :, :, 1] -= 116.779 style_array[:, :, :, 2] -= 123.68 content_array = content_array[:, :, :, ::-1] style_array = style_array[:, :, :, ::-1] # Create the backend variables. In our case tensorflow. content_image = K.variable(content_array) style_image = K.variable(style_array) combination_image = K.placeholder((1, height, width, 3)) # Concatenate all tensors input_tensor = K.concatenate([content_image, style_image, combination_image], axis=0) # Load the VGG16 model from Keras. We are only interested in getting the features # from the different layers hence we omit the dense layers at the top. model = applications.VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False) # Store layers of the model. We'll need that to refer to the layers we want to
# In[ ]: content_array[:, :, :, 0] -= 103.939 content_array[:, :, :, 1] -= 116.779 content_array[:, :, :, 2] -= 123.68 content_array = content_array[:, :, :, ::-1] style_array[:, :, :, 0] -= 103.939 style_array[:, :, :, 1] -= 116.779 style_array[:, :, :, 2] -= 123.68 style_array = style_array[:, :, :, ::-1] # In[ ]: content_image = backend.variable(content_array) style_image = backend.variable(style_array) combination_image = backend.placeholder((1, height, width, 3)) # In[ ]: input_tensor = backend.concatenate( [content_image, style_image, combination_image], axis=0) # In[12]: model = applications.VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False) # In[ ]:
def main(_): # disable all training specific operations K.set_learning_phase(0) model = applications.inception_v3.InceptionV3(weights='imagenet', include_top=False) layer_contributions = { 'mixed2': 0.2, 'mixed3': 3.0, 'mixed4': 2.0, 'mixed5': 1.5 } layer_dict = dict([(layer.name, layer) for layer in model.layers]) loss = K.variable(0.,) for layer_name in layer_contributions: coeff = layer_contributions[layer_name] activation = layer_dict[layer_name].output scaling = K.prod(K.cast(K.shape(activation), 'float32')) # avoid artifacts by only involving non-boarder pixels loss += coeff * K.sum(K.square(activation[:, 2:-2, 2:-2, :])) / scaling # start the gradient-ascent process dream = model.input grads_list = K.gradients(loss, dream) grads = grads_list[0] # trick: normalize gradients grads /= K.maximum(K.mean(K.abs(grads)), 1e-7) fetch_loss_and_grads = K.function(inputs=[dream], outputs=[loss, grads]) def gradient_ascent(x, iterations, step_rate, max_loss=None): for i in range(iterations): loss_value, grads_value = fetch_loss_and_grads([x]) if max_loss is not None and loss_value > max_loss: break print('@{:4d}: {:.4f}'.format(i, loss_value)) x += step_rate * grads_value return x img = preprocess_img(FLAGS.img_path) original_shape = img.shape[1:3] successive_shapes = [original_shape] for i in range(1, NUM_OCTAVES): shape = tuple([int(dim / (OCTAVES_SCLAE ** i)) for dim in original_shape]) successive_shapes.append(shape) # reverse successive_shapes = successive_shapes[::-1] original_img = np.copy(img) shrunk_original_img = resize_img(img, successive_shapes[0]) for shape in successive_shapes: print('Preprocess image with shape: {}'.format(shape)) img = resize_img(img, shape) img = gradient_ascent(img, iterations=FLAGS.iterations, step_rate=FLAGS.step_rate, max_loss=MAX_LOSS) same_size_original = resize_img(original_img, shape) if FLAGS.repair_lost_detail: upscale_shrunk_original_img = resize_img(shrunk_original_img, shape) lost_detail = same_size_original - upscale_shrunk_original_img img += lost_detail shrunk_original_img = same_size_original save_img(img, filename='dream_at_scale_{}.png'.format(str(shape))) save_img(img, filename='dream.png')
def __init__(self, hp_lambda, **kwargs): super(GradientReversal, self).__init__(**kwargs) self._hp_lambda = hp_lambda self.hp_lambda = K.variable(hp_lambda) self.supports_masking = False self.op = ReverseGradient(self.hp_lambda)
def main(_): width, height = preprocessing.image.load_img(FLAGS.target_img_path).size gen_img_height = 400 gen_img_width = int(width * gen_img_height / height) target_x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width)) target_img = K.constant(target_x) style_x = preprocess_img(FLAGS.style_img_path, target_size=(gen_img_height, gen_img_width)) style_img = K.constant(style_x) combination_img = K.placeholder(shape=(1, gen_img_height, gen_img_width, 3)) input_tensor = K.concatenate([ target_img, style_img, combination_img ], axis=0) model = applications.vgg19.VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False) model.summary() outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) content_layer = 'block5_conv2' style_layers = [ 'block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1' ] total_variation_weight = 1e-4 style_weight = 1.0 content_weight = 0.025 loss = K.variable(0.) layer_features = outputs_dict[content_layer] target_img_features = layer_features[0, :, :, :] combination_features = layer_features[2, :, :, :] loss += content_weight * content_loss(target_img_features, combination_features) for layer_name in style_layers: layer_features = outputs_dict[layer_name] style_features = layer_features[1, :, :, :] combination_features = layer_features[2, :, :, :] sl = style_loss(style_features, combination_features, target_size=(gen_img_height, gen_img_width)) loss += (style_weight / len(style_layers)) * sl loss += total_variation_weight * total_variation_loss(combination_img, target_size=(gen_img_height, gen_img_width)) # setup gradient-descent grads_list = K.gradients(loss, combination_img) grads = grads_list[0] fetch_loss_and_grads = K.function(inputs=[combination_img], outputs=[loss, grads]) lossAndGradsCache = LossAndGradsCache(fetch_loss_and_grads, target_size=(gen_img_height, gen_img_width)) x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width)) x = x.flatten() for i in range(FLAGS.iterations): start_time = time.time() x, min_val, info = fmin_l_bfgs_b(lossAndGradsCache.loss, x, fprime=lossAndGradsCache.grads, maxfun=20) print('@{:4d}: {:.4f}'.format(i + 1, min_val)) x_copy = x.copy().reshape((gen_img_height, gen_img_width, 3)) print(np.min(x_copy), np.mean(x_copy), np.max(x_copy)) img = deprocess_img(x_copy) os.makedirs('out', exist_ok=True) filename = 'out/result_{:04d}.png'.format(i + 1) imsave(filename, img) print('Iteration took {:.1f}s'.format(time.time() - start_time))