def build_model(self): l2_kernel_regularization = 1e-5 # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') # Hidden layers for states model_states = layers.Dense( units=32, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) model_states = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( model_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) # Hidden layers for actions model_actions = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_actions) model_actions = layers.BatchNormalization()(model_actions) model_actions = layers.LeakyReLU(1e-2)(model_actions) # Both models merge here model = layers.add([model_states, model_actions]) # Fully connected and batch normalization model = layers.Dense(units=32, kernel_regularizer=regularizers.l2( l2_kernel_regularization))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) # Q values / output layer Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(l2_kernel_regularization), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), name='output_Q_values')(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def gradient_penalty(y_true, y_pred, interpolate, lamb): grad = K.gradients(y_pred, interpolate)[0] norm = K.square(grad) norm_sum = K.sum(norm,axis=np.arange(1,len(norm.shape))) l2_norm = K.sqrt(norm_sum) gp_reg = lamb*K.square(1-l2_norm) return K.mean(gp_reg)
def build_model(self): #Define input layers inputStates = layers.Input(shape=(self.state_size, ), name='inputStates') inputActions = layers.Input(shape=(self.action_size, ), name='inputActions') # Hidden layers for states modelS = layers.Dense(units=128, activation='linear')(inputStates) modelS = layers.BatchNormalization()(modelS) modelS = layers.LeakyReLU(0.01)(modelS) modelS = layers.Dropout(0.3)(modelS) modelS = layers.Dense(units=256, activation='linear')(modelS) modelS = layers.BatchNormalization()(modelS) modelS = layers.LeakyReLU(0.01)(modelS) modelS = layers.Dropout(0.3)(modelS) modelA = layers.Dense(units=256, activation='linear')(inputActions) modelA = layers.LeakyReLU(0.01)(modelA) modelA = layers.BatchNormalization()(modelA) modelA = layers.Dropout(0.5)(modelA) #Merging the models model = layers.add([modelS, modelA]) model = layers.Dense(units=256, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) #Q Layer Qvalues = layers.Dense(units=1, activation=None, name='outputQvalues')(model) #Keras model self.model = models.Model(inputs=[inputStates, inputActions], outputs=Qvalues) optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') actionGradients = K.gradients(Qvalues, inputActions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=actionGradients)
def __init__(self, input_tensor, losses, input_range=(0, 255), wrt_tensor=None, norm_grads=True): """Creates an optimizer that minimizes weighted loss function. Args: input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. losses: List of ([Loss](vis.losses#Loss), weight) tuples. input_range: Specifies the input range as a `(min, max)` tuple. This is used to rescale the final optimized input to the given range. (Default value=(0, 255)) wrt_tensor: Short for, with respect to. This instructs the optimizer that the aggregate loss from `losses` should be minimized with respect to `wrt_tensor`. `wrt_tensor` can be any tensor that is part of the model graph. Default value is set to None which means that loss will simply be minimized with respect to `input_tensor`. norm_grads: True to normalize gradients. Normalization avoids very small or large gradients and ensures a smooth gradient gradient descent process. If you want the actual gradient (for example, visualizing attention), set this to false. """ self.input_tensor = input_tensor self.input_range = input_range self.loss_names = [] self.loss_functions = [] self.wrt_tensor = self.input_tensor if wrt_tensor is None else wrt_tensor overall_loss = None for loss, weight in losses: # Perf optimization. Don't build loss function with 0 weight. if weight != 0: loss_fn = weight * loss.build_loss() overall_loss = loss_fn if overall_loss is None else overall_loss + loss_fn self.loss_names.append(loss.name) self.loss_functions.append(loss_fn) # Compute gradient of overall with respect to `wrt` tensor. grads = K.gradients(overall_loss, self.wrt_tensor)[0] if norm_grads: grads = grads / (K.sqrt(K.mean(K.square(grads))) + K.epsilon()) # The main function to compute various quantities in optimization loop. self.compute_fn = K.function([self.input_tensor, K.learning_phase()], self.loss_functions + [overall_loss, grads, self.wrt_tensor])
def gradCAM(model, dog_x, class_idx): model_class_output = model.output[:, class_idx] print('model_class_output shape', model_class_output.shape) last_conv_layer = model.get_layer('block5_conv3') grads_list = K.gradients(model_class_output, last_conv_layer.output) grads = grads_list[0] # grads shape: (?, 14, 14, 512) pooled_grads = K.mean(grads, axis=(0, 1, 2)) # pooled_grads shape: (512,) iterate = K.function(inputs=[model.input], outputs=[pooled_grads, last_conv_layer.output[0]]) pooled_grads_value, conv_layer_output_value = iterate([dog_x]) # conv_layer_output_value shape: (14, 14, 512) for i in range(last_conv_layer.filters): conv_layer_output_value[:, :, i] *= pooled_grads_value[i] heatmap = np.mean(conv_layer_output_value, axis=-1) # heatmap shape: (14, 14) heatmap = normalize_heatmap(heatmap) return heatmap
def generate_pattern(model, layer_name, filter_index, steps, learning_rate, size=224): layer_output = model.get_layer(layer_name).output loss = K.mean(layer_output[:, :, :, filter_index]) # obtain the gradient of the loss with respect to the model's input image grads_list = K.gradients(loss, model.input) grads = grads_list[0] # gradient normalization trick grads /= (K.sqrt(K.mean(K.square(grads))) + EPSILON) # fetch loss and normalized-gradients for a given input iterate = K.function(inputs=[model.input], outputs=[loss, grads]) # loss maximization via stochastic gradient descent input_img_data = np.random.random((1, size, size, 3)) * 20 + 128 # start from gray image with random noise for i in range(steps): loss_value, grads_value = iterate([input_img_data]) print('@{:-4d}: {:.4f}'.format(i, loss_value)) # gradient ascent: adjust the input image in the direction that maximizes the loss input_img_data += grads_value * learning_rate img_tensor = input_img_data[0] return tensor_to_image(img_tensor)
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # Add hidden layer(s) for state pathway net_states = layers.Dense( units=32, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=64, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=128, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=32, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) net_actions = layers.Dense( units=64, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) net_actions = layers.Dense( units=128, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Add more layers to the combined network if needed # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
# this is the placeholder for the input images input_img = model.input # get the symbolic outputs of each "key" layer. layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) kept_filters = [] for filter_index in range(0, 50): print('start') # we build a loss function that maximizes the activation # of the nth filter of the layer considered layer_output = layer_dict[layer_name].output loss = K.mean(layer_output[:, :, :, filter_index]) # we compute the gradient of the input picture wrt this loss grads = K.gradients(loss, input_img)[0] # normalization trick: we normalize the gradient by its L2 norm grads = normalize(grads) # this function returns the loss and grads given the input picture iterate = K.function([input_img], [loss, grads]) # step size for gradient ascent step = 1. # we start from a gray image with some random noise input_img_data = np.random.random((1, resolution, resolution, 1)) input_img_data = (input_img_data - 0.5) * 20 + 128 # we run gradient ascent for 20 steps
def build_model(self): # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') #---------- copy from DDPG quadcopter --------- # Add hidden layer(s) for state pathway net_states = layers.Dense(units=400)(input_states) # net_states = layers.BatchNormalization()(net_states) net_states = layers.Activation("relu")(net_states) net_states = layers.Dense(units=300)(net_states) net_states = layers.Activation("relu")(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense(units=300)(input_actions) net_actions = layers.Activation("relu")(net_actions) # net_actions = layers.Dense(units=250,kernel_regularizer=regularizers.l2(1e-7))(net_actions) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Activation("relu")(net_actions) # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) net = layers.Dense(units=200, kernel_initializer=initializers.RandomUniform( minval=-0.5, maxval=0.5))(net) net = layers.Activation('relu')(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # ---------------- Hidden layers for states ---------------- # model_states = layers.Dense(units=32, activation=activations.sigmoid)(input_states) # # model_states = layers.BatchNormalization()(model_states) # model_states = layers.Dense(units=16, activation=activations.sigmoid)(model_states) # # model_states = layers.BatchNormalization()(model_states) # # model_states = layers.Dense(units=64)(model_states) # # model_states = layers.BatchNormalization()(model_states) # # ---------------- Hidden layers for actions ---------------- # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(input_actions) # # model_actions = layers.BatchNormalization()(model_actions) # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(model_actions) # # model_actions = layers.BatchNormalization()(model_actions) # # Both models merge here # model = layers.add([model_states, model_actions]) # # Fully connected and batch normalization # model = layers.Dense(units=8, activation=activations.sigmoid)(model) # # model = layers.BatchNormalization()(model) # # model = layers.Dense(units=64, activation=activations.relu)(model) # # model = layers.BatchNormalization()(model) # # Q values / output layer # Q_values = layers.Dense(units=1, name='Q_s_a')(model) # # model = layers.BatchNormalization()(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=0.0001) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
style_features = layer_features[1, :, :, :] combination_features = layer_features[2, :, :, :] sl = style_loss(style_features, combination_features) loss += (style_weight / len(feature_layers)) * sl # Using only these losses results in noisy output. Add a regularization term # in the form of total variation loss for spatial smootheness. def total_variation_loss(x): a = K.square(x[:, :height-1, :width-1, :] - x[:, 1:, :width-1, :]) b = K.square(x[:, :height-1, :width-1, :] - x[:, :height-1, 1:, :]) return K.sum(K.pow(a + b, 1.25)) loss += total_variation_weight * total_variation_loss(combination_image) # Define the gradients of the combination image w.r.t. the loss function grads = K.gradients(loss, combination_image) # Class that calculates loss and gradients on one pass. We do this becayse # optimizer requires two separate functions for those but that is inefficient. outputs = [loss] outputs += grads f_outputs = K.function([combination_image], outputs) #print(f_outputs) def eval_loss_and_grads(x): x = x.reshape((1, height, width, 3)) outs = f_outputs([x]) loss_value = outs[0] grad_values = outs[1].flatten().astype('float64') return loss_value, grad_values class Evaluator(object):
# In[ ]: def total_variation_loss(x): a = backend.square(x[:, :height - 1, :width - 1, :] - x[:, 1:, :width - 1, :]) b = backend.square(x[:, :height - 1, :width - 1, :] - x[:, :height - 1, 1:, :]) return backend.sum(backend.pow(a + b, 1.25)) loss += total_variation_weight * total_variation_loss(combination_image) # In[ ]: grads = backend.gradients(loss, combination_image) # In[ ]: outputs = [loss] outputs += grads f_outputs = backend.function([combination_image], outputs) def eval_loss_and_grads(x): x = x.reshape((1, height, width, 3)) outs = f_outputs([x]) loss_value = outs[0] grad_values = outs[1].flatten().astype('float64') return loss_value, grad_values
def main(_): # disable all training specific operations K.set_learning_phase(0) model = applications.inception_v3.InceptionV3(weights='imagenet', include_top=False) layer_contributions = { 'mixed2': 0.2, 'mixed3': 3.0, 'mixed4': 2.0, 'mixed5': 1.5 } layer_dict = dict([(layer.name, layer) for layer in model.layers]) loss = K.variable(0.,) for layer_name in layer_contributions: coeff = layer_contributions[layer_name] activation = layer_dict[layer_name].output scaling = K.prod(K.cast(K.shape(activation), 'float32')) # avoid artifacts by only involving non-boarder pixels loss += coeff * K.sum(K.square(activation[:, 2:-2, 2:-2, :])) / scaling # start the gradient-ascent process dream = model.input grads_list = K.gradients(loss, dream) grads = grads_list[0] # trick: normalize gradients grads /= K.maximum(K.mean(K.abs(grads)), 1e-7) fetch_loss_and_grads = K.function(inputs=[dream], outputs=[loss, grads]) def gradient_ascent(x, iterations, step_rate, max_loss=None): for i in range(iterations): loss_value, grads_value = fetch_loss_and_grads([x]) if max_loss is not None and loss_value > max_loss: break print('@{:4d}: {:.4f}'.format(i, loss_value)) x += step_rate * grads_value return x img = preprocess_img(FLAGS.img_path) original_shape = img.shape[1:3] successive_shapes = [original_shape] for i in range(1, NUM_OCTAVES): shape = tuple([int(dim / (OCTAVES_SCLAE ** i)) for dim in original_shape]) successive_shapes.append(shape) # reverse successive_shapes = successive_shapes[::-1] original_img = np.copy(img) shrunk_original_img = resize_img(img, successive_shapes[0]) for shape in successive_shapes: print('Preprocess image with shape: {}'.format(shape)) img = resize_img(img, shape) img = gradient_ascent(img, iterations=FLAGS.iterations, step_rate=FLAGS.step_rate, max_loss=MAX_LOSS) same_size_original = resize_img(original_img, shape) if FLAGS.repair_lost_detail: upscale_shrunk_original_img = resize_img(shrunk_original_img, shape) lost_detail = same_size_original - upscale_shrunk_original_img img += lost_detail shrunk_original_img = same_size_original save_img(img, filename='dream_at_scale_{}.png'.format(str(shape))) save_img(img, filename='dream.png')
def build_model(self): kernel_l2_reg = 1e-5 # Dense Options # units = 200, # activation='relu', # activation = None, # activity_regularizer=regularizers.l2(0.01), # kernel_regularizer=regularizers.l2(kernel_l2_reg), # bias_initializer=initializers.Constant(1e-2), # use_bias = True # use_bias=False """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # size_repeat = 30 # state_size = size_repeat*self.state_size # action_size = size_repeat*self.action_size # block_size = size_repeat*self.state_size + size_repeat*self.action_size # print("Critic block size = {}".format(block_size)) # # net_states = layers.concatenate(size_repeat * [states]) # net_states = layers.BatchNormalization()(net_states) # net_states = layers.Dropout(0.2)(net_states) # # net_actions = layers.concatenate(size_repeat * [actions]) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Dropout(0.2)(net_actions) # # # State pathway # for _ in range(3): # net_states = res_block(net_states, state_size) # # # Action pathway # for _ in range(2): # net_actions = res_block(net_actions, action_size) # # # Merge state and action pathways # net = layers.concatenate([net_states, net_actions]) # # # Final blocks # for _ in range(3): # net = res_block(net, block_size) # Add hidden layer(s) for state pathway net_states = layers.Dense( units=300, kernel_regularizer=regularizers.l2(kernel_l2_reg))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) # Merge state and action pathways net = layers.add([net_states, net_actions]) net = layers.Dense( units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def main(_): width, height = preprocessing.image.load_img(FLAGS.target_img_path).size gen_img_height = 400 gen_img_width = int(width * gen_img_height / height) target_x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width)) target_img = K.constant(target_x) style_x = preprocess_img(FLAGS.style_img_path, target_size=(gen_img_height, gen_img_width)) style_img = K.constant(style_x) combination_img = K.placeholder(shape=(1, gen_img_height, gen_img_width, 3)) input_tensor = K.concatenate([ target_img, style_img, combination_img ], axis=0) model = applications.vgg19.VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False) model.summary() outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) content_layer = 'block5_conv2' style_layers = [ 'block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1' ] total_variation_weight = 1e-4 style_weight = 1.0 content_weight = 0.025 loss = K.variable(0.) layer_features = outputs_dict[content_layer] target_img_features = layer_features[0, :, :, :] combination_features = layer_features[2, :, :, :] loss += content_weight * content_loss(target_img_features, combination_features) for layer_name in style_layers: layer_features = outputs_dict[layer_name] style_features = layer_features[1, :, :, :] combination_features = layer_features[2, :, :, :] sl = style_loss(style_features, combination_features, target_size=(gen_img_height, gen_img_width)) loss += (style_weight / len(style_layers)) * sl loss += total_variation_weight * total_variation_loss(combination_img, target_size=(gen_img_height, gen_img_width)) # setup gradient-descent grads_list = K.gradients(loss, combination_img) grads = grads_list[0] fetch_loss_and_grads = K.function(inputs=[combination_img], outputs=[loss, grads]) lossAndGradsCache = LossAndGradsCache(fetch_loss_and_grads, target_size=(gen_img_height, gen_img_width)) x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width)) x = x.flatten() for i in range(FLAGS.iterations): start_time = time.time() x, min_val, info = fmin_l_bfgs_b(lossAndGradsCache.loss, x, fprime=lossAndGradsCache.grads, maxfun=20) print('@{:4d}: {:.4f}'.format(i + 1, min_val)) x_copy = x.copy().reshape((gen_img_height, gen_img_width, 3)) print(np.min(x_copy), np.mean(x_copy), np.max(x_copy)) img = deprocess_img(x_copy) os.makedirs('out', exist_ok=True) filename = 'out/result_{:04d}.png'.format(i + 1) imsave(filename, img) print('Iteration took {:.1f}s'.format(time.time() - start_time))