weighted_content_losses = [] total_loss = K.variable(0.) for loss in style_losses: weighted_loss = args.style_weight * K.mean(loss) weighted_style_losses.append(weighted_loss) total_loss += weighted_loss for loss in content_losses: weighted_loss = args.content_weight * K.mean(loss) weighted_content_losses.append(weighted_loss) total_loss += weighted_loss weighted_tv_loss = args.tv_weight * K.mean(total_var_loss) total_loss += weighted_tv_loss opt = Adam(lr=args.lr) updates = opt.get_updates([pastiche_image], {}, total_loss) # List of outputs outputs = [ total_loss ] + weighted_content_losses + weighted_style_losses + [weighted_tv_loss] # Function that makes a step after backpropping to the image make_step = K.function([], outputs, updates) # Perform optimization steps and save the results start_time = time.time() for i in range(args.num_iterations): out = make_step([]) if (i + 1) % args.print_and_save == 0: print('Iteration %d/%d' % (i + 1, args.num_iterations))
def __init__(self, env, sess, actor_noise, obs_normalizer=None, action_processor=None, predictor_type="cnn", use_batch_norm=False, load_root_model=False, config=DEFAULT_CONFIG): self.config = config assert self.config['max step'] > self.config[ 'batch size'], 'Max step must be bigger than batch size' self.episode = self.config["episode"] self.actor_learning_rate = self.config["actor learning rate"] self.critic_learning_rate = self.config["critic learning rate"] self.tau = self.config["tau"] self.gamma = self.config["gamma"] self.batch_size = self.config['batch size'] self.action_processor = action_processor np.random.seed(self.config['seed']) if env: env.seed(self.config['seed']) self.sess = sess # if env is None, then DDPG just predicts self.env = env self.actor_noise = actor_noise # share state input has_complex_state = ( isinstance(self.env.observation_space, gym.spaces.Dict) or isinstance(self.env.observation_space, gym.spaces.Tuple)) if obs_normalizer and has_complex_state: state_input = Input( shape=self.env.observation_space.spaces[obs_normalizer].shape, name="state_input") else: state_input = Input(shape=self.env.observation_space.shape, name="state_input") target_state_input = Input( shape=self.env.observation_space.spaces[obs_normalizer].shape, name="target_state_input") self.obs_normalizer = obs_normalizer # shape action_dim = env.action_space.shape[0] nb_assets = state_input.shape[1] window_length = state_input.shape[2] nb_features = state_input.shape[3] # paths self.model_save_path = get_model_path(window_length=window_length, predictor_type=predictor_type, use_batch_norm=use_batch_norm) self.summary_path = get_result_path( window_length=window_length, predictor_type=predictor_type, use_batch_norm=use_batch_norm) + "/" + datetime.now().strftime( "%Y-%m-%d-%H%M%S") self.root_model_save_path = get_root_model_path( window_length, predictor_type, use_batch_norm) # feature extraction self.predictor_type = predictor_type self.use_batch_norm = use_batch_norm root_net = RootNetwork(inputs=state_input, predictor_type=self.predictor_type, use_batch_norm=self.use_batch_norm).net self.root_model = Model(state_input, root_net) if load_root_model == True: try: self.root_model.load_weights(self.root_model_save_path) for layer in self.root_model.layers: layer.trainable = False except: print("ERROR while loading root model ", self.root_model_save_path) else: pass variable_summaries(root_net, "Root_Output") #array_variable_summaries(self.root_model.layers[1].weights, "Root_Input_1") #array_variable_summaries(self.root_model.layers[2].weights, "Root_Input_2") #array_variable_summaries(self.root_model.layers[-1].weights, "Root_Output_2") target_root_net = RootNetwork(inputs=target_state_input, predictor_type=predictor_type, use_batch_norm=use_batch_norm).net self.target_root_model = Model(target_state_input, target_root_net) if load_root_model == True: try: self.target_root_model.load_weights(self.root_model_save_path) for layer in self.target_root_model.layers: layer.trainable = False except: print("ERROR while loading root model ", self.root_model_save_path) else: pass self.target_root_model.set_weights(self.root_model.get_weights()) # ===================================================================== # # Actor Model # # Chain rule: find the gradient of changing the actor network params in # # getting closest to the final value network predictions, i.e. de/dA # # Calculate de/dA as = de/dC * dC/dA, where e is error, C critic, A act # # ===================================================================== # self.actor_state_input, self.actor_model = Actor( state_input=state_input, root_net=root_net, action_dim=action_dim).references() _, self.target_actor_model = Actor(state_input=target_state_input, root_net=target_root_net, action_dim=action_dim).references() # summary #array_variable_summaries(self.actor_model.layers[-1].weights, "Actor_Output") #actor_model_weights = self.actor_model.trainable_weights #self.actor_grads = K.gradients(self.actor_model.output,actor_model_weights) # dC/dA (from actor) # grads = zip(self.actor_grads, actor_model_weights) action_grad = Input(shape=(action_dim, )) loss = K.mean(-action_grad * self.actor_model.outputs) for regularizer_loss in self.actor_model.losses: loss += regularizer_loss loss = loss optimizer = Adam(lr=self.actor_learning_rate) updates_op = optimizer.get_updates( params=self.actor_model.trainable_weights, # constraints=self.model.constraints, loss=loss) self.optimize = K.function( inputs=[self.actor_state_input, action_grad, K.learning_phase()], outputs=[loss], updates=updates_op) # calling function for the loop """ self.actor_grads = tf.gradients(self.actor_model.output, actor_model_weights, -self.actor_critic_grad) # dC/dA (from actor) tf.summary.histogram("Actor_Critic_Grad", self.actor_critic_grad) grads = zip(self.actor_grads, actor_model_weights) self.optimize = tf.train.AdamOptimizer(self.actor_learning_rate).apply_gradients(grads) """ # ===================================================================== # # Critic Model # # ===================================================================== # self.critic_state_input, self.critic_action_input, self.critic_model = Critic( state_input=state_input, root_net=root_net, action_dim=action_dim, lr=self.critic_learning_rate).references() array_variable_summaries(self.critic_model.layers[-1].weights, "Critic_Output") _, _, self.target_critic_model = Critic( state_input=target_state_input, root_net=target_root_net, action_dim=action_dim, lr=self.critic_learning_rate).references() """ self.critic_grads = tf.gradients(self.critic_model.output, self.critic_action_input) # where we calcaulte de/dC for feeding above """ #self.actor_critic_grad = tf.placeholder(tf.float32,[None, self.env.action_space.shape[0]]) # where we will feed de/dC (from critic) # summary self.critic_grads = K.gradients( self.critic_model.outputs, self.critic_action_input ) # where we calculate de/dC for feeding above self.compute_critic_gradient = K.function( inputs=[ self.critic_model.output, self.critic_action_input, self.critic_state_input ], outputs=self.critic_grads) # calling function for the loop tf.summary.histogram("Critic_Grad", self.critic_grads) # Update target networks self.update_target() # summary #self.summary_ops, self.summary_vars = build_summaries(action_dim=action_dim) with tf.variable_scope("Global"): self.episode_reward = tf.Variable(0., name="episode_reward") tf.summary.scalar("Reward", self.episode_reward) self.episode_min_reward = tf.Variable(0., name="episode_min_reward") tf.summary.scalar("Min_Reward", self.episode_min_reward) self.episode_ave_max_q = tf.Variable(0., name="episode_ave_max_q") tf.summary.scalar("Qmax_Value", self.episode_ave_max_q) self.loss_critic = tf.Variable(0., name="loss_critic") tf.summary.scalar("Loss_critic", self.loss_critic) self.loss_actor = tf.Variable(0., name="loss_actor") tf.summary.scalar("Loss_actor", self.loss_actor) self.ep_base_action = tf.Variable(initial_value=self.env.sim.w0, name="ep_base_action") tf.summary.histogram("Action_base", self.ep_base_action) self.ep_action = tf.Variable(initial_value=self.env.sim.w0, name="ep_action") tf.summary.histogram("Action", self.ep_action) self.merged = tf.summary.merge_all()