weighted_content_losses = []

    total_loss = K.variable(0.)
    for loss in style_losses:
        weighted_loss = args.style_weight * K.mean(loss)
        weighted_style_losses.append(weighted_loss)
        total_loss += weighted_loss
    for loss in content_losses:
        weighted_loss = args.content_weight * K.mean(loss)
        weighted_content_losses.append(weighted_loss)
        total_loss += weighted_loss
    weighted_tv_loss = args.tv_weight * K.mean(total_var_loss)
    total_loss += weighted_tv_loss

    opt = Adam(lr=args.lr)
    updates = opt.get_updates([pastiche_image], {}, total_loss)
    # List of outputs
    outputs = [
        total_loss
    ] + weighted_content_losses + weighted_style_losses + [weighted_tv_loss]

    # Function that makes a step after backpropping to the image
    make_step = K.function([], outputs, updates)

    # Perform optimization steps and save the results
    start_time = time.time()

    for i in range(args.num_iterations):
        out = make_step([])
        if (i + 1) % args.print_and_save == 0:
            print('Iteration %d/%d' % (i + 1, args.num_iterations))
예제 #2
0
파일: ddpg.py 프로젝트: ajmal017/rewave
    def __init__(self,
                 env,
                 sess,
                 actor_noise,
                 obs_normalizer=None,
                 action_processor=None,
                 predictor_type="cnn",
                 use_batch_norm=False,
                 load_root_model=False,
                 config=DEFAULT_CONFIG):

        self.config = config
        assert self.config['max step'] > self.config[
            'batch size'], 'Max step must be bigger than batch size'

        self.episode = self.config["episode"]

        self.actor_learning_rate = self.config["actor learning rate"]
        self.critic_learning_rate = self.config["critic learning rate"]
        self.tau = self.config["tau"]
        self.gamma = self.config["gamma"]
        self.batch_size = self.config['batch size']

        self.action_processor = action_processor

        np.random.seed(self.config['seed'])
        if env:
            env.seed(self.config['seed'])

        self.sess = sess
        # if env is None, then DDPG just predicts
        self.env = env

        self.actor_noise = actor_noise
        # share state input
        has_complex_state = (
            isinstance(self.env.observation_space, gym.spaces.Dict)
            or isinstance(self.env.observation_space, gym.spaces.Tuple))
        if obs_normalizer and has_complex_state:
            state_input = Input(
                shape=self.env.observation_space.spaces[obs_normalizer].shape,
                name="state_input")
        else:
            state_input = Input(shape=self.env.observation_space.shape,
                                name="state_input")

        target_state_input = Input(
            shape=self.env.observation_space.spaces[obs_normalizer].shape,
            name="target_state_input")
        self.obs_normalizer = obs_normalizer

        # shape
        action_dim = env.action_space.shape[0]
        nb_assets = state_input.shape[1]
        window_length = state_input.shape[2]
        nb_features = state_input.shape[3]

        # paths
        self.model_save_path = get_model_path(window_length=window_length,
                                              predictor_type=predictor_type,
                                              use_batch_norm=use_batch_norm)
        self.summary_path = get_result_path(
            window_length=window_length,
            predictor_type=predictor_type,
            use_batch_norm=use_batch_norm) + "/" + datetime.now().strftime(
                "%Y-%m-%d-%H%M%S")
        self.root_model_save_path = get_root_model_path(
            window_length, predictor_type, use_batch_norm)

        # feature extraction
        self.predictor_type = predictor_type
        self.use_batch_norm = use_batch_norm
        root_net = RootNetwork(inputs=state_input,
                               predictor_type=self.predictor_type,
                               use_batch_norm=self.use_batch_norm).net
        self.root_model = Model(state_input, root_net)

        if load_root_model == True:
            try:
                self.root_model.load_weights(self.root_model_save_path)
                for layer in self.root_model.layers:
                    layer.trainable = False
            except:
                print("ERROR while loading root model ",
                      self.root_model_save_path)
        else:
            pass
        variable_summaries(root_net, "Root_Output")

        #array_variable_summaries(self.root_model.layers[1].weights, "Root_Input_1")
        #array_variable_summaries(self.root_model.layers[2].weights, "Root_Input_2")
        #array_variable_summaries(self.root_model.layers[-1].weights, "Root_Output_2")

        target_root_net = RootNetwork(inputs=target_state_input,
                                      predictor_type=predictor_type,
                                      use_batch_norm=use_batch_norm).net
        self.target_root_model = Model(target_state_input, target_root_net)

        if load_root_model == True:
            try:
                self.target_root_model.load_weights(self.root_model_save_path)
                for layer in self.target_root_model.layers:
                    layer.trainable = False
            except:
                print("ERROR while loading root model ",
                      self.root_model_save_path)
        else:
            pass

        self.target_root_model.set_weights(self.root_model.get_weights())

        # ===================================================================== #
        #                               Actor Model                             #
        # Chain rule: find the gradient of changing the actor network params in #
        # getting closest to the final value network predictions, i.e. de/dA    #
        # Calculate de/dA as = de/dC * dC/dA, where e is error, C critic, A act #
        # ===================================================================== #

        self.actor_state_input, self.actor_model = Actor(
            state_input=state_input, root_net=root_net,
            action_dim=action_dim).references()
        _, self.target_actor_model = Actor(state_input=target_state_input,
                                           root_net=target_root_net,
                                           action_dim=action_dim).references()

        # summary
        #array_variable_summaries(self.actor_model.layers[-1].weights, "Actor_Output")

        #actor_model_weights = self.actor_model.trainable_weights

        #self.actor_grads = K.gradients(self.actor_model.output,actor_model_weights)  # dC/dA (from actor)

        # grads = zip(self.actor_grads, actor_model_weights)

        action_grad = Input(shape=(action_dim, ))
        loss = K.mean(-action_grad * self.actor_model.outputs)

        for regularizer_loss in self.actor_model.losses:
            loss += regularizer_loss

        loss = loss

        optimizer = Adam(lr=self.actor_learning_rate)

        updates_op = optimizer.get_updates(
            params=self.actor_model.trainable_weights,
            # constraints=self.model.constraints,
            loss=loss)

        self.optimize = K.function(
            inputs=[self.actor_state_input, action_grad,
                    K.learning_phase()],
            outputs=[loss],
            updates=updates_op)  # calling function for the loop
        """
        self.actor_grads = tf.gradients(self.actor_model.output,
                                        actor_model_weights, -self.actor_critic_grad)  # dC/dA (from actor)

        tf.summary.histogram("Actor_Critic_Grad", self.actor_critic_grad)

        
        grads = zip(self.actor_grads, actor_model_weights)

        self.optimize = tf.train.AdamOptimizer(self.actor_learning_rate).apply_gradients(grads)
        """
        # ===================================================================== #
        #                              Critic Model                             #
        # ===================================================================== #

        self.critic_state_input, self.critic_action_input, self.critic_model = Critic(
            state_input=state_input,
            root_net=root_net,
            action_dim=action_dim,
            lr=self.critic_learning_rate).references()
        array_variable_summaries(self.critic_model.layers[-1].weights,
                                 "Critic_Output")

        _, _, self.target_critic_model = Critic(
            state_input=target_state_input,
            root_net=target_root_net,
            action_dim=action_dim,
            lr=self.critic_learning_rate).references()
        """
        self.critic_grads = tf.gradients(self.critic_model.output,
                                         self.critic_action_input)  # where we calcaulte de/dC for feeding above

        """

        #self.actor_critic_grad = tf.placeholder(tf.float32,[None, self.env.action_space.shape[0]])  # where we will feed de/dC (from critic)
        # summary

        self.critic_grads = K.gradients(
            self.critic_model.outputs, self.critic_action_input
        )  # where we calculate de/dC for feeding above

        self.compute_critic_gradient = K.function(
            inputs=[
                self.critic_model.output, self.critic_action_input,
                self.critic_state_input
            ],
            outputs=self.critic_grads)  # calling function for the loop

        tf.summary.histogram("Critic_Grad", self.critic_grads)

        # Update target networks
        self.update_target()

        # summary
        #self.summary_ops, self.summary_vars = build_summaries(action_dim=action_dim)
        with tf.variable_scope("Global"):
            self.episode_reward = tf.Variable(0., name="episode_reward")
            tf.summary.scalar("Reward", self.episode_reward)
            self.episode_min_reward = tf.Variable(0.,
                                                  name="episode_min_reward")
            tf.summary.scalar("Min_Reward", self.episode_min_reward)
            self.episode_ave_max_q = tf.Variable(0., name="episode_ave_max_q")
            tf.summary.scalar("Qmax_Value", self.episode_ave_max_q)
            self.loss_critic = tf.Variable(0., name="loss_critic")
            tf.summary.scalar("Loss_critic", self.loss_critic)
            self.loss_actor = tf.Variable(0., name="loss_actor")
            tf.summary.scalar("Loss_actor", self.loss_actor)
            self.ep_base_action = tf.Variable(initial_value=self.env.sim.w0,
                                              name="ep_base_action")
            tf.summary.histogram("Action_base", self.ep_base_action)
            self.ep_action = tf.Variable(initial_value=self.env.sim.w0,
                                         name="ep_action")
            tf.summary.histogram("Action", self.ep_action)

        self.merged = tf.summary.merge_all()