def gradients_and_summaries(self):
        local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       self.scope)
        """Gradients and update ops"""
        self.grads_sf, self.apply_grads_sf = self.take_gradient(self.sf_loss)
        self.grads_aux, self.apply_grads_aux = self.take_gradient(
            self.aux_loss)
        self.grads_option, self.apply_grads_option = self.take_gradient(
            self.option_loss)
        """Summaries"""
        self.merged_summary_sf = tf.summary.merge(self.summaries_sf + [
            tf.summary.scalar('avg_sf_loss', self.sf_loss),
            gradient_summaries(zip(self.grads_sf, local_vars))
        ])
        self.merged_summary_aux = tf.summary.merge(
            self.image_summaries + self.summaries_aux + [
                tf.summary.scalar('aux_loss', self.aux_loss),
                gradient_summaries(zip(self.grads_aux, local_vars))
            ])
        options_to_merge = self.summaries_option +\
                           [tf.summary.scalar('avg_entropy_loss', self.entropy_loss),
                            tf.summary.scalar('avg_policy_loss', self.policy_loss),
                            tf.summary.scalar('avg_eigen_critic_loss', self.eigen_critic_loss),
                            gradient_summaries(zip(self.grads_option, local_vars),)]

        self.merged_summary_option = tf.summary.merge(options_to_merge)
    def gradients_and_summaries(self):
        local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       self.scope)
        """Gradients and update ops"""
        self.grads_sf, self.apply_grads_sf = self.take_gradient(self.sf_loss)
        self.grads_aux, self.apply_grads_aux = self.take_gradient(
            self.aux_loss)
        self.grads_critic, self.apply_grads_critic = self.take_gradient(
            self.critic_loss)
        self.grads_option, self.apply_grads_option = self.take_gradient(
            self.option_loss)
        self.grads_term, self.apply_grads_term = self.take_gradient(
            self.term_loss)
        """Summaries"""
        self.merged_summary_sf = tf.summary.merge(self.summaries_sf + [
            tf.summary.scalar('avg_sf_loss', self.sf_loss),
            gradient_summaries(zip(self.grads_sf, local_vars))
        ])
        self.merged_summary_aux = tf.summary.merge(
            self.image_summaries + self.summaries_aux + [
                tf.summary.scalar('aux_loss', self.aux_loss),
                gradient_summaries(zip(self.grads_aux, local_vars))
            ])
        options_to_merge = self.summaries_option +\
                           [tf.summary.scalar('avg_entropy_loss', self.entropy_loss),
                            tf.summary.scalar('avg_policy_loss', self.policy_loss),
                            tf.summary.scalar('random_option_prob', self.random_option_prob),
                            # tf.summary.scalar('LR', self.lr),
                            tf.summary.scalar('avg_eigen_critic_loss', self.eigen_critic_loss),
                            gradient_summaries(zip(self.grads_option, local_vars),)]

        self.merged_summary_option = tf.summary.merge(options_to_merge)

        self.merged_summary_term = tf.summary.merge(self.summaries_term + [
            tf.summary.scalar('avg_termination_loss', self.term_loss),
            tf.summary.scalar('avg_termination_error',
                              tf.reduce_mean(self.term_err)),
            gradient_summaries(zip(self.grads_term, local_vars))
        ])

        self.merged_summary_critic = tf.summary.merge(
          self.summaries_critic + \
          [tf.summary.scalar('avg_critic_loss', self.critic_loss),
           gradient_summaries(zip(self.grads_critic, local_vars))])
Esempio n. 3
0
  def __init__(self, scope, config, action_size):
    self._scope = scope
    self.nb_states = config.input_size[0] * config.input_size[1]
    self._conv_layers = config.conv_layers
    self._fc_layers = config.fc_layers
    self._action_size = action_size
    self._nb_options = config.nb_options
    self._nb_envs = config.num_agents
    self._config = config
    self.option = 0
    self._sf_layers = config.sf_layers
    self._deconv_layers = config.deconv_layers
    self._network_optimizer = config.network_optimizer(
      self._config.lr, name='network_optimizer')

    with tf.variable_scope(scope):
      self.observation = tf.placeholder(shape=[None, self.nb_states],
                                        dtype=tf.float32, name="Inputs")
      self.sf = layers.fully_connected(self.observation, num_outputs=self.nb_states,
                                       activation_fn=None,
                                       variables_collections=tf.get_collection("variables"),
                                       outputs_collections="activations", scope="sf")
      if scope != 'global':
        self.target_sf = tf.placeholder(shape=[None, self.nb_states], dtype=tf.float32, name="target_SF")

        with tf.name_scope('sf_loss'):
          sf_td_error = self.target_sf - self.sf
          self.sf_loss = tf.reduce_mean(tf.square(sf_td_error))

        self.loss = self.sf_loss  # + self.instant_r_loss
        loss_summaries = [tf.summary.scalar('avg_sf_loss', self.sf_loss)]

        local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
        gradients = tf.gradients(self.loss, local_vars)
        self.var_norms = tf.global_norm(local_vars)
        grads, self.grad_norms = tf.clip_by_global_norm(gradients, self._config.gradient_clip_value)

        # for grad, weight in zip(grads, local_vars):
        #   if grad is not None:
        #     self.summaries.append(tf.summary.histogram(weight.name + '_grad', grad))
        #     self.summaries.append(tf.summary.histogram(weight.name, weight))

        self.merged_summary = tf.summary.merge(loss_summaries + [
          tf.summary.scalar('gradient_norm', tf.global_norm(gradients)),
          tf.summary.scalar('cliped_gradient_norm', tf.global_norm(grads)),
          gradient_summaries(zip(grads, local_vars))])
        global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
        self.apply_grads = self._network_optimizer.apply_gradients(zip(grads, global_vars))
  def __init__(self, scope, config, action_size):
    self._scope = scope
    # self.option = 0
    self.nb_states = config.input_size[0] * config.input_size[1]
    # self.conv_layers = config.conv_layers
    self.fc_layers = config.fc_layers
    self.sf_layers = config.sf_layers
    self.aux_fc_layers = config.aux_fc_layers
    # self.aux_deconv_layers = config.aux_deconv_layers
    self.action_size = action_size
    self.nb_options = config.nb_options
    self.nb_envs = config.num_agents
    self.config = config

    self.network_optimizer = config.network_optimizer(
      self.config.lr, name='network_optimizer')

    with tf.variable_scope(scope):
      self.observation = tf.placeholder(shape=[None, config.input_size[0], config.input_size[1], config.history_size],
                                        dtype=tf.float32, name="Inputs")

      self.image_summaries = []
      self.image_summaries.append(tf.summary.image('input', self.observation, max_outputs=30))

      self.summaries_sf = []
      self.summaries_aux = []

      out = self.observation
      out = layers.flatten(out, scope="flatten")

      with tf.variable_scope("fc"):
        for i, nb_filt in enumerate(self.fc_layers):
          out = layers.fully_connected(out, num_outputs=nb_filt,
                                       activation_fn=None,
                                       variables_collections=tf.get_collection("variables"),
                                       outputs_collections="activations", scope="fc_{}".format(i))

          if i < len(self.fc_layers) - 1:
            # out = layer_norm_fn(out, relu=True)
            out = tf.nn.relu(out)
          self.summaries_sf.append(tf.contrib.layers.summarize_activation(out))
          self.summaries_aux.append(tf.contrib.layers.summarize_activation(out))
        self.fi = out

      with tf.variable_scope("sf"):
        out = self.layer_norm_fn(self.fi, relu=True)
        out = tf.stop_gradient(out)
        for i, nb_filt in enumerate(self.sf_layers):
          out = layers.fully_connected(out, num_outputs=nb_filt,
                                       activation_fn=None,
                                       variables_collections=tf.get_collection("variables"),
                                       outputs_collections="activations", scope="sf_{}".format(i))
          if i < len(self.sf_layers) - 1:
            out = tf.nn.relu(out)
          self.summaries_sf.append(tf.contrib.layers.summarize_activation(out))
        self.sf = out

      with tf.variable_scope("action_fc"):
        self.actions_placeholder = tf.placeholder(shape=[None], dtype=tf.float32, name="Actions")
        actions = layers.fully_connected(self.actions_placeholder[..., None], num_outputs=self.fc_layers[-1],
                                         activation_fn=None,
                                         variables_collections=tf.get_collection("variables"),
                                         outputs_collections="activations", scope="action_fc{}".format(i))

      with tf.variable_scope("aux_fc"):
        out = tf.add(self.fi, actions)
        # out = tf.nn.relu(out)
        for i, nb_filt in enumerate(self.aux_fc_layers):
          out = layers.fully_connected(out, num_outputs=nb_filt,
                                       activation_fn=None,
                                       variables_collections=tf.get_collection("variables"),
                                       outputs_collections="activations", scope="aux_fc_{}".format(i))
          if i < len(self.aux_fc_layers) - 1:
            out = tf.nn.relu(out)
          self.summaries_aux.append(tf.contrib.layers.summarize_activation(out))
        self.next_obs = tf.reshape(out, (-1, config.input_size[0], config.input_size[1], config.history_size))

        self.image_summaries.append(tf.summary.image('next_obs', self.next_obs, max_outputs=30))

      if scope != 'global':
        self.target_sf = tf.placeholder(shape=[None, self.sf_layers[-1]], dtype=tf.float32, name="target_SF")
        self.target_next_obs = tf.placeholder(
          shape=[None, config.input_size[0], config.input_size[1], config.history_size], dtype=tf.float32,
          name="target_next_obs")
        self.image_summaries.append(tf.summary.image('target_next_obs', self.target_next_obs, max_outputs=30))

        self.matrix_sf = tf.placeholder(shape=[self.nb_states, self.sf_layers[-1]],
                                        dtype=tf.float32, name="matrix_sf")
        self.s, self.u, self.v = tf.svd(self.matrix_sf)

        with tf.name_scope('sf_loss'):
          sf_td_error = self.target_sf - self.sf
          self.sf_loss = tf.reduce_mean(huber_loss(sf_td_error))

        with tf.name_scope('aux_loss'):
          aux_error = self.next_obs - self.target_next_obs
          self.aux_loss = tf.reduce_mean(self.config.aux_coef * huber_loss(aux_error))

        # regularizer_features = tf.reduce_mean(self.config.feat_decay * tf.nn.l2_loss(self.fi))
        local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)

        gradients_sf = tf.gradients(self.sf_loss, local_vars)
        gradients_aux = tf.gradients(self.aux_loss, local_vars)
        self.var_norms = tf.global_norm(local_vars)
        grads_sf, self.grad_norms_sf = tf.clip_by_global_norm(gradients_sf, self.config.gradient_clip_norm_value)
        grads_aux, self.grad_norms_aux = tf.clip_by_global_norm(gradients_aux, self.config.gradient_clip_norm_value)

        self.merged_summary_sf = tf.summary.merge(
          self.summaries_sf + [tf.summary.scalar('avg_sf_loss', self.sf_loss)] + [
            tf.summary.scalar('gradient_norm_sf', tf.global_norm(gradients_sf)),
            tf.summary.scalar('cliped_gradient_norm_sf', tf.global_norm(grads_sf)),
            gradient_summaries(zip(grads_sf, local_vars))])
        self.merged_summary_aux = tf.summary.merge(self.image_summaries + self.summaries_aux +
                                                   [tf.summary.scalar('aux_loss', self.aux_loss)] + [
                                                     tf.summary.scalar('gradient_norm_sf',
                                                                       tf.global_norm(gradients_aux)),
                                                     tf.summary.scalar('cliped_gradient_norm_sf',
                                                                       tf.global_norm(grads_aux)),
                                                     gradient_summaries(zip(grads_aux, local_vars))])
        global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
        self.apply_grads_sf = self.network_optimizer.apply_gradients(zip(grads_sf, global_vars))
        self.apply_grads_aux = self.network_optimizer.apply_gradients(zip(grads_aux, global_vars))
Esempio n. 5
0
    def __init__(self, scope, config, action_size):
        self._scope = scope
        """The size of the input space flatten out"""
        self.nb_states = config.input_size[0] * config.input_size[1]
        self.config = config
        """Creating buffers for holding summaries"""
        self.image_summaries = []
        self.summaries_sf = []
        self.summaries_aux = []
        """Instantiating optimizer"""
        self.network_optimizer = config.network_optimizer(
            self.config.lr, name='network_optimizer')

        with tf.variable_scope(scope):
            self.observation = tf.placeholder(shape=[
                None, config.input_size[0], config.input_size[1],
                config.history_size
            ],
                                              dtype=tf.float32,
                                              name="Inputs")
            out = self.observation
            out = layers.flatten(out, scope="flatten")
            """State space encoder into latent fi(s)"""
            with tf.variable_scope("fi"):
                for i, nb_filt in enumerate(self.config.fc_layers):
                    out = layers.fully_connected(
                        out,
                        num_outputs=nb_filt,
                        activation_fn=None,
                        variables_collections=tf.get_collection("variables"),
                        outputs_collections="activations",
                        scope="fc_fi_{}".format(i))

                    if i < len(self.config.fc_layers) - 1:
                        out = tf.nn.relu(out)
                    self.summaries_aux.append(
                        tf.contrib.layers.summarize_activation(out))
                self.fi = out
            """Successor representation mapping to latent psi(s)"""
            with tf.variable_scope("succ_feat"):
                out = tf.stop_gradient(tf.nn.relu(self.fi))
                for i, nb_filt in enumerate(self.config.sf_layers):
                    out = layers.fully_connected(
                        out,
                        num_outputs=nb_filt,
                        activation_fn=None,
                        variables_collections=tf.get_collection("variables"),
                        outputs_collections="activations",
                        scope="sf_{}".format(i))
                    if i < len(self.config.sf_layers) - 1:
                        out = tf.nn.relu(out)
                    self.summaries_sf.append(
                        tf.contrib.layers.summarize_activation(out))
                self.sf = out
            """Plugging in the current action taken into the environment for next frame prediction"""
            with tf.variable_scope("action_fc"):
                self.actions_placeholder = tf.placeholder(shape=[None],
                                                          dtype=tf.float32,
                                                          name="Actions")
                actions = layers.fully_connected(
                    self.actions_placeholder[..., None],
                    num_outputs=self.config.fc_layers[-1],
                    activation_fn=None,
                    variables_collections=tf.get_collection("variables"),
                    outputs_collections="activations",
                    scope="action_fc")
            """Decoder from latent space fi(s) to the next state"""
            with tf.variable_scope("aux_fc"):
                out = tf.add(self.fi, actions)
                out = tf.nn.relu(out)
                for i, nb_filt in enumerate(self.config.aux_fc_layers):
                    out = layers.fully_connected(
                        out,
                        num_outputs=nb_filt,
                        activation_fn=None,
                        variables_collections=tf.get_collection("variables"),
                        outputs_collections="activations",
                        scope="aux_fc_{}".format(i))
                    if i < len(self.config.aux_fc_layers) - 1:
                        out = tf.nn.relu(out)
                    self.summaries_aux.append(
                        tf.contrib.layers.summarize_activation(out))
                self.next_obs = tf.reshape(
                    out, (-1, config.input_size[0], config.input_size[1],
                          config.history_size))

            if scope != 'global':
                """Placeholder for the target successor representation at the next time step"""
                self.target_sf = tf.placeholder(
                    shape=[None, self.config.sf_layers[-1]],
                    dtype=tf.float32,
                    name="target_SF")
                """Placeholder for the target observation at the next time step - for self-supervised prediction of the next frame"""
                self.target_next_obs = tf.placeholder(shape=[
                    None, config.input_size[0], config.input_size[1],
                    config.history_size
                ],
                                                      dtype=tf.float32,
                                                      name="target_next_obs")
                """Adding comparison of predicted frame and actual next frame to tensorboard"""
                self.image_summaries.append(
                    tf.summary.image(
                        'next',
                        tf.concat([self.next_obs, self.target_next_obs], 2),
                        max_outputs=30))
                """Building losses"""
                with tf.name_scope('sf_loss'):
                    """TD error of successor representations"""
                    sf_td_error = self.target_sf - self.sf
                    self.sf_loss = tf.reduce_mean(huber_loss(sf_td_error))

                with tf.name_scope('aux_loss'):
                    """L2 loss for the next frame prediction"""
                    aux_error = self.next_obs - self.target_next_obs
                    self.aux_loss = tf.reduce_mean(self.config.aux_coef *
                                                   huber_loss(aux_error))

                local_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope)

                gradients_sf = tf.gradients(self.sf_loss, local_vars)
                grads_sf, grad_norms_sf = tf.clip_by_global_norm(
                    gradients_sf, self.config.gradient_clip_norm_value)

                gradients_aux = tf.gradients(self.aux_loss, local_vars)
                grads_aux, grad_norms_aux = tf.clip_by_global_norm(
                    gradients_aux, self.config.gradient_clip_norm_value)

                self.merged_summary_sf = tf.summary.merge(self.summaries_sf + [
                    tf.summary.scalar('avg_sf_loss', self.sf_loss),
                    gradient_summaries(zip(grads_sf, local_vars))
                ])
                self.merged_summary_aux = tf.summary.merge(
                    self.image_summaries + self.summaries_aux + [
                        tf.summary.scalar('aux_loss', self.aux_loss),
                        gradient_summaries(zip(grads_aux, local_vars))
                    ])

                global_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
                self.apply_grads_sf = self.network_optimizer.apply_gradients(
                    zip(grads_sf, global_vars))
                self.apply_grads_aux = self.network_optimizer.apply_gradients(
                    zip(grads_aux, global_vars))
    def gradients_and_summaries(self):
        local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       self.scope)

        gradients_sf = tf.gradients(self.sf_loss, local_vars)
        gradients_aux = tf.gradients(self.aux_loss, local_vars)
        gradients_option = tf.gradients(self.option_loss, local_vars)
        gradients_primitive_option = tf.gradients(self.critic_loss, local_vars)

        self.var_norms = tf.global_norm(local_vars)
        grads_sf, self.grad_norms_sf = tf.clip_by_global_norm(
            gradients_sf, self.config.gradient_clip_norm_value)
        grads_aux, self.grad_norms_aux = tf.clip_by_global_norm(
            gradients_aux, self.config.gradient_clip_norm_value)
        grads_option, self.grad_norms_option = tf.clip_by_global_norm(
            gradients_option, self.config.gradient_clip_norm_value)
        grads_primitive_option, self.grad_norms_primitive_option = tf.clip_by_global_norm(
            gradients_primitive_option, self.config.gradient_clip_norm_value)

        self.merged_summary_sf = tf.summary.merge(
            self.summaries_sf +
            [tf.summary.scalar('avg_sf_loss', self.sf_loss)] + [
                tf.summary.scalar('gradient_norm_sf',
                                  tf.global_norm(gradients_sf)),
                tf.summary.scalar('cliped_gradient_norm_sf',
                                  tf.global_norm(grads_sf)),
                gradient_summaries(zip(grads_sf, local_vars))
            ])
        self.merged_summary_aux = tf.summary.merge(
            self.image_summaries + self.summaries_aux +
            [tf.summary.scalar('aux_loss', self.aux_loss)] + [
                tf.summary.scalar('gradient_norm_aux',
                                  tf.global_norm(gradients_aux)),
                tf.summary.scalar('cliped_gradient_norm_aux',
                                  tf.global_norm(grads_aux)),
                gradient_summaries(zip(grads_aux, local_vars))
            ])
        options_to_merge = self.summaries_option + [
            tf.summary.scalar('avg_critic_loss', self.critic_loss),
            tf.summary.scalar('avg_termination_loss', self.term_loss),
            tf.summary.scalar('avg_entropy_loss', self.entropy_loss),
            tf.summary.scalar('avg_policy_loss', self.policy_loss),
            tf.summary.scalar('gradient_norm_option',
                              tf.global_norm(gradients_option)),
            tf.summary.scalar('cliped_gradient_norm_option',
                              tf.global_norm(grads_option)),
            gradient_summaries(zip(grads_option, local_vars))
        ]
        if self.config.eigen:
            options_to_merge += [
                tf.summary.scalar('avg_eigen_critic_loss',
                                  self.eigen_critic_loss)
            ]

        self.merged_summary_option = tf.summary.merge(options_to_merge)
        global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        'global')
        self.apply_grads_sf = self.network_optimizer.apply_gradients(
            zip(grads_sf, global_vars))
        self.apply_grads_aux = self.network_optimizer.apply_gradients(
            zip(grads_aux, global_vars))
        self.apply_grads_option = self.network_optimizer.apply_gradients(
            zip(grads_option, global_vars))
        self.apply_grads_primitive_option = self.network_optimizer.apply_gradients(
            zip(grads_primitive_option, global_vars))