def gradients_and_summaries(self): local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope) """Gradients and update ops""" self.grads_sf, self.apply_grads_sf = self.take_gradient(self.sf_loss) self.grads_aux, self.apply_grads_aux = self.take_gradient( self.aux_loss) self.grads_option, self.apply_grads_option = self.take_gradient( self.option_loss) """Summaries""" self.merged_summary_sf = tf.summary.merge(self.summaries_sf + [ tf.summary.scalar('avg_sf_loss', self.sf_loss), gradient_summaries(zip(self.grads_sf, local_vars)) ]) self.merged_summary_aux = tf.summary.merge( self.image_summaries + self.summaries_aux + [ tf.summary.scalar('aux_loss', self.aux_loss), gradient_summaries(zip(self.grads_aux, local_vars)) ]) options_to_merge = self.summaries_option +\ [tf.summary.scalar('avg_entropy_loss', self.entropy_loss), tf.summary.scalar('avg_policy_loss', self.policy_loss), tf.summary.scalar('avg_eigen_critic_loss', self.eigen_critic_loss), gradient_summaries(zip(self.grads_option, local_vars),)] self.merged_summary_option = tf.summary.merge(options_to_merge)
def gradients_and_summaries(self): local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope) """Gradients and update ops""" self.grads_sf, self.apply_grads_sf = self.take_gradient(self.sf_loss) self.grads_aux, self.apply_grads_aux = self.take_gradient( self.aux_loss) self.grads_critic, self.apply_grads_critic = self.take_gradient( self.critic_loss) self.grads_option, self.apply_grads_option = self.take_gradient( self.option_loss) self.grads_term, self.apply_grads_term = self.take_gradient( self.term_loss) """Summaries""" self.merged_summary_sf = tf.summary.merge(self.summaries_sf + [ tf.summary.scalar('avg_sf_loss', self.sf_loss), gradient_summaries(zip(self.grads_sf, local_vars)) ]) self.merged_summary_aux = tf.summary.merge( self.image_summaries + self.summaries_aux + [ tf.summary.scalar('aux_loss', self.aux_loss), gradient_summaries(zip(self.grads_aux, local_vars)) ]) options_to_merge = self.summaries_option +\ [tf.summary.scalar('avg_entropy_loss', self.entropy_loss), tf.summary.scalar('avg_policy_loss', self.policy_loss), tf.summary.scalar('random_option_prob', self.random_option_prob), # tf.summary.scalar('LR', self.lr), tf.summary.scalar('avg_eigen_critic_loss', self.eigen_critic_loss), gradient_summaries(zip(self.grads_option, local_vars),)] self.merged_summary_option = tf.summary.merge(options_to_merge) self.merged_summary_term = tf.summary.merge(self.summaries_term + [ tf.summary.scalar('avg_termination_loss', self.term_loss), tf.summary.scalar('avg_termination_error', tf.reduce_mean(self.term_err)), gradient_summaries(zip(self.grads_term, local_vars)) ]) self.merged_summary_critic = tf.summary.merge( self.summaries_critic + \ [tf.summary.scalar('avg_critic_loss', self.critic_loss), gradient_summaries(zip(self.grads_critic, local_vars))])
def __init__(self, scope, config, action_size): self._scope = scope self.nb_states = config.input_size[0] * config.input_size[1] self._conv_layers = config.conv_layers self._fc_layers = config.fc_layers self._action_size = action_size self._nb_options = config.nb_options self._nb_envs = config.num_agents self._config = config self.option = 0 self._sf_layers = config.sf_layers self._deconv_layers = config.deconv_layers self._network_optimizer = config.network_optimizer( self._config.lr, name='network_optimizer') with tf.variable_scope(scope): self.observation = tf.placeholder(shape=[None, self.nb_states], dtype=tf.float32, name="Inputs") self.sf = layers.fully_connected(self.observation, num_outputs=self.nb_states, activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="sf") if scope != 'global': self.target_sf = tf.placeholder(shape=[None, self.nb_states], dtype=tf.float32, name="target_SF") with tf.name_scope('sf_loss'): sf_td_error = self.target_sf - self.sf self.sf_loss = tf.reduce_mean(tf.square(sf_td_error)) self.loss = self.sf_loss # + self.instant_r_loss loss_summaries = [tf.summary.scalar('avg_sf_loss', self.sf_loss)] local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm(gradients, self._config.gradient_clip_value) # for grad, weight in zip(grads, local_vars): # if grad is not None: # self.summaries.append(tf.summary.histogram(weight.name + '_grad', grad)) # self.summaries.append(tf.summary.histogram(weight.name, weight)) self.merged_summary = tf.summary.merge(loss_summaries + [ tf.summary.scalar('gradient_norm', tf.global_norm(gradients)), tf.summary.scalar('cliped_gradient_norm', tf.global_norm(grads)), gradient_summaries(zip(grads, local_vars))]) global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = self._network_optimizer.apply_gradients(zip(grads, global_vars))
def __init__(self, scope, config, action_size): self._scope = scope # self.option = 0 self.nb_states = config.input_size[0] * config.input_size[1] # self.conv_layers = config.conv_layers self.fc_layers = config.fc_layers self.sf_layers = config.sf_layers self.aux_fc_layers = config.aux_fc_layers # self.aux_deconv_layers = config.aux_deconv_layers self.action_size = action_size self.nb_options = config.nb_options self.nb_envs = config.num_agents self.config = config self.network_optimizer = config.network_optimizer( self.config.lr, name='network_optimizer') with tf.variable_scope(scope): self.observation = tf.placeholder(shape=[None, config.input_size[0], config.input_size[1], config.history_size], dtype=tf.float32, name="Inputs") self.image_summaries = [] self.image_summaries.append(tf.summary.image('input', self.observation, max_outputs=30)) self.summaries_sf = [] self.summaries_aux = [] out = self.observation out = layers.flatten(out, scope="flatten") with tf.variable_scope("fc"): for i, nb_filt in enumerate(self.fc_layers): out = layers.fully_connected(out, num_outputs=nb_filt, activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="fc_{}".format(i)) if i < len(self.fc_layers) - 1: # out = layer_norm_fn(out, relu=True) out = tf.nn.relu(out) self.summaries_sf.append(tf.contrib.layers.summarize_activation(out)) self.summaries_aux.append(tf.contrib.layers.summarize_activation(out)) self.fi = out with tf.variable_scope("sf"): out = self.layer_norm_fn(self.fi, relu=True) out = tf.stop_gradient(out) for i, nb_filt in enumerate(self.sf_layers): out = layers.fully_connected(out, num_outputs=nb_filt, activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="sf_{}".format(i)) if i < len(self.sf_layers) - 1: out = tf.nn.relu(out) self.summaries_sf.append(tf.contrib.layers.summarize_activation(out)) self.sf = out with tf.variable_scope("action_fc"): self.actions_placeholder = tf.placeholder(shape=[None], dtype=tf.float32, name="Actions") actions = layers.fully_connected(self.actions_placeholder[..., None], num_outputs=self.fc_layers[-1], activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="action_fc{}".format(i)) with tf.variable_scope("aux_fc"): out = tf.add(self.fi, actions) # out = tf.nn.relu(out) for i, nb_filt in enumerate(self.aux_fc_layers): out = layers.fully_connected(out, num_outputs=nb_filt, activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="aux_fc_{}".format(i)) if i < len(self.aux_fc_layers) - 1: out = tf.nn.relu(out) self.summaries_aux.append(tf.contrib.layers.summarize_activation(out)) self.next_obs = tf.reshape(out, (-1, config.input_size[0], config.input_size[1], config.history_size)) self.image_summaries.append(tf.summary.image('next_obs', self.next_obs, max_outputs=30)) if scope != 'global': self.target_sf = tf.placeholder(shape=[None, self.sf_layers[-1]], dtype=tf.float32, name="target_SF") self.target_next_obs = tf.placeholder( shape=[None, config.input_size[0], config.input_size[1], config.history_size], dtype=tf.float32, name="target_next_obs") self.image_summaries.append(tf.summary.image('target_next_obs', self.target_next_obs, max_outputs=30)) self.matrix_sf = tf.placeholder(shape=[self.nb_states, self.sf_layers[-1]], dtype=tf.float32, name="matrix_sf") self.s, self.u, self.v = tf.svd(self.matrix_sf) with tf.name_scope('sf_loss'): sf_td_error = self.target_sf - self.sf self.sf_loss = tf.reduce_mean(huber_loss(sf_td_error)) with tf.name_scope('aux_loss'): aux_error = self.next_obs - self.target_next_obs self.aux_loss = tf.reduce_mean(self.config.aux_coef * huber_loss(aux_error)) # regularizer_features = tf.reduce_mean(self.config.feat_decay * tf.nn.l2_loss(self.fi)) local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) gradients_sf = tf.gradients(self.sf_loss, local_vars) gradients_aux = tf.gradients(self.aux_loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads_sf, self.grad_norms_sf = tf.clip_by_global_norm(gradients_sf, self.config.gradient_clip_norm_value) grads_aux, self.grad_norms_aux = tf.clip_by_global_norm(gradients_aux, self.config.gradient_clip_norm_value) self.merged_summary_sf = tf.summary.merge( self.summaries_sf + [tf.summary.scalar('avg_sf_loss', self.sf_loss)] + [ tf.summary.scalar('gradient_norm_sf', tf.global_norm(gradients_sf)), tf.summary.scalar('cliped_gradient_norm_sf', tf.global_norm(grads_sf)), gradient_summaries(zip(grads_sf, local_vars))]) self.merged_summary_aux = tf.summary.merge(self.image_summaries + self.summaries_aux + [tf.summary.scalar('aux_loss', self.aux_loss)] + [ tf.summary.scalar('gradient_norm_sf', tf.global_norm(gradients_aux)), tf.summary.scalar('cliped_gradient_norm_sf', tf.global_norm(grads_aux)), gradient_summaries(zip(grads_aux, local_vars))]) global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads_sf = self.network_optimizer.apply_gradients(zip(grads_sf, global_vars)) self.apply_grads_aux = self.network_optimizer.apply_gradients(zip(grads_aux, global_vars))
def __init__(self, scope, config, action_size): self._scope = scope """The size of the input space flatten out""" self.nb_states = config.input_size[0] * config.input_size[1] self.config = config """Creating buffers for holding summaries""" self.image_summaries = [] self.summaries_sf = [] self.summaries_aux = [] """Instantiating optimizer""" self.network_optimizer = config.network_optimizer( self.config.lr, name='network_optimizer') with tf.variable_scope(scope): self.observation = tf.placeholder(shape=[ None, config.input_size[0], config.input_size[1], config.history_size ], dtype=tf.float32, name="Inputs") out = self.observation out = layers.flatten(out, scope="flatten") """State space encoder into latent fi(s)""" with tf.variable_scope("fi"): for i, nb_filt in enumerate(self.config.fc_layers): out = layers.fully_connected( out, num_outputs=nb_filt, activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="fc_fi_{}".format(i)) if i < len(self.config.fc_layers) - 1: out = tf.nn.relu(out) self.summaries_aux.append( tf.contrib.layers.summarize_activation(out)) self.fi = out """Successor representation mapping to latent psi(s)""" with tf.variable_scope("succ_feat"): out = tf.stop_gradient(tf.nn.relu(self.fi)) for i, nb_filt in enumerate(self.config.sf_layers): out = layers.fully_connected( out, num_outputs=nb_filt, activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="sf_{}".format(i)) if i < len(self.config.sf_layers) - 1: out = tf.nn.relu(out) self.summaries_sf.append( tf.contrib.layers.summarize_activation(out)) self.sf = out """Plugging in the current action taken into the environment for next frame prediction""" with tf.variable_scope("action_fc"): self.actions_placeholder = tf.placeholder(shape=[None], dtype=tf.float32, name="Actions") actions = layers.fully_connected( self.actions_placeholder[..., None], num_outputs=self.config.fc_layers[-1], activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="action_fc") """Decoder from latent space fi(s) to the next state""" with tf.variable_scope("aux_fc"): out = tf.add(self.fi, actions) out = tf.nn.relu(out) for i, nb_filt in enumerate(self.config.aux_fc_layers): out = layers.fully_connected( out, num_outputs=nb_filt, activation_fn=None, variables_collections=tf.get_collection("variables"), outputs_collections="activations", scope="aux_fc_{}".format(i)) if i < len(self.config.aux_fc_layers) - 1: out = tf.nn.relu(out) self.summaries_aux.append( tf.contrib.layers.summarize_activation(out)) self.next_obs = tf.reshape( out, (-1, config.input_size[0], config.input_size[1], config.history_size)) if scope != 'global': """Placeholder for the target successor representation at the next time step""" self.target_sf = tf.placeholder( shape=[None, self.config.sf_layers[-1]], dtype=tf.float32, name="target_SF") """Placeholder for the target observation at the next time step - for self-supervised prediction of the next frame""" self.target_next_obs = tf.placeholder(shape=[ None, config.input_size[0], config.input_size[1], config.history_size ], dtype=tf.float32, name="target_next_obs") """Adding comparison of predicted frame and actual next frame to tensorboard""" self.image_summaries.append( tf.summary.image( 'next', tf.concat([self.next_obs, self.target_next_obs], 2), max_outputs=30)) """Building losses""" with tf.name_scope('sf_loss'): """TD error of successor representations""" sf_td_error = self.target_sf - self.sf self.sf_loss = tf.reduce_mean(huber_loss(sf_td_error)) with tf.name_scope('aux_loss'): """L2 loss for the next frame prediction""" aux_error = self.next_obs - self.target_next_obs self.aux_loss = tf.reduce_mean(self.config.aux_coef * huber_loss(aux_error)) local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) gradients_sf = tf.gradients(self.sf_loss, local_vars) grads_sf, grad_norms_sf = tf.clip_by_global_norm( gradients_sf, self.config.gradient_clip_norm_value) gradients_aux = tf.gradients(self.aux_loss, local_vars) grads_aux, grad_norms_aux = tf.clip_by_global_norm( gradients_aux, self.config.gradient_clip_norm_value) self.merged_summary_sf = tf.summary.merge(self.summaries_sf + [ tf.summary.scalar('avg_sf_loss', self.sf_loss), gradient_summaries(zip(grads_sf, local_vars)) ]) self.merged_summary_aux = tf.summary.merge( self.image_summaries + self.summaries_aux + [ tf.summary.scalar('aux_loss', self.aux_loss), gradient_summaries(zip(grads_aux, local_vars)) ]) global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads_sf = self.network_optimizer.apply_gradients( zip(grads_sf, global_vars)) self.apply_grads_aux = self.network_optimizer.apply_gradients( zip(grads_aux, global_vars))
def gradients_and_summaries(self): local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope) gradients_sf = tf.gradients(self.sf_loss, local_vars) gradients_aux = tf.gradients(self.aux_loss, local_vars) gradients_option = tf.gradients(self.option_loss, local_vars) gradients_primitive_option = tf.gradients(self.critic_loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads_sf, self.grad_norms_sf = tf.clip_by_global_norm( gradients_sf, self.config.gradient_clip_norm_value) grads_aux, self.grad_norms_aux = tf.clip_by_global_norm( gradients_aux, self.config.gradient_clip_norm_value) grads_option, self.grad_norms_option = tf.clip_by_global_norm( gradients_option, self.config.gradient_clip_norm_value) grads_primitive_option, self.grad_norms_primitive_option = tf.clip_by_global_norm( gradients_primitive_option, self.config.gradient_clip_norm_value) self.merged_summary_sf = tf.summary.merge( self.summaries_sf + [tf.summary.scalar('avg_sf_loss', self.sf_loss)] + [ tf.summary.scalar('gradient_norm_sf', tf.global_norm(gradients_sf)), tf.summary.scalar('cliped_gradient_norm_sf', tf.global_norm(grads_sf)), gradient_summaries(zip(grads_sf, local_vars)) ]) self.merged_summary_aux = tf.summary.merge( self.image_summaries + self.summaries_aux + [tf.summary.scalar('aux_loss', self.aux_loss)] + [ tf.summary.scalar('gradient_norm_aux', tf.global_norm(gradients_aux)), tf.summary.scalar('cliped_gradient_norm_aux', tf.global_norm(grads_aux)), gradient_summaries(zip(grads_aux, local_vars)) ]) options_to_merge = self.summaries_option + [ tf.summary.scalar('avg_critic_loss', self.critic_loss), tf.summary.scalar('avg_termination_loss', self.term_loss), tf.summary.scalar('avg_entropy_loss', self.entropy_loss), tf.summary.scalar('avg_policy_loss', self.policy_loss), tf.summary.scalar('gradient_norm_option', tf.global_norm(gradients_option)), tf.summary.scalar('cliped_gradient_norm_option', tf.global_norm(grads_option)), gradient_summaries(zip(grads_option, local_vars)) ] if self.config.eigen: options_to_merge += [ tf.summary.scalar('avg_eigen_critic_loss', self.eigen_critic_loss) ] self.merged_summary_option = tf.summary.merge(options_to_merge) global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads_sf = self.network_optimizer.apply_gradients( zip(grads_sf, global_vars)) self.apply_grads_aux = self.network_optimizer.apply_gradients( zip(grads_aux, global_vars)) self.apply_grads_option = self.network_optimizer.apply_gradients( zip(grads_option, global_vars)) self.apply_grads_primitive_option = self.network_optimizer.apply_gradients( zip(grads_primitive_option, global_vars))