def build_graph(self): """ Builds computational graph for policy """ with tf.variable_scope(self.name): # build the actual policy network self.obs_var, self.mean_var = create_mlp(name='mean_network', output_dim=self.action_dim, hidden_sizes=self.hidden_sizes, hidden_nonlinearity=self.hidden_nonlinearity, output_nonlinearity=self.output_nonlinearity, input_dim=(None, self.obs_dim,) ) with tf.variable_scope("log_std_network"): log_std_var = tf.get_variable(name='log_std_var', shape=(1, self.action_dim,), dtype=tf.float32, initializer=tf.constant_initializer(self.init_log_std), trainable=self.learn_std ) self.log_std_var = tf.maximum(log_std_var, self.min_log_std, name='log_std') # symbolically define sampled action and distribution self.action_var = self.mean_var + tf.random_normal(shape=tf.shape(self.mean_var)) * tf.exp(log_std_var) self._dist = DiagonalGaussian(self.action_dim) # save the policy's trainable variables in dicts current_scope = tf.get_default_graph().get_name_scope() trainable_policy_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope) self.policy_params = OrderedDict([(remove_scope_from_name(var.name, current_scope), var) for var in trainable_policy_vars])
def build_graph(self): with tf.variable_scope(self.name): # build the actual policy network self.obs_var, self.prob_var = create_mlp( name='prob_network', output_dim=self.action_dim, hidden_sizes=self.hidden_sizes, hidden_nonlinearity=self.hidden_nonlinearity, output_nonlinearity=self.output_nonlinearity, input_dim=( None, self.obs_dim, )) # symbolically define sampled action and distribution self.action_var = tf.random.categorical(tf.log(self.prob_var), 1) self._dist = Categorical(self.action_dim) # save the policy's trainable variables in dicts current_scope = tf.get_default_graph().get_name_scope() trainable_policy_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope) self.policy_params = OrderedDict([ (remove_scope_from_name(var.name, current_scope), var) for var in trainable_policy_vars ])
def _create_placeholders_for_vars(self, scope, graph_keys=tf.GraphKeys. TRAINABLE_VARIABLES): var_list = tf.get_collection(graph_keys, scope=scope) placeholders = [] for var in var_list: var_name = remove_scope_from_name(var.name, scope.split('/')[0]) placeholders.append((var_name, tf.placeholder(tf.float32, shape=var.shape, name="%s_ph" % var_name))) return OrderedDict(placeholders)