def policy(obs, dimA, reuse=False): with tf.variable_scope('policy', reuse=reuse): h1 = tf.nn.relu(linear(obs, dout=l1, name='h1')) h2 = tf.nn.relu(linear(h1, dout=l2, name='h2')) h3 = tf.identity(linear(h2, dout=dimA), name='h3') action = tf.nn.tanh(h3, name='h4-action') * action_scale return action
def qfunction(obs, act, reuse=False): with tf.variable_scope('qfunc', reuse=reuse): h1 = tf.nn.relu(linear(obs, dout=l1, name='h1')) h1a = tf.concat(1, [h1, act]) h2 = tf.nn.relu(linear(h1a, dout=l2, name='h2')) qs = linear(h2, dout=1) q = tf.squeeze(qs, [1], name='h3-q') return q
def inner(obs, dU, reuse=False): out = obs with tf.variable_scope('policy', reuse=reuse): for i in range(num_hidden): out = tf.nn.relu( linear(out, dout=dim_hidden, name='layer_%d' % i)) out = linear(out, dout=dU, init_scale=0.01) pol = tf.nn.tanh(out) * (diff / 2) + mid return pol
def compute_params_tensor(self, logit): mu = linear(logit, dout=self.dU, name='mu') sigma = tf.exp(linear(logit, dout=self.dU, name='logsig')) #params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) if self.min_var > 0: sigma = tf.maximum(self.min_var, sigma) if self.mean_clamp: mu = tf.nn.tanh(mu) * self.mean_clamp dist_params = [mu, sigma] self.params = dist_params
def inner(state, action, reuse=False): sout = state dU = int(action.get_shape()[1]) with tf.variable_scope('q_function', reuse=reuse) as vs: for i in range(num_hidden): sout = tf.nn.relu( linear(sout, dout=dim_hidden, name='layer_%d' % i)) sa = tf.concat(1, [sout, action]) assert_shape(sa, [None, dim_hidden + dU]) out = tf.nn.relu(linear(sa, dout=dim_hidden, name='sa1')) out = linear(out, dout=1, init_scale=0.01, name='sa2') return out
def inner(obs, dU, reuse=False): out = obs dist = DiagGauss(dU, mean_clamp=mean_clamp, min_var=min_std) with tf.variable_scope('policy', reuse=reuse) as vs: for i in range(num_hidden): out = tf.nn.relu( linear(out, dout=dim_hidden, name='layer_%d' % i)) dist.compute_params_tensor(out) return dist
def linear_value_fn(state): value = linear(state, dout=1) return value
def linear_q_fn(state, action, reuse=False): with tf.variable_scope('q_function', reuse=reuse): a1 = linear(state, dout=1, name='state') a2 = linear(action, dout=1, name='act') return a1 + a2
def compute_params_tensor(self, logit): probs = tf.nn.softmax(linear(logit, dout=self.dU)) self.params = [probs]