Ejemplo n.º 1
0
 def policy(obs, dimA, reuse=False):
     with tf.variable_scope('policy', reuse=reuse):
         h1 = tf.nn.relu(linear(obs, dout=l1, name='h1'))
         h2 = tf.nn.relu(linear(h1, dout=l2, name='h2'))
         h3 = tf.identity(linear(h2, dout=dimA), name='h3')
         action = tf.nn.tanh(h3, name='h4-action') * action_scale
         return action
Ejemplo n.º 2
0
 def qfunction(obs, act, reuse=False):
     with tf.variable_scope('qfunc', reuse=reuse):
         h1 = tf.nn.relu(linear(obs, dout=l1, name='h1'))
         h1a = tf.concat(1, [h1, act])
         h2 = tf.nn.relu(linear(h1a, dout=l2, name='h2'))
         qs = linear(h2, dout=1)
         q = tf.squeeze(qs, [1], name='h3-q')
         return q
Ejemplo n.º 3
0
 def inner(obs, dU, reuse=False):
     out = obs
     with tf.variable_scope('policy', reuse=reuse):
         for i in range(num_hidden):
             out = tf.nn.relu(
                 linear(out, dout=dim_hidden, name='layer_%d' % i))
         out = linear(out, dout=dU, init_scale=0.01)
         pol = tf.nn.tanh(out) * (diff / 2) + mid
     return pol
Ejemplo n.º 4
0
    def compute_params_tensor(self, logit):
        mu = linear(logit, dout=self.dU, name='mu')
        sigma = tf.exp(linear(logit, dout=self.dU, name='logsig'))
        #params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
        if self.min_var > 0:
            sigma = tf.maximum(self.min_var, sigma)

        if self.mean_clamp:
            mu = tf.nn.tanh(mu) * self.mean_clamp

        dist_params = [mu, sigma]
        self.params = dist_params
Ejemplo n.º 5
0
 def inner(state, action, reuse=False):
     sout = state
     dU = int(action.get_shape()[1])
     with tf.variable_scope('q_function', reuse=reuse) as vs:
         for i in range(num_hidden):
             sout = tf.nn.relu(
                 linear(sout, dout=dim_hidden, name='layer_%d' % i))
         sa = tf.concat(1, [sout, action])
         assert_shape(sa, [None, dim_hidden + dU])
         out = tf.nn.relu(linear(sa, dout=dim_hidden, name='sa1'))
         out = linear(out, dout=1, init_scale=0.01, name='sa2')
     return out
Ejemplo n.º 6
0
 def inner(obs, dU, reuse=False):
     out = obs
     dist = DiagGauss(dU, mean_clamp=mean_clamp, min_var=min_std)
     with tf.variable_scope('policy', reuse=reuse) as vs:
         for i in range(num_hidden):
             out = tf.nn.relu(
                 linear(out, dout=dim_hidden, name='layer_%d' % i))
         dist.compute_params_tensor(out)
     return dist
Ejemplo n.º 7
0
def linear_value_fn(state):
    value = linear(state, dout=1)
    return value
Ejemplo n.º 8
0
def linear_q_fn(state, action, reuse=False):
    with tf.variable_scope('q_function', reuse=reuse):
        a1 = linear(state, dout=1, name='state')
        a2 = linear(action, dout=1, name='act')
    return a1 + a2
Ejemplo n.º 9
0
 def compute_params_tensor(self, logit):
     probs = tf.nn.softmax(linear(logit, dout=self.dU))
     self.params = [probs]