Ejemplo n.º 1
0
    def __init__(self, state_size, action_size, global_step, rlConfig,
                 **kwargs):
        self.action_size = action_size
        self.layer_sizes = [128, 128]
        self.layers = []

        self.actor = tfl.Sequential()
        self.critic = tfl.Sequential()

        prev_size = state_size
        for i, next_size in enumerate(self.layer_sizes):
            for net in ['actor', 'critic']:
                with tf.variable_scope("%s/layer_%d" % (net, i)):
                    getattr(self, net).append(
                        tfl.makeAffineLayer(prev_size, next_size,
                                            tfl.leaky_softplus()))
            prev_size = next_size

        with tf.variable_scope('actor'):
            actor = tfl.makeAffineLayer(prev_size, action_size,
                                        tf.nn.log_softmax)
            self.actor.append(actor)

        with tf.variable_scope('critic'):
            v_out = tfl.makeAffineLayer(prev_size, 1)
            v_out = util.compose(lambda x: tf.squeeze(x, [-1]), v_out)
            self.critic.append(v_out)

        self.rlConfig = rlConfig
Ejemplo n.º 2
0
  def __init__(self, state_size, action_size, global_step, rlConfig, epsilon=0.04, **kwargs):
    self.action_size = action_size
    self.layer_sizes = [128, 128]
    self.layers = []

    with tf.name_scope('epsilon'):
      #epsilon = tf.constant(0.02)
      self.epsilon = epsilon# + 0.5 * tf.exp(-tf.cast(global_step, tf.float32) / 50000.0)
    
    self.actor = tfl.Sequential()
    self.critic = tfl.Sequential()
    
    prev_size = state_size
    for i, next_size in enumerate(self.layer_sizes):
      for net in ['actor', 'critic']:
        with tf.variable_scope("%s/layer_%d" % (net, i)):
          getattr(self, net).append(tfl.makeAffineLayer(prev_size, next_size, tfl.leaky_relu))
      prev_size = next_size

    with tf.variable_scope('actor'):
      actor = tfl.makeAffineLayer(prev_size, action_size, tf.nn.softmax)
      smooth = lambda probs: (1.0 - self.epsilon) * probs + self.epsilon / action_size
      actor = util.compose(smooth, actor)
      self.actor.append(actor)

    with tf.variable_scope('critic'):
      v_out = tfl.makeAffineLayer(prev_size, 1)
      v_out = util.compose(lambda x: tf.squeeze(x, [-1]), v_out)
      self.critic.append(v_out)

    self.rlConfig = rlConfig
Ejemplo n.º 3
0
    def __init__(self,
                 state_size,
                 action_size,
                 global_step,
                 rlConfig,
                 epsilon=0.04,
                 temperature=0.01,
                 **kwargs):
        self.action_size = action_size
        self.layer_sizes = [128, 128]

        self.q_net = tfl.Sequential()

        prev_size = state_size
        for i, size in enumerate(self.layer_sizes):
            with tf.variable_scope("layer_%d" % i):
                self.q_net.append(tfl.FCLayer(prev_size, size, tfl.leaky_relu))
            prev_size = size

        with tf.variable_scope("q_out"):
            # no non-linearity on output layer
            self.q_net.append(tfl.FCLayer(prev_size, action_size))

        self.rlConfig = rlConfig

        with tf.name_scope('epsilon'):
            #epsilon = tf.constant(0.02)
            self.epsilon = epsilon + 0.5 * tf.exp(
                -tf.cast(global_step, tf.float32) / 1000.0)

        with tf.name_scope('temperature'):
            #temperature = 0.05  * (0.5 ** (tf.cast(global_step, tf.float32) / 100000.0) + 0.1)
            self.temperature = temperature

        self.global_step = global_step
Ejemplo n.º 4
0
 def __init__(self, state_size, action_size, global_step, rlConfig, **kwargs):
   Default.__init__(self, **kwargs)
   self.action_size = action_size
   
   self.q_net = tfl.Sequential()
   
   prev_size = state_size
   for i, size in enumerate(self.q_layers):
     with tf.variable_scope("layer_%d" % i):
       self.q_net.append(tfl.FCLayer(prev_size, size, tfl.leaky_softplus()))
     prev_size = size
   
   with tf.variable_scope("q_out"):
     # no non-linearity on output layer
     self.q_net.append(tfl.FCLayer(prev_size, action_size))
   
   self.rlConfig = rlConfig
   
   self.global_step = global_step
Ejemplo n.º 5
0
  def __init__(self, state_size, action_size, global_step, rlConfig, **kwargs):
    Default.__init__(self, **kwargs)
    
    self.action_size = action_size

    for name in ['actor', 'critic']:
      net = tfl.Sequential()
      with tf.variable_scope(name):
        prev_size = state_size
        for i, next_size in enumerate(getattr(self, name + "_layers")):
          with tf.variable_scope("layer_%d" % i):
            net.append(tfl.makeAffineLayer(prev_size, next_size, tfl.leaky_softplus()))
          prev_size = next_size
      setattr(self, name, net)

    with tf.variable_scope('actor'):
      self.actor.append(tfl.makeAffineLayer(prev_size, action_size, tf.nn.softmax))
      self.actor.append(lambda p: (1. - self.epsilon) * p + self.epsilon / action_size)

    with tf.variable_scope('critic'):
      self.critic.append(tfl.makeAffineLayer(prev_size, 1))
      self.critic.append(lambda x: tf.squeeze(x, [-1]))

    self.rlConfig = rlConfig