Ejemplo n.º 1
0
 def _infer(self, **kwargs):
     feed_dict = {
         self.step_obs_ph: kwargs['obs_t'],
     }
     sess = tf.get_default_session()
     ops = [self.action, self.log_policy, self.value]
     return ActionOutput(*sess.run(ops, feed_dict=feed_dict))
Ejemplo n.º 2
0
 def _infer(self, **kwargs):
     sess = tf.get_default_session()
     feed_dict = {
         self.obs_t_ph: np.array([kwargs['obs_t']])
     }
     ops = [self.action, self.log_prob, self.value]
     return ActionOutput(*sess.run(ops, feed_dict=feed_dict))
Ejemplo n.º 3
0
 def _infer(self, **kwargs):
     feed_dict = {
         self.obs_t_ph: np.array([kwargs['obs_t']])
     }
     sess = tf.get_default_session()
     ops = [self.action, self.value]
     action, value = sess.run(ops, feed_dict=feed_dict)
     return ActionOutput(action=action[0], log_prob=None, value=value[0])
Ejemplo n.º 4
0
def make_output(num_actions=4, batch_size=1, batch=False):
    if batch:
        action = np.random.random((batch_size, num_actions))
        log_prob = np.random.random((batch_size,))
        value = np.random.random((batch_size,))
    else:
        action = np.random.random((num_actions,))
        log_prob = np.random.random()
        value = np.random.random()
    return ActionOutput(action, log_prob, value)
Ejemplo n.º 5
0
 def test_properties(self):
     output = ActionOutput('action', 'log_prob', 'value')
     self.assertEqual(output.action, 'action')
     self.assertEqual(output.value, 'value')
     self.assertEqual(output.log_prob, 'log_prob')