Example #1
0
    def __init__(self, env):
        """Put an OpenAI Gym environment into the TensorFlow graph.

    Args:
      env: OpenAI Gym environment.
    """
        self._env = env
        observ_shape = self._parse_shape(self._env.observation_space)
        observ_dtype = self._parse_dtype(self._env.observation_space)
        action_shape = self._parse_shape(self._env.action_space)
        action_dtype = self._parse_dtype(self._env.action_space)
        with tf.name_scope('environment'):
            self._observ = tf.Variable(tf.zeros(observ_shape, observ_dtype),
                                       name='observ',
                                       trainable=False)
            self._action = tf.Variable(tf.zeros(action_shape, action_dtype),
                                       name='action',
                                       trainable=False)
            self._reward = tf.Variable(0.0,
                                       dtype=tf.float32,
                                       name='reward',
                                       trainable=False)
            self._done = tf.Variable(True,
                                     dtype=tf.bool,
                                     name='done',
                                     trainable=False)
            self._step = tf.Variable(0,
                                     dtype=tf.int32,
                                     name='step',
                                     trainable=False)
Example #2
0
 def test_non_default_graph(self):
     graph = tf.Graph()
     with graph.as_default():
         tf.Variable(tf.zeros((5, 3)), trainable=True)
         tf.Variable(tf.zeros((8, 2)), trainable=False)
     self.assertNotEqual(graph, tf.get_default_graph)
     self.assertEqual(15, count_weights(graph=graph))
Example #3
0
 def test_restrict_invalid_scope(self):
     tf.Variable(tf.zeros((3, 2)), trainable=True)
     with tf.variable_scope('foo'):
         tf.Variable(tf.zeros((5, 2)), trainable=True)
         with tf.variable_scope('bar'):
             tf.Variable(tf.zeros((1, 2)), trainable=True)
     self.assertEqual(0, count_weights('bar'))
Example #4
0
 def test_trainable_and_non_trainable(self):
     tf.Variable(tf.zeros((5, 3)), trainable=True)
     tf.Variable(tf.zeros((8, 2)), trainable=False)
     tf.Variable(tf.zeros((1, 1)), trainable=True)
     tf.Variable(tf.zeros((5, )), trainable=True)
     tf.Variable(tf.zeros((3, 1)), trainable=False)
     self.assertEqual(15 + 1 + 5, count_weights())
Example #5
0
    def __init__(self, batch_env):
        """Batch of environments inside the TensorFlow graph.

    Args:
      batch_env: Batch environment.
    """
        self._batch_env = batch_env
        observ_shape = self._parse_shape(self._batch_env.observation_space)
        observ_dtype = self._parse_dtype(self._batch_env.observation_space)
        action_shape = self._parse_shape(self._batch_env.action_space)
        action_dtype = self._parse_dtype(self._batch_env.action_space)
        with tf.variable_scope('env_temporary'):
            self._observ = tf.Variable(tf.zeros(
                (len(self._batch_env), ) + observ_shape, observ_dtype),
                                       name='observ',
                                       trainable=False)
            self._action = tf.Variable(tf.zeros(
                (len(self._batch_env), ) + action_shape, action_dtype),
                                       name='action',
                                       trainable=False)
            self._reward = tf.Variable(tf.zeros((len(self._batch_env), ),
                                                tf.float32),
                                       name='reward',
                                       trainable=False)
            self._done = tf.Variable(tf.cast(tf.ones((len(self._batch_env), )),
                                             tf.bool),
                                     name='done',
                                     trainable=False)
Example #6
0
 def test_exclude_by_regex(self):
     tf.Variable(tf.zeros((3, 2)), trainable=True)
     with tf.variable_scope('foo'):
         tf.Variable(tf.zeros((5, 2)), trainable=True)
         with tf.variable_scope('bar'):
             tf.Variable(tf.zeros((1, 2)), trainable=True)
     self.assertEqual(0, count_weights(exclude=r'.*'))
     self.assertEqual(6, count_weights(exclude=r'(^|/)foo/.*'))
     self.assertEqual(16, count_weights(exclude=r'.*/bar/.*'))
Example #7
0
    def __init__(self, shape, dtype):
        """Specify the shape and dtype of the mean to be estimated.

    Note that a float mean to zero submitted elements is NaN, while computing
    the integer mean of zero elements raises a division by zero error.

    Args:
      shape: Shape of the mean to compute.
      dtype: Data type of the mean to compute.
    """
        self._dtype = dtype
        self._sum = tf.Variable(lambda: tf.zeros(shape, dtype), False)
        self._count = tf.Variable(lambda: 0, trainable=False)
Example #8
0
    def __init__(self, template, capacity, max_length, scope):
        """Create a memory that stores episodes.

    Each transition tuple consists of quantities specified by the template.
    These quantities would typically be be observartions, actions, rewards, and
    done indicators.

    Args:
      template: List of tensors to derive shapes and dtypes of each transition.
      capacity: Number of episodes, or rows, hold by the memory.
      max_length: Allocated sequence length for the episodes.
      scope: Variable scope to use for internal variables.
    """
        self._capacity = capacity
        self._max_length = max_length
        with tf.variable_scope(scope) as var_scope:
            self._scope = var_scope
            self._length = tf.Variable(tf.zeros(capacity, tf.int32), False)
            self._buffers = [
                tf.Variable(
                    tf.zeros([capacity, max_length] + elem.shape.as_list(),
                             elem.dtype), False) for elem in template
            ]
Example #9
0
def reinit_nested_vars(variables, indices=None):
  """Reset all variables in a nested tuple to zeros.

  Args:
    variables: Nested tuple or list of variaables.
    indices: Indices along the first dimension to reset, defaults to all.

  Returns:
    Operation.
  """
  if isinstance(variables, (tuple, list)):
    return tf.group(*[reinit_nested_vars(variable, indices) for variable in variables])
  if indices is None:
    return variables.assign(tf.zeros_like(variables))
  else:
    zeros = tf.zeros([tf.shape(indices)[0]] + variables.shape[1:].as_list())
    return tf.scatter_update(variables, indices, zeros)
Example #10
0
def simulate(batch_env, algo, log=True, reset=False):
    """Simulation step of a vecrotized algorithm with in-graph environments.

  Integrates the operations implemented by the algorithm and the environments
  into a combined operation.

  Args:
    batch_env: In-graph batch environment.
    algo: Algorithm instance implementing required operations.
    log: Tensor indicating whether to compute and return summaries.
    reset: Tensor causing all environments to reset.

  Returns:
    Tuple of tensors containing done flags for the current episodes, possibly
    intermediate scores for the episodes, and a summary tensor.
  """
    def _define_begin_episode(agent_indices):
        """Reset environments, intermediate scores and durations for new episodes.

    Args:
      agent_indices: Tensor containing batch indices starting an episode.

    Returns:
      Summary tensor.
    """
        assert agent_indices.shape.ndims == 1
        zero_scores = tf.zeros_like(agent_indices, tf.float32)
        zero_durations = tf.zeros_like(agent_indices)
        reset_ops = [
            batch_env.reset(agent_indices),
            tf.scatter_update(score, agent_indices, zero_scores),
            tf.scatter_update(length, agent_indices, zero_durations)
        ]
        with tf.control_dependencies(reset_ops):
            return algo.begin_episode(agent_indices)

    def _define_step():
        """Request actions from the algorithm and apply them to the environments.

    Increments the lengths of all episodes and increases their scores by the
    current reward. After stepping the environments, provides the full
    transition tuple to the algorithm.

    Returns:
      Summary tensor.
    """
        prevob = batch_env.observ + 0  # Ensure a copy of the variable value.
        action, step_summary = algo.perform(prevob)
        action.set_shape(batch_env.action.shape)
        with tf.control_dependencies([batch_env.simulate(action)]):
            add_score = score.assign_add(batch_env.reward)
            inc_length = length.assign_add(tf.ones(len(batch_env), tf.int32))
        with tf.control_dependencies([add_score, inc_length]):
            experience_summary = algo.experience(prevob, batch_env.action,
                                                 batch_env.reward,
                                                 batch_env.done,
                                                 batch_env.observ)
        return tf.summary.merge([step_summary, experience_summary])

    def _define_end_episode(agent_indices):
        """Notify the algorithm of ending episodes.

    Also updates the mean score and length counters used for summaries.

    Args:
      agent_indices: Tensor holding batch indices that end their episodes.

    Returns:
      Summary tensor.
    """
        assert agent_indices.shape.ndims == 1
        submit_score = mean_score.submit(tf.gather(score, agent_indices))
        submit_length = mean_length.submit(
            tf.cast(tf.gather(length, agent_indices), tf.float32))
        with tf.control_dependencies([submit_score, submit_length]):
            return algo.end_episode(agent_indices)

    def _define_summaries():
        """Reset the average score and duration, and return them as summary.

    Returns:
      Summary string.
    """
        score_summary = tf.cond(
            tf.logical_and(log, tf.cast(mean_score.count, tf.bool)),
            lambda: tf.summary.scalar('mean_score', mean_score.clear()), str)
        length_summary = tf.cond(
            tf.logical_and(log, tf.cast(mean_length.count, tf.bool)),
            lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
        return tf.summary.merge([score_summary, length_summary])

    with tf.name_scope('simulate'):
        log = tf.convert_to_tensor(log)
        reset = tf.convert_to_tensor(reset)
        with tf.variable_scope('simulate_temporary'):
            score = tf.Variable(tf.zeros(len(batch_env), dtype=tf.float32),
                                False,
                                name='score')
            length = tf.Variable(tf.zeros(len(batch_env), dtype=tf.int32),
                                 False,
                                 name='length')
        mean_score = streaming_mean.StreamingMean((), tf.float32)
        mean_length = streaming_mean.StreamingMean((), tf.float32)
        agent_indices = tf.cond(
            reset, lambda: tf.range(len(batch_env)),
            lambda: tf.cast(tf.where(batch_env.done)[:, 0], tf.int32))
        begin_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0], tf.bool),
                                lambda: _define_begin_episode(agent_indices),
                                str)
        with tf.control_dependencies([begin_episode]):
            step = _define_step()
        with tf.control_dependencies([step]):
            agent_indices = tf.cast(tf.where(batch_env.done)[:, 0], tf.int32)
            end_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0], tf.bool),
                                  lambda: _define_end_episode(agent_indices),
                                  str)
        with tf.control_dependencies([end_episode]):
            summary = tf.summary.merge(
                [_define_summaries(), begin_episode, step, end_episode])
        with tf.control_dependencies([summary]):
            done, score = tf.identity(batch_env.done), tf.identity(score)
        return done, score, summary
Example #11
0
 def test_include_scopes(self):
     tf.Variable(tf.zeros((3, 2)), trainable=True)
     with tf.variable_scope('foo'):
         tf.Variable(tf.zeros((5, 2)), trainable=True)
     self.assertEqual(6 + 10, count_weights())
Example #12
0
 def test_ignore_non_trainable(self):
     tf.Variable(tf.zeros((5, 3)), trainable=False)
     tf.Variable(tf.zeros((1, 1)), trainable=False)
     tf.Variable(tf.zeros((5, )), trainable=False)
     self.assertEqual(0, count_weights())
Example #13
0
 def test_count_trainable(self):
     tf.Variable(tf.zeros((5, 3)), trainable=True)
     tf.Variable(tf.zeros((1, 1)), trainable=True)
     tf.Variable(tf.zeros((5, )), trainable=True)
     self.assertEqual(15 + 1 + 5, count_weights())