Esempio n. 1
0
 def reset(self, indices=None, max_frames=None, name=None):
     '''Resets Atari instances with a random noop start (1-30) and set the maximum number of frames for the episode (default 100,000 * frameskip)
     '''
     with tf.variable_scope(name, default_name='MazeReset'):
         noops = tf.random_uniform(tf.shape(indices), minval=1, maxval=31, dtype=tf.int32)
         if max_frames is None:
             max_frames = self.env_default_timestep_cutoff
         return gym_tensorflow_module.environment_reset(self.instances, indices, noops=noops, max_frames=max_frames)
Esempio n. 2
0
 def reset(self, indices=None, max_frames=None, name=None):
     '''Resets Atari instances with a random noop start (1-30) and set the maximum number of frames for the episode (default 100,000 * frameskip)
     '''
     if indices is None:
         indices = np.arange(self.batch_size)
     with tf.variable_scope(name, default_name='AtariReset'):
         noops = tf.random_uniform(tf.shape(indices), minval=1, maxval=31, dtype=tf.int32)
         if max_frames is None:
             max_frames = tf.ones_like(indices, dtype=tf.int32) * (100000 * self.frameskip)
         import collections
         if not isinstance(max_frames, collections.Sequence):
             max_frames = tf.ones_like(indices, dtype=tf.int32) * max_frames
         return gym_tensorflow_module.environment_reset(self.instances, indices, noops=noops, max_frames=max_frames)