예제 #1
0
    def __init__(self, env, directory=None):
        """
        Create a TraceRecordingWrapper around env, writing into directory
        """
        super(TraceRecordingWrapper, self).__init__(env)
        self.recording = None
        trace_record_closer.register(self)

        self.recording = TraceRecording(None)
        self.directory = self.recording.directory
예제 #2
0
    def __init__(self,
                 env,
                 directory=None,
                 episode_filter=None,
                 frame_filter=None,
                 reward_classes=None):
        """
        Create a TraceRecordingWrapper around env, writing into directory
        """
        super(TraceRecordingWrapper, self).__init__(env)
        self.recording = None
        trace_record_closer.register(self)

        self.recording = TraceRecording(directory, episode_filter,
                                        frame_filter, reward_classes)
        self.directory = self.recording.directory
예제 #3
0
class TraceRecordingWrapper(gym.Wrapper):
    """

    A Wrapper that records a trace of every action, observation, and reward generated by an environment.
    For an episode of length N, this will consist of:
      actions [0..N]
      observations [0..N+1]. Including the initial observation from `env.reset()`
      rewards [0..N]

    Usage:

      from gym_recording.wrappers import TraceRecordingWrapper
      if args.record_trace:
        env = TraceRecordingWrapper(env, '/tmp/mytraces')

    It'll save a numbered series of json-encoded files, with large arrays stored in binary, along
    with a manifest in /tmp/mytraces/openaigym.traces.*.
    See gym_recording.recording for more on the file format

    Later you can load the recorded traces:

      import gym_recording.playback

      def episode_cb(observations, actions, rewards):
          ... do something the episode ...

      gym_recording.playback.scan_recorded_traces('/tmp/mytraces', episode_cb)

    For an episode of length N, episode_cb receives 3 numpy arrays:
      observations.shape = [N + 1, observation_dim]
      actions.shape = [N, action_dim]
      rewards.shape = [N]


    """
    def __init__(self,
                 env,
                 directory=None,
                 episode_filter=None,
                 frame_filter=None,
                 reward_classes=None):
        """
        Create a TraceRecordingWrapper around env, writing into directory
        """
        super(TraceRecordingWrapper, self).__init__(env)
        self.recording = None
        trace_record_closer.register(self)

        self.recording = TraceRecording(directory, episode_filter,
                                        frame_filter, reward_classes)
        self.directory = self.recording.directory

    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        self.recording.add_step(action, observation, reward)
        return observation, reward, done, info

    def reset(self):
        self.recording.end_episode()
        observation = self.env.reset()
        self.recording.add_reset(observation)
        return observation

    def close(self):
        """
        Flush any buffered data to disk and close. It should get called automatically at program exit time, but
        you can free up memory by calling it explicitly when you're done
        """
        if self.recording is not None:
            self.recording.close()
        if self.env:
            self.env.close()