예제 #1
0
 def test_num_steps_can_sample(self):
     buffer = SimpleReplayBuffer(10000, 1, 1)
     buffer.add_sample(1, 1, 1, False, 1)
     buffer.add_sample(1, 1, 1, True, 1)
     buffer.terminate_episode()
     buffer.add_sample(1, 1, 1, False, 1)
     self.assertEqual(buffer.num_steps_can_sample(), 3)
예제 #2
0
    obs = d['obs'][path_num]
    acs = d['acs'][path_num]
    env_infos = d['info'][path_num]

    ep_len = len(obs)
    for j in range(ep_len - 1):
        o = {
            'obs': obs[j]['observation'],
            'obs_task_params': obs[j]['desired_goal']
        }
        a = acs[j]
        r = 0.  # the demons don't come with reward
        terminal = 0  # none of the robotic environments in gym have terminal 1 ever
        next_o = {
            'obs': obs[j + 1]['observation'],
            'obs_task_params': obs[j + 1]['desired_goal']
        }
        env_info = env_infos[j]
        buffer.add_sample(o,
                          a,
                          r,
                          terminal,
                          next_o,
                          agent_info={},
                          env_info=env_info)
    buffer.terminate_episode()

# save it
file_name = os.path.join(rlkit_buffer_save_dir, 'extra_data.pkl')
joblib.dump({'replay_buffer': buffer}, file_name, compress=3)