def test_num_steps_can_sample(self): buffer = SimpleReplayBuffer(10000, 1, 1) buffer.add_sample(1, 1, 1, False, 1) buffer.add_sample(1, 1, 1, True, 1) buffer.terminate_episode() buffer.add_sample(1, 1, 1, False, 1) self.assertEqual(buffer.num_steps_can_sample(), 3)
obs = d['obs'][path_num] acs = d['acs'][path_num] env_infos = d['info'][path_num] ep_len = len(obs) for j in range(ep_len - 1): o = { 'obs': obs[j]['observation'], 'obs_task_params': obs[j]['desired_goal'] } a = acs[j] r = 0. # the demons don't come with reward terminal = 0 # none of the robotic environments in gym have terminal 1 ever next_o = { 'obs': obs[j + 1]['observation'], 'obs_task_params': obs[j + 1]['desired_goal'] } env_info = env_infos[j] buffer.add_sample(o, a, r, terminal, next_o, agent_info={}, env_info=env_info) buffer.terminate_episode() # save it file_name = os.path.join(rlkit_buffer_save_dir, 'extra_data.pkl') joblib.dump({'replay_buffer': buffer}, file_name, compress=3)