def step(self, action): obs, reward, done, info = self.env.step(action) if self.mode == 'Bob': import ipdb ipdb.set_trace() #First visit done for Bob if np.allclose(reward, 0.): done = True info['is_success'] = True if info.get('TimeLimit.truncated'): del info['TimeLimit.truncated'] return obs, reward, done, info elif self.mode == 'Alice': import ipdb ipdb.set_trace() info = AttrDict(info) self.total_rewards += reward if done: done = False info.done_observation = obs #info.terminal_state = True if info.get('TimeLimit.truncated'): done = True info.terminal_state = False info.episodic_return = self.total_rewards self.total_rewards = 0 else: info.terminal_state = False info.episodic_return = None return obs, reward, done, info
def step(self, action): obs, reward, done, info = self.env.step(action) info = AttrDict(info) self.total_rewards += reward if done: info.done_observation = obs info.terminal_state = True if info.get('TimeLimit.truncated'): info.terminal_state = False info.episodic_return = self.total_rewards self.total_rewards = 0 else: info.terminal_state = False info.episodic_return = None return obs, reward, done, info