Esempio n. 1
0
    def step(self, action):

        obs, reward, done, info = self.env.step(action)

        if self.mode == 'Bob':
            import ipdb
            ipdb.set_trace()
            #First visit done for Bob
            if np.allclose(reward, 0.):
                done = True
                info['is_success'] = True
                if info.get('TimeLimit.truncated'):
                    del info['TimeLimit.truncated']

            return obs, reward, done, info

        elif self.mode == 'Alice':
            import ipdb
            ipdb.set_trace()
            info = AttrDict(info)
            self.total_rewards += reward
            if done:
                done = False
                info.done_observation = obs
                #info.terminal_state = True
                if info.get('TimeLimit.truncated'):
                    done = True
                    info.terminal_state = False
                info.episodic_return = self.total_rewards
                self.total_rewards = 0
            else:
                info.terminal_state = False
                info.episodic_return = None

            return obs, reward, done, info
Esempio n. 2
0
 def step(self, action):
     obs, reward, done, info = self.env.step(action)
     info = AttrDict(info)
     self.total_rewards += reward
     if done:
         info.done_observation = obs
         info.terminal_state = True
         if info.get('TimeLimit.truncated'):
             info.terminal_state = False
         info.episodic_return = self.total_rewards
         self.total_rewards = 0
     else:
         info.terminal_state = False
         info.episodic_return = None
     return obs, reward, done, info