def step(self, action): obs, reward, done, info = self.env.step(action) if self.mode == 'Bob': import ipdb ipdb.set_trace() #First visit done for Bob if np.allclose(reward, 0.): done = True info['is_success'] = True if info.get('TimeLimit.truncated'): del info['TimeLimit.truncated'] return obs, reward, done, info elif self.mode == 'Alice': import ipdb ipdb.set_trace() info = AttrDict(info) self.total_rewards += reward if done: done = False info.done_observation = obs #info.terminal_state = True if info.get('TimeLimit.truncated'): done = True info.terminal_state = False info.episodic_return = self.total_rewards self.total_rewards = 0 else: info.terminal_state = False info.episodic_return = None return obs, reward, done, info
def step(self, action): obs, reward, done, info = self.env.step(action) info = AttrDict(info) self.total_rewards += reward if done: info.done_observation = obs info.terminal_state = True if info.get('TimeLimit.truncated'): info.terminal_state = False info.episodic_return = self.total_rewards self.total_rewards = 0 else: info.terminal_state = False info.episodic_return = None return obs, reward, done, info
def __init__( self, module_list: Iterable, # list of mrl.Modules (possibly nested) config: AttrDict): # hyperparameters and module settings self.config = config parent_folder = config.parent_folder assert parent_folder, "Setting the agent's parent folder is required!" self.agent_name = config.get( 'agent_name') or 'agent_' + short_timestamp() self.agent_folder = os.path.join(parent_folder, self.agent_name) load_agent = False if os.path.exists(self.agent_folder): print('Detected existing agent! Loading agent from checkpoint...') load_agent = True else: os.makedirs(self.agent_folder, exist_ok=True) self._process_experience_registry = [ ] # set of modules which define _process_experience self._optimize_registry = [] # set of modules which define _optimize self.config.env_steps = 0 self.config.opt_steps = 0 module_list = flatten_modules(module_list) self.module_dict = AttrDict() for module in module_list: assert module.module_name setattr(self, module.module_name, module) self.module_dict[module.module_name] = module for module in module_list: self._register_module(module) self.training = True if load_agent: self.load() print('Successfully loaded saved agent!') else: self.save()