class BotAgent: def __init__(self, env): """An agent based on a GOFAI bot.""" self.env = env self.on_reset() def on_reset(self): self.bot = Bot(self.env) def act(self, obs=None, update_internal_state=True, *args, **kwargs): action = self.bot.get_action() if update_internal_state: self.bot.take_action(action) return {'action': action} def analyze_feedback(self, reward, done): pass
# To make things simple, only allow random left/right/fwd moves, and opening of doors if action in (mission.actions.left, mission.actions.right, mission.actions.forward): break fwd_pos = mission.agent_pos + mission.dir_vec fwd_cell = mission.grid.get(*fwd_pos) if action == mission.actions.toggle and fwd_cell is not None and fwd_cell.type == 'door': break before_optimal_actions.append(action) else: optimal_actions.append(action) expert.take_action(action) obs, reward, done, info = mission.step(action) total_reward += reward episode_steps += 1 if done: if reward > 0: num_success += 1 total_steps += episode_steps if reward <= 0: assert episode_steps == mission.max_steps # Is there another reason for this to happen ? if options.verbose: print('FAILURE on %s, seed %d, reward %.2f' % (level_name, mission_seed, reward)) break