Exemple #1
0
class BotAgent:
    def __init__(self, env):
        """An agent based on a GOFAI bot."""
        self.env = env
        self.on_reset()

    def on_reset(self):
        self.bot = Bot(self.env)

    def act(self, obs=None, update_internal_state=True, *args, **kwargs):
        action = self.bot.replan()
        return {'action': action}

    def analyze_feedback(self, reward, done):
        pass
Exemple #2
0
class InteractiveIIL:
    def __init__(self, args):
        self.args = args

        # seeding
        utils.seed(args.seed)

        self.env = gym.make(id=args.env)

        self.episodes = 300  # args.episodes
        self.horizon = self.env.max_steps
        self.initial_decay = 0.99  # args.decay

        self.observation_preprocessor = utils.ObssPreprocessor(
            model_name=args.model,
            obs_space=self.env.observation_space,
            load_vocab_from=getattr(self.args, 'pretrained_model', None))
        # TODO: for now I am only running the small model
        self.model = models.ACModel(obs_space=self.env.observation_space,
                                    action_space=self.env.action_space)
        self.learner = ModelAgent(
            model_or_name=self.model,
            obss_preprocessor=self.observation_preprocessor,
            argmax=True)
        self.teacher = Bot(self.env)

        self.data = []

        self.observation_preprocessor.vocab.save()
        utils.save_model(self.model, args.model)

        self.model.train()
        if torch.cuda.is_available():
            self.model.cuda()

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        if self.device.type == 'cpu':
            print('running on cpu...')

    def train(self):
        for episode in range(self.episodes):
            alpha = self.initial_decay**episode

            observation = self.env.reset()
            last_action = None

            done = False
            while not done:
                active_agent = np.random.choice(a=[self.teacher, self.learner],
                                                p=[alpha, 1. - alpha])
                optimal_action = self.teacher.replan(action_taken=last_action)
                if active_agent == self.teacher:
                    action = optimal_action
                else:
                    action = self.learner.act(observation)

                next_observation, reward, done, info = self.env.step(action)

                self.data.append([observation, optimal_action, done])
                last_action = action
                observation = next_observation

            self._train_epoch()

    def _train_epoch(self):
        batch_size = self.args.batch_size
        data_set_size = len(self.data)

        # NOTE: this is a really smart idea
        randomized_indexes = np.arange(0, len(self.data))
        np.random.shuffle(randomized_indexes)

        for index in range(0, data_set_size, batch_size):
            batch = [
                self.data[i]
                for i in randomized_indexes[index:index + batch_size]
            ]
            _log = self._train_batch(batch)

    def _train_batch(self, batch):
        pass
Exemple #3
0
                (run_no + 1, options.num_runs, mission.surface, mission_seed))

        optimal_actions = []
        before_optimal_actions = []
        non_optimal_steps = options.non_optimal_steps or int(
            mission.max_steps // 3)
        rng = Random(mission_seed)

        try:
            episode_steps = 0
            last_action = None
            while True:
                # vis_mask = expert.vis_mask
                # expert = Bot(mission)
                # expert.vis_mask = vis_mask
                action = expert.replan(last_action)
                # action = expert.replan(last_action)
                if options.advise_mode and episode_steps < non_optimal_steps:
                    if rng.random() < options.bad_action_proba:
                        while True:
                            action = bad_agent.act(
                                mission.gen_obs())['action'].item()
                            fwd_pos = mission.agent_pos + mission.dir_vec
                            fwd_cell = mission.grid.get(*fwd_pos)
                            # The current bot can't recover from two kinds of behaviour:
                            # - opening a box (cause it just disappears)
                            # - closing a door (cause its path finding mechanism get confused)
                            opening_box = (action == mission.actions.toggle
                                           and fwd_cell
                                           and fwd_cell.type == 'box')
                            closing_door = (action == mission.actions.toggle
Exemple #4
0
 def replan(self, action_taken=None):
     # Create an entirely new bot each time we need to plan
     bot = Bot(self.mission)
     action = bot.replan()
     return action