Esempio n. 1
0
class BotAgent:
    def __init__(self, env):
        """An agent based on a GOFAI bot."""
        self.env = env
        self.on_reset()

    def on_reset(self):
        self.bot = Bot(self.env)

    def act(self, obs=None, update_internal_state=True, *args, **kwargs):
        action = self.bot.get_action()
        if update_internal_state:
            self.bot.take_action(action)
        return {'action': action}

    def analyze_feedback(self, reward, done):
        pass
Esempio n. 2
0
    def __init__(self, args):
        self.args = args

        # seeding
        utils.seed(args.seed)

        self.env = gym.make(id=args.env)

        self.episodes = 300  # args.episodes
        self.horizon = self.env.max_steps
        self.initial_decay = 0.99  # args.decay

        self.observation_preprocessor = utils.ObssPreprocessor(
            model_name=args.model,
            obs_space=self.env.observation_space,
            load_vocab_from=getattr(self.args, 'pretrained_model', None))
        # TODO: for now I am only running the small model
        self.model = models.ACModel(obs_space=self.env.observation_space,
                                    action_space=self.env.action_space)
        self.learner = ModelAgent(
            model_or_name=self.model,
            obss_preprocessor=self.observation_preprocessor,
            argmax=True)
        self.teacher = Bot(self.env)

        self.data = []

        self.observation_preprocessor.vocab.save()
        utils.save_model(self.model, args.model)

        self.model.train()
        if torch.cuda.is_available():
            self.model.cuda()

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        if self.device.type == 'cpu':
            print('running on cpu...')
 def __init__(self, start_loc='all',
              include_holdout_obj=True, num_meta_tasks=2,
              persist_agent=True, persist_goal=True, persist_objs=True,
              feedback_type=None, feedback_always=False, feedback_freq=False, intermediate_reward=False,
              cartesian_steps=1, **kwargs):
     """
     :param start_loc: which part of the grid to start the agent in.  ['top', 'bottom', 'all']
     :param include_holdout_obj: If true, uses all objects. If False, doesn't use grey objects or boxes
     :param persist_agent: Whether to keep agent position the same across runs within a meta-task
     :param persist_goal: Whether to keep the goal (i.e. textual mission string) the same across runs in a meta-task
     :param persist_objs: Whether to keep object positions the same across runs within a meta-task
     :param feedback_type: Type of teacher feedback, string
     :param feedback_always: Whether to give that feedback type every time (rather than just when the agent needs help)
     :param kwargs: Additional arguments passed to the parent class
     """
     assert start_loc in ['top', 'bottom', 'all']
     self.start_loc = start_loc
     self.intermediate_reward = intermediate_reward
     self.include_holdout_obj = include_holdout_obj
     self.persist_agent = persist_agent
     self.persist_goal = persist_goal
     self.persist_objs = persist_objs
     self.num_meta_tasks = num_meta_tasks
     self.task = {}
     self.itr = 0
     self.feedback_type = feedback_type
     super().__init__(**kwargs)
     if feedback_type is not None:
         self.oracle = {}
         teachers = {}
         for ft in feedback_type:
             if ft == 'PostActionAdvice':
                 teacher = PostActionAdvice(Bot, self, feedback_always=feedback_always,
                                            feedback_frequency=feedback_freq, cartesian_steps=cartesian_steps)
             elif ft == 'PreActionAdvice':
                 teacher = PreActionAdvice(Bot, self, feedback_always=feedback_always,
                                           feedback_frequency=feedback_freq, cartesian_steps=cartesian_steps)
             elif ft == 'CartesianCorrections':
                 teacher = CartesianCorrections(Bot, self, feedback_always=feedback_always,
                                                feedback_frequency=feedback_freq, cartesian_steps=cartesian_steps)
             elif ft == 'SubgoalCorrections':
                 teacher = SubgoalCorrections(Bot, self, feedback_always=feedback_always,
                                              feedback_frequency=feedback_freq, cartesian_steps=cartesian_steps)
             else:
                 raise NotImplementedError
             teachers[ft] = teacher
             self.oracle[ft] = Bot(self)
         teacher = BatchTeacher(teachers)
     else:
         teacher = None
     self.teacher = teacher
Esempio n. 4
0
class BotAgent:
    def __init__(self, env, forget=False):
        """An agent based on a GOFAI bot."""
        self.env = env
        self.forget = forget
        self.on_reset()

    def on_reset(self):
        self.bot = Bot(self.env, forget=self.forget)

    def act(self, *args, **kwargs):
        return {'action': self.bot.step()}

    def analyze_feedback(self, reward, done):
        pass
Esempio n. 5
0
 def on_reset(self):
     self.bot = Bot(self.env)
Esempio n. 6
0
    num_success = 0
    total_reward = 0
    total_steps = []
    total_bfs = 0
    total_episode_steps = 0
    total_bfs_steps = 0

    for run_no in range(options.num_runs):
        level = level_dict[level_name]

        mission_seed = options.seed + run_no
        mission = level(seed=mission_seed)
        if not run_no % 1:
            print(run_no, mission.mission)
        expert = Bot(mission)

        if options.verbose:
            print(
                '%s/%s: %s, seed=%d' %
                (run_no + 1, options.num_runs, mission.surface, mission_seed))

        optimal_actions = []
        before_optimal_actions = []
        non_optimal_steps = options.non_optimal_steps or int(
            mission.max_steps // 3)
        rng = Random(mission_seed)

        try:
            episode_steps = 0
            last_action = None
Esempio n. 7
0
        bad_agent = RandomAgent(seed=options.random_agent_seed)

start_time = time.time()

for level_name in level_list:

    num_success = 0
    total_reward = 0
    total_steps = 0

    for run_no in range(options.num_runs):
        level = level_dict[level_name]

        mission_seed = options.seed + run_no
        mission = level(seed=mission_seed)
        expert = Bot(mission)

        if options.verbose:
            print(
                '%s/%s: %s, seed=%d' %
                (run_no + 1, options.num_runs, mission.surface, mission_seed))

        optimal_actions = []
        before_optimal_actions = []
        non_optimal_steps = options.non_optimal_steps or int(
            mission.max_steps // 3)
        rng = Random(mission_seed)

        try:
            episode_steps = 0
            while True:
Esempio n. 8
0
 def on_reset(self):
     self.bot = Bot(self.env, forget=self.forget)
Esempio n. 9
0
class InteractiveIIL:
    def __init__(self, args):
        self.args = args

        # seeding
        utils.seed(args.seed)

        self.env = gym.make(id=args.env)

        self.episodes = 300  # args.episodes
        self.horizon = self.env.max_steps
        self.initial_decay = 0.99  # args.decay

        self.observation_preprocessor = utils.ObssPreprocessor(
            model_name=args.model,
            obs_space=self.env.observation_space,
            load_vocab_from=getattr(self.args, 'pretrained_model', None))
        # TODO: for now I am only running the small model
        self.model = models.ACModel(obs_space=self.env.observation_space,
                                    action_space=self.env.action_space)
        self.learner = ModelAgent(
            model_or_name=self.model,
            obss_preprocessor=self.observation_preprocessor,
            argmax=True)
        self.teacher = Bot(self.env)

        self.data = []

        self.observation_preprocessor.vocab.save()
        utils.save_model(self.model, args.model)

        self.model.train()
        if torch.cuda.is_available():
            self.model.cuda()

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        if self.device.type == 'cpu':
            print('running on cpu...')

    def train(self):
        for episode in range(self.episodes):
            alpha = self.initial_decay**episode

            observation = self.env.reset()
            last_action = None

            done = False
            while not done:
                active_agent = np.random.choice(a=[self.teacher, self.learner],
                                                p=[alpha, 1. - alpha])
                optimal_action = self.teacher.replan(action_taken=last_action)
                if active_agent == self.teacher:
                    action = optimal_action
                else:
                    action = self.learner.act(observation)

                next_observation, reward, done, info = self.env.step(action)

                self.data.append([observation, optimal_action, done])
                last_action = action
                observation = next_observation

            self._train_epoch()

    def _train_epoch(self):
        batch_size = self.args.batch_size
        data_set_size = len(self.data)

        # NOTE: this is a really smart idea
        randomized_indexes = np.arange(0, len(self.data))
        np.random.shuffle(randomized_indexes)

        for index in range(0, data_set_size, batch_size):
            batch = [
                self.data[i]
                for i in randomized_indexes[index:index + batch_size]
            ]
            _log = self._train_batch(batch)

    def _train_batch(self, batch):
        pass
Esempio n. 10
0
    level_list = [options.level]

start_time = time.time()

for level_name in level_list:

    num_success = 0
    total_reward = 0
    total_steps = 0

    for run_no in range(options.num_runs):
        level = level_dict[level_name]

        mission_seed = options.seed + run_no
        mission = level(seed=mission_seed)
        expert = Bot(mission, forget=options.forget)

        if options.verbose:
            print(
                '%s/%s: %s, seed=%d' %
                (run_no + 1, options.num_runs, mission.surface, mission_seed))

        try:
            episode_steps = 0
            while True:
                action = expert.step()
                obs, reward, done, info = mission.step(action)

                total_reward += reward
                episode_steps += 1
Esempio n. 11
0
 def replan(self, action_taken=None):
     # Create an entirely new bot each time we need to plan
     bot = Bot(self.mission)
     action = bot.replan()
     return action