def __init__(self, start_loc='all',
              include_holdout_obj=True, num_meta_tasks=2,
              persist_agent=True, persist_goal=True, persist_objs=True,
              feedback_type=None, feedback_always=False, feedback_freq=False, intermediate_reward=False,
              cartesian_steps=1, **kwargs):
     """
     :param start_loc: which part of the grid to start the agent in.  ['top', 'bottom', 'all']
     :param include_holdout_obj: If true, uses all objects. If False, doesn't use grey objects or boxes
     :param persist_agent: Whether to keep agent position the same across runs within a meta-task
     :param persist_goal: Whether to keep the goal (i.e. textual mission string) the same across runs in a meta-task
     :param persist_objs: Whether to keep object positions the same across runs within a meta-task
     :param feedback_type: Type of teacher feedback, string
     :param feedback_always: Whether to give that feedback type every time (rather than just when the agent needs help)
     :param kwargs: Additional arguments passed to the parent class
     """
     assert start_loc in ['top', 'bottom', 'all']
     self.start_loc = start_loc
     self.intermediate_reward = intermediate_reward
     self.include_holdout_obj = include_holdout_obj
     self.persist_agent = persist_agent
     self.persist_goal = persist_goal
     self.persist_objs = persist_objs
     self.num_meta_tasks = num_meta_tasks
     self.task = {}
     self.itr = 0
     self.feedback_type = feedback_type
     super().__init__(**kwargs)
     if feedback_type is not None:
         self.oracle = {}
         teachers = {}
         for ft in feedback_type:
             if ft == 'PostActionAdvice':
                 teacher = PostActionAdvice(Bot, self, feedback_always=feedback_always,
                                            feedback_frequency=feedback_freq, cartesian_steps=cartesian_steps)
             elif ft == 'PreActionAdvice':
                 teacher = PreActionAdvice(Bot, self, feedback_always=feedback_always,
                                           feedback_frequency=feedback_freq, cartesian_steps=cartesian_steps)
             elif ft == 'CartesianCorrections':
                 teacher = CartesianCorrections(Bot, self, feedback_always=feedback_always,
                                                feedback_frequency=feedback_freq, cartesian_steps=cartesian_steps)
             elif ft == 'SubgoalCorrections':
                 teacher = SubgoalCorrections(Bot, self, feedback_always=feedback_always,
                                              feedback_frequency=feedback_freq, cartesian_steps=cartesian_steps)
             else:
                 raise NotImplementedError
             teachers[ft] = teacher
             self.oracle[ft] = Bot(self)
         teacher = BatchTeacher(teachers)
     else:
         teacher = None
     self.teacher = teacher
Beispiel #2
0
    def __init__(self, args):
        self.args = args

        # seeding
        utils.seed(args.seed)

        self.env = gym.make(id=args.env)

        self.episodes = 300  # args.episodes
        self.horizon = self.env.max_steps
        self.initial_decay = 0.99  # args.decay

        self.observation_preprocessor = utils.ObssPreprocessor(
            model_name=args.model,
            obs_space=self.env.observation_space,
            load_vocab_from=getattr(self.args, 'pretrained_model', None))
        # TODO: for now I am only running the small model
        self.model = models.ACModel(obs_space=self.env.observation_space,
                                    action_space=self.env.action_space)
        self.learner = ModelAgent(
            model_or_name=self.model,
            obss_preprocessor=self.observation_preprocessor,
            argmax=True)
        self.teacher = Bot(self.env)

        self.data = []

        self.observation_preprocessor.vocab.save()
        utils.save_model(self.model, args.model)

        self.model.train()
        if torch.cuda.is_available():
            self.model.cuda()

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        if self.device.type == 'cpu':
            print('running on cpu...')
Beispiel #3
0
 def on_reset(self):
     self.bot = Bot(self.env)
Beispiel #4
0
    num_success = 0
    total_reward = 0
    total_steps = []
    total_bfs = 0
    total_episode_steps = 0
    total_bfs_steps = 0

    for run_no in range(options.num_runs):
        level = level_dict[level_name]

        mission_seed = options.seed + run_no
        mission = level(seed=mission_seed)
        if not run_no % 1:
            print(run_no, mission.mission)
        expert = Bot(mission)

        if options.verbose:
            print(
                '%s/%s: %s, seed=%d' %
                (run_no + 1, options.num_runs, mission.surface, mission_seed))

        optimal_actions = []
        before_optimal_actions = []
        non_optimal_steps = options.non_optimal_steps or int(
            mission.max_steps // 3)
        rng = Random(mission_seed)

        try:
            episode_steps = 0
            last_action = None
Beispiel #5
0
 def on_reset(self):
     self.bot = Bot(self.env, forget=self.forget)
Beispiel #6
0
    level_list = [options.level]

start_time = time.time()

for level_name in level_list:

    num_success = 0
    total_reward = 0
    total_steps = 0

    for run_no in range(options.num_runs):
        level = level_dict[level_name]

        mission_seed = options.seed + run_no
        mission = level(seed=mission_seed)
        expert = Bot(mission, forget=options.forget)

        if options.verbose:
            print(
                '%s/%s: %s, seed=%d' %
                (run_no + 1, options.num_runs, mission.surface, mission_seed))

        try:
            episode_steps = 0
            while True:
                action = expert.step()
                obs, reward, done, info = mission.step(action)

                total_reward += reward
                episode_steps += 1
Beispiel #7
0
 def replan(self, action_taken=None):
     # Create an entirely new bot each time we need to plan
     bot = Bot(self.mission)
     action = bot.replan()
     return action