Example #1
0
    class Thor(threading.Thread):
        def __init__(self, queue, train_eval="train"):
            Thread.__init__(self)
            self.action_queue = queue
            self.mask_rcnn = None
            self.env = None
            self.train_eval = train_eval
            self.controller_type = "oracle"

        def run(self):
            while True:
                action, reset, task_file = self.action_queue.get()
                try:
                    if reset:
                        self.reset(task_file)
                    else:
                        self.step(action)
                finally:
                    self.action_queue.task_done()

        def init_env(self, config):
            self.config = config

            screen_height = config['env']['thor']['screen_height']
            screen_width = config['env']['thor']['screen_width']
            smooth_nav = config['env']['thor']['smooth_nav']
            save_frames_to_disk = config['env']['thor']['save_frames_to_disk']

            if not self.env:
                self.env = ThorEnv(player_screen_height=screen_height,
                                   player_screen_width=screen_width,
                                   smooth_nav=smooth_nav,
                                   save_frames_to_disk=save_frames_to_disk)
            self.controller_type = self.config['controller']['type']
            self._done = False
            self._res = ()
            self._feedback = ""
            self.expert = HandCodedThorAgent(self.env, max_steps=200)
            self.prev_command = ""
            self.load_mask_rcnn()

        def load_mask_rcnn(self):
            # load pretrained MaskRCNN model if required
            if 'mrcnn' in self.config['controller'][
                    'type'] and not self.mask_rcnn:
                model_path = os.path.join(
                    os.environ['ALFRED_ROOT'],
                    self.config['mask_rcnn']['pretrained_model_path'])
                self.mask_rcnn = load_pretrained_model(model_path)

        def set_task(self, task_file):
            self.task_file = task_file
            self.traj_root = os.path.dirname(task_file)
            with open(task_file, 'r') as f:
                self.traj_data = json.load(f)

        def reset(self, task_file):
            assert self.env
            assert self.controller_type

            self.set_task(task_file)

            # scene setup
            scene_num = self.traj_data['scene']['scene_num']
            object_poses = self.traj_data['scene']['object_poses']
            dirty_and_empty = self.traj_data['scene']['dirty_and_empty']
            object_toggles = self.traj_data['scene']['object_toggles']
            scene_name = 'FloorPlan%d' % scene_num
            self.env.reset(scene_name)
            self.env.restore_scene(object_poses, object_toggles,
                                   dirty_and_empty)

            # recording
            save_frames_path = self.config['env']['thor']['save_frames_path']
            self.env.save_frames_path = os.path.join(
                save_frames_path, self.traj_root.replace('../', ''))

            # initialize to start position
            self.env.step(dict(
                self.traj_data['scene']['init_action']))  # print goal instr
            task_desc = get_templated_task_desc(self.traj_data)
            print("Task: %s" % task_desc)

            # print("Task: %s" % (self.traj_data['turk_annotations']['anns'][0]['task_desc']))

            # setup task for reward
            class args:
                pass

            args.reward_config = os.path.join(os.environ['ALFRED_ROOT'],
                                              'agents/config/rewards.json')
            self.env.set_task(self.traj_data, args, reward_type='dense')

            # set controller
            self.controller_type = self.config['controller']['type']
            self.goal_desc_human_anns_prob = self.config['env'][
                'goal_desc_human_anns_prob']
            load_receps = self.config['controller']['load_receps']
            debug = self.config['controller']['debug']

            if self.controller_type == 'oracle':
                self.controller = OracleAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob)
            elif self.controller_type == 'oracle_astar':
                self.controller = OracleAStarAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob)
            elif self.controller_type == 'mrcnn':
                self.controller = MaskRCNNAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    pretrained_model=self.mask_rcnn,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob,
                    save_detections_to_disk=self.env.save_frames_to_disk,
                    save_detections_path=self.env.save_frames_path)
            elif self.controller_type == 'mrcnn_astar':
                self.controller = MaskRCNNAStarAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    pretrained_model=self.mask_rcnn,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob,
                    save_detections_to_disk=self.env.save_frames_to_disk,
                    save_detections_path=self.env.save_frames_path)
            else:
                raise NotImplementedError()

            # zero steps
            self.steps = 0

            # reset expert state
            self.expert.reset(task_file)
            self.prev_command = ""

            # return intro text
            self._feedback = self.controller.feedback
            self._res = self.get_info()

            return self._feedback

        def step(self, action):
            if not self._done:
                # take action
                self.prev_command = str(action)
                self._feedback = self.controller.step(action)
                self._res = self.get_info()
                if self.env.save_frames_to_disk:
                    self.record_action(action)
            self.steps += 1

        def get_results(self):
            return self._res

        def record_action(self, action):
            txt_file = os.path.join(self.env.save_frames_path, 'action.txt')
            with open(txt_file, 'a+') as f:
                f.write("%s\r\n" % str(action))

        def get_info(self):
            won = self.env.get_goal_satisfied()
            pcs = self.env.get_goal_conditions_met()
            goal_condition_success_rate = pcs[0] / float(pcs[1])
            acs = self.controller.get_admissible_commands()

            # expert action
            if self.train_eval == "train":
                game_state = {
                    'admissible_commands': acs,
                    'feedback': self._feedback,
                    'won': won
                }
                expert_actions = ["look"]
                try:
                    if not self.prev_command:
                        self.expert.observe(game_state['feedback'])
                    else:
                        next_action = self.expert.act(game_state, 0, won,
                                                      self.prev_command)
                        if next_action in acs:
                            expert_actions = [next_action]
                except HandCodedAgentTimeout:
                    print("Expert Timeout")
                except Exception as e:
                    print(e)
                    traceback.print_exc()
            else:
                expert_actions = []

            training_method = self.config["general"]["training_method"]
            if training_method == "dqn":
                max_nb_steps_per_episode = self.config["rl"]["training"][
                    "max_nb_steps_per_episode"]
            elif training_method == "dagger":
                max_nb_steps_per_episode = self.config["dagger"]["training"][
                    "max_nb_steps_per_episode"]
            else:
                raise NotImplementedError
            self._done = won or self.steps > max_nb_steps_per_episode
            return (self._feedback, self._done, acs, won,
                    goal_condition_success_rate, expert_actions)

        def get_last_frame(self):
            return self.env.last_event.frame[:, :, ::-1]

        def get_exploration_frames(self):
            return self.controller.get_exploration_frames()
Example #2
0
    def run_rollouts(cls, model, task_queue, results, args, validation=False):
        env = ThorEnv()

        while True:
            if validation:
                task, seen = task_queue.get()
            else:
                task = task_queue.get()
            if task is None:
                break

            # reset model
            model.reset()

            # setup scene
            traj_data = model.load_task_json(task)
            r_idx = task['repeat_idx']
            cls.setup_scene(env, traj_data, r_idx, args)

            feat = model.featurize([traj_data],
                                   load_frames=False,
                                   load_mask=False)

            curr_rollout = []
            done = False
            fails = 0
            total_reward = 0
            num_steps = 0
            while not done and num_steps < args.max_steps:

                # extract visual features
                curr_image = Image.fromarray(np.uint8(env.last_event.frame))
                feat['frames'] = model.resnet.featurize([curr_image],
                                                        batch=1).unsqueeze(0)

                # forward model
                out = model.step(feat)
                pred = model.sample_pred(out, greedy=validation)

                # monitor resource usage
                monitor = start_monitor(
                    path=args.dout,
                    note="validation" if validation else "rollout" +
                    f" step={num_steps}")

                # # check if <<stop>> was predicted
                # if pred['action_low'] == "<<stop>>":
                #     print("\tpredicted STOP")
                #     break

                # get action and mask
                action = pred['action_low']
                mask = pred['action_low_mask'] if cls.has_interaction(
                    action) else None

                # use predicted action and mask (if available) to interact with the env
                t_success, _, _, err, _ = env.va_interact(
                    action,
                    interact_mask=mask,
                    smooth_nav=args.smooth_nav,
                    debug=args.debug)

                if not t_success:
                    fails += 1
                    if fails >= args.max_fails:
                        break

                # next time-step
                reward, done = env.get_transition_reward()
                total_reward += reward
                num_steps += 1

                if not validation:
                    curr_rollout.append({
                        'frames':
                        feat['frames'].cpu().detach().numpy(),
                        'lang_goal_instr_data':
                        feat['lang_goal_instr'].data.cpu().detach().numpy(),
                        'lang_goal_instr_batch':
                        feat['lang_goal_instr'].batch_sizes.cpu().detach(
                        ).numpy(),
                        'lang_goal_instr_sorted':
                        feat['lang_goal_instr'].sorted_indices.cpu().detach(
                        ).numpy() if feat['lang_goal_instr'].sorted_indices
                        is not None else None,
                        'lang_goal_instr_unsorted':
                        feat['lang_goal_instr'].unsorted_indices.cpu().detach(
                        ).numpy() if feat['lang_goal_instr'].unsorted_indices
                        is not None else None,
                        'action_dist':
                        pred['action_low_dist'].cpu().detach().numpy(),
                        'action_mask_dist':
                        pred['action_low_mask_dist'].cpu().detach().numpy(),
                        'action_idx':
                        pred['action_low_idx'].cpu().detach().numpy(),
                        'action_mask_idx':
                        pred['action_low_mask_idx'].cpu().detach().numpy(),
                        'reward':
                        np.array([reward])
                    })

                stop_monitor(monitor)

            if validation:
                # check if goal was satisfied
                goal_satisfied = env.get_goal_satisfied()

                # goal_conditions
                pcs = env.get_goal_conditions_met()
                goal_condition_success_rate = pcs[0] / float(pcs[1])

                # SPL
                path_len_weight = len(traj_data['plan']['low_actions'])
                s_spl = (1 if goal_satisfied else 0) * min(
                    1., path_len_weight / float(num_steps))
                pc_spl = goal_condition_success_rate * min(
                    1., path_len_weight / float(num_steps))

                # path length weighted SPL
                plw_s_spl = s_spl * path_len_weight
                plw_pc_spl = pc_spl * path_len_weight

                # log success/fails
                log_entry = {
                    'trial':
                    traj_data['task_id'],
                    'type':
                    traj_data['task_type'],
                    'repeat_idx':
                    int(r_idx),
                    'seen':
                    seen,
                    'goal_instr':
                    traj_data['turk_annotations']['anns'][r_idx]['task_desc'],
                    'goal_satisfied':
                    goal_satisfied,
                    'completed_goal_conditions':
                    int(pcs[0]),
                    'total_goal_conditions':
                    int(pcs[1]),
                    'goal_condition_success':
                    float(goal_condition_success_rate),
                    'success_spl':
                    float(s_spl),
                    'path_len_weighted_success_spl':
                    float(plw_s_spl),
                    'goal_condition_spl':
                    float(pc_spl),
                    'path_len_weighted_goal_condition_spl':
                    float(plw_pc_spl),
                    'path_len_weight':
                    int(path_len_weight),
                    'reward':
                    float(total_reward)
                }
                results.put(log_entry)
            else:
                results.put(curr_rollout)
        env.stop()