Ejemplo n.º 1
0
def main(args):

    # start THOR
    env = ThorEnv()

    # load traj_data
    root = args.problem
    json_file = os.path.join(root, 'traj_data.json')
    with open(json_file, 'r') as f:
        traj_data = json.load(f)

    # setup scene
    setup_scene(env, traj_data, 0, args)

    # choose controller
    if args.controller == "oracle":
        AgentModule = OracleAgent
        agent = AgentModule(env, traj_data, traj_root=root, load_receps=args.load_receps, debug=args.debug)
    elif args.controller == "oracle_astar":
        AgentModule = OracleAStarAgent
        agent = AgentModule(env, traj_data, traj_root=root, load_receps=args.load_receps, debug=args.debug)
    elif args.controller == "mrcnn":
        AgentModule = MaskRCNNAgent
        mask_rcnn = load_pretrained_model('./agents/detector/models/mrcnn.pth')
        agent = AgentModule(env, traj_data, traj_root=root,
                            pretrained_model=mask_rcnn,
                            load_receps=args.load_receps, debug=args.debug)
    elif args.controller == "mrcnn_astar":
        AgentModule = MaskRCNNAStarAgent
        mask_rcnn = load_pretrained_model('./agents/detector/models/mrcnn.pth')
        agent = AgentModule(env, traj_data, traj_root=root,
                            pretrained_model=mask_rcnn,
                            load_receps=args.load_receps, debug=args.debug)
    else:
        raise NotImplementedError()

    print(agent.feedback)
    while True:
        cmd = input()
        agent.step(cmd)
        if not args.debug:
            print(agent.feedback)

        done = env.get_goal_satisfied()
        if done:
            print("You won!")
            break
Ejemplo n.º 2
0
def replay_check(args, thread_num=0):
    env = ThorEnv(x_display='0.%d' % (thread_num % args.total_gpu))

    # replay certificate filenames
    replay_certificate_filenames = [
        "replay.certificate.%d" % idx for idx in range(args.num_replays)
    ]

    # Clear existing failures in file recording.
    if args.failure_filename is not None:
        with open(args.failure_filename, 'w') as f:
            f.write('')

    continue_check = True
    total_checks, total_failures, crash_fails, unsat_fails, json_fails, nondet_fails = 0, 0, 0, 0, 0, 0
    errors = {
    }  # map from error strings to counts, to be shown after every failure.
    total_threads = args.total_gpu * args.num_threads
    current_threads = args.gpu_id * args.num_threads + thread_num

    while continue_check:

        # Crawl the directory of trajectories and vet ones with no certificate.
        failure_list = []
        valid_dirs = []
        count = 0
        for dir_name, subdir_list, file_list in os.walk(args.data_path):
            if "trial_" in dir_name and (not "raw_images" in dir_name) and (
                    not "pddl_states" in dir_name):
                json_file = os.path.join(dir_name, JSON_FILENAME)
                if not os.path.isfile(json_file):
                    continue

                # If we're just stripping certificates, do that and continue.
                if args.remove_certificates:
                    for cidx in range(args.num_replays):
                        certificate_file = os.path.join(
                            dir_name, replay_certificate_filenames[cidx])
                        if os.path.isfile(certificate_file):
                            os.system("rm %s" % certificate_file)
                    continue

                if count % total_threads == current_threads:
                    valid_dirs.append(dir_name)
                count += 1

        print(len(valid_dirs))
        np.random.shuffle(valid_dirs)
        for ii, dir_name in enumerate(valid_dirs):

            if not os.path.exists(dir_name):
                continue

            json_file = os.path.join(dir_name, JSON_FILENAME)
            if not os.path.isfile(json_file):
                continue

            cidx = 0
            certificate_file = os.path.join(dir_name,
                                            replay_certificate_filenames[cidx])
            already_checked = False
            while os.path.isfile(certificate_file):
                cidx += 1
                if cidx == args.num_replays:
                    already_checked = True
                    break
                certificate_file = os.path.join(
                    dir_name, replay_certificate_filenames[cidx])
            if already_checked:
                continue

            print(ii)
            if not os.path.isfile(certificate_file):
                total_checks += 1. / args.num_replays
                failed = False

                with open(json_file) as f:
                    print("check %d/%d for file '%s'" %
                          (cidx + 1, args.num_replays, json_file))
                    try:
                        traj_data = json.load(f)
                        env.set_task(traj_data, args, reward_type='dense')
                    except json.decoder.JSONDecodeError:
                        failed = True
                        json_fails += 1

                if not failed:
                    steps_taken = None
                    try:
                        steps_taken = replay_json(env, json_file)
                    except Exception as e:
                        import traceback
                        traceback.print_exc()
                        failed = True
                        crash_fails += 1

                        if str(e) not in errors:
                            errors[str(e)] = 0
                        errors[str(e)] += 1
                        print("%%%%%%%%%%")
                        es = sum([errors[er] for er in errors])
                        print("\terrors (%d):" % es)
                        for er, v in sorted(errors.items(),
                                            key=lambda kv: kv[1],
                                            reverse=True):
                            # if v / es < 0.01:  # stop showing below 1% of errors.
                            #     break
                            print("\t(%.2f) (%d)\t%s" % (v / es, v, er))
                        print("%%%%%%%%%%")

                        if cidx > 1:
                            print(
                                "WARNING: replay that has succeeded before has failed at attempt %d"
                                % cidx)
                            nondet_fails += 1

                    if steps_taken is not None:  # executed without crashing, so now we need to verify completion.
                        goal_satisfied = env.get_goal_satisfied()

                        if goal_satisfied:
                            with open(certificate_file, 'w') as f:
                                f.write('%d' % steps_taken)
                        else:
                            failed = True
                            unsat_fails += 1
                            print("Goal was not satisfied after execution!")

                if failed:
                    # Mark one failure and count the remainder of checks for this instance into the total.
                    total_failures += 1
                    total_checks += args.num_replays - (
                        (cidx + 1) / float(args.num_replays))

                    failure_list.append(json_file)
                    if args.failure_filename is not None:
                        with open(args.failure_filename, 'a') as f:
                            f.write("%s\n" % json_file)
                    # If we're deleting bad trajectories, do that here.
                    if args.move_failed_trajectories is not None:
                        print("Relocating failed trajectory '%s' to '%s'" %
                              (dir_name,
                               os.path.join(args.move_failed_trajectories)))
                        try:
                            shutil.move(dir_name,
                                        args.move_failed_trajectories)
                        except shutil.Error as e:
                            print(
                                "WARNING: failed to perform move; error follows; deleting instead"
                            )
                            print(repr(e))
                            shutil.rmtree(dir_name)
                    if args.remove_failed_trajectories:
                        print("Removing failed trajectory '%s'" % dir_name)
                        shutil.rmtree(dir_name)

                print("-------------------------")
                print("Success Rate: %.2f/%.2f = %.3f" %
                      (total_checks - total_failures, total_checks,
                       float(total_checks - total_failures) /
                       float(total_checks)))
                if total_failures > 0:
                    print("Non-deterministic failure: %d/%d = %.3f" %
                          (nondet_fails, total_failures,
                           float(nondet_fails) / total_failures))
                    print("Failures by crash: %d/%d = %.3f" %
                          (crash_fails, total_failures,
                           float(crash_fails) / total_failures))
                    print("Failures by unsatisfied: %d/%d = %.3f" %
                          (unsat_fails, total_failures,
                           float(unsat_fails) / total_failures))
                    print("Failures by json decode error: %d/%d = %.3f" %
                          (json_fails, total_failures,
                           float(json_fails) / total_failures))
                print("-------------------------")

        if not args.in_parallel:
            continue_check = False
        else:
            time.sleep(60)
Ejemplo n.º 3
0
    class Thor(threading.Thread):
        def __init__(self, queue, train_eval="train"):
            Thread.__init__(self)
            self.action_queue = queue
            self.mask_rcnn = None
            self.env = None
            self.train_eval = train_eval
            self.controller_type = "oracle"

        def run(self):
            while True:
                action, reset, task_file = self.action_queue.get()
                try:
                    if reset:
                        self.reset(task_file)
                    else:
                        self.step(action)
                finally:
                    self.action_queue.task_done()

        def init_env(self, config):
            self.config = config

            screen_height = config['env']['thor']['screen_height']
            screen_width = config['env']['thor']['screen_width']
            smooth_nav = config['env']['thor']['smooth_nav']
            save_frames_to_disk = config['env']['thor']['save_frames_to_disk']

            if not self.env:
                self.env = ThorEnv(player_screen_height=screen_height,
                                   player_screen_width=screen_width,
                                   smooth_nav=smooth_nav,
                                   save_frames_to_disk=save_frames_to_disk)
            self.controller_type = self.config['controller']['type']
            self._done = False
            self._res = ()
            self._feedback = ""
            self.expert = HandCodedThorAgent(self.env, max_steps=200)
            self.prev_command = ""
            self.load_mask_rcnn()

        def load_mask_rcnn(self):
            # load pretrained MaskRCNN model if required
            if 'mrcnn' in self.config['controller'][
                    'type'] and not self.mask_rcnn:
                model_path = os.path.join(
                    os.environ['ALFRED_ROOT'],
                    self.config['mask_rcnn']['pretrained_model_path'])
                self.mask_rcnn = load_pretrained_model(model_path)

        def set_task(self, task_file):
            self.task_file = task_file
            self.traj_root = os.path.dirname(task_file)
            with open(task_file, 'r') as f:
                self.traj_data = json.load(f)

        def reset(self, task_file):
            assert self.env
            assert self.controller_type

            self.set_task(task_file)

            # scene setup
            scene_num = self.traj_data['scene']['scene_num']
            object_poses = self.traj_data['scene']['object_poses']
            dirty_and_empty = self.traj_data['scene']['dirty_and_empty']
            object_toggles = self.traj_data['scene']['object_toggles']
            scene_name = 'FloorPlan%d' % scene_num
            self.env.reset(scene_name)
            self.env.restore_scene(object_poses, object_toggles,
                                   dirty_and_empty)

            # recording
            save_frames_path = self.config['env']['thor']['save_frames_path']
            self.env.save_frames_path = os.path.join(
                save_frames_path, self.traj_root.replace('../', ''))

            # initialize to start position
            self.env.step(dict(
                self.traj_data['scene']['init_action']))  # print goal instr
            task_desc = get_templated_task_desc(self.traj_data)
            print("Task: %s" % task_desc)

            # print("Task: %s" % (self.traj_data['turk_annotations']['anns'][0]['task_desc']))

            # setup task for reward
            class args:
                pass

            args.reward_config = os.path.join(os.environ['ALFRED_ROOT'],
                                              'agents/config/rewards.json')
            self.env.set_task(self.traj_data, args, reward_type='dense')

            # set controller
            self.controller_type = self.config['controller']['type']
            self.goal_desc_human_anns_prob = self.config['env'][
                'goal_desc_human_anns_prob']
            load_receps = self.config['controller']['load_receps']
            debug = self.config['controller']['debug']

            if self.controller_type == 'oracle':
                self.controller = OracleAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob)
            elif self.controller_type == 'oracle_astar':
                self.controller = OracleAStarAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob)
            elif self.controller_type == 'mrcnn':
                self.controller = MaskRCNNAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    pretrained_model=self.mask_rcnn,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob,
                    save_detections_to_disk=self.env.save_frames_to_disk,
                    save_detections_path=self.env.save_frames_path)
            elif self.controller_type == 'mrcnn_astar':
                self.controller = MaskRCNNAStarAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    pretrained_model=self.mask_rcnn,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob,
                    save_detections_to_disk=self.env.save_frames_to_disk,
                    save_detections_path=self.env.save_frames_path)
            else:
                raise NotImplementedError()

            # zero steps
            self.steps = 0

            # reset expert state
            self.expert.reset(task_file)
            self.prev_command = ""

            # return intro text
            self._feedback = self.controller.feedback
            self._res = self.get_info()

            return self._feedback

        def step(self, action):
            if not self._done:
                # take action
                self.prev_command = str(action)
                self._feedback = self.controller.step(action)
                self._res = self.get_info()
                if self.env.save_frames_to_disk:
                    self.record_action(action)
            self.steps += 1

        def get_results(self):
            return self._res

        def record_action(self, action):
            txt_file = os.path.join(self.env.save_frames_path, 'action.txt')
            with open(txt_file, 'a+') as f:
                f.write("%s\r\n" % str(action))

        def get_info(self):
            won = self.env.get_goal_satisfied()
            pcs = self.env.get_goal_conditions_met()
            goal_condition_success_rate = pcs[0] / float(pcs[1])
            acs = self.controller.get_admissible_commands()

            # expert action
            if self.train_eval == "train":
                game_state = {
                    'admissible_commands': acs,
                    'feedback': self._feedback,
                    'won': won
                }
                expert_actions = ["look"]
                try:
                    if not self.prev_command:
                        self.expert.observe(game_state['feedback'])
                    else:
                        next_action = self.expert.act(game_state, 0, won,
                                                      self.prev_command)
                        if next_action in acs:
                            expert_actions = [next_action]
                except HandCodedAgentTimeout:
                    print("Expert Timeout")
                except Exception as e:
                    print(e)
                    traceback.print_exc()
            else:
                expert_actions = []

            training_method = self.config["general"]["training_method"]
            if training_method == "dqn":
                max_nb_steps_per_episode = self.config["rl"]["training"][
                    "max_nb_steps_per_episode"]
            elif training_method == "dagger":
                max_nb_steps_per_episode = self.config["dagger"]["training"][
                    "max_nb_steps_per_episode"]
            else:
                raise NotImplementedError
            self._done = won or self.steps > max_nb_steps_per_episode
            return (self._feedback, self._done, acs, won,
                    goal_condition_success_rate, expert_actions)

        def get_last_frame(self):
            return self.env.last_event.frame[:, :, ::-1]

        def get_exploration_frames(self):
            return self.controller.get_exploration_frames()
Ejemplo n.º 4
0
    def run_rollouts(cls, model, task_queue, results, args, validation=False):
        env = ThorEnv()

        while True:
            if validation:
                task, seen = task_queue.get()
            else:
                task = task_queue.get()
            if task is None:
                break

            # reset model
            model.reset()

            # setup scene
            traj_data = model.load_task_json(task)
            r_idx = task['repeat_idx']
            cls.setup_scene(env, traj_data, r_idx, args)

            feat = model.featurize([traj_data],
                                   load_frames=False,
                                   load_mask=False)

            curr_rollout = []
            done = False
            fails = 0
            total_reward = 0
            num_steps = 0
            while not done and num_steps < args.max_steps:

                # extract visual features
                curr_image = Image.fromarray(np.uint8(env.last_event.frame))
                feat['frames'] = model.resnet.featurize([curr_image],
                                                        batch=1).unsqueeze(0)

                # forward model
                out = model.step(feat)
                pred = model.sample_pred(out, greedy=validation)

                # monitor resource usage
                monitor = start_monitor(
                    path=args.dout,
                    note="validation" if validation else "rollout" +
                    f" step={num_steps}")

                # # check if <<stop>> was predicted
                # if pred['action_low'] == "<<stop>>":
                #     print("\tpredicted STOP")
                #     break

                # get action and mask
                action = pred['action_low']
                mask = pred['action_low_mask'] if cls.has_interaction(
                    action) else None

                # use predicted action and mask (if available) to interact with the env
                t_success, _, _, err, _ = env.va_interact(
                    action,
                    interact_mask=mask,
                    smooth_nav=args.smooth_nav,
                    debug=args.debug)

                if not t_success:
                    fails += 1
                    if fails >= args.max_fails:
                        break

                # next time-step
                reward, done = env.get_transition_reward()
                total_reward += reward
                num_steps += 1

                if not validation:
                    curr_rollout.append({
                        'frames':
                        feat['frames'].cpu().detach().numpy(),
                        'lang_goal_instr_data':
                        feat['lang_goal_instr'].data.cpu().detach().numpy(),
                        'lang_goal_instr_batch':
                        feat['lang_goal_instr'].batch_sizes.cpu().detach(
                        ).numpy(),
                        'lang_goal_instr_sorted':
                        feat['lang_goal_instr'].sorted_indices.cpu().detach(
                        ).numpy() if feat['lang_goal_instr'].sorted_indices
                        is not None else None,
                        'lang_goal_instr_unsorted':
                        feat['lang_goal_instr'].unsorted_indices.cpu().detach(
                        ).numpy() if feat['lang_goal_instr'].unsorted_indices
                        is not None else None,
                        'action_dist':
                        pred['action_low_dist'].cpu().detach().numpy(),
                        'action_mask_dist':
                        pred['action_low_mask_dist'].cpu().detach().numpy(),
                        'action_idx':
                        pred['action_low_idx'].cpu().detach().numpy(),
                        'action_mask_idx':
                        pred['action_low_mask_idx'].cpu().detach().numpy(),
                        'reward':
                        np.array([reward])
                    })

                stop_monitor(monitor)

            if validation:
                # check if goal was satisfied
                goal_satisfied = env.get_goal_satisfied()

                # goal_conditions
                pcs = env.get_goal_conditions_met()
                goal_condition_success_rate = pcs[0] / float(pcs[1])

                # SPL
                path_len_weight = len(traj_data['plan']['low_actions'])
                s_spl = (1 if goal_satisfied else 0) * min(
                    1., path_len_weight / float(num_steps))
                pc_spl = goal_condition_success_rate * min(
                    1., path_len_weight / float(num_steps))

                # path length weighted SPL
                plw_s_spl = s_spl * path_len_weight
                plw_pc_spl = pc_spl * path_len_weight

                # log success/fails
                log_entry = {
                    'trial':
                    traj_data['task_id'],
                    'type':
                    traj_data['task_type'],
                    'repeat_idx':
                    int(r_idx),
                    'seen':
                    seen,
                    'goal_instr':
                    traj_data['turk_annotations']['anns'][r_idx]['task_desc'],
                    'goal_satisfied':
                    goal_satisfied,
                    'completed_goal_conditions':
                    int(pcs[0]),
                    'total_goal_conditions':
                    int(pcs[1]),
                    'goal_condition_success':
                    float(goal_condition_success_rate),
                    'success_spl':
                    float(s_spl),
                    'path_len_weighted_success_spl':
                    float(plw_s_spl),
                    'goal_condition_spl':
                    float(pc_spl),
                    'path_len_weighted_goal_condition_spl':
                    float(plw_pc_spl),
                    'path_len_weight':
                    int(path_len_weight),
                    'reward':
                    float(total_reward)
                }
                results.put(log_entry)
            else:
                results.put(curr_rollout)
        env.stop()