Beispiel #1
0
    def run(cls, model, resnet, task_queue, args, lock, successes, failures,
            results):
        '''
        evaluation loop
        '''
        # start THOR
        env = ThorEnv()

        # make subgoals list
        subgoals_to_evaluate = cls.ALL_SUBGOALS if args.subgoals.lower(
        ) == "all" else args.subgoals.split(',')
        subgoals_to_evaluate = [
            sg for sg in subgoals_to_evaluate if sg in cls.ALL_SUBGOALS
        ]
        print("Subgoals to evaluate: %s" % str(subgoals_to_evaluate))

        # create empty stats per subgoal
        for sg in subgoals_to_evaluate:
            successes[sg] = list()
            failures[sg] = list()

        while True:
            if task_queue.qsize() == 0:
                break

            task = task_queue.get()

            try:
                traj = model.load_task_json(model.args, task)[0]
                if args.modular_subgoals:
                    filtered_traj_by_subgoal = {
                        subgoal: model.load_task_json(task, subgoal)
                        for subgoal in subgoals_to_evaluate
                    }
                else:
                    filtered_traj_by_subgoal = None
                r_idx = task['repeat_idx']
                subgoals_and_idxs = [
                    (sg['discrete_action']['action'], sg['high_idx'])
                    for sg in traj['plan']['high_pddl']
                    if sg['discrete_action']['action'] in subgoals_to_evaluate
                ]
                for subgoal, eval_idx in subgoals_and_idxs:
                    print("No. of trajectories left: %d" %
                          (task_queue.qsize()))
                    if filtered_traj_by_subgoal is not None:
                        subgoal_filtered_traj_data = filtered_traj_by_subgoal[
                            subgoal]
                    else:
                        subgoal_filtered_traj_data = None
                    cls.evaluate(env, model, eval_idx, r_idx, resnet, traj,
                                 args, lock, successes, failures, results,
                                 subgoal_filtered_traj_data)
            except Exception as e:
                import traceback
                traceback.print_exc()
                print("Error: " + repr(e))

        # stop THOR
        env.stop()
Beispiel #2
0
    def run(cls, model, resnet, task_queue, args, lock, successes, failures,
            results):
        '''
        evaluation loop
        '''
        # start THOR
        env = ThorEnv()

        while True:
            if task_queue.qsize() == 0:
                break

            task = task_queue.get()

            try:
                traj = model.load_task_json(task)
                r_idx = task['repeat_idx']
                print("Evaluating: %s" % (traj['root']))
                print("No. of trajectories left: %d" % (task_queue.qsize()))
                cls.evaluate(env, model, r_idx, resnet, traj, args, lock,
                             successes, failures, results)
            except Exception as e:
                import traceback
                traceback.print_exc()
                print("Error: " + repr(e))

        # stop THOR
        env.stop()
def run():
    '''
    replay loop
    '''
    # start THOR env
    env = ThorEnv(player_screen_width=IMAGE_WIDTH,
                  player_screen_height=IMAGE_HEIGHT)

    skipped_files = []

    while len(traj_list) > 0:
        lock.acquire()
        json_file = traj_list.pop()
        lock.release()

        print("Augmenting: " + json_file)
        try:
            augment_traj(env, json_file)
        except Exception as e:
            import traceback
            traceback.print_exc()
            print("Error: " + repr(e))
            print("Skipping " + json_file)
            skipped_files.append(json_file)

    env.stop()
    print("Finished.")

    # skipped files
    if len(skipped_files) > 0:
        print("Skipped Files:")
        print(skipped_files)
Beispiel #4
0
def main(args):

    # start THOR
    env = ThorEnv()

    # load traj_data
    root = args.problem
    json_file = os.path.join(root, 'traj_data.json')
    with open(json_file, 'r') as f:
        traj_data = json.load(f)

    # setup scene
    setup_scene(env, traj_data, 0, args)

    # choose controller
    if args.controller == "oracle":
        AgentModule = OracleAgent
        agent = AgentModule(env, traj_data, traj_root=root, load_receps=args.load_receps, debug=args.debug)
    elif args.controller == "oracle_astar":
        AgentModule = OracleAStarAgent
        agent = AgentModule(env, traj_data, traj_root=root, load_receps=args.load_receps, debug=args.debug)
    elif args.controller == "mrcnn":
        AgentModule = MaskRCNNAgent
        mask_rcnn = load_pretrained_model('./agents/detector/models/mrcnn.pth')
        agent = AgentModule(env, traj_data, traj_root=root,
                            pretrained_model=mask_rcnn,
                            load_receps=args.load_receps, debug=args.debug)
    elif args.controller == "mrcnn_astar":
        AgentModule = MaskRCNNAStarAgent
        mask_rcnn = load_pretrained_model('./agents/detector/models/mrcnn.pth')
        agent = AgentModule(env, traj_data, traj_root=root,
                            pretrained_model=mask_rcnn,
                            load_receps=args.load_receps, debug=args.debug)
    else:
        raise NotImplementedError()

    print(agent.feedback)
    while True:
        cmd = input()
        agent.step(cmd)
        if not args.debug:
            print(agent.feedback)

        done = env.get_goal_satisfied()
        if done:
            print("You won!")
            break
Beispiel #5
0
        def init_env(self, config):
            self.config = config

            screen_height = config['env']['thor']['screen_height']
            screen_width = config['env']['thor']['screen_width']
            smooth_nav = config['env']['thor']['smooth_nav']
            save_frames_to_disk = config['env']['thor']['save_frames_to_disk']

            if not self.env:
                self.env = ThorEnv(player_screen_height=screen_height,
                                   player_screen_width=screen_width,
                                   smooth_nav=smooth_nav,
                                   save_frames_to_disk=save_frames_to_disk)
            self.controller_type = self.config['controller']['type']
            self._done = False
            self._res = ()
            self._feedback = ""
            self.expert = HandCodedThorAgent(self.env, max_steps=200)
            self.prev_command = ""
            self.load_mask_rcnn()
Beispiel #6
0
def run():
    '''
    replay loop
    '''
    # start THOR env
    env = ThorEnv(player_screen_width=IMAGE_WIDTH,
                  player_screen_height=IMAGE_HEIGHT)

    skipped_files = []
    finished = []
    cache_file = os.path.join(args.save_path, "cache.json")

    while len(traj_list) > 0:
        lock.acquire()
        json_file = traj_list.pop()
        lock.release()

        print("(%d Left) Augmenting: %s" % (len(traj_list), json_file))
        try:
            augment_traj(env, json_file)
            finished.append(json_file)
            with open(cache_file, 'w') as f:
                json.dump({'finished': finished}, f)

        except Exception as e:
            import traceback
            traceback.print_exc()
            print("Error: " + repr(e))
            print("Skipping " + json_file)
            skipped_files.append(json_file)

    env.stop()
    print("Finished.")

    # skipped files
    if len(skipped_files) > 0:
        print("Skipped Files:")
        print(skipped_files)
Beispiel #7
0
def replay_check(args, thread_num=0):
    env = ThorEnv(x_display='0.%d' % (thread_num % args.total_gpu))

    # replay certificate filenames
    replay_certificate_filenames = [
        "replay.certificate.%d" % idx for idx in range(args.num_replays)
    ]

    # Clear existing failures in file recording.
    if args.failure_filename is not None:
        with open(args.failure_filename, 'w') as f:
            f.write('')

    continue_check = True
    total_checks, total_failures, crash_fails, unsat_fails, json_fails, nondet_fails = 0, 0, 0, 0, 0, 0
    errors = {
    }  # map from error strings to counts, to be shown after every failure.
    total_threads = args.total_gpu * args.num_threads
    current_threads = args.gpu_id * args.num_threads + thread_num

    while continue_check:

        # Crawl the directory of trajectories and vet ones with no certificate.
        failure_list = []
        valid_dirs = []
        count = 0
        for dir_name, subdir_list, file_list in os.walk(args.data_path):
            if "trial_" in dir_name and (not "raw_images" in dir_name) and (
                    not "pddl_states" in dir_name):
                json_file = os.path.join(dir_name, JSON_FILENAME)
                if not os.path.isfile(json_file):
                    continue

                # If we're just stripping certificates, do that and continue.
                if args.remove_certificates:
                    for cidx in range(args.num_replays):
                        certificate_file = os.path.join(
                            dir_name, replay_certificate_filenames[cidx])
                        if os.path.isfile(certificate_file):
                            os.system("rm %s" % certificate_file)
                    continue

                if count % total_threads == current_threads:
                    valid_dirs.append(dir_name)
                count += 1

        print(len(valid_dirs))
        np.random.shuffle(valid_dirs)
        for ii, dir_name in enumerate(valid_dirs):

            if not os.path.exists(dir_name):
                continue

            json_file = os.path.join(dir_name, JSON_FILENAME)
            if not os.path.isfile(json_file):
                continue

            cidx = 0
            certificate_file = os.path.join(dir_name,
                                            replay_certificate_filenames[cidx])
            already_checked = False
            while os.path.isfile(certificate_file):
                cidx += 1
                if cidx == args.num_replays:
                    already_checked = True
                    break
                certificate_file = os.path.join(
                    dir_name, replay_certificate_filenames[cidx])
            if already_checked:
                continue

            print(ii)
            if not os.path.isfile(certificate_file):
                total_checks += 1. / args.num_replays
                failed = False

                with open(json_file) as f:
                    print("check %d/%d for file '%s'" %
                          (cidx + 1, args.num_replays, json_file))
                    try:
                        traj_data = json.load(f)
                        env.set_task(traj_data, args, reward_type='dense')
                    except json.decoder.JSONDecodeError:
                        failed = True
                        json_fails += 1

                if not failed:
                    steps_taken = None
                    try:
                        steps_taken = replay_json(env, json_file)
                    except Exception as e:
                        import traceback
                        traceback.print_exc()
                        failed = True
                        crash_fails += 1

                        if str(e) not in errors:
                            errors[str(e)] = 0
                        errors[str(e)] += 1
                        print("%%%%%%%%%%")
                        es = sum([errors[er] for er in errors])
                        print("\terrors (%d):" % es)
                        for er, v in sorted(errors.items(),
                                            key=lambda kv: kv[1],
                                            reverse=True):
                            # if v / es < 0.01:  # stop showing below 1% of errors.
                            #     break
                            print("\t(%.2f) (%d)\t%s" % (v / es, v, er))
                        print("%%%%%%%%%%")

                        if cidx > 1:
                            print(
                                "WARNING: replay that has succeeded before has failed at attempt %d"
                                % cidx)
                            nondet_fails += 1

                    if steps_taken is not None:  # executed without crashing, so now we need to verify completion.
                        goal_satisfied = env.get_goal_satisfied()

                        if goal_satisfied:
                            with open(certificate_file, 'w') as f:
                                f.write('%d' % steps_taken)
                        else:
                            failed = True
                            unsat_fails += 1
                            print("Goal was not satisfied after execution!")

                if failed:
                    # Mark one failure and count the remainder of checks for this instance into the total.
                    total_failures += 1
                    total_checks += args.num_replays - (
                        (cidx + 1) / float(args.num_replays))

                    failure_list.append(json_file)
                    if args.failure_filename is not None:
                        with open(args.failure_filename, 'a') as f:
                            f.write("%s\n" % json_file)
                    # If we're deleting bad trajectories, do that here.
                    if args.move_failed_trajectories is not None:
                        print("Relocating failed trajectory '%s' to '%s'" %
                              (dir_name,
                               os.path.join(args.move_failed_trajectories)))
                        try:
                            shutil.move(dir_name,
                                        args.move_failed_trajectories)
                        except shutil.Error as e:
                            print(
                                "WARNING: failed to perform move; error follows; deleting instead"
                            )
                            print(repr(e))
                            shutil.rmtree(dir_name)
                    if args.remove_failed_trajectories:
                        print("Removing failed trajectory '%s'" % dir_name)
                        shutil.rmtree(dir_name)

                print("-------------------------")
                print("Success Rate: %.2f/%.2f = %.3f" %
                      (total_checks - total_failures, total_checks,
                       float(total_checks - total_failures) /
                       float(total_checks)))
                if total_failures > 0:
                    print("Non-deterministic failure: %d/%d = %.3f" %
                          (nondet_fails, total_failures,
                           float(nondet_fails) / total_failures))
                    print("Failures by crash: %d/%d = %.3f" %
                          (crash_fails, total_failures,
                           float(crash_fails) / total_failures))
                    print("Failures by unsatisfied: %d/%d = %.3f" %
                          (unsat_fails, total_failures,
                           float(unsat_fails) / total_failures))
                    print("Failures by json decode error: %d/%d = %.3f" %
                          (json_fails, total_failures,
                           float(json_fails) / total_failures))
                print("-------------------------")

        if not args.in_parallel:
            continue_check = False
        else:
            time.sleep(60)
Beispiel #8
0
def main(args, thread_num=0):

    print(thread_num)
    # settings
    alfred_dataset_path = '../data/json_2.1.0/train'

    constants.DATA_SAVE_PATH = args.save_path
    print("Force Unsave Data: %s" % str(args.force_unsave))

    # Set up data structure to track dataset balance and use for selecting next parameters.
    # In actively gathering data, we will try to maximize entropy for each (e.g., uniform spread of goals,
    # uniform spread over patient objects, uniform recipient objects, and uniform scenes).
    succ_traj = pd.DataFrame(
        columns=["goal", "pickup", "movable", "receptacle", "scene"])

    # objects-to-scene and scene-to-objects database
    for scene_type, ids in constants.SCENE_TYPE.items():
        for id in ids:
            obj_json_file = os.path.join('layouts',
                                         'FloorPlan%d-objects.json' % id)
            with open(obj_json_file, 'r') as of:
                scene_objs = json.load(of)

            id_str = str(id)
            scene_id_to_objs[id_str] = scene_objs
            for obj in scene_objs:
                if obj not in obj_to_scene_ids:
                    obj_to_scene_ids[obj] = set()
                obj_to_scene_ids[obj].add(id_str)

    # scene-goal database
    for g in constants.GOALS:
        for st in constants.GOALS_VALID[g]:
            scenes_for_goal[g].extend(
                [str(s) for s in constants.SCENE_TYPE[st]])
        scenes_for_goal[g] = set(scenes_for_goal[g])

    # scene-type database
    for st in constants.SCENE_TYPE:
        for s in constants.SCENE_TYPE[st]:
            scene_to_type[str(s)] = st

    # pre-populate counts in this structure using saved trajectories path.
    succ_traj, full_traj = load_successes_from_disk(args.save_path, succ_traj,
                                                    args.just_examine,
                                                    args.repeats_per_cond)
    if args.just_examine:
        print_successes(succ_traj)
        return

    print(succ_traj.groupby('goal').count())
    # pre-populate failed trajectories.
    fail_traj = load_fails_from_disk(args.save_path)
    print("Loaded %d known failed tuples" % len(fail_traj))

    # create env and agent
    env = ThorEnv(x_display='0.%d' % (thread_num % 2))

    game_state = TaskGameStateFullKnowledge(env)
    agent = DeterministicPlannerAgent(thread_id=0, game_state=game_state)

    errors = {
    }  # map from error strings to counts, to be shown after every failure.
    goal_candidates = constants.GOALS[:]
    pickup_candidates = list(set().union(*[
        constants.VAL_RECEPTACLE_OBJECTS[
            obj]  # Union objects that can be placed.
        for obj in constants.VAL_RECEPTACLE_OBJECTS
    ]))
    pickup_candidates = [
        p for p in pickup_candidates
        if constants.OBJ_PARENTS[p] in obj_to_scene_ids
    ]
    movable_candidates = list(
        set(constants.MOVABLE_RECEPTACLES).intersection(
            obj_to_scene_ids.keys()))
    receptacle_candidates = [obj for obj in constants.VAL_RECEPTACLE_OBJECTS
                             if obj not in constants.MOVABLE_RECEPTACLES and obj in obj_to_scene_ids] + \
                            [obj for obj in constants.VAL_ACTION_OBJECTS["Toggleable"]
                             if obj in obj_to_scene_ids]

    # toaster isn't interesting in terms of producing linguistic diversity
    receptacle_candidates.remove('Toaster')
    receptacle_candidates.sort()

    scene_candidates = list(scene_id_to_objs.keys())

    n_until_load_successes = args.async_load_every_n_samples
    print_successes(succ_traj)
    task_sampler = sample_task_params(succ_traj, full_traj, fail_traj,
                                      goal_candidates, pickup_candidates,
                                      movable_candidates,
                                      receptacle_candidates, scene_candidates)

    # main generation loop
    # keeps trying out new task tuples as trajectories either fail or suceed
    while True:
        # for _ in range(20):
        for ii, json_path in enumerate(
                glob.iglob(os.path.join(alfred_dataset_path, "**",
                                        "traj_data.json"),
                           recursive=True)):
            # if ii % args.num_threads == thread_num:
            # if ii == 5:
            sampled_task = json_path.split('/')[-3].split('-')
            # sampled_task = next(task_sampler)
            # print("===============")
            # print(ii, json_path)
            print(sampled_task)  # DEBUG
            # print("===============")

            if sampled_task is None:
                sys.exit(
                    "No valid tuples left to sample (all are known to fail or already have %d trajectories"
                    % args.repeats_per_cond)
            gtype, pickup_obj, movable_obj, receptacle_obj, sampled_scene = sampled_task

            sampled_scene = int(sampled_scene)
            print("sampled tuple: " + str((gtype, pickup_obj, movable_obj,
                                           receptacle_obj, sampled_scene)))

            tries_remaining = args.trials_before_fail
            # only try to get the number of trajectories left to make this tuple full.
            target_remaining = args.repeats_per_cond - len(
                succ_traj.loc[(succ_traj['goal'] == gtype)
                              & (succ_traj['pickup'] == pickup_obj) &
                              (succ_traj['movable'] == movable_obj) &
                              (succ_traj['receptacle'] == receptacle_obj) &
                              (succ_traj['scene'] == str(sampled_scene))])
            num_place_fails = 0  # count of errors related to placement failure for no valid positions.

            # continue until we're (out of tries + have never succeeded) or (have gathered the target number of instances)
            while num_place_fails > args.trials_before_fail or target_remaining > 0:

                # environment setup
                constants.pddl_goal_type = gtype
                print("PDDLGoalType: " + constants.pddl_goal_type)
                task_id = create_dirs(gtype, pickup_obj, movable_obj,
                                      receptacle_obj, sampled_scene)

                # setup data dictionary
                setup_data_dict()
                constants.data_dict['task_id'] = task_id
                constants.data_dict['task_type'] = constants.pddl_goal_type
                constants.data_dict['dataset_params'][
                    'video_frame_rate'] = constants.VIDEO_FRAME_RATE

                # plan & execute
                try:
                    # if True:
                    # Agent reset to new scene.
                    constraint_objs = {
                        'repeat': [(
                            constants.OBJ_PARENTS[
                                pickup_obj],  # Generate multiple parent objs.
                            np.random.randint(
                                2 if gtype == "pick_two_obj_and_place" else 1,
                                constants.PICKUP_REPEAT_MAX + 1))],
                        'sparse':
                        [(receptacle_obj.replace('Basin', ''),
                          num_place_fails * constants.RECEPTACLE_SPARSE_POINTS)
                         ]
                    }
                    if movable_obj != "None":
                        constraint_objs['repeat'].append(
                            (movable_obj,
                             np.random.randint(1, constants.PICKUP_REPEAT_MAX +
                                               1)))
                    for obj_type in scene_id_to_objs[str(sampled_scene)]:
                        if (obj_type in pickup_candidates and
                                obj_type != constants.OBJ_PARENTS[pickup_obj]
                                and obj_type != movable_obj):
                            constraint_objs['repeat'].append(
                                (obj_type,
                                 np.random.randint(
                                     1,
                                     constants.MAX_NUM_OF_OBJ_INSTANCES + 1)))
                    if gtype in goal_to_invalid_receptacle:
                        constraint_objs['empty'] = [
                            (r.replace('Basin', ''), num_place_fails *
                             constants.RECEPTACLE_EMPTY_POINTS)
                            for r in goal_to_invalid_receptacle[gtype]
                        ]
                    constraint_objs['seton'] = []
                    if gtype == 'look_at_obj_in_light':
                        constraint_objs['seton'].append(
                            (receptacle_obj, False))
                    if num_place_fails > 0:
                        print(
                            "Failed %d placements in the past; increased free point constraints: "
                            % num_place_fails + str(constraint_objs))
                    scene_info = {
                        'scene_num': sampled_scene,
                        'random_seed': random.randint(0, 2**32)
                    }
                    info = agent.reset(scene=scene_info, objs=constraint_objs)

                    # Problem initialization with given constraints.
                    task_objs = {'pickup': pickup_obj}
                    if movable_obj != "None":
                        task_objs['mrecep'] = movable_obj
                    if gtype == "look_at_obj_in_light":
                        task_objs['toggle'] = receptacle_obj
                    else:
                        task_objs['receptacle'] = receptacle_obj
                    agent.setup_problem({'info': info},
                                        scene=scene_info,
                                        objs=task_objs)

                    # Now that objects are in their initial places, record them.
                    object_poses = [{
                        'objectName':
                        obj['name'].split('(Clone)')[0],
                        'position':
                        obj['position'],
                        'rotation':
                        obj['rotation']
                    } for obj in env.last_event.metadata['objects']
                                    if obj['pickupable']]
                    dirty_and_empty = gtype == 'pick_clean_then_place_in_recep'
                    object_toggles = [{
                        'objectType': o,
                        'stateChange': 'toggleable',
                        'isToggled': v
                    } for o, v in constraint_objs['seton']]
                    constants.data_dict['scene']['object_poses'] = object_poses
                    constants.data_dict['scene'][
                        'dirty_and_empty'] = dirty_and_empty
                    constants.data_dict['scene'][
                        'object_toggles'] = object_toggles

                    # Pre-restore the scene to cause objects to "jitter" like they will when the episode is replayed
                    # based on stored object and toggle info. This should put objects closer to the final positions they'll
                    # be inlay at inference time (e.g., mugs fallen and broken, knives fallen over, etc.).
                    print("Performing reset via thor_env API")
                    env.reset(sampled_scene)
                    print("Performing restore via thor_env API")
                    env.restore_scene(object_poses, object_toggles,
                                      dirty_and_empty)
                    event = env.step(
                        dict(constants.data_dict['scene']['init_action']))

                    terminal = False
                    while not terminal and agent.current_frame_count <= constants.MAX_EPISODE_LENGTH:
                        action_dict = agent.get_action(None)
                        agent.step(action_dict)
                        reward, terminal = agent.get_reward()

                    dump_data_dict()
                    save_video()
                # else:
                except Exception as e:
                    import traceback
                    traceback.print_exc()
                    print("Error: " + repr(e))
                    print("Invalid Task: skipping...")
                    if args.debug:
                        print(traceback.format_exc())

                    deleted = delete_save(args.in_parallel)
                    if not deleted:  # another thread is filling this task successfully, so leave it alone.
                        target_remaining = 0  # stop trying to do this task.
                    else:
                        if str(
                                e
                        ) == "API Action Failed: No valid positions to place object found":
                            # Try increasing the space available on sparse and empty flagged objects.
                            num_place_fails += 1
                            tries_remaining -= 1
                        else:  # generic error
                            tries_remaining -= 1

                    estr = str(e)
                    if len(estr) > 120:
                        estr = estr[:120]
                    if estr not in errors:
                        errors[estr] = 0
                    errors[estr] += 1
                    print("%%%%%%%%%%")
                    es = sum([errors[er] for er in errors])
                    print("\terrors (%d):" % es)
                    for er, v in sorted(errors.items(),
                                        key=lambda kv: kv[1],
                                        reverse=True):
                        if v / es < 0.01:  # stop showing below 1% of errors.
                            break
                        print("\t(%.2f) (%d)\t%s" % (v / es, v, er))
                    print("%%%%%%%%%%")

                    continue

                if args.force_unsave:
                    delete_save(args.in_parallel)

                # add to save structure.
                succ_traj = succ_traj.append(
                    {
                        "goal": gtype,
                        "movable": movable_obj,
                        "pickup": pickup_obj,
                        "receptacle": receptacle_obj,
                        "scene": str(sampled_scene)
                    },
                    ignore_index=True)
                target_remaining -= 1
                tries_remaining += args.trials_before_fail  # on success, add more tries for future successes

            # if this combination resulted in a certain number of failures with no successes, flag it as not possible.
            if tries_remaining == 0 and target_remaining == args.repeats_per_cond:
                new_fails = [(gtype, pickup_obj, movable_obj, receptacle_obj,
                              str(sampled_scene))]
                fail_traj = load_fails_from_disk(args.save_path,
                                                 to_write=new_fails)
                print("%%%%%%%%%%")
                print("failures (%d)" % len(fail_traj))
                # print("\t" + "\n\t".join([str(ft) for ft in fail_traj]))
                print("%%%%%%%%%%")

            # if this combination gave us the repeats we wanted, note it as filled.
            if target_remaining == 0:
                full_traj.add((gtype, pickup_obj, movable_obj, receptacle_obj,
                               sampled_scene))

            # if we're sharing with other processes, reload successes from disk to update local copy with others' additions.
            if args.in_parallel:
                if n_until_load_successes > 0:
                    n_until_load_successes -= 1
                else:
                    print(
                        "Reloading trajectories from disk because of parallel processes..."
                    )
                    succ_traj = pd.DataFrame(
                        columns=succ_traj.columns)  # Drop all rows.
                    succ_traj, full_traj = load_successes_from_disk(
                        args.save_path, succ_traj, False,
                        args.repeats_per_cond)
                    print("... Loaded %d trajectories" % len(succ_traj.index))
                    n_until_load_successes = args.async_load_every_n_samples
                    print_successes(succ_traj)
                    task_sampler = sample_task_params(
                        succ_traj, full_traj, fail_traj, goal_candidates,
                        pickup_candidates, movable_candidates,
                        receptacle_candidates, scene_candidates)
                    print(
                        "... Created fresh instance of sample_task_params generator"
                    )
def run():
    print(all_scene_numbers)
    # create env and agent
    env = ThorEnv()
    while len(all_scene_numbers) > 0:
        lock.acquire()
        scene_num = all_scene_numbers.pop()
        lock.release()
        fn = os.path.join('layouts', ('FloorPlan%d-layout.npy') % scene_num)
        if os.path.isfile(fn):
            print("file %s already exists; skipping this floorplan" % fn)
            continue

        openable_json_file = os.path.join(
            'layouts', ('FloorPlan%d-openable.json') % scene_num)
        scene_objs_json_file = os.path.join(
            'layouts', ('FloorPlan%d-objects.json') % scene_num)

        scene_name = ('FloorPlan%d') % scene_num
        print('Running ' + scene_name)
        event = env.reset(scene_name,
                          render_image=False,
                          render_depth_image=False,
                          render_class_image=False,
                          render_object_image=True)
        agent_height = event.metadata['agent']['position']['y']

        scene_objs = list(
            set([obj['objectType'] for obj in event.metadata['objects']]))
        with open(scene_objs_json_file, 'w') as sof:
            json.dump(scene_objs, sof, sort_keys=True, indent=4)

        # Get all the reachable points through Unity for this step size.
        event = env.step(
            dict(action='GetReachablePositions',
                 gridSize=constants.AGENT_STEP_SIZE /
                 constants.RECORD_SMOOTHING_FACTOR))
        if event.metadata['actionReturn'] is None:
            print("ERROR: scene %d 'GetReachablePositions' returns None" %
                  scene_num)
        else:
            reachable_points = set()
            for point in event.metadata['actionReturn']:
                reachable_points.add((point['x'], point['z']))
            print("scene %d got %d reachable points, now checking" %
                  (scene_num, len(reachable_points)))

            # Pick up a small object to use in testing whether points are good for openable objects.
            open_test_objs = {
                'ButterKnife', 'CD', 'CellPhone', 'Cloth', 'CreditCard',
                'DishSponge', 'Fork', 'KeyChain', 'Pen', 'Pencil', 'SoapBar',
                'Spoon', 'Watch'
            }
            good_obj_point = None
            good_obj_point = get_obj(env, open_test_objs, reachable_points,
                                     agent_height, scene_name, good_obj_point)

            best_open_point = {
            }  # map from object names to the best point from which they can be successfully opened
            best_sem_coverage = {
            }  # number of pixels in the semantic map of the receptacle at the existing best openpt
            checked_points = set()
            scene_receptacles = set()
            for point in reachable_points:
                point_is_valid = True
                action = {
                    'action': 'TeleportFull',
                    'x': point[0],
                    'y': agent_height,
                    'z': point[1],
                }
                event = env.step(action)
                if event.metadata['lastActionSuccess']:
                    for horizon in [-30, 0, 30]:
                        action = {
                            'action': 'TeleportFull',
                            'x': point[0],
                            'y': agent_height,
                            'z': point[1],
                            'rotateOnTeleport': True,
                            'rotation': 0,
                            'horizon': horizon
                        }
                        event = env.step(action)
                        if not event.metadata['lastActionSuccess']:
                            point_is_valid = False
                            break
                        for rotation in range(3):
                            action = {'action': 'RotateLeft'}
                            event = env.step(action)
                            if not event.metadata['lastActionSuccess']:
                                point_is_valid = False
                                break
                        if not point_is_valid:
                            break
                    if point_is_valid:
                        checked_points.add(point)
                    else:
                        continue

                    # Check whether we can open objects from here in any direction with any tilt.
                    for rotation in range(4):
                        # First try up, then down, then return to the horizon before moving again.
                        for horizon in [-30, 0, 30]:

                            action = {
                                'action': 'TeleportFull',
                                'x': point[0],
                                'y': agent_height,
                                'z': point[1],
                                'rotateOnTeleport': True,
                                'rotation': rotation * 90,
                                'horizon': horizon
                            }
                            event = env.step(action)
                            for obj in event.metadata['objects']:
                                if (obj['visible'] and obj['objectId']
                                        and obj['receptacle']
                                        and not obj['pickupable']
                                        and obj['objectType']
                                        in constants.VAL_RECEPTACLE_OBJECTS):
                                    obj_name = obj['objectId']
                                    obj_point = (obj['position']['x'],
                                                 obj['position']['y'])
                                    scene_receptacles.add(obj_name)

                                    # Go ahead and attempt to close the object from this position if it's open.
                                    if obj['openable'] and obj['isOpen']:
                                        close_action = {
                                            'action': 'CloseObject',
                                            'objectId': obj['objectId']
                                        }
                                        event = env.step(close_action)

                                    point_to_recep = np.linalg.norm(
                                        np.array(point) - np.array(obj_point))
                                    if len(env.last_event.
                                           metadata['inventoryObjects']) > 0:
                                        inv_obj = env.last_event.metadata[
                                            'inventoryObjects'][0]['objectId']
                                    else:
                                        inv_obj = None

                                    # Heuristic implemented in task_game_state has agent 0.5 or farther in agent space.
                                    heuristic_far_enough_from_recep = 0.5 < point_to_recep
                                    # Ensure this point affords a larger view according to the semantic segmentation
                                    # of the receptacle than the existing.
                                    point_sem_coverage = get_mask_of_obj(
                                        env, obj['objectId'])
                                    if point_sem_coverage is None:
                                        use_sem_heuristic = False
                                        better_sem_covereage = False
                                    else:
                                        use_sem_heuristic = True
                                        better_sem_covereage = (
                                            obj_name not in best_sem_coverage
                                            or
                                            best_sem_coverage[obj_name] is None
                                            or point_sem_coverage >
                                            best_sem_coverage[obj_name])
                                    # Ensure that this point is farther away than our existing best candidate.
                                    # We'd like to open each receptacle from as far away as possible while retaining
                                    # the ability to pick/place from it.
                                    farther_than_existing_good_point = (
                                        obj_name not in best_open_point
                                        or point_to_recep > np.linalg.norm(
                                            np.array(point) - np.array(
                                                best_open_point[obj_name][:2]))
                                    )
                                    # If we don't have an inventory object, though, we'll fall back to the heuristic
                                    # of being able to open/close as _close_ as possible.
                                    closer_than_existing_good_point = (
                                        obj_name not in best_open_point
                                        or point_to_recep < np.linalg.norm(
                                            np.array(point) - np.array(
                                                best_open_point[obj_name][:2]))
                                    )
                                    # Semantic segmentation heuristic.
                                    if ((use_sem_heuristic
                                         and heuristic_far_enough_from_recep
                                         and better_sem_covereage) or
                                        (not use_sem_heuristic and
                                         # Distance heuristics.
                                         (heuristic_far_enough_from_recep and
                                          (inv_obj and
                                           farther_than_existing_good_point) or
                                          (not inv_obj and
                                           closer_than_existing_good_point)))):
                                        if obj['openable']:
                                            action = {
                                                'action': 'OpenObject',
                                                'objectId': obj['objectId']
                                            }
                                            event = env.step(action)
                                        if not obj[
                                                'openable'] or event.metadata[
                                                    'lastActionSuccess']:
                                            # We can open the object, so try placing our small inventory obj inside.
                                            # If it can be placed inside and retrieved, then this is a safe point.
                                            action = {
                                                'action':
                                                'PutObject',
                                                'objectId':
                                                inv_obj,
                                                'receptacleObjectId':
                                                obj['objectId'],
                                                'forceAction':
                                                True,
                                                'placeStationary':
                                                True
                                            }
                                            if inv_obj:
                                                event = env.step(action)
                                            if inv_obj is None or event.metadata[
                                                    'lastActionSuccess']:
                                                action = {
                                                    'action': 'PickupObject',
                                                    'objectId': inv_obj
                                                }
                                                if inv_obj:
                                                    event = env.step(action)
                                                if inv_obj is None or event.metadata[
                                                        'lastActionSuccess']:

                                                    # Finally, ensure we can also close the receptacle.
                                                    if obj['openable']:
                                                        action = {
                                                            'action':
                                                            'CloseObject',
                                                            'objectId':
                                                            obj['objectId']
                                                        }
                                                        event = env.step(
                                                            action)
                                                    if not obj['openable'] or event.metadata[
                                                            'lastActionSuccess']:

                                                        # We can put/pick our inv object into the receptacle from here.
                                                        # We have already ensured this point is farther than any
                                                        # existing best, so this is the new best.
                                                        best_open_point[
                                                            obj_name] = [
                                                                point[0],
                                                                point[1],
                                                                rotation * 90,
                                                                horizon
                                                            ]
                                                        best_sem_coverage[
                                                            obj_name] = point_sem_coverage

                                                # We could not retrieve our inv object, so we need to go get another one
                                                else:
                                                    good_obj_point = get_obj(
                                                        env, open_test_objs,
                                                        reachable_points,
                                                        agent_height,
                                                        scene_name,
                                                        good_obj_point)
                                                    action = {
                                                        'action':
                                                        'TeleportFull',
                                                        'x': point[0],
                                                        'y': agent_height,
                                                        'z': point[1],
                                                        'rotateOnTeleport':
                                                        True,
                                                        'rotation':
                                                        rotation * 90,
                                                        'horizon': horizon
                                                    }
                                                    event = env.step(action)

                                    # Regardless of what happened up there, try to close the receptacle again if
                                    # it remained open.
                                    if obj['isOpen']:
                                        action = {
                                            'action': 'CloseObject',
                                            'objectId': obj['objectId']
                                        }
                                        event = env.step(action)

            essential_objs = []
            if scene_num in constants.SCENE_TYPE["Kitchen"]:
                essential_objs.extend(["Microwave", "Fridge"])
            for obj in essential_objs:
                if not np.any([obj in obj_key for obj_key in best_open_point]):
                    print(
                        "WARNING: Essential object %s has no open points in scene %d"
                        % (obj, scene_num))

            print(
                "scene %d found open/pick/place/close positions for %d/%d receptacle objects"
                % (scene_num, len(best_open_point), len(scene_receptacles)))
            with open(openable_json_file, 'w') as f:
                json.dump(best_open_point, f, sort_keys=True, indent=4)

            print("scene %d reachable %d, checked %d; taking intersection" %
                  (scene_num, len(reachable_points), len(checked_points)))

            points = np.array(list(checked_points))[:, :2]
            points = points[np.lexsort((points[:, 0], points[:, 1])), :]
            np.save(fn, points)

    env.stop()
    print('Done')
Beispiel #10
0
    class Thor(threading.Thread):
        def __init__(self, queue, train_eval="train"):
            Thread.__init__(self)
            self.action_queue = queue
            self.mask_rcnn = None
            self.env = None
            self.train_eval = train_eval
            self.controller_type = "oracle"

        def run(self):
            while True:
                action, reset, task_file = self.action_queue.get()
                try:
                    if reset:
                        self.reset(task_file)
                    else:
                        self.step(action)
                finally:
                    self.action_queue.task_done()

        def init_env(self, config):
            self.config = config

            screen_height = config['env']['thor']['screen_height']
            screen_width = config['env']['thor']['screen_width']
            smooth_nav = config['env']['thor']['smooth_nav']
            save_frames_to_disk = config['env']['thor']['save_frames_to_disk']

            if not self.env:
                self.env = ThorEnv(player_screen_height=screen_height,
                                   player_screen_width=screen_width,
                                   smooth_nav=smooth_nav,
                                   save_frames_to_disk=save_frames_to_disk)
            self.controller_type = self.config['controller']['type']
            self._done = False
            self._res = ()
            self._feedback = ""
            self.expert = HandCodedThorAgent(self.env, max_steps=200)
            self.prev_command = ""
            self.load_mask_rcnn()

        def load_mask_rcnn(self):
            # load pretrained MaskRCNN model if required
            if 'mrcnn' in self.config['controller'][
                    'type'] and not self.mask_rcnn:
                model_path = os.path.join(
                    os.environ['ALFRED_ROOT'],
                    self.config['mask_rcnn']['pretrained_model_path'])
                self.mask_rcnn = load_pretrained_model(model_path)

        def set_task(self, task_file):
            self.task_file = task_file
            self.traj_root = os.path.dirname(task_file)
            with open(task_file, 'r') as f:
                self.traj_data = json.load(f)

        def reset(self, task_file):
            assert self.env
            assert self.controller_type

            self.set_task(task_file)

            # scene setup
            scene_num = self.traj_data['scene']['scene_num']
            object_poses = self.traj_data['scene']['object_poses']
            dirty_and_empty = self.traj_data['scene']['dirty_and_empty']
            object_toggles = self.traj_data['scene']['object_toggles']
            scene_name = 'FloorPlan%d' % scene_num
            self.env.reset(scene_name)
            self.env.restore_scene(object_poses, object_toggles,
                                   dirty_and_empty)

            # recording
            save_frames_path = self.config['env']['thor']['save_frames_path']
            self.env.save_frames_path = os.path.join(
                save_frames_path, self.traj_root.replace('../', ''))

            # initialize to start position
            self.env.step(dict(
                self.traj_data['scene']['init_action']))  # print goal instr
            task_desc = get_templated_task_desc(self.traj_data)
            print("Task: %s" % task_desc)

            # print("Task: %s" % (self.traj_data['turk_annotations']['anns'][0]['task_desc']))

            # setup task for reward
            class args:
                pass

            args.reward_config = os.path.join(os.environ['ALFRED_ROOT'],
                                              'agents/config/rewards.json')
            self.env.set_task(self.traj_data, args, reward_type='dense')

            # set controller
            self.controller_type = self.config['controller']['type']
            self.goal_desc_human_anns_prob = self.config['env'][
                'goal_desc_human_anns_prob']
            load_receps = self.config['controller']['load_receps']
            debug = self.config['controller']['debug']

            if self.controller_type == 'oracle':
                self.controller = OracleAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob)
            elif self.controller_type == 'oracle_astar':
                self.controller = OracleAStarAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob)
            elif self.controller_type == 'mrcnn':
                self.controller = MaskRCNNAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    pretrained_model=self.mask_rcnn,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob,
                    save_detections_to_disk=self.env.save_frames_to_disk,
                    save_detections_path=self.env.save_frames_path)
            elif self.controller_type == 'mrcnn_astar':
                self.controller = MaskRCNNAStarAgent(
                    self.env,
                    self.traj_data,
                    self.traj_root,
                    pretrained_model=self.mask_rcnn,
                    load_receps=load_receps,
                    debug=debug,
                    goal_desc_human_anns_prob=self.goal_desc_human_anns_prob,
                    save_detections_to_disk=self.env.save_frames_to_disk,
                    save_detections_path=self.env.save_frames_path)
            else:
                raise NotImplementedError()

            # zero steps
            self.steps = 0

            # reset expert state
            self.expert.reset(task_file)
            self.prev_command = ""

            # return intro text
            self._feedback = self.controller.feedback
            self._res = self.get_info()

            return self._feedback

        def step(self, action):
            if not self._done:
                # take action
                self.prev_command = str(action)
                self._feedback = self.controller.step(action)
                self._res = self.get_info()
                if self.env.save_frames_to_disk:
                    self.record_action(action)
            self.steps += 1

        def get_results(self):
            return self._res

        def record_action(self, action):
            txt_file = os.path.join(self.env.save_frames_path, 'action.txt')
            with open(txt_file, 'a+') as f:
                f.write("%s\r\n" % str(action))

        def get_info(self):
            won = self.env.get_goal_satisfied()
            pcs = self.env.get_goal_conditions_met()
            goal_condition_success_rate = pcs[0] / float(pcs[1])
            acs = self.controller.get_admissible_commands()

            # expert action
            if self.train_eval == "train":
                game_state = {
                    'admissible_commands': acs,
                    'feedback': self._feedback,
                    'won': won
                }
                expert_actions = ["look"]
                try:
                    if not self.prev_command:
                        self.expert.observe(game_state['feedback'])
                    else:
                        next_action = self.expert.act(game_state, 0, won,
                                                      self.prev_command)
                        if next_action in acs:
                            expert_actions = [next_action]
                except HandCodedAgentTimeout:
                    print("Expert Timeout")
                except Exception as e:
                    print(e)
                    traceback.print_exc()
            else:
                expert_actions = []

            training_method = self.config["general"]["training_method"]
            if training_method == "dqn":
                max_nb_steps_per_episode = self.config["rl"]["training"][
                    "max_nb_steps_per_episode"]
            elif training_method == "dagger":
                max_nb_steps_per_episode = self.config["dagger"]["training"][
                    "max_nb_steps_per_episode"]
            else:
                raise NotImplementedError
            self._done = won or self.steps > max_nb_steps_per_episode
            return (self._feedback, self._done, acs, won,
                    goal_condition_success_rate, expert_actions)

        def get_last_frame(self):
            return self.env.last_event.frame[:, :, ::-1]

        def get_exploration_frames(self):
            return self.controller.get_exploration_frames()
Beispiel #11
0
    def run_rollouts(cls, model, task_queue, results, args, validation=False):
        env = ThorEnv()

        while True:
            if validation:
                task, seen = task_queue.get()
            else:
                task = task_queue.get()
            if task is None:
                break

            # reset model
            model.reset()

            # setup scene
            traj_data = model.load_task_json(task)
            r_idx = task['repeat_idx']
            cls.setup_scene(env, traj_data, r_idx, args)

            feat = model.featurize([traj_data],
                                   load_frames=False,
                                   load_mask=False)

            curr_rollout = []
            done = False
            fails = 0
            total_reward = 0
            num_steps = 0
            while not done and num_steps < args.max_steps:

                # extract visual features
                curr_image = Image.fromarray(np.uint8(env.last_event.frame))
                feat['frames'] = model.resnet.featurize([curr_image],
                                                        batch=1).unsqueeze(0)

                # forward model
                out = model.step(feat)
                pred = model.sample_pred(out, greedy=validation)

                # monitor resource usage
                monitor = start_monitor(
                    path=args.dout,
                    note="validation" if validation else "rollout" +
                    f" step={num_steps}")

                # # check if <<stop>> was predicted
                # if pred['action_low'] == "<<stop>>":
                #     print("\tpredicted STOP")
                #     break

                # get action and mask
                action = pred['action_low']
                mask = pred['action_low_mask'] if cls.has_interaction(
                    action) else None

                # use predicted action and mask (if available) to interact with the env
                t_success, _, _, err, _ = env.va_interact(
                    action,
                    interact_mask=mask,
                    smooth_nav=args.smooth_nav,
                    debug=args.debug)

                if not t_success:
                    fails += 1
                    if fails >= args.max_fails:
                        break

                # next time-step
                reward, done = env.get_transition_reward()
                total_reward += reward
                num_steps += 1

                if not validation:
                    curr_rollout.append({
                        'frames':
                        feat['frames'].cpu().detach().numpy(),
                        'lang_goal_instr_data':
                        feat['lang_goal_instr'].data.cpu().detach().numpy(),
                        'lang_goal_instr_batch':
                        feat['lang_goal_instr'].batch_sizes.cpu().detach(
                        ).numpy(),
                        'lang_goal_instr_sorted':
                        feat['lang_goal_instr'].sorted_indices.cpu().detach(
                        ).numpy() if feat['lang_goal_instr'].sorted_indices
                        is not None else None,
                        'lang_goal_instr_unsorted':
                        feat['lang_goal_instr'].unsorted_indices.cpu().detach(
                        ).numpy() if feat['lang_goal_instr'].unsorted_indices
                        is not None else None,
                        'action_dist':
                        pred['action_low_dist'].cpu().detach().numpy(),
                        'action_mask_dist':
                        pred['action_low_mask_dist'].cpu().detach().numpy(),
                        'action_idx':
                        pred['action_low_idx'].cpu().detach().numpy(),
                        'action_mask_idx':
                        pred['action_low_mask_idx'].cpu().detach().numpy(),
                        'reward':
                        np.array([reward])
                    })

                stop_monitor(monitor)

            if validation:
                # check if goal was satisfied
                goal_satisfied = env.get_goal_satisfied()

                # goal_conditions
                pcs = env.get_goal_conditions_met()
                goal_condition_success_rate = pcs[0] / float(pcs[1])

                # SPL
                path_len_weight = len(traj_data['plan']['low_actions'])
                s_spl = (1 if goal_satisfied else 0) * min(
                    1., path_len_weight / float(num_steps))
                pc_spl = goal_condition_success_rate * min(
                    1., path_len_weight / float(num_steps))

                # path length weighted SPL
                plw_s_spl = s_spl * path_len_weight
                plw_pc_spl = pc_spl * path_len_weight

                # log success/fails
                log_entry = {
                    'trial':
                    traj_data['task_id'],
                    'type':
                    traj_data['task_type'],
                    'repeat_idx':
                    int(r_idx),
                    'seen':
                    seen,
                    'goal_instr':
                    traj_data['turk_annotations']['anns'][r_idx]['task_desc'],
                    'goal_satisfied':
                    goal_satisfied,
                    'completed_goal_conditions':
                    int(pcs[0]),
                    'total_goal_conditions':
                    int(pcs[1]),
                    'goal_condition_success':
                    float(goal_condition_success_rate),
                    'success_spl':
                    float(s_spl),
                    'path_len_weighted_success_spl':
                    float(plw_s_spl),
                    'goal_condition_spl':
                    float(pc_spl),
                    'path_len_weighted_goal_condition_spl':
                    float(plw_pc_spl),
                    'path_len_weight':
                    int(path_len_weight),
                    'reward':
                    float(total_reward)
                }
                results.put(log_entry)
            else:
                results.put(curr_rollout)
        env.stop()