예제 #1
0
def run_planning_demo(env, planner_name, outdir='/tmp', fps=3, verbose=False, seed=None, check_reward=True):
    if outdir is None:
        outdir = "/tmp/{}".format(env_cls.__name__)
        if not os.path.exists(outdir):
            os.makedirs(outdir)

    if env._render:
        if env._problem_index_fixed:
            problem_idx = env._problem_idx
            video_path = os.path.join(outdir, 'planning_{}_{}_{}_demo.gif'.format(
                planner_name, env.spec.id, problem_idx))
        else:
            video_path = os.path.join(outdir, 'planning_{}_{}_demo.gif'.format(
                planner_name, env.spec.id))
        env = VideoWrapper(env, video_path, fps=fps)

    if seed is not None:
        env.seed(seed)

    obs, debug_info = env.reset()
    plan = run_planner(debug_info['domain_file'], debug_info['problem_file'], planner_name)

    actions = []
    for s in plan:
        a = parse_plan_step(
                s, 
                env.domain.operators.values(), 
                env.action_predicates,
                obs.objects, 
                operators_as_actions=env.operators_as_actions
            )
        actions.append(a)
    
    tot_reward = 0.
    for action in actions:
        if verbose:
            print("Obs:", obs)
    
        if verbose:
            print("Act:", action)

        obs, reward, done, _ = env.step(action)
        env.render()
        tot_reward += reward
        if verbose:
            print("Rew:", reward)

        if done:
            break

    if verbose:
        print("Final obs:", obs)
        print()

    env.close()
    if check_reward:
        assert tot_reward > 0
    if verbose:
        input("press enter to continue to next problem")
    return tot_reward
예제 #2
0
 def _plan_step_to_action(domain, state, act_predicates, plan_step):
     return parse_plan_step(
         plan_step,
         domain.operators.values(),
         act_predicates,
         state.objects,
         operators_as_actions=domain.operators_as_actions,
     )
예제 #3
0
def run_planning_agent_demo(env, oaru, outdir="out", fps=3, verbose=False, seed=None,
        planner_name="ff"):
    if seed is not None:
        env.seed(seed)

    if env._render:
        if env._problem_index_fixed:
            problem_idx = env._problem_idx
            video_path = os.path.join(outdir, 'planning_{}_{}_{}_demo.gif'.format(
                planner_name, env.spec.id, problem_idx))
        else:
            video_path = os.path.join(outdir, 'planning_{}_{}_demo.gif'.format(
                planner_name, env.spec.id))
        env = VideoWrapper(env, video_path, fps=fps)

    obs, debug_info = env.reset()
    type_predicates = generate_type_predicates(env.unwrapped)

    tqdm.tqdm.write("Planning...")
    plan = run_planner(debug_info['domain_file'], debug_info['problem_file'], "ff")

    actions = []
    for s in plan:
        a = parse_plan_step(
                s,
                env.domain.operators.values(),
                env.action_predicates,
                obs.objects,
                operators_as_actions=env.operators_as_actions
            )
        actions.append(a)

    tqdm.tqdm.write("Generating state trajectory...")

    state_trajectory = [ (env.render(), get_state(env.unwrapped, type_predicates)) ]

    tot_reward = 0
    for action in tqdm.tqdm(actions):
        if verbose:
            # tqdm.write(f"Obs: {list(obs}")
            tqdm.tqdm.write(f"Act: {action}")

        obs, reward, done, _ = env.step(action)
        # env.render()
        state_trajectory.append( (env.render(), get_state(env, type_predicates)) )
        record_transition(state_trajectory[-2], state_trajectory[-1], oaru)
        tot_reward += reward
        if verbose:
            tqdm.tqdm.write(f"Rew: {reward}")

        if done:
            break

    if verbose:
        # print("Final obs:", obs)
        tqdm.tqdm.write(f"Total reward: {tot_reward}")

    env.close()
예제 #4
0
    def _plan_to_actions(self, plan, objects):
        operators = self._learned_operators
        action_predicates = self._action_space.predicates

        actions = []
        for plan_step in plan:
            if plan_step == "reach-goal":
                continue
            action = parse_plan_step(plan_step, operators, action_predicates,
                                     objects)
            actions.append(action)
        return actions
예제 #5
0
def get_plan_trajectory(env,
                        verbose=False,
                        seed=None,
                        partial_observability=None):
    if seed is not None:
        env.seed(seed)

    obs, debug_info = env.reset()
    type_predicates = generate_type_predicates(env)
    plan = run_planner(debug_info['domain_file'],
                       debug_info['problem_file'],
                       "ff",
                       timeout=300)

    a_lib_gmt = get_gmt_action_library(env)

    action_trajectory = [
        a_lib_gmt[a_name].instantiate(a_args)
        for a_name, *a_args in map(str.split, plan)
    ]

    actions = []
    for s in plan:
        a = parse_plan_step(s,
                            env.domain.operators.values(),
                            env.action_predicates,
                            obs.objects,
                            operators_as_actions=env.operators_as_actions)
        actions.append(a)

    state_trajectory = [get_state(env, type_predicates)]

    for action in actions:
        if verbose:
            print("Obs:", obs)
            print("Act:", action)

        obs, _, done, _ = env.step(action)
        # env.render()

        state_trajectory.append(get_state(env, type_predicates))

        if done:
            break

    if verbose:
        print("Final obs:", obs)
        print()

    env.close()
    if verbose:
        input("press enter to continue to next problem")

    if partial_observability:
        rng = random.Random(seed)
        # seen_atoms = set()
        # for state in state_trajectory:
        # seen_atoms.update(state.atoms)
        lo, hi = partial_observability
        for state in state_trajectory:
            n_selected = rng.randint(min(lo, len(state.atoms)),
                                     min(hi, len(state.atoms)))
            selected_atoms = rng.sample(state.atoms, n_selected)
            for atom in selected_atoms:
                state.atoms.remove(atom)
                state.uncertain_atoms.add(atom)

    return state_trajectory, action_trajectory
예제 #6
0
def run_probabilistic_planning_demo(env, planner_name, verbose=False, num_epi=20, outdir='/tmp', fps=3):
    """Probabilistic planning via simple determinization.
    """
    if outdir is None:
        outdir = "/tmp/{}".format(env_cls.__name__)
        if not os.path.exists(outdir):
            os.makedirs(outdir)

    if env._render:
        if env._problem_index_fixed:
            problem_idx = env._problem_idx
            video_path = os.path.join(outdir, 'planning_{}_{}_{}_demo.gif'.format(
                planner_name, env.spec.id, problem_idx))
        else:
            video_path = os.path.join(outdir, 'planning_{}_{}_demo.gif'.format(
                planner_name, env.spec.id))
        env = VideoWrapper(env, video_path, fps=fps)

    avg_reward = 0
    for _ in range(num_epi):
        obs, debug_info = env.reset()
        domain = PDDLDomainParser(debug_info["domain_file"])
        domain.determinize()
        domain.write("/tmp/domain.pddl")

        plan = run_planner("/tmp/domain.pddl", debug_info['problem_file'], planner_name)

        actions = []
        for s in plan:
            a = parse_plan_step(
                    s, 
                    env.domain.operators.values(), 
                    env.action_predicates,
                    obs.objects, 
                    operators_as_actions=env.operators_as_actions
                )
            actions.append(a)

        tot_reward = 0.
        for action in actions:
            if verbose:
                print("Obs:", obs)

            if verbose:
                print("Act:", action)

            obs, reward, done, _ = env.step(action)
            env.render()
            tot_reward += reward
            if verbose:
                print("Rew:", reward)

            if done:
                break

        if verbose:
            print("Final obs:", obs)
            print("Got total reward:", tot_reward)
            print()

        avg_reward += tot_reward/num_epi

    print("Average reward over {} episodes was {}".format(num_epi, avg_reward))
    env.close()
    if verbose:
        input("press enter to continue to next problem")
    return tot_reward