예제 #1
0
파일: main.py 프로젝트: tomsilver/ploi
def _test_planner(planner, domain_name, num_problems, timeout):
    print("Running testing...")
    env = pddlgym.make("PDDLEnv{}-v0".format(domain_name))
    num_problems = min(num_problems, len(env.problems))
    for problem_idx in range(num_problems):
        print("\tTesting problem {} of {}".format(problem_idx + 1,
                                                  num_problems),
              flush=True)
        env.fix_problem_index(problem_idx)
        state, _ = env.reset()
        start = time.time()
        try:
            plan = planner(env.domain, state, timeout=timeout)
        except (PlanningTimeout, PlanningFailure) as e:
            print("\t\tPlanning failed with error: {}".format(e), flush=True)
            continue
        # Validate plan on the full test problem.
        if not validate_strips_plan(
                domain_file=env.domain.domain_fname,
                problem_file=env.problems[problem_idx].problem_fname,
                plan=plan):
            print("\t\tPlanning returned an invalid plan")
            continue
        print("\t\tSuccess, got plan of length {} in {:.5f} seconds".format(
            len(plan),
            time.time() - start),
              flush=True)
예제 #2
0
    def _collect_training_data(self, train_env_name):
        """Returns X, Y where X are States and Y are sets of objects
        """
        outfile = self._dataset_file_prefix + "_{}.pkl".format(train_env_name)
        if not self._load_dataset_from_file or not os.path.exists(outfile):
            inputs = []
            outputs = []
            env = pddlgym.make("PDDLEnv{}-v0".format(train_env_name))
            assert env.operators_as_actions
            for idx in range(min(self._num_train_problems, len(env.problems))):
                print("Collecting training data problem {}".format(idx),
                      flush=True)
                env.fix_problem_index(idx)
                state, _ = env.reset()
                try:
                    plan = self._planner(env.domain, state, timeout=60)
                except (PlanningTimeout, PlanningFailure):
                    print("Warning: planning failed, skipping: {}".format(
                        env.problems[idx].problem_fname))
                    continue
                inputs.append(state)
                objects_in_plan = {o for act in plan for o in act.variables}
                outputs.append(objects_in_plan)
            training_data = (inputs, outputs)

            with open(outfile, "wb") as f:
                pickle.dump(training_data, f)

        with open(outfile, "rb") as f:
            training_data = pickle.load(f)

        return training_data
예제 #3
0
파일: demo.py 프로젝트: tomsilver/pddlgym
def demo_random(env_name, render=True, problem_index=0, verbose=True):
    env = pddlgym.make("PDDLEnv{}-v0".format(env_name.capitalize()))
    env.fix_problem_index(problem_index)
    policy = lambda s: env.action_space.sample(s)
    video_path = "/tmp/{}_random_demo.mp4".format(env_name)
    run_demo(env,
             policy,
             render=render,
             verbose=verbose,
             seed=0,
             video_path=video_path)
예제 #4
0
def test_searchandrescue_walls(num_actions_to_test=100):
    """Test that when we try to move into walls, we stay put.
    """
    rng = np.random.RandomState(0)
    for level in [1, 2]:
        env = pddlgym.make(f"SearchAndRescueLevel{level}-v0")
        for idx in range(len(env.problems)):
            env.fix_problem_index(idx)
            state, debug_info = env.reset()

            all_actions = dropoff, down, left, right, up, pickup = env.get_possible_actions(
            )

            act_to_delta = {
                dropoff: (0, 0),
                down: (1, 0),
                left: (0, -1),
                right: (0, 1),
                up: (-1, 0),
                pickup: (0, 0),
            }

            actions = rng.choice(all_actions, size=num_actions_to_test)
            done = False
            robot_r, robot_c = dict(state)["robot0"]
            walls = {
                dict(state)[k]
                for k in dict(state) if k.startswith("wall")
            }
            for t, act in enumerate(actions):

                dr, dc = act_to_delta[act]
                can_r, can_c = robot_r + dr, robot_c + dc

                if done:
                    break
                state1, _, done, _ = env.step(act)
                state2 = env.get_successor_state(state, act)
                assert state2 == state1
                state = state1

                new_r, new_c = dict(state)["robot0"]

                if (can_r, can_c) in walls:
                    # Can't move into walls!
                    assert (new_r, new_c) == (robot_r, robot_c)

                robot_r, robot_c = new_r, new_c

    print("Test passed.")
예제 #5
0
def test_readme_example():
    """Make sure that the README example runs
    """
    # Planning with FastForward
    ff_planner = FF()
    env = pddlgym.make("PDDLEnvBlocks-v0")
    state, _ = env.reset()
    print("Plan:", ff_planner(env.domain, state))
    print("Statistics:", ff_planner.get_statistics())

    # Planning with FastDownward (--alias seq-opt-lmcut)
    fd_planner = FD()
    env = pddlgym.make("PDDLEnvBlocks-v0")
    state, _ = env.reset()
    print("Plan:", fd_planner(env.domain, state))
    print("Statistics:", fd_planner.get_statistics())

    # Planning with FastDownward (--alias lama-first)
    lama_first_planner = FD(alias_flag="--alias lama-first")
    env = pddlgym.make("PDDLEnvBlocks-v0")
    state, _ = env.reset()
    print("Plan:", lama_first_planner(env.domain, state))
    print("Statistics:", lama_first_planner.get_statistics())
예제 #6
0
def test_planners():
    """Make sure that the plans found by the planners
    succeed in the environments
    """
    planners = [FF(), FD(), FD(alias_flag="--alias lama-first")]
    envs = ["PDDLEnvBlocks-v0", "PDDLEnvBlocks_operator_actions-v0"]

    for planner in planners:
        for env in envs:
            env = pddlgym.make("PDDLEnvBlocks-v0")
            state, _ = env.reset()
            plan = planner(env.domain, state)
            for act in plan:
                _, reward, done, _ = env.step(act)
            assert reward == 1.
            assert done
예제 #7
0
def demo_planning(env_name,
                  render=True,
                  probabilistic=False,
                  problem_index=0,
                  verbose=True):
    env = pddlgym.make("PDDLEnv{}-v0".format(env_name.capitalize()))
    env.fix_problem_index(problem_index)
    planner = FD(alias_flag="--alias lama-first")
    if probabilistic:
        policy = create_replanning_policy(env, planner)
    else:
        policy = create_single_plan_policy(env, planner)
    video_path = "/tmp/{}_random_demo.mp4".format(env_name)
    run_demo(env,
             policy,
             render=render,
             verbose=verbose,
             seed=0,
             video_path=video_path)
예제 #8
0
def test_searchandrescue(num_actions_to_test=100, verbose=False):
    """Test state encoding and decoding
    """
    for level in range(1, 7):
        env = pddlgym.make(f"SearchAndRescueLevel{level}-v0")
        if level == 1:
            assert len(env.problems) == 20
        else:
            assert len(env.problems) == 50
        env.fix_problem_index(0)
        state, debug_info = env.reset()
        rng = np.random.RandomState(0)

        all_actions = env.get_possible_actions()
        actions = rng.choice(all_actions, size=num_actions_to_test)
        done = False
        for t, act in enumerate(actions):
            if verbose:
                print(f"Taking action {t}/{num_actions_to_test}",
                      end='\r',
                      flush=True)

            assert state == env._internal_to_state(env._state)
            assert env._state.literals == env._state_to_internal(
                state).literals
            assert env._state.objects == env._state_to_internal(state).objects
            assert set(env._state.goal.literals) == set(
                env._state_to_internal(state).goal.literals)
            assert env.check_goal(state) == done
            for a in all_actions:
                ns = env.get_successor_state(state, a)
                assert ns == env._internal_to_state(env._state_to_internal(ns))

            if done:
                break
            state, _, done, _ = env.step(act)
        if verbose:
            print()

    print("Test passed.")
예제 #9
0
class LargePOSARRadius1Env(POSARRadius1Env):
    height, width = 9, 9
    room_locs = [(0, i) for i in range(9)] + [(8, i) for i in range(9)]
    robot_starts = [(4, 3), (4, 4), (4, 5)]
    wall_locs = [(1, 1), (2, 4), (1, 4), (1, 5), (2, 1), (2, 7), (2, 8),
                 (3, 0), (3, 2), (4, 6), (4, 7), (5, 1), (5, 3), (5, 4),
                 (6, 2), (6, 5), (6, 6), (6, 8), (7, 3), (7, 4)]
    fire_locs = []


if __name__ == "__main__":
    import imageio
    np.random.seed(0)
    for env_name in ["PDDLSearchAndRescueLevel7"]: #, "MyopicPOSAR"]:
        imgs = []
        env = pddlgym.make(f"{env_name}-v0")
        env.fix_problem_index(1)
        obs, _ = env.reset()
        print(obs)
        imgs.append(env.render())
        plan = np.random.choice(env.get_possible_actions(), size=50)
        for act in plan:
            obs, reward, done, _ = env.step(act)
            print(obs, reward, done)
            imgs.append(env.render())
            if done:
                break
        imageio.mimsave(f"/tmp/{env_name}_random.mp4", imgs)