예제 #1
0
def _evaluate(env_config, num_episode):
    s = time.time()
    np.random.seed(0)
    env = PGDriveEnv(env_config)
    obs = env.reset()
    success_list, reward_list, ep_reward, ep_len, ep_count = [], [], 0, 0, 0
    while ep_count < num_episode:
        action = expert(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        ep_reward += reward
        ep_len += 1
        if done:
            ep_count += 1
            success_list.append(1 if get_terminal_state(info) ==
                                "Success" else 0)
            reward_list.append(ep_reward)
            ep_reward = 0
            ep_len = 0
            obs = env.reset()
    env.close()
    t = time.time() - s
    ep_reward_mean = sum(reward_list) / len(reward_list)
    success_rate = sum(success_list) / len(success_list)
    print(
        f"Finish {ep_count} episodes in {t:.3f} s. Episode reward: {ep_reward_mean}, success rate: {success_rate}."
    )
    return ep_reward_mean, success_rate
def _evaluate(env_config, num_episode, has_traffic=True):
    s = time.time()
    np.random.seed(0)
    env = PGDriveEnv(env_config)
    try:
        obs = env.reset()
        lidar_success = False
        success_list, reward_list, ep_reward, ep_len, ep_count = [], [], 0, 0, 0
        while ep_count < num_episode:
            action = expert(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            # double check lidar
            lidar = [True if p == 1.0 else False for p in env.observations[DEFAULT_AGENT].cloud_points]
            if not all(lidar):
                lidar_success = True
            ep_reward += reward
            ep_len += 1
            if done:
                ep_count += 1
                success_list.append(1 if get_terminal_state(info) == "Success" else 0)
                reward_list.append(ep_reward)
                ep_reward = 0
                ep_len = 0
                obs = env.reset()
                if has_traffic:
                    assert lidar_success
                lidar_success = False
        env.close()
        t = time.time() - s
        ep_reward_mean = sum(reward_list) / len(reward_list)
        success_rate = sum(success_list) / len(success_list)
        print(
            f"Finish {ep_count} episodes in {t:.3f} s. Episode reward: {ep_reward_mean}, success rate: {success_rate}."
        )
    finally:
        env.close()
    return ep_reward_mean, success_rate
예제 #3
0
import random

from pgdrive import PGDriveEnv
from pgdrive.examples import expert, get_terminal_state

if __name__ == '__main__':
    env = PGDriveEnv(
        dict(use_render=True,
             environment_num=100,
             start_seed=random.randint(0, 1000),
             map=7))
    obs = env.reset()
    success_list, reward_list, ep_reward, ep_len, ep_count = [], [], 0, 0, 0
    try:
        while True:
            action = expert(obs)
            obs, reward, done, info = env.step(action)
            ep_reward += reward
            ep_len += 1
            # env.render()
            if done:
                ep_count += 1
                success_list.append(1 if get_terminal_state(info) ==
                                    "Success" else 0)
                reward_list.append(ep_reward)
                print(
                    "{} episodes terminated! Length: {}, Reward: {:.4f}, Terminal state: {}."
                    .format(ep_count, ep_len, ep_reward,
                            get_terminal_state(info)))
                ep_reward = 0
                ep_len = 0