def run(args, parser):
    def create_environment(env_config):
        # This import must happen inside the method so that worker processes import this code
        import roboschool
        return gym.make(args.env)

    if not args.config:
        # Load configuration from file
        config_dir = os.path.dirname(args.checkpoint)
        # params.json is saved in the model directory during ray training by default
        config_path = os.path.join(config_dir, "params.json")
        with open(config_path) as f:
            args.config = json.load(f)

    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    register_env(args.env, create_environment)

    cls = get_agent_class(args.algorithm)
    config = args.config
    config["monitor"] = False
    config["num_workers"] = 1
    config["num_gpus"] = 0
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_episodes = int(args.evaluate_episodes)

    if args.algorithm == "DQN":
        env = gym.make(args.env)
        env = wrap_dqn(env, args.config.get("model", {}))
    else:
        env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env))
    env = wrappers.Monitor(env,
                           OUTPUT_DIR,
                           force=True,
                           video_callable=lambda episode_id: True)
    all_rewards = []
    for episode in range(num_episodes):
        steps = 0
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done:
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward
            steps += 1
            state = next_state
        all_rewards.append(reward_total)
        print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))
    print("Mean Reward:", np.mean(all_rewards))
    print("Max Reward:", np.max(all_rewards))
    print("Min Reward:", np.min(all_rewards))
def run(args, parser):
    def create_environment(env_config):
        # This import must happen inside the method so that worker processes import this code
        import roboschool
        return gym.make(args.env)

    if not args.config:
        # Load configuration from file
        config_dir = os.path.dirname(args.checkpoint)
        # params.json is saved in the model directory during ray training by default
        config_path = os.path.join(config_dir, "params.json")
        with open(config_path) as f:
            args.config = json.load(f)

    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    register_env(args.env, create_environment)

    cls = get_agent_class(args.algorithm)
    config = args.config
    config["monitor"] = False
    config["num_workers"] = 1
    config["num_gpus"] = 0
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_episodes = int(args.evaluate_episodes)

    if args.algorithm == "DQN":
        env = gym.make(args.env)
        env = wrap_dqn(env, args.config.get("model", {}))
    else:
        env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env))
    env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True)
    all_rewards = []
    for episode in range(num_episodes):
        steps = 0
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done:
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward
            steps += 1
            state = next_state
        all_rewards.append(reward_total)
        print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))
    print("Mean Reward:", np.mean(all_rewards))
    print("Max Reward:", np.max(all_rewards))
    print("Min Reward:", np.min(all_rewards))
Ejemplo n.º 3
0
def run(args, parser):
    if not args.config:
        # Load configuration from file
        config_dir = os.path.dirname(args.checkpoint)
        config_path = os.path.join(config_dir, "params.json")
        with open(config_path) as f:
            args.config = json.load(f)

    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    cls = get_agent_class(args.run)
    agent = cls(env=args.env, config=args.config)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)

    if args.run == "DQN":
        env = gym.make(args.env)
        env = wrap_dqn(env, args.config.get("model", {}))
    else:
        env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env))
    if args.out is not None:
        rollouts = []
    steps = 0
    while steps < (num_steps or steps + 1):
        if args.out is not None:
            rollout = []
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done and steps < (num_steps or steps + 1):
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward
            if not args.no_render:
                env.render()
            if args.out is not None:
                rollout.append([state, action, next_state, reward, done])
            steps += 1
            state = next_state
        if args.out is not None:
            rollouts.append(rollout)
        print("Episode reward", reward_total)
    if args.out is not None:
        pickle.dump(rollouts, open(args.out, "wb"))
def run(args, parser):

    if not args.config:
        # Load configuration from file
        config_dir = os.path.dirname(args.checkpoint)
        # params.json is saved in the model directory during ray training by default
        config_path = os.path.join(config_dir, "params.json")
        with open(config_path) as f:
            args.config = json.load(f)

    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init(webui_host="127.0.0.1")

    agent_env_config = {"env_name": args.env}

    register_env("unity_env", lambda config: UnityEnvWrapper(agent_env_config))

    if ray.__version__ >= "0.6.5":
        from ray.rllib.agents.registry import get_agent_class
    else:
        from ray.rllib.agents.agent import get_agent_class

    cls = get_agent_class(args.algorithm)
    config = args.config
    config["monitor"] = False
    config["num_workers"] = 0
    config["num_gpus"] = 0
    agent = cls(env="unity_env", config=config)
    # Delete unnessesary logs
    env_name = args.env.split('.')[0]
    files = glob("/opt/ml/input/data/train/{}_Data/Logs/*.csv".format(env_name), recursive=True)
    for file in files:
        os.remove(file)

    agent.restore(args.checkpoint)
    num_episodes = int(args.evaluate_episodes)

    env_config = {"env_name": args.env}

    if ray.__version__ >= "0.6.5":
        env = UnityEnvWrapper(env_config)
    else:
        from ray.rllib.agents.dqn.common.wrappers import wrap_dqn
        if args.algorithm == "DQN":
            env = UnityEnvWrapper(env_config)
            env = wrap_dqn(env, args.config.get("model", {}))
        else:
            env = ModelCatalog.get_preprocessor_as_wrapper(UnityEnvWrapper(env_config))

    env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True)
    all_rewards = []
    for episode in range(num_episodes):
        steps = 0
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done:
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward
            steps += 1
            state = next_state
        all_rewards.append(reward_total)
        print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))

    print("Mean Reward:", np.mean(all_rewards))
    print("Max Reward:", np.max(all_rewards))
    print("Min Reward:", np.min(all_rewards))
Ejemplo n.º 5
0
    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    cls = get_agent_class(args.run)
    agent = cls(env=args.env, config=args.config)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)

    if args.run == "DQN":
        env = gym.make(args.env)
        env = wrap_dqn(env, args.config.get("model", {}))
    else:
        env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env))
    if args.out is not None:
        rollouts = []
    steps = 0
    while steps < (num_steps or steps + 1):
        if args.out is not None:
            rollout = []
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done and steps < (num_steps or steps + 1):
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward