def run(args, parser): def create_environment(env_config): # This import must happen inside the method so that worker processes import this code import roboschool return gym.make(args.env) if not args.config: # Load configuration from file config_dir = os.path.dirname(args.checkpoint) # params.json is saved in the model directory during ray training by default config_path = os.path.join(config_dir, "params.json") with open(config_path) as f: args.config = json.load(f) if not args.env: if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init() register_env(args.env, create_environment) cls = get_agent_class(args.algorithm) config = args.config config["monitor"] = False config["num_workers"] = 1 config["num_gpus"] = 0 agent = cls(env=args.env, config=config) agent.restore(args.checkpoint) num_episodes = int(args.evaluate_episodes) if args.algorithm == "DQN": env = gym.make(args.env) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env)) env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True) all_rewards = [] for episode in range(num_episodes): steps = 0 state = env.reset() done = False reward_total = 0.0 while not done: action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward steps += 1 state = next_state all_rewards.append(reward_total) print("Episode reward: %s. Episode steps: %s" % (reward_total, steps)) print("Mean Reward:", np.mean(all_rewards)) print("Max Reward:", np.max(all_rewards)) print("Min Reward:", np.min(all_rewards))
def run(args, parser): if not args.config: # Load configuration from file config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.json") with open(config_path) as f: args.config = json.load(f) if not args.env: if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init() cls = get_agent_class(args.run) agent = cls(env=args.env, config=args.config) agent.restore(args.checkpoint) num_steps = int(args.steps) if args.run == "DQN": env = gym.make(args.env) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env)) if args.out is not None: rollouts = [] steps = 0 while steps < (num_steps or steps + 1): if args.out is not None: rollout = [] state = env.reset() done = False reward_total = 0.0 while not done and steps < (num_steps or steps + 1): action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward if not args.no_render: env.render() if args.out is not None: rollout.append([state, action, next_state, reward, done]) steps += 1 state = next_state if args.out is not None: rollouts.append(rollout) print("Episode reward", reward_total) if args.out is not None: pickle.dump(rollouts, open(args.out, "wb"))
def run(args, parser): if not args.config: # Load configuration from file config_dir = os.path.dirname(args.checkpoint) # params.json is saved in the model directory during ray training by default config_path = os.path.join(config_dir, "params.json") with open(config_path) as f: args.config = json.load(f) if not args.env: if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init(webui_host="127.0.0.1") agent_env_config = {"env_name": args.env} register_env("unity_env", lambda config: UnityEnvWrapper(agent_env_config)) if ray.__version__ >= "0.6.5": from ray.rllib.agents.registry import get_agent_class else: from ray.rllib.agents.agent import get_agent_class cls = get_agent_class(args.algorithm) config = args.config config["monitor"] = False config["num_workers"] = 0 config["num_gpus"] = 0 agent = cls(env="unity_env", config=config) # Delete unnessesary logs env_name = args.env.split('.')[0] files = glob("/opt/ml/input/data/train/{}_Data/Logs/*.csv".format(env_name), recursive=True) for file in files: os.remove(file) agent.restore(args.checkpoint) num_episodes = int(args.evaluate_episodes) env_config = {"env_name": args.env} if ray.__version__ >= "0.6.5": env = UnityEnvWrapper(env_config) else: from ray.rllib.agents.dqn.common.wrappers import wrap_dqn if args.algorithm == "DQN": env = UnityEnvWrapper(env_config) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(UnityEnvWrapper(env_config)) env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True) all_rewards = [] for episode in range(num_episodes): steps = 0 state = env.reset() done = False reward_total = 0.0 while not done: action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward steps += 1 state = next_state all_rewards.append(reward_total) print("Episode reward: %s. Episode steps: %s" % (reward_total, steps)) print("Mean Reward:", np.mean(all_rewards)) print("Max Reward:", np.max(all_rewards)) print("Min Reward:", np.min(all_rewards))
if not args.env: if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init() cls = get_agent_class(args.run) agent = cls(env=args.env, config=args.config) agent.restore(args.checkpoint) num_steps = int(args.steps) if args.run == "DQN": env = gym.make(args.env) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env)) if args.out is not None: rollouts = [] steps = 0 while steps < (num_steps or steps + 1): if args.out is not None: rollout = [] state = env.reset() done = False reward_total = 0.0 while not done and steps < (num_steps or steps + 1): action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward