def eval_time(args): with open(args.config) as fp: json_data = json.load(fp) video_path = os.path.join("./videos", args.agent) config = GameConfig.deserialize(json_data) config.agents_config[args.agent]["save_path"] += "_vs_time_pt_check.zip" env = DummyVecEnv( [lambda: retro.make(config.game_name, state=config.eval_state[1])]) agent = AgentLoader.get_agent(args.agent, config.agents_config, env, load=True) env.close() env = DummyVecEnv([ lambda: retro.make( config.game_name, state=config.eval_state[1], record=video_path) ]) obs = env.reset() done = False while not done: actions, _ = agent.agent.predict(obs) obs, rew, done, info = env.step(actions) # env.render() env.close()
def main(config: str, agent: str): with open(config) as fp: json_data = json.load(fp) config = GameConfig.deserialize(json_data) log_dir = config.agents_config[agent]["save_path"] # if agent == "DQN": # env = make_atari_env(config.game_name, n_envs=1, # seed=0, monitor_dir=log_dir) # elif agent == "PPO": # env = make_atari_env(config.game_name, n_envs=8, # seed=0, monitor_dir=log_dir) # else: # env = make_atari_env(config.game_name, n_envs=16, # seed=0, monitor_dir=log_dir) env = gym_super_mario_bros.make(config.game_name) env = JoypadSpace(env, SIMPLE_MOVEMENT) env = Monitor(env, log_dir) # env = VecFrameStack(env, n_stack=4) agent = AgentLoader.get_agent(agent, config.agents_config, env)
def eval_100_trials(args): with open(args.config) as fp: json_data = json.load(fp) config = GameConfig.deserialize(json_data) config.agents_config[args.agent]["save_path"] += "_vs_time_pt.zip" env = DummyVecEnv( [lambda: retro.make(config.game_name, state=config.eval_state[1])]) agent = AgentLoader.get_agent(args.agent, config.agents_config, env, load=True) rew_list = [] trials = 100 for i in tqdm(range(trials)): obs = env.reset() done = False reward = 0 while not done: actions, _ = agent.agent.predict(obs) obs, rew, done, info = env.step(actions) reward += rew rew_list.append(reward) env.close() count = sum(i > 0 for i in rew_list) print("win percentage = {}%".format(count / trials * 100))
def main_vs_5(config: str): with open(config) as fp: json_data = json.load(fp) config = GameConfig.deserialize(json_data) config.agents_config["A2C"]["save_path"] += "_vs_5" config.agents_config["A2C"]["tensorboard"] += "_vs_5" env = DummyVecEnv( [lambda: retro.make(config.game_name, state=config.train_states[0])]) agent = AgentLoader.get_agent("A2C", config.agents_config, env) env.close() start_time = time.time() for st in tqdm(config.train_states, desc='Main Loop'): print(st) env = DummyVecEnv([ lambda: retro.make(config.game_name, state=st, scenario='scenario') ]) agent.agent.set_env(env) agent.agent.learn(total_timesteps=10000) agent.save() env.close() end_time = time.time() - start_time print(f'\n The Training Took {end_time} seconds')
def main(config: str, agent: str): with open(config) as fp: json_data = json.load(fp) config = GameConfig.deserialize(json_data) log_dir = config.agents_config[agent]["save_path"] if agent == "DQN": env = make_atari_env(config.game_name, n_envs=1, seed=0, monitor_dir=log_dir) elif agent == "PPO": env = make_atari_env(config.game_name, n_envs=8, seed=0, monitor_dir=log_dir) else: env = make_atari_env(config.game_name, n_envs=16, seed=0, monitor_dir=log_dir) env = VecFrameStack(env, n_stack=4) agent = AgentLoader.get_agent(agent, config.agents_config, env) reward_callback = SaveOnBestTrainingRewardCallback( check_freq=100, log_dir=log_dir) start_time = time.time() steps = 10_000_000 with ProgressBarManager_new(steps) as progress_callback: agent.agent.learn(total_timesteps=steps, callback=[ reward_callback, progress_callback]) # agent.save() env.close() end_time = time.time() - start_time print(f'\n The Training Took {end_time} seconds')
def main(config, agent): with open(config) as fp: json_data = json.load(fp) video_path = os.path.join("./videos", agent, "pong") config = GameConfig.deserialize(json_data) config.agents_config[args.agent]["save_path"] += "best_model.zip" # config.agents_config[args.agent]["save_path"] = "my_models/pong/pong_ppo/best_model.zip" print(config.agents_config[args.agent]["save_path"]) # env = retro.make(config.game_name) env = gym.make("PongNoFrameskip-v4") agent = AgentLoader.get_agent(args.agent, config.agents_config, env, load=True) env.close() env = gym.make("PongNoFrameskip-v4") env = DummyVecEnv([lambda: env]) # env = retro.make(config.game_name, record=video_path) env = VecVideoRecorder( env, video_path, record_video_trigger=lambda x: x == 0, ) obs = env.reset() done = False while not done: actions, _ = agent.agent.predict(obs) obs, rew, done, info = env.step(actions) env.close()
def main_vs_time(config: str): with open(config) as fp: json_data = json.load(fp) config = GameConfig.deserialize(json_data) config.agents_config["A2C"]["save_path"] += "_vs_time_pt" config.agents_config["A2C"]["tensorboard"] += "_vs_time" env = DummyVecEnv( [lambda: (retro.make(config.game_name, state=config.eval_state[0]))]) agent = AgentLoader.get_agent("A2C", config.agents_config, env) start_time = time.time() with ProgressBarManager_new(40000) as callback: agent.agent.learn(total_timesteps=40000, callback=callback) agent.save() env.close() end_time = time.time() - start_time print(f'\n The Training Took {end_time} seconds')
def main_vs_time(config: str): with open(config) as fp: json_data = json.load(fp) config = GameConfig.deserialize(json_data) config.agents_config["PPO"]["save_path"] += "_vs_time_pt_check" config.agents_config["PPO"]["tensorboard"] += "_vs_time_check" env = DummyVecEnv( [lambda: (retro.make(config.game_name, state=config.eval_state[0]))]) agent = AgentLoader.get_agent("PPO", config.agents_config, env) env.close() env = DummyVecEnv( [lambda: (retro.make(config.game_name, state=config.eval_state[0]))]) agent.agent.set_env(env) with ProgressBarManager_new(1000) as callback: agent.agent.learn(1000, callback=callback) agent.save() env.close()