def save_model(exp_path, model, model_type, env, basicdate): logger.info("Saving model") helper.save_model_with_env_settings(exp_path,model,model_type,env,basicdate) video_path = os.path.join(exp_path, 'training.mp4') helper.make_video(model,env,video_path)
def save_model(exp_path, model, model_type, env, basicdate): logger.info("Saving model") # helper.save_model(exp_path, model, model_type, env.driving_agent, env.render_height, env.render_width, image_based, basicdate) helper.save_model_with_env_settings(exp_path,model,model_type,env,basicdate) video_path = os.path.join(exp_path, 'training.mp4') helper.make_video(model,env,video_path)
for i in range(population_size) ] # for iteration in tqdm(range(iterations)): for iteration in range(iterations): print("Iteration: " + str(iteration)) for panther in panthers: for pelican in pelicans: panther_env.set_pelican(pelican) pelican_env.set_panther(panther) pelican.learn(steps) panther.learn(steps) # Make video video_path = os.path.join('./', 'test_self_play.mp4') basewidth, hsize = helper.make_video(pelicans[2], pelican_env, video_path) video = io.open(video_path, 'r+b').read() encoded = base64.b64encode(video) HTML(data='''<video alt="test" width="''' + str(basewidth) + '''" height="''' + str(hsize) + '''" controls> <source src="data:video/mp4;base64,{0}" type="video/mp4" /> </video>'''.format(encoded.decode('ascii'))) start_time = time.time() stats = [] for panther in panthers: for pelican in pelicans: #panther_env.set_pelican(pelican) pelican_env.set_panther(panther) mean, std = evaluate_policy(pelican, pelican_env, n_eval_episodes=10) stats.append({
def run_self_play(exp_name, exp_path, basicdate, pelican_testing_interval=100, pelican_max_initial_learning_steps=10000, panther_testing_interval=100, panther_max_initial_learning_steps=10000, self_play_testing_interval=100, self_play_max_learning_steps_per_agent=10000, self_play_iterations=10000, model_type='PPO2', log_to_tb=False, image_based=True, num_parallel_envs=1): pelican_training_steps = 0 panther_training_steps = 0 pelican_model_type = model_type panther_model_type = model_type if log_to_tb: writer = SummaryWriter(exp_path) pelican_tb_log_name = 'pelican' panther_tb_log_name = 'panther' else: writer = None pelican_tb_log_name = None panther_tb_log_name = None policy = 'CnnPolicy' if image_based is False: policy = 'MlpPolicy' parallel = False if model_type.lower() == 'ppo2': parallel = True #Train initial pelican vs rule based panther if parallel: pelican_env = SubprocVecEnv([ lambda: PlarkEnv( driving_agent='pelican', config_file_path= '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: pelican_env = PlarkEnv( driving_agent='pelican', config_file_path= '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) pelican_model = helper.make_new_model(model_type, policy, pelican_env) logger.info('Training initial pelican') pelican_agent_filepath, steps = train_agent( exp_path, pelican_model, pelican_env, pelican_testing_interval, pelican_max_initial_learning_steps, pelican_model_type, basicdate, writer, pelican_tb_log_name) pelican_training_steps = pelican_training_steps + steps # Train initial panther agent vs initial pelican agent if parallel: panther_env = SubprocVecEnv([ lambda: PlarkEnv( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: panther_env = PlarkEnv( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) panther_model = helper.make_new_model(model_type, policy, panther_env) logger.info('Training initial panther') panther_agent_filepath, steps = train_agent( exp_path, panther_model, panther_env, panther_testing_interval, panther_max_initial_learning_steps, panther_model_type, basicdate, writer, panther_tb_log_name) panther_training_steps = panther_training_steps + steps # Train agent vs agent logger.info('Self play') for i in range(self_play_iterations): logger.info('Self play iteration ' + str(i) + ' of ' + str(self_play_iterations)) logger.info('Training pelican') if parallel: pelican_env = SubprocVecEnv([ lambda: PlarkEnvSparse( driving_agent='pelican', panther_agent_filepath=panther_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: pelican_env = PlarkEnvSparse( driving_agent='pelican', panther_agent_filepath=panther_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) pelican_agent_filepath, steps = train_agent( exp_path, pelican_model, pelican_env, self_play_testing_interval, self_play_max_learning_steps_per_agent, pelican_model_type, basicdate, writer, pelican_tb_log_name, previous_steps=pelican_training_steps) pelican_training_steps = pelican_training_steps + steps logger.info('Training panther') if parallel: panther_env = SubprocVecEnv([ lambda: PlarkEnvSparse( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: panther_env = PlarkEnvSparse( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) panther_agent_filepath, steps = train_agent( exp_path, panther_model, panther_env, self_play_testing_interval, self_play_max_learning_steps_per_agent, panther_model_type, basicdate, writer, panther_tb_log_name, previous_steps=panther_training_steps) panther_training_steps = panther_training_steps + steps logger.info('Training pelican total steps:' + str(pelican_training_steps)) logger.info('Training panther total steps:' + str(panther_training_steps)) # Make video video_path = os.path.join(exp_path, 'test_self_play.mp4') basewidth, hsize = helper.make_video(pelican_model, pelican_env, video_path) return video_path, basewidth, hsize
# In[11]: env.get_attr('driving_agent') # In[12]: modeltype = 'PPO2' modelplayer = env.get_attr('driving_agent')[0] #env.driving_agent render_height = env.get_attr('render_height')[0] #env.render_height render_width = env.get_attr('render_width')[0] #env.render_width image_based = False helper.save_model(exp_path, model, modeltype, modelplayer, render_height, render_width, image_based, basicdate) # # making the video # In[13]: video_path = '/test.mp4' basewidth, hsize = helper.make_video(model, env, video_path) video = io.open(video_path, 'r+b').read() encoded = base64.b64encode(video) HTML(data='''<video alt="test" width="''' + str(basewidth) + '''" height="''' + str(hsize) + '''" controls> <source src="data:video/mp4;base64,{0}" type="video/mp4" /> </video>'''.format(encoded.decode('ascii'))) # In[ ]: