def initialAgents(self): # If no initial pelican agent is given, we train one from fresh if len(self.initial_pelicans) == 0: # Train initial pelican vs default panther self.pelican_model = helper.make_new_model(self.model_type, self.policy, self.pelican_env, n_steps=self.training_steps) logger.info('Training initial pelican') pelican_agent_filepath = self.train_agent(self.pelicans_tmp_exp_path, self.pelican_model, self.pelican_env) else: logger.info('Initial set of %d pelicans found' % (len(self.initial_pelicans))) pelican_agent_filepath = self.initial_pelicans # If no initial panther agent is given, we train one from fresh if len(self.initial_panthers) == 0: # Train initial panther agent vs default pelican self.panther_model = helper.make_new_model(self.model_type, self.policy, self.panther_env, n_steps=self.training_steps) logger.info('Training initial panther') panther_agent_filepath = self.train_agent(self.panthers_tmp_exp_path, self.panther_model, self.panther_env) else: logger.info('Initial set of %d panthers found' % (len(self.initial_panthers))) panther_agent_filepath = self.initial_panthers return panther_agent_filepath, pelican_agent_filepath
def bootstrap(self, model_paths, env, mixture): if np.random.rand(1) < self.retraining_prob: path = np.random.choice(model_paths, 1, p = mixture)[0] path = glob.glob(path + "/*.zip")[0] return helper.loadAgent(path, self.model_type) else: return helper.make_new_model(self.model_type, self.policy, env, n_steps=self.training_steps)
def evaluate_algorithms(exp_name, base_path, tb_enabled, algorithms, victory_threshold, victory_trials, max_seconds, testing_interval, use_non_image): basicdate = str(datetime.now().strftime("%Y%m%d_%H%M%S")) exp_name = "{}_{}".format(exp_name, basicdate) exp_path = os.path.join(base_path, exp_name) logger.info("Storing results in {}".format(exp_path)) writer = None if tb_enabled: writer = SummaryWriter(exp_path) for algo in algorithms: tb_log_name = "{}_non_image".format(algo) if use_non_image else algo logger.info("Evaluating algorithm: {}; non-image: {}".format( algo, use_non_image)) if use_non_image: image_based = False env = plark_env_non_image_state.PlarkEnvNonImageState( driving_agent='pelican', config_file_path= '/Components/plark-game/plark_game/game_config/10x10/panther_easy.json' ) policy = "MlpPolicy" # CnnPolicy doesn't work with MultiDiscrete observation space else: image_based = True env = plark_env.PlarkEnv( driving_agent='pelican', config_file_path= '/Components/plark-game/plark_game/game_config/10x10/panther_easy.json' ) policy = "CnnPolicy" model = helper.make_new_model(algo, policy, env) helper.train_until(model, env, victory_threshold, victory_trials, max_seconds, testing_interval, tb_writer=writer, tb_log_name=tb_log_name) helper.save_model_with_env_settings(exp_path, model, algo, env, image_based, basicdate) writer.close()
def compare_envs(exp_name, base_path, tb_enabled, victory_threshold, victory_trials, max_seconds, testing_interval, num_parallel_envs, non_image): basicdate = str(datetime.now().strftime("%Y%m%d_%H%M%S")) exp_name = "{}_{}".format(exp_name, basicdate) exp_path = os.path.join(base_path, exp_name) logger.info("Storing results in {}".format(exp_path)) writer = None if tb_enabled: writer = SummaryWriter(exp_path) for parallel in [False, True]: algo = "PPO2" policy = "MlpPolicy" if non_image else "CnnPolicy" tb_log_name = "{}_parallel".format(algo) if parallel else algo logger.info("Evaluating {}; parallel: {}".format(algo, parallel)) if parallel: logger.info("Evaluating using {} parallel environments".format( num_parallel_envs)) env_fn = createNonImageEnv if non_image else createImageEnv env = SubprocVecEnv([env_fn for _ in range(num_parallel_envs)]) else: env = createNonImageEnv() if non_image else createImageEnv() model = helper.make_new_model(algo, policy, env) helper.train_until(model, env, victory_threshold, victory_trials, max_seconds, testing_interval, tb_writer=writer, tb_log_name=tb_log_name) helper.save_model_with_env_settings(exp_path, model, algo, env, basicdate) writer.close()
# + pelican_env = PlarkEnvSparse(driving_agent='pelican', config_file_path=config_file_path, image_based=False, random_panther_start_position=True, max_illegal_moves_per_turn=1) panther_env = PlarkEnvSparse(driving_agent='panther', config_file_path=config_file_path, image_based=False, random_panther_start_position=True, max_illegal_moves_per_turn=1) # - panthers = [ helper.make_new_model(model_type, policy_panther, panther_env) for i in range(population_size) ] pelicans = [ helper.make_new_model(model_type, policy_pelican, pelican_env) for i in range(population_size) ] # for iteration in tqdm(range(iterations)): for iteration in range(iterations): print("Iteration: " + str(iteration)) for panther in panthers: for pelican in pelicans: panther_env.set_pelican(pelican) pelican_env.set_panther(panther) pelican.learn(steps)
def run_self_play(exp_name, exp_path, basicdate, pelican_testing_interval=100, pelican_max_initial_learning_steps=10000, panther_testing_interval=100, panther_max_initial_learning_steps=10000, self_play_testing_interval=100, self_play_max_learning_steps_per_agent=10000, self_play_iterations=10000, model_type='PPO2', log_to_tb=False, image_based=True, num_parallel_envs=1): pelican_training_steps = 0 panther_training_steps = 0 pelican_model_type = model_type panther_model_type = model_type if log_to_tb: writer = SummaryWriter(exp_path) pelican_tb_log_name = 'pelican' panther_tb_log_name = 'panther' else: writer = None pelican_tb_log_name = None panther_tb_log_name = None policy = 'CnnPolicy' if image_based is False: policy = 'MlpPolicy' parallel = False if model_type.lower() == 'ppo2': parallel = True #Train initial pelican vs rule based panther if parallel: pelican_env = SubprocVecEnv([ lambda: PlarkEnv( driving_agent='pelican', config_file_path= '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: pelican_env = PlarkEnv( driving_agent='pelican', config_file_path= '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) pelican_model = helper.make_new_model(model_type, policy, pelican_env) logger.info('Training initial pelican') pelican_agent_filepath, steps = train_agent( exp_path, pelican_model, pelican_env, pelican_testing_interval, pelican_max_initial_learning_steps, pelican_model_type, basicdate, writer, pelican_tb_log_name) pelican_training_steps = pelican_training_steps + steps # Train initial panther agent vs initial pelican agent if parallel: panther_env = SubprocVecEnv([ lambda: PlarkEnv( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: panther_env = PlarkEnv( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) panther_model = helper.make_new_model(model_type, policy, panther_env) logger.info('Training initial panther') panther_agent_filepath, steps = train_agent( exp_path, panther_model, panther_env, panther_testing_interval, panther_max_initial_learning_steps, panther_model_type, basicdate, writer, panther_tb_log_name) panther_training_steps = panther_training_steps + steps # Train agent vs agent logger.info('Self play') for i in range(self_play_iterations): logger.info('Self play iteration ' + str(i) + ' of ' + str(self_play_iterations)) logger.info('Training pelican') if parallel: pelican_env = SubprocVecEnv([ lambda: PlarkEnvSparse( driving_agent='pelican', panther_agent_filepath=panther_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: pelican_env = PlarkEnvSparse( driving_agent='pelican', panther_agent_filepath=panther_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) pelican_agent_filepath, steps = train_agent( exp_path, pelican_model, pelican_env, self_play_testing_interval, self_play_max_learning_steps_per_agent, pelican_model_type, basicdate, writer, pelican_tb_log_name, previous_steps=pelican_training_steps) pelican_training_steps = pelican_training_steps + steps logger.info('Training panther') if parallel: panther_env = SubprocVecEnv([ lambda: PlarkEnvSparse( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: panther_env = PlarkEnvSparse( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) panther_agent_filepath, steps = train_agent( exp_path, panther_model, panther_env, self_play_testing_interval, self_play_max_learning_steps_per_agent, panther_model_type, basicdate, writer, panther_tb_log_name, previous_steps=panther_training_steps) panther_training_steps = panther_training_steps + steps logger.info('Training pelican total steps:' + str(pelican_training_steps)) logger.info('Training panther total steps:' + str(panther_training_steps)) # Make video video_path = os.path.join(exp_path, 'test_self_play.mp4') basewidth, hsize = helper.make_video(pelican_model, pelican_env, video_path) return video_path, basewidth, hsize
# + pelican_env = envops( PlarkEnvSparse(driving_agent='pelican', config_file_path=config_file_path, image_based=False, random_panther_start_position=True, max_illegal_moves_per_turn=1), log_dir_base + '/pelican/') panther_env = envops( PlarkEnvSparse(driving_agent='panther', config_file_path=config_file_path, image_based=False, random_panther_start_position=True, max_illegal_moves_per_turn=1), log_dir_base + '/panther/') pelican = helper.make_new_model(model_type, policy, pelican_env) panther = helper.make_new_model(model_type, policy, panther_env) panther_env.set_pelican(pelican) pelican_env.set_panther(panther) # - for iteration in tqdm(range(iterations)): pelican.learn(steps) panther.learn(steps) # Make video video_path = os.path.join('./', 'test_self_play.mp4') basewidth, hsize = helper.make_video(pelican, pelican_env, video_path) video = io.open(video_path, 'r+b').read() encoded = base64.b64encode(video)