def get_env(driving_agent, config_file_path, opponent=None, image_based=False, random_panther_start_position=True, random_pelican_start_position=True, max_illegal_moves_per_turn=3, sparse=False, normalise=False, is_in_vec_env=False): params = dict(driving_agent=driving_agent, config_file_path=config_file_path, image_based=image_based, random_panther_start_position=random_panther_start_position, random_pelican_start_position=random_pelican_start_position, max_illegal_moves_per_turn=max_illegal_moves_per_turn, normalise=normalise, is_in_vec_env=is_in_vec_env) if opponent != None and driving_agent == 'pelican': params.update(panther_agent_filepath=opponent) elif opponent != None and driving_agent == 'panther': params.update(pelican_agent_filepath=opponent) if sparse: return PlarkEnvSparse(**params) else: return PlarkEnv(**params)
def evaluate(genome, config_file_path, driving_agent, normalise_obs, domain_params_in_obs, num_trials): #Instantiate the env env = PlarkEnvSparse(config_file_path=config_file_path, image_based=False, driving_agent=driving_agent, normalise=normalise_obs, domain_params_in_obs=domain_params_in_obs) num_inputs = len(env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 if trained_agent == 'panther': agent = PantherNN(num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) else: agent = PelicanNN(num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) agent.set_weights(genome) reward = 0 for i in range(num_trials): env.reset() obs = env._observation() trial_reward = 0 while True: action = agent.getAction(obs) obs, r, done, info = env.step(action) trial_reward += r if done: break reward += trial_reward #Average trial reward reward /= num_trials #agent.save_agent(obs_normalise=normalise_obs, domain_params_in_obs=domain_params_in_obs) #print("Finished at step num:", step_num) #print("Reward:", reward) #print("Status:", info['status']) #save_video(genome, agent, env, max_num_steps, file_name='evo.mp4') #exit() return [reward]
from gym_plark.envs.plark_env_sparse import PlarkEnvSparse from plark_game.agents.basic.panther_nn import PantherNN from plark_game.agents.basic.pelican_nn import PelicanNN if __name__ == '__main__': #Env variables config_file_path = '/Components/plark-game/plark_game/game_config/10x10/nn/nn_coevolution_balanced.json' normalise_obs = True #Instantiate dummy env and dummy agent #I need to do this to ascertain the number of weights needed in the optimisation #procedure dummy_env = PlarkEnvSparse(config_file_path=config_file_path, image_based=False, driving_agent='panther', normalise=normalise_obs) #Neural net variables num_inputs = len(dummy_env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 panther_dummy_agent = PantherNN( num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) #I need to figure out how to get rid of the 139 magic number pelican_dummy_agent = PelicanNN( num_inputs=139, num_hidden_layers=num_hidden_layers,
normalise_obs = True domain_params_in_obs = True stochastic_actions = False random_panther_start_position = True random_pelican_start_position = True num_trials = 5 #Instantiate dummy env and dummy agent #I need to do this to ascertain the number of weights needed in the optimisation #procedure dummy_env = PlarkEnvSparse( config_file_path=config_file_path, driving_agent=trained_agent, normalise=normalise_obs, domain_params_in_obs=domain_params_in_obs, random_panther_start_position=random_panther_start_position, random_pelican_start_position=random_pelican_start_position) #Neural net variables num_inputs = len(dummy_env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 if trained_agent == 'panther': dummy_agent = PantherNN( num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer, stochastic_actions=stochastic_actions)
log_dir_base = './self_play/' os.makedirs(log_dir_base, exist_ok=True) config_file_path = '/Components/plark-game/plark_game/game_config/10x10/balanced.json' basicdate = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S")) basepath = '/data/agents/models' exp_name = 'test_' + basicdate policy_panther = 'MlpPolicy' policy_pelican = 'MlpPolicy' model_type = 'PPO2' exp_path = os.path.join(basepath, exp_name) # + pelican_env = PlarkEnvSparse(driving_agent='pelican', config_file_path=config_file_path, image_based=False, random_panther_start_position=True, max_illegal_moves_per_turn=1) panther_env = PlarkEnvSparse(driving_agent='panther', config_file_path=config_file_path, image_based=False, random_panther_start_position=True, max_illegal_moves_per_turn=1) # - panthers = [ helper.make_new_model(model_type, policy_panther, panther_env) for i in range(population_size) ] pelicans = [
model = PPO2('MlpPolicy', env, seed=5000) #Train model.learn(training_steps) #Evaluate on all testing configs for test_config in testing_configs: print('Evaluating on:', test_config) #If test_config is the same as what was trained on, just skip if test_config == train_config: continue sparse_env = PlarkEnvSparse(config_file_path=test_config, driving_agent='panther', \ image_based=False) sparse_env = Monitor(sparse_env, log_dir) if normalize: sparse_env = DummyVecEnv([lambda: sparse_env]) sparse_env = VecNormalize(sparse_env, norm_obs=True, norm_reward=False, \ clip_obs=200., gamma=0.95) mean_reward, n_steps = evaluate_policy(model, sparse_env, \ n_eval_episodes=n_eval_episodes, \ deterministic=False, render=False, \ callback=None, reward_threshold=None, \ return_episode_rewards=False) print("Mean reward: ", mean_reward)
def run_self_play(exp_name, exp_path, basicdate, pelican_testing_interval=100, pelican_max_initial_learning_steps=10000, panther_testing_interval=100, panther_max_initial_learning_steps=10000, self_play_testing_interval=100, self_play_max_learning_steps_per_agent=10000, self_play_iterations=10000, model_type='PPO2', log_to_tb=False, image_based=True, num_parallel_envs=1): pelican_training_steps = 0 panther_training_steps = 0 pelican_model_type = model_type panther_model_type = model_type if log_to_tb: writer = SummaryWriter(exp_path) pelican_tb_log_name = 'pelican' panther_tb_log_name = 'panther' else: writer = None pelican_tb_log_name = None panther_tb_log_name = None policy = 'CnnPolicy' if image_based is False: policy = 'MlpPolicy' parallel = False if model_type.lower() == 'ppo2': parallel = True #Train initial pelican vs rule based panther if parallel: pelican_env = SubprocVecEnv([ lambda: PlarkEnv( driving_agent='pelican', config_file_path= '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: pelican_env = PlarkEnv( driving_agent='pelican', config_file_path= '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) pelican_model = helper.make_new_model(model_type, policy, pelican_env) logger.info('Training initial pelican') pelican_agent_filepath, steps = train_agent( exp_path, pelican_model, pelican_env, pelican_testing_interval, pelican_max_initial_learning_steps, pelican_model_type, basicdate, writer, pelican_tb_log_name) pelican_training_steps = pelican_training_steps + steps # Train initial panther agent vs initial pelican agent if parallel: panther_env = SubprocVecEnv([ lambda: PlarkEnv( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: panther_env = PlarkEnv( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) panther_model = helper.make_new_model(model_type, policy, panther_env) logger.info('Training initial panther') panther_agent_filepath, steps = train_agent( exp_path, panther_model, panther_env, panther_testing_interval, panther_max_initial_learning_steps, panther_model_type, basicdate, writer, panther_tb_log_name) panther_training_steps = panther_training_steps + steps # Train agent vs agent logger.info('Self play') for i in range(self_play_iterations): logger.info('Self play iteration ' + str(i) + ' of ' + str(self_play_iterations)) logger.info('Training pelican') if parallel: pelican_env = SubprocVecEnv([ lambda: PlarkEnvSparse( driving_agent='pelican', panther_agent_filepath=panther_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: pelican_env = PlarkEnvSparse( driving_agent='pelican', panther_agent_filepath=panther_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) pelican_agent_filepath, steps = train_agent( exp_path, pelican_model, pelican_env, self_play_testing_interval, self_play_max_learning_steps_per_agent, pelican_model_type, basicdate, writer, pelican_tb_log_name, previous_steps=pelican_training_steps) pelican_training_steps = pelican_training_steps + steps logger.info('Training panther') if parallel: panther_env = SubprocVecEnv([ lambda: PlarkEnvSparse( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs) ]) else: panther_env = PlarkEnvSparse( driving_agent='panther', pelican_agent_filepath=pelican_agent_filepath, config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', image_based=image_based, random_panther_start_position=True, max_illegal_moves_per_turn=3) panther_agent_filepath, steps = train_agent( exp_path, panther_model, panther_env, self_play_testing_interval, self_play_max_learning_steps_per_agent, panther_model_type, basicdate, writer, panther_tb_log_name, previous_steps=panther_training_steps) panther_training_steps = panther_training_steps + steps logger.info('Training pelican total steps:' + str(pelican_training_steps)) logger.info('Training panther total steps:' + str(panther_training_steps)) # Make video video_path = os.path.join(exp_path, 'test_self_play.mp4') basewidth, hsize = helper.make_video(pelican_model, pelican_env, video_path) return video_path, basewidth, hsize
return [reward] if __name__ == '__main__': #Env variables config_file_path = '/Components/plark-game/plark_game/game_config/10x10/nn/nn_single_agent_balanced.json' trained_agent = 'panther' #trained_agent = 'pelican' normalise_obs = True domain_params_in_obs = True #Instantiate dummy env and dummy agent #I need to do this to ascertain the number of weights needed in the optimisation #procedure dummy_env = PlarkEnvSparse(config_file_path=config_file_path, image_based=False, driving_agent=trained_agent, normalise=normalise_obs, domain_params_in_obs=domain_params_in_obs) #Neural net variables num_inputs = len(dummy_env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 if trained_agent == 'panther': dummy_agent = PantherNN(num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) else: dummy_agent = PelicanNN(num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) num_weights = dummy_agent.get_num_weights()
# - def envops(env, logdir): os.makedirs(logdir, exist_ok=True) env = Monitor(env, logdir) #env = DummyVecEnv([lambda: env]) #env = VecNormalize(env, norm_obs=True, norm_reward=False, clip_obs=200., gamma=0.95) return env # + pelican_env = envops( PlarkEnvSparse(driving_agent='pelican', config_file_path=config_file_path, image_based=False, random_panther_start_position=True, max_illegal_moves_per_turn=1), log_dir_base + '/pelican/') panther_env = envops( PlarkEnvSparse(driving_agent='panther', config_file_path=config_file_path, image_based=False, random_panther_start_position=True, max_illegal_moves_per_turn=1), log_dir_base + '/panther/') pelican = helper.make_new_model(model_type, policy, pelican_env) panther = helper.make_new_model(model_type, policy, panther_env) panther_env.set_pelican(pelican) pelican_env.set_panther(panther)
model = PPO2('MlpPolicy', env, seed=5000) # In[8]: model.learn(training_steps) # In[14]: print("****** STARTING EVALUATION *******") #sparse_env = env from gym_plark.envs.plark_env_sparse import PlarkEnvSparse sparse_env = PlarkEnvSparse( config_file_path= '/Components/plark-game/plark_game/game_config/10x10/balanced.json', driving_agent='panther', image_based=False) sparse_env = Monitor(sparse_env, log_dir) if normalize: sparse_env = DummyVecEnv([lambda: sparse_env]) sparse_env = VecNormalize(sparse_env, norm_obs=True, norm_reward=False, clip_obs=200., gamma=0.95) #for nee in [1000]: #for nee in [10,20,30,40,50,100,250,500]: # 0.892 for 1000
from gym_plark.envs.plark_env_sparse import PlarkEnvSparse from agent_training import helper from plark_game.classes.rule_based_game import create_rule_based_game if __name__ == '__main__': #Env variables config_file_path = '/Components/plark-game/plark_game/game_config/10x10/balanced.json' driving_agent = 'pelican' random_panther_start_position = True random_pelican_start_position = True env = PlarkEnvSparse( config_file_path=config_file_path, driving_agent=driving_agent, random_panther_start_position=random_panther_start_position, random_pelican_start_position=random_pelican_start_position) #This is the only difference to a normal environment - one has to set the game #to a RuleBasedGame env.env.activeGames[len(env.env.activeGames) - 1] = create_rule_based_game(config_file_path) env.reset() reward = 0 while True: _, r, done, info = env.step(None) reward += r if done: