Exemple #1
0
def get_env(driving_agent,
            config_file_path,
            opponent=None,
            image_based=False,
            random_panther_start_position=True,
            random_pelican_start_position=True,
            max_illegal_moves_per_turn=3,
            sparse=False,
            normalise=False,
            is_in_vec_env=False):

    params = dict(driving_agent=driving_agent,
                  config_file_path=config_file_path,
                  image_based=image_based,
                  random_panther_start_position=random_panther_start_position,
                  random_pelican_start_position=random_pelican_start_position,
                  max_illegal_moves_per_turn=max_illegal_moves_per_turn,
                  normalise=normalise,
                  is_in_vec_env=is_in_vec_env)

    if opponent != None and driving_agent == 'pelican':
        params.update(panther_agent_filepath=opponent)
    elif opponent != None and driving_agent == 'panther':
        params.update(pelican_agent_filepath=opponent)
    if sparse:
        return PlarkEnvSparse(**params)
    else:
        return PlarkEnv(**params)
def evaluate(genome, config_file_path, driving_agent, normalise_obs,
             domain_params_in_obs, num_trials):

    #Instantiate the env
    env = PlarkEnvSparse(config_file_path=config_file_path,
                         image_based=False,
                         driving_agent=driving_agent,
                         normalise=normalise_obs,
                         domain_params_in_obs=domain_params_in_obs)

    num_inputs = len(env._observation())
    num_hidden_layers = 0
    neurons_per_hidden_layer = 0
    if trained_agent == 'panther':
        agent = PantherNN(num_inputs=num_inputs,
                          num_hidden_layers=num_hidden_layers,
                          neurons_per_hidden_layer=neurons_per_hidden_layer)
    else:
        agent = PelicanNN(num_inputs=num_inputs,
                          num_hidden_layers=num_hidden_layers,
                          neurons_per_hidden_layer=neurons_per_hidden_layer)

    agent.set_weights(genome)

    reward = 0

    for i in range(num_trials):
        env.reset()

        obs = env._observation()
        trial_reward = 0
        while True:
            action = agent.getAction(obs)
            obs, r, done, info = env.step(action)
            trial_reward += r
            if done:
                break
        reward += trial_reward

    #Average trial reward
    reward /= num_trials

    #agent.save_agent(obs_normalise=normalise_obs, domain_params_in_obs=domain_params_in_obs)

    #print("Finished at step num:", step_num)
    #print("Reward:", reward)
    #print("Status:", info['status'])

    #save_video(genome, agent, env, max_num_steps, file_name='evo.mp4')
    #exit()

    return [reward]
from gym_plark.envs.plark_env_sparse import PlarkEnvSparse
from plark_game.agents.basic.panther_nn import PantherNN
from plark_game.agents.basic.pelican_nn import PelicanNN

if __name__ == '__main__':

    #Env variables
    config_file_path = '/Components/plark-game/plark_game/game_config/10x10/nn/nn_coevolution_balanced.json'
    normalise_obs = True

    #Instantiate dummy env and dummy agent
    #I need to do this to ascertain the number of weights needed in the optimisation
    #procedure
    dummy_env = PlarkEnvSparse(config_file_path=config_file_path,
                               image_based=False,
                               driving_agent='panther',
                               normalise=normalise_obs)

    #Neural net variables
    num_inputs = len(dummy_env._observation())
    num_hidden_layers = 0
    neurons_per_hidden_layer = 0

    panther_dummy_agent = PantherNN(
        num_inputs=num_inputs,
        num_hidden_layers=num_hidden_layers,
        neurons_per_hidden_layer=neurons_per_hidden_layer)
    #I need to figure out how to get rid of the 139 magic number
    pelican_dummy_agent = PelicanNN(
        num_inputs=139,
        num_hidden_layers=num_hidden_layers,
    normalise_obs = True
    domain_params_in_obs = True
    stochastic_actions = False

    random_panther_start_position = True
    random_pelican_start_position = True

    num_trials = 5

    #Instantiate dummy env and dummy agent
    #I need to do this to ascertain the number of weights needed in the optimisation
    #procedure
    dummy_env = PlarkEnvSparse(
        config_file_path=config_file_path,
        driving_agent=trained_agent,
        normalise=normalise_obs,
        domain_params_in_obs=domain_params_in_obs,
        random_panther_start_position=random_panther_start_position,
        random_pelican_start_position=random_pelican_start_position)

    #Neural net variables
    num_inputs = len(dummy_env._observation())
    num_hidden_layers = 0
    neurons_per_hidden_layer = 0

    if trained_agent == 'panther':
        dummy_agent = PantherNN(
            num_inputs=num_inputs,
            num_hidden_layers=num_hidden_layers,
            neurons_per_hidden_layer=neurons_per_hidden_layer,
            stochastic_actions=stochastic_actions)
log_dir_base = './self_play/'
os.makedirs(log_dir_base, exist_ok=True)
config_file_path = '/Components/plark-game/plark_game/game_config/10x10/balanced.json'

basicdate = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
basepath = '/data/agents/models'
exp_name = 'test_' + basicdate
policy_panther = 'MlpPolicy'
policy_pelican = 'MlpPolicy'
model_type = 'PPO2'
exp_path = os.path.join(basepath, exp_name)

# +
pelican_env = PlarkEnvSparse(driving_agent='pelican',
                             config_file_path=config_file_path,
                             image_based=False,
                             random_panther_start_position=True,
                             max_illegal_moves_per_turn=1)

panther_env = PlarkEnvSparse(driving_agent='panther',
                             config_file_path=config_file_path,
                             image_based=False,
                             random_panther_start_position=True,
                             max_illegal_moves_per_turn=1)
# -

panthers = [
    helper.make_new_model(model_type, policy_panther, panther_env)
    for i in range(population_size)
]
pelicans = [
Exemple #6
0
    model = PPO2('MlpPolicy', env, seed=5000)

    #Train
    model.learn(training_steps)

    #Evaluate on all testing configs
    for test_config in testing_configs:

        print('Evaluating on:', test_config)

        #If test_config is the same as what was trained on, just skip
        if test_config == train_config:
            continue

        sparse_env = PlarkEnvSparse(config_file_path=test_config, driving_agent='panther', \
                                    image_based=False)

        sparse_env = Monitor(sparse_env, log_dir)

        if normalize:
            sparse_env = DummyVecEnv([lambda: sparse_env])
            sparse_env = VecNormalize(sparse_env, norm_obs=True, norm_reward=False, \
                                      clip_obs=200., gamma=0.95)

        mean_reward, n_steps = evaluate_policy(model, sparse_env, \
                                               n_eval_episodes=n_eval_episodes, \
                                               deterministic=False, render=False, \
                                               callback=None, reward_threshold=None, \
                                               return_episode_rewards=False)
        print("Mean reward: ", mean_reward)
def run_self_play(exp_name,
                  exp_path,
                  basicdate,
                  pelican_testing_interval=100,
                  pelican_max_initial_learning_steps=10000,
                  panther_testing_interval=100,
                  panther_max_initial_learning_steps=10000,
                  self_play_testing_interval=100,
                  self_play_max_learning_steps_per_agent=10000,
                  self_play_iterations=10000,
                  model_type='PPO2',
                  log_to_tb=False,
                  image_based=True,
                  num_parallel_envs=1):
    pelican_training_steps = 0
    panther_training_steps = 0

    pelican_model_type = model_type
    panther_model_type = model_type

    if log_to_tb:
        writer = SummaryWriter(exp_path)
        pelican_tb_log_name = 'pelican'
        panther_tb_log_name = 'panther'
    else:
        writer = None
        pelican_tb_log_name = None
        panther_tb_log_name = None

    policy = 'CnnPolicy'
    if image_based is False:
        policy = 'MlpPolicy'

    parallel = False
    if model_type.lower() == 'ppo2':
        parallel = True
    #Train initial pelican vs rule based panther

    if parallel:
        pelican_env = SubprocVecEnv([
            lambda: PlarkEnv(
                driving_agent='pelican',
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs)
        ])
    else:
        pelican_env = PlarkEnv(
            driving_agent='pelican',
            config_file_path=
            '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json',
            image_based=image_based,
            random_panther_start_position=True,
            max_illegal_moves_per_turn=3)

    pelican_model = helper.make_new_model(model_type, policy, pelican_env)
    logger.info('Training initial pelican')
    pelican_agent_filepath, steps = train_agent(
        exp_path, pelican_model, pelican_env, pelican_testing_interval,
        pelican_max_initial_learning_steps, pelican_model_type, basicdate,
        writer, pelican_tb_log_name)
    pelican_training_steps = pelican_training_steps + steps

    # Train initial panther agent vs initial pelican agent
    if parallel:
        panther_env = SubprocVecEnv([
            lambda: PlarkEnv(
                driving_agent='panther',
                pelican_agent_filepath=pelican_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs)
        ])
    else:
        panther_env = PlarkEnv(
            driving_agent='panther',
            pelican_agent_filepath=pelican_agent_filepath,
            config_file_path=
            '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
            image_based=image_based,
            random_panther_start_position=True,
            max_illegal_moves_per_turn=3)
    panther_model = helper.make_new_model(model_type, policy, panther_env)
    logger.info('Training initial panther')
    panther_agent_filepath, steps = train_agent(
        exp_path, panther_model, panther_env, panther_testing_interval,
        panther_max_initial_learning_steps, panther_model_type, basicdate,
        writer, panther_tb_log_name)
    panther_training_steps = panther_training_steps + steps

    # Train agent vs agent
    logger.info('Self play')

    for i in range(self_play_iterations):
        logger.info('Self play iteration ' + str(i) + ' of ' +
                    str(self_play_iterations))
        logger.info('Training pelican')
        if parallel:
            pelican_env = SubprocVecEnv([
                lambda: PlarkEnvSparse(
                    driving_agent='pelican',
                    panther_agent_filepath=panther_agent_filepath,
                    config_file_path=
                    '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                    image_based=image_based,
                    random_panther_start_position=True,
                    max_illegal_moves_per_turn=3)
                for _ in range(num_parallel_envs)
            ])
        else:
            pelican_env = PlarkEnvSparse(
                driving_agent='pelican',
                panther_agent_filepath=panther_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3)

        pelican_agent_filepath, steps = train_agent(
            exp_path,
            pelican_model,
            pelican_env,
            self_play_testing_interval,
            self_play_max_learning_steps_per_agent,
            pelican_model_type,
            basicdate,
            writer,
            pelican_tb_log_name,
            previous_steps=pelican_training_steps)
        pelican_training_steps = pelican_training_steps + steps

        logger.info('Training panther')
        if parallel:
            panther_env = SubprocVecEnv([
                lambda: PlarkEnvSparse(
                    driving_agent='panther',
                    pelican_agent_filepath=pelican_agent_filepath,
                    config_file_path=
                    '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                    image_based=image_based,
                    random_panther_start_position=True,
                    max_illegal_moves_per_turn=3)
                for _ in range(num_parallel_envs)
            ])
        else:
            panther_env = PlarkEnvSparse(
                driving_agent='panther',
                pelican_agent_filepath=pelican_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3)

        panther_agent_filepath, steps = train_agent(
            exp_path,
            panther_model,
            panther_env,
            self_play_testing_interval,
            self_play_max_learning_steps_per_agent,
            panther_model_type,
            basicdate,
            writer,
            panther_tb_log_name,
            previous_steps=panther_training_steps)
        panther_training_steps = panther_training_steps + steps

    logger.info('Training pelican total steps:' + str(pelican_training_steps))
    logger.info('Training panther total steps:' + str(panther_training_steps))
    # Make video
    video_path = os.path.join(exp_path, 'test_self_play.mp4')
    basewidth, hsize = helper.make_video(pelican_model, pelican_env,
                                         video_path)
    return video_path, basewidth, hsize
    return [reward]

if __name__ == '__main__':

    #Env variables
    config_file_path = '/Components/plark-game/plark_game/game_config/10x10/nn/nn_single_agent_balanced.json'
    trained_agent = 'panther'
    #trained_agent = 'pelican'
    normalise_obs = True
    domain_params_in_obs = True

    #Instantiate dummy env and dummy agent
    #I need to do this to ascertain the number of weights needed in the optimisation
    #procedure
    dummy_env = PlarkEnvSparse(config_file_path=config_file_path, image_based=False, 
                               driving_agent=trained_agent, normalise=normalise_obs,
                               domain_params_in_obs=domain_params_in_obs)

    #Neural net variables
    num_inputs = len(dummy_env._observation())
    num_hidden_layers = 0
    neurons_per_hidden_layer = 0

    if trained_agent == 'panther':
        dummy_agent = PantherNN(num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, 
                                neurons_per_hidden_layer=neurons_per_hidden_layer)  
    else:
        dummy_agent = PelicanNN(num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, 
                                neurons_per_hidden_layer=neurons_per_hidden_layer)  

    num_weights = dummy_agent.get_num_weights()
Exemple #9
0
# -


def envops(env, logdir):
    os.makedirs(logdir, exist_ok=True)
    env = Monitor(env, logdir)
    #env = DummyVecEnv([lambda: env])
    #env = VecNormalize(env, norm_obs=True, norm_reward=False, clip_obs=200., gamma=0.95)
    return env


# +
pelican_env = envops(
    PlarkEnvSparse(driving_agent='pelican',
                   config_file_path=config_file_path,
                   image_based=False,
                   random_panther_start_position=True,
                   max_illegal_moves_per_turn=1), log_dir_base + '/pelican/')

panther_env = envops(
    PlarkEnvSparse(driving_agent='panther',
                   config_file_path=config_file_path,
                   image_based=False,
                   random_panther_start_position=True,
                   max_illegal_moves_per_turn=1), log_dir_base + '/panther/')

pelican = helper.make_new_model(model_type, policy, pelican_env)
panther = helper.make_new_model(model_type, policy, panther_env)

panther_env.set_pelican(pelican)
pelican_env.set_panther(panther)
model = PPO2('MlpPolicy', env, seed=5000)

# In[8]:

model.learn(training_steps)

# In[14]:

print("****** STARTING EVALUATION *******")

#sparse_env = env
from gym_plark.envs.plark_env_sparse import PlarkEnvSparse
sparse_env = PlarkEnvSparse(
    config_file_path=
    '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
    driving_agent='panther',
    image_based=False)

sparse_env = Monitor(sparse_env, log_dir)

if normalize:
    sparse_env = DummyVecEnv([lambda: sparse_env])
    sparse_env = VecNormalize(sparse_env,
                              norm_obs=True,
                              norm_reward=False,
                              clip_obs=200.,
                              gamma=0.95)

    #for nee in [1000]:
    #for nee in [10,20,30,40,50,100,250,500]: # 0.892 for 1000
Exemple #11
0
from gym_plark.envs.plark_env_sparse import PlarkEnvSparse
from agent_training import helper
from plark_game.classes.rule_based_game import create_rule_based_game

if __name__ == '__main__':

    #Env variables
    config_file_path = '/Components/plark-game/plark_game/game_config/10x10/balanced.json'
    driving_agent = 'pelican'

    random_panther_start_position = True
    random_pelican_start_position = True

    env = PlarkEnvSparse(
        config_file_path=config_file_path,
        driving_agent=driving_agent,
        random_panther_start_position=random_panther_start_position,
        random_pelican_start_position=random_pelican_start_position)

    #This is the only difference to a normal environment - one has to set the game
    #to a RuleBasedGame
    env.env.activeGames[len(env.env.activeGames) -
                        1] = create_rule_based_game(config_file_path)

    env.reset()

    reward = 0
    while True:
        _, r, done, info = env.step(None)
        reward += r
        if done: