Esempio n. 1
0
def save_model(exp_path, model, model_type, env, basicdate):
    logger.info("Saving model")

    helper.save_model_with_env_settings(exp_path,model,model_type,env,basicdate)

    video_path = os.path.join(exp_path, 'training.mp4')
    helper.make_video(model,env,video_path)            
Esempio n. 2
0
def save_model(exp_path, model, model_type, env, basicdate):
    logger.info("Saving model")

    # helper.save_model(exp_path, model, model_type, env.driving_agent, env.render_height, env.render_width, image_based, basicdate)
    helper.save_model_with_env_settings(exp_path,model,model_type,env,basicdate)

    video_path = os.path.join(exp_path, 'training.mp4')
    helper.make_video(model,env,video_path)   
    for i in range(population_size)
]

# for iteration in tqdm(range(iterations)):
for iteration in range(iterations):
    print("Iteration: " + str(iteration))
    for panther in panthers:
        for pelican in pelicans:
            panther_env.set_pelican(pelican)
            pelican_env.set_panther(panther)
            pelican.learn(steps)
            panther.learn(steps)

# Make video
video_path = os.path.join('./', 'test_self_play.mp4')
basewidth, hsize = helper.make_video(pelicans[2], pelican_env, video_path)
video = io.open(video_path, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" width="''' + str(basewidth) + '''" height="''' +
     str(hsize) + '''" controls>
             <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii')))

start_time = time.time()
stats = []
for panther in panthers:
    for pelican in pelicans:
        #panther_env.set_pelican(pelican)
        pelican_env.set_panther(panther)
        mean, std = evaluate_policy(pelican, pelican_env, n_eval_episodes=10)
        stats.append({
Esempio n. 4
0
def run_self_play(exp_name,
                  exp_path,
                  basicdate,
                  pelican_testing_interval=100,
                  pelican_max_initial_learning_steps=10000,
                  panther_testing_interval=100,
                  panther_max_initial_learning_steps=10000,
                  self_play_testing_interval=100,
                  self_play_max_learning_steps_per_agent=10000,
                  self_play_iterations=10000,
                  model_type='PPO2',
                  log_to_tb=False,
                  image_based=True,
                  num_parallel_envs=1):
    pelican_training_steps = 0
    panther_training_steps = 0

    pelican_model_type = model_type
    panther_model_type = model_type

    if log_to_tb:
        writer = SummaryWriter(exp_path)
        pelican_tb_log_name = 'pelican'
        panther_tb_log_name = 'panther'
    else:
        writer = None
        pelican_tb_log_name = None
        panther_tb_log_name = None

    policy = 'CnnPolicy'
    if image_based is False:
        policy = 'MlpPolicy'

    parallel = False
    if model_type.lower() == 'ppo2':
        parallel = True
    #Train initial pelican vs rule based panther

    if parallel:
        pelican_env = SubprocVecEnv([
            lambda: PlarkEnv(
                driving_agent='pelican',
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs)
        ])
    else:
        pelican_env = PlarkEnv(
            driving_agent='pelican',
            config_file_path=
            '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json',
            image_based=image_based,
            random_panther_start_position=True,
            max_illegal_moves_per_turn=3)

    pelican_model = helper.make_new_model(model_type, policy, pelican_env)
    logger.info('Training initial pelican')
    pelican_agent_filepath, steps = train_agent(
        exp_path, pelican_model, pelican_env, pelican_testing_interval,
        pelican_max_initial_learning_steps, pelican_model_type, basicdate,
        writer, pelican_tb_log_name)
    pelican_training_steps = pelican_training_steps + steps

    # Train initial panther agent vs initial pelican agent
    if parallel:
        panther_env = SubprocVecEnv([
            lambda: PlarkEnv(
                driving_agent='panther',
                pelican_agent_filepath=pelican_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs)
        ])
    else:
        panther_env = PlarkEnv(
            driving_agent='panther',
            pelican_agent_filepath=pelican_agent_filepath,
            config_file_path=
            '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
            image_based=image_based,
            random_panther_start_position=True,
            max_illegal_moves_per_turn=3)
    panther_model = helper.make_new_model(model_type, policy, panther_env)
    logger.info('Training initial panther')
    panther_agent_filepath, steps = train_agent(
        exp_path, panther_model, panther_env, panther_testing_interval,
        panther_max_initial_learning_steps, panther_model_type, basicdate,
        writer, panther_tb_log_name)
    panther_training_steps = panther_training_steps + steps

    # Train agent vs agent
    logger.info('Self play')

    for i in range(self_play_iterations):
        logger.info('Self play iteration ' + str(i) + ' of ' +
                    str(self_play_iterations))
        logger.info('Training pelican')
        if parallel:
            pelican_env = SubprocVecEnv([
                lambda: PlarkEnvSparse(
                    driving_agent='pelican',
                    panther_agent_filepath=panther_agent_filepath,
                    config_file_path=
                    '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                    image_based=image_based,
                    random_panther_start_position=True,
                    max_illegal_moves_per_turn=3)
                for _ in range(num_parallel_envs)
            ])
        else:
            pelican_env = PlarkEnvSparse(
                driving_agent='pelican',
                panther_agent_filepath=panther_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3)

        pelican_agent_filepath, steps = train_agent(
            exp_path,
            pelican_model,
            pelican_env,
            self_play_testing_interval,
            self_play_max_learning_steps_per_agent,
            pelican_model_type,
            basicdate,
            writer,
            pelican_tb_log_name,
            previous_steps=pelican_training_steps)
        pelican_training_steps = pelican_training_steps + steps

        logger.info('Training panther')
        if parallel:
            panther_env = SubprocVecEnv([
                lambda: PlarkEnvSparse(
                    driving_agent='panther',
                    pelican_agent_filepath=pelican_agent_filepath,
                    config_file_path=
                    '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                    image_based=image_based,
                    random_panther_start_position=True,
                    max_illegal_moves_per_turn=3)
                for _ in range(num_parallel_envs)
            ])
        else:
            panther_env = PlarkEnvSparse(
                driving_agent='panther',
                pelican_agent_filepath=pelican_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3)

        panther_agent_filepath, steps = train_agent(
            exp_path,
            panther_model,
            panther_env,
            self_play_testing_interval,
            self_play_max_learning_steps_per_agent,
            panther_model_type,
            basicdate,
            writer,
            panther_tb_log_name,
            previous_steps=panther_training_steps)
        panther_training_steps = panther_training_steps + steps

    logger.info('Training pelican total steps:' + str(pelican_training_steps))
    logger.info('Training panther total steps:' + str(panther_training_steps))
    # Make video
    video_path = os.path.join(exp_path, 'test_self_play.mp4')
    basewidth, hsize = helper.make_video(pelican_model, pelican_env,
                                         video_path)
    return video_path, basewidth, hsize
# In[11]:

env.get_attr('driving_agent')

# In[12]:

modeltype = 'PPO2'
modelplayer = env.get_attr('driving_agent')[0]  #env.driving_agent
render_height = env.get_attr('render_height')[0]  #env.render_height
render_width = env.get_attr('render_width')[0]  #env.render_width
image_based = False
helper.save_model(exp_path, model, modeltype, modelplayer, render_height,
                  render_width, image_based, basicdate)

# # making the video

# In[13]:

video_path = '/test.mp4'
basewidth, hsize = helper.make_video(model, env, video_path)

video = io.open(video_path, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" width="''' + str(basewidth) + '''" height="''' +
     str(hsize) + '''" controls>
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii')))

# In[ ]: