コード例 #1
0
    def initialAgents(self):
        # If no initial pelican agent is given, we train one from fresh
        if len(self.initial_pelicans) == 0:
            # Train initial pelican vs default panther
            self.pelican_model = helper.make_new_model(self.model_type,
                                                       self.policy,
                                                       self.pelican_env,
                                                       n_steps=self.training_steps)
            logger.info('Training initial pelican')
            pelican_agent_filepath = self.train_agent(self.pelicans_tmp_exp_path,
                                                      self.pelican_model,
                                                      self.pelican_env)
        else:
            logger.info('Initial set of %d pelicans found' % (len(self.initial_pelicans)))
            pelican_agent_filepath = self.initial_pelicans


        # If no initial panther agent is given, we train one from fresh
        if len(self.initial_panthers) == 0:
            # Train initial panther agent vs default pelican
            self.panther_model = helper.make_new_model(self.model_type,
                                                       self.policy,
                                                       self.panther_env,
                                                       n_steps=self.training_steps)
            logger.info('Training initial panther')
            panther_agent_filepath  = self.train_agent(self.panthers_tmp_exp_path,
                                                       self.panther_model,
                                                       self.panther_env)
        else:
            logger.info('Initial set of %d panthers found' % (len(self.initial_panthers)))
            panther_agent_filepath = self.initial_panthers

        return panther_agent_filepath, pelican_agent_filepath
コード例 #2
0
 def bootstrap(self, model_paths, env, mixture):
     if np.random.rand(1) < self.retraining_prob:
         path = np.random.choice(model_paths, 1, p = mixture)[0]
         path = glob.glob(path + "/*.zip")[0]
         return helper.loadAgent(path, self.model_type)
     else:
         return helper.make_new_model(self.model_type,
                                      self.policy,
                                      env,
                                      n_steps=self.training_steps)
コード例 #3
0
def evaluate_algorithms(exp_name, base_path, tb_enabled, algorithms,
                        victory_threshold, victory_trials, max_seconds,
                        testing_interval, use_non_image):
    basicdate = str(datetime.now().strftime("%Y%m%d_%H%M%S"))
    exp_name = "{}_{}".format(exp_name, basicdate)
    exp_path = os.path.join(base_path, exp_name)
    logger.info("Storing results in {}".format(exp_path))

    writer = None
    if tb_enabled:
        writer = SummaryWriter(exp_path)

    for algo in algorithms:
        tb_log_name = "{}_non_image".format(algo) if use_non_image else algo
        logger.info("Evaluating algorithm: {}; non-image: {}".format(
            algo, use_non_image))
        if use_non_image:
            image_based = False
            env = plark_env_non_image_state.PlarkEnvNonImageState(
                driving_agent='pelican',
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/panther_easy.json'
            )
            policy = "MlpPolicy"  # CnnPolicy doesn't work with MultiDiscrete observation space
        else:
            image_based = True
            env = plark_env.PlarkEnv(
                driving_agent='pelican',
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/panther_easy.json'
            )
            policy = "CnnPolicy"

        model = helper.make_new_model(algo, policy, env)
        helper.train_until(model,
                           env,
                           victory_threshold,
                           victory_trials,
                           max_seconds,
                           testing_interval,
                           tb_writer=writer,
                           tb_log_name=tb_log_name)
        helper.save_model_with_env_settings(exp_path, model, algo, env,
                                            image_based, basicdate)

    writer.close()
def compare_envs(exp_name, base_path, tb_enabled, victory_threshold,
                 victory_trials, max_seconds, testing_interval,
                 num_parallel_envs, non_image):
    basicdate = str(datetime.now().strftime("%Y%m%d_%H%M%S"))
    exp_name = "{}_{}".format(exp_name, basicdate)
    exp_path = os.path.join(base_path, exp_name)
    logger.info("Storing results in {}".format(exp_path))

    writer = None
    if tb_enabled:
        writer = SummaryWriter(exp_path)

    for parallel in [False, True]:
        algo = "PPO2"
        policy = "MlpPolicy" if non_image else "CnnPolicy"
        tb_log_name = "{}_parallel".format(algo) if parallel else algo
        logger.info("Evaluating {}; parallel: {}".format(algo, parallel))
        if parallel:
            logger.info("Evaluating using {} parallel environments".format(
                num_parallel_envs))
            env_fn = createNonImageEnv if non_image else createImageEnv
            env = SubprocVecEnv([env_fn for _ in range(num_parallel_envs)])
        else:
            env = createNonImageEnv() if non_image else createImageEnv()

        model = helper.make_new_model(algo, policy, env)
        helper.train_until(model,
                           env,
                           victory_threshold,
                           victory_trials,
                           max_seconds,
                           testing_interval,
                           tb_writer=writer,
                           tb_log_name=tb_log_name)
        helper.save_model_with_env_settings(exp_path, model, algo, env,
                                            basicdate)

    writer.close()
# +
pelican_env = PlarkEnvSparse(driving_agent='pelican',
                             config_file_path=config_file_path,
                             image_based=False,
                             random_panther_start_position=True,
                             max_illegal_moves_per_turn=1)

panther_env = PlarkEnvSparse(driving_agent='panther',
                             config_file_path=config_file_path,
                             image_based=False,
                             random_panther_start_position=True,
                             max_illegal_moves_per_turn=1)
# -

panthers = [
    helper.make_new_model(model_type, policy_panther, panther_env)
    for i in range(population_size)
]
pelicans = [
    helper.make_new_model(model_type, policy_pelican, pelican_env)
    for i in range(population_size)
]

# for iteration in tqdm(range(iterations)):
for iteration in range(iterations):
    print("Iteration: " + str(iteration))
    for panther in panthers:
        for pelican in pelicans:
            panther_env.set_pelican(pelican)
            pelican_env.set_panther(panther)
            pelican.learn(steps)
コード例 #6
0
def run_self_play(exp_name,
                  exp_path,
                  basicdate,
                  pelican_testing_interval=100,
                  pelican_max_initial_learning_steps=10000,
                  panther_testing_interval=100,
                  panther_max_initial_learning_steps=10000,
                  self_play_testing_interval=100,
                  self_play_max_learning_steps_per_agent=10000,
                  self_play_iterations=10000,
                  model_type='PPO2',
                  log_to_tb=False,
                  image_based=True,
                  num_parallel_envs=1):
    pelican_training_steps = 0
    panther_training_steps = 0

    pelican_model_type = model_type
    panther_model_type = model_type

    if log_to_tb:
        writer = SummaryWriter(exp_path)
        pelican_tb_log_name = 'pelican'
        panther_tb_log_name = 'panther'
    else:
        writer = None
        pelican_tb_log_name = None
        panther_tb_log_name = None

    policy = 'CnnPolicy'
    if image_based is False:
        policy = 'MlpPolicy'

    parallel = False
    if model_type.lower() == 'ppo2':
        parallel = True
    #Train initial pelican vs rule based panther

    if parallel:
        pelican_env = SubprocVecEnv([
            lambda: PlarkEnv(
                driving_agent='pelican',
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs)
        ])
    else:
        pelican_env = PlarkEnv(
            driving_agent='pelican',
            config_file_path=
            '/Components/plark-game/plark_game/game_config/10x10/pelican_easy.json',
            image_based=image_based,
            random_panther_start_position=True,
            max_illegal_moves_per_turn=3)

    pelican_model = helper.make_new_model(model_type, policy, pelican_env)
    logger.info('Training initial pelican')
    pelican_agent_filepath, steps = train_agent(
        exp_path, pelican_model, pelican_env, pelican_testing_interval,
        pelican_max_initial_learning_steps, pelican_model_type, basicdate,
        writer, pelican_tb_log_name)
    pelican_training_steps = pelican_training_steps + steps

    # Train initial panther agent vs initial pelican agent
    if parallel:
        panther_env = SubprocVecEnv([
            lambda: PlarkEnv(
                driving_agent='panther',
                pelican_agent_filepath=pelican_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3) for _ in range(num_parallel_envs)
        ])
    else:
        panther_env = PlarkEnv(
            driving_agent='panther',
            pelican_agent_filepath=pelican_agent_filepath,
            config_file_path=
            '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
            image_based=image_based,
            random_panther_start_position=True,
            max_illegal_moves_per_turn=3)
    panther_model = helper.make_new_model(model_type, policy, panther_env)
    logger.info('Training initial panther')
    panther_agent_filepath, steps = train_agent(
        exp_path, panther_model, panther_env, panther_testing_interval,
        panther_max_initial_learning_steps, panther_model_type, basicdate,
        writer, panther_tb_log_name)
    panther_training_steps = panther_training_steps + steps

    # Train agent vs agent
    logger.info('Self play')

    for i in range(self_play_iterations):
        logger.info('Self play iteration ' + str(i) + ' of ' +
                    str(self_play_iterations))
        logger.info('Training pelican')
        if parallel:
            pelican_env = SubprocVecEnv([
                lambda: PlarkEnvSparse(
                    driving_agent='pelican',
                    panther_agent_filepath=panther_agent_filepath,
                    config_file_path=
                    '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                    image_based=image_based,
                    random_panther_start_position=True,
                    max_illegal_moves_per_turn=3)
                for _ in range(num_parallel_envs)
            ])
        else:
            pelican_env = PlarkEnvSparse(
                driving_agent='pelican',
                panther_agent_filepath=panther_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3)

        pelican_agent_filepath, steps = train_agent(
            exp_path,
            pelican_model,
            pelican_env,
            self_play_testing_interval,
            self_play_max_learning_steps_per_agent,
            pelican_model_type,
            basicdate,
            writer,
            pelican_tb_log_name,
            previous_steps=pelican_training_steps)
        pelican_training_steps = pelican_training_steps + steps

        logger.info('Training panther')
        if parallel:
            panther_env = SubprocVecEnv([
                lambda: PlarkEnvSparse(
                    driving_agent='panther',
                    pelican_agent_filepath=pelican_agent_filepath,
                    config_file_path=
                    '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                    image_based=image_based,
                    random_panther_start_position=True,
                    max_illegal_moves_per_turn=3)
                for _ in range(num_parallel_envs)
            ])
        else:
            panther_env = PlarkEnvSparse(
                driving_agent='panther',
                pelican_agent_filepath=pelican_agent_filepath,
                config_file_path=
                '/Components/plark-game/plark_game/game_config/10x10/balanced.json',
                image_based=image_based,
                random_panther_start_position=True,
                max_illegal_moves_per_turn=3)

        panther_agent_filepath, steps = train_agent(
            exp_path,
            panther_model,
            panther_env,
            self_play_testing_interval,
            self_play_max_learning_steps_per_agent,
            panther_model_type,
            basicdate,
            writer,
            panther_tb_log_name,
            previous_steps=panther_training_steps)
        panther_training_steps = panther_training_steps + steps

    logger.info('Training pelican total steps:' + str(pelican_training_steps))
    logger.info('Training panther total steps:' + str(panther_training_steps))
    # Make video
    video_path = os.path.join(exp_path, 'test_self_play.mp4')
    basewidth, hsize = helper.make_video(pelican_model, pelican_env,
                                         video_path)
    return video_path, basewidth, hsize
コード例 #7
0
# +
pelican_env = envops(
    PlarkEnvSparse(driving_agent='pelican',
                   config_file_path=config_file_path,
                   image_based=False,
                   random_panther_start_position=True,
                   max_illegal_moves_per_turn=1), log_dir_base + '/pelican/')

panther_env = envops(
    PlarkEnvSparse(driving_agent='panther',
                   config_file_path=config_file_path,
                   image_based=False,
                   random_panther_start_position=True,
                   max_illegal_moves_per_turn=1), log_dir_base + '/panther/')

pelican = helper.make_new_model(model_type, policy, pelican_env)
panther = helper.make_new_model(model_type, policy, panther_env)

panther_env.set_pelican(pelican)
pelican_env.set_panther(panther)
# -

for iteration in tqdm(range(iterations)):
    pelican.learn(steps)
    panther.learn(steps)

# Make video
video_path = os.path.join('./', 'test_self_play.mp4')
basewidth, hsize = helper.make_video(pelican, pelican_env, video_path)
video = io.open(video_path, 'r+b').read()
encoded = base64.b64encode(video)