コード例 #1
0
class GA_Agent:
    hparams = {}
    random_seed = 0
    eval_interval = -1
    display_moves = False
    should_log = False
    random_util = RandomUtil(0)
    plotter = Plotter('none')
    base_path = 'none'
    episode_count = 0

    def __init__(self,
                 hyperparameters,
                 should_log=False,
                 eval_interval=-1,
                 display_moves=False):
        self.hparams = hyperparameters
        self.random_seed = round(time.time())
        self.should_log = should_log
        self.display_moves = display_moves
        self.eval_interval = eval_interval

        self.random_util = RandomUtil(self.random_seed)
        self.base_path = 'results/GA-' + self.random_util.generate_random_string(
            6) + '/'
        self.plotter = Plotter(self.base_path)
        self.episode_count = 0

    def __evaluate_ga(self,
                      blue_coeffs,
                      red_coeffs,
                      eval_py_env,
                      display_moves=False,
                      no_tests=10):
        if display_moves:
            episode_path = 'game-drawn/episode-' + str(self.episode_count)
            if not os.path.exists(self.base_path + episode_path):
                os.makedirs(self.base_path + episode_path)

            time_step = eval_py_env.reset()
            picture_count = 0
            while not time_step.is_last():
                observation = time_step.observation
                self.plotter.plot_state(
                    observation,
                    episode_path + '/' + str(picture_count) + '.jpeg')
                picture_count += 1
                time_step = eval_py_env.step()

            picture_count = 0
            self.episode_count += 1

        total_return = 0.0
        for _ in range(no_tests):
            time_step = eval_py_env.reset()
            episode_return = 0.0

            while not time_step.is_last():
                #print(time_step.observation.numpy())
                time_step = eval_py_env.step()
                episode_return += time_step.reward
            total_return += episode_return

        return total_return / no_tests

    def __plot_coefs(self, single_organism_coefs, file_name):
        x = 0.0
        x_values = []
        function_values = []

        def evaluate_function(x):
            value = 0.0
            x_pow = 1
            for coef in single_organism_coefs:
                value += coef * x_pow
                x_pow *= x
            return value

        step = 0.03
        while x < 10:
            function_values.append(evaluate_function(x))
            x_values.append(x)
            x += step

        self.plotter.plot_simple_values(x=x_values,
                                        y=function_values,
                                        directory=file_name)

    def train(self, eval_game_params):
        hparams = self.hparams
        max_degree = hparams['max_parameter_degree']
        no_blues = hparams['no_blue_organisms']
        no_reds = hparams['no_red_organisms']
        no_parameters = hparams['no_parameters']
        coef_count = np.power(max_degree + 1, no_parameters)
        self.ga_util = GaUtil(self.random_util, coef_count)

        blue_coeffs = self.random_util.get_random_matrix(
            no_blues, coef_count, [-1000, 1000])
        red_coeffs = self.random_util.get_random_matrix(
            no_reds, coef_count, [-1000, 1000])

        returns = []
        # Train the genetic algorithm
        no_random = hparams['no_random_start'] * 1.0
        random_step = (hparams['no_random_final'] -
                       no_random) / hparams['no_random_anneal_time']
        mutation_factor_range = np.array(
            hparams['mutation_factor_range_start'])
        mutation_factor_range_final = np.array(
            hparams['mutation_factor_range_final'])
        mutation_factor_range_step = (mutation_factor_range_final - mutation_factor_range)\
                                    / hparams['mutation_factor_range_anneal_time']

        for generation_number in range(hparams['no_generations']):
            prev_blue_organisms = []
            prev_red_organisms = []
            if generation_number > 0:
                prev_blue_organisms = env.dead_blue_organisms
                prev_red_organisms = env.dead_red_organisms

            env = GameEnv(blue_coeffs, red_coeffs, max_degree,
                          hparams['food_count'], hparams['board_size'])
            #utils.validate_py_environment(py_environment, episodes=5)

            # Evaluate the GA
            if self.eval_interval > 0 and (generation_number +
                                           1) % self.eval_interval == 0:
                eval_blue_coeffs = self.ga_util.get_coeffs_from_best(
                    prev_blue_organisms, eval_game_params['no_blue_organisms'],
                    eval_game_params['no_blue_organisms'], 0, [0, 0])
                eval_red_coeffs = self.ga_util.get_coeffs_from_best(
                    prev_red_organisms, eval_game_params['no_red_organisms'],
                    eval_game_params['no_red_organisms'], 0, [0, 0])
                eval_py_env = GameEnv(eval_blue_coeffs, eval_red_coeffs,
                                      max_degree,
                                      eval_game_params['food_count'],
                                      eval_game_params['board_size'])

                avg_return = self.__evaluate_ga(blue_coeffs, red_coeffs,
                                                eval_py_env,
                                                self.display_moves)
                returns.append(avg_return)
                if self.should_log:
                    print(avg_return)

            # Play the game
            time_step = env.reset()
            while not time_step.is_last():
                #print(time_step.observation.numpy())
                time_step = env.step()

            # Pick best genomes for the next generation
            blue_organisms = env.dead_blue_organisms
            blue_coeffs = self.ga_util.get_coeffs_from_best(
                blue_organisms, no_blues, hparams['no_best'], round(no_random),
                mutation_factor_range)
            red_organisms = env.dead_red_organisms
            red_coeffs = self.ga_util.get_coeffs_from_best(
                red_organisms, no_reds, hparams['no_best'], round(no_random),
                mutation_factor_range)

            # Reduce the number of random organisms and mutation_factor_range
            no_random += random_step
            mutation_factor_range += mutation_factor_range_step

        self.plotter.plot_simple_values(y=list(returns),
                                        directory='score.jpeg')
        for single_organism_coefs in red_coeffs:
            self.__plot_coefs(single_organism_coefs, 'red-coeffs.jpeg')
        for single_organism_coefs in blue_coeffs:
            self.__plot_coefs(single_organism_coefs, 'blue-coeffs.jpeg')
        hparams['random_seed'] = self.random_seed
        self.plotter.dump_to_json(hparams, 'hparams.json')