Ejemplo n.º 1
0
class PolicyEvaluator(object):
    def __init__(self, architecture_file, model_file):
        # Initialize objects
        self.net = BaristaNet(architecture_file, model_file, None)
        self.batch_size = self.net.batch_size

        game = SnakeGame()
        preprocessor = generate_preprocessor(self.net.state.shape[2:],
                                             gray_scale)
        self.engines = [ExpGain(self.net, ['w', 'a', 's', 'd'],
                                preprocessor, game.cpu_play,
                                None, game.encode_state())
                        for _ in range(self.batch_size)]

    def evaluate(self, model, num_trials):
        """ Runs |num_trials| games and returns average score. """
        for eg in self.engines:
            set_net_params(eg.net.net, model)
            eg.reset_game()

        total_score = 0
        trials_completed = 0
        scores = [0] * self.batch_size
        while trials_completed < num_trials:
            states = [eg.get_preprocessed_state() for eg in self.engines]
            actions = self.net.select_action(states,
                                             batch_size=self.batch_size)
            for i, (action, eg) in enumerate(zip(actions, self.engines)):
                scores[i] += eg.play_action(eg.actions[action])
                if eg.game_over:
                    total_score += scores[i]
                    trials_completed += 1
                    if trials_completed == num_trials:
                        break
                    eg.reset_game()
                    scores[i] = 0

        return float(total_score)/num_trials