예제 #1
0
def testSimpleAgent():
    game_type = constants.GameType(4)

    board = [[0, 0, 2, 1, 1, 1], [0, 0, 0, 0, 0, 0], [2, 8, 0, 1, 0, 1],
             [1, 0, 1, 0, 10, 1], [1, 0, 3, 0, 0, 1], [1, 11, 1, 1, 1, 0]]
    bomb_info = [(0, 1, 2, None)]

    game_state = my_utility.get_gamestate(board, bomb_info)
    game_data = my_utility.get_gamedata(game_state, game_type)

    fm = forward_model.ForwardModel()

    obs = fm.get_observations(game_data.board, game_data.agents,
                              game_data.bombs, game_data.flames, False, None,
                              game_data.game_type, None)

    simpel_agent = SimpleAgent()

    print(simpel_agent.act(obs[1], spaces.Discrete(6)))
class EvaluatorAgent(BaseAgent):
    def __init__(
        self,
        n_actions,
        character,
        evaluation_model=None,
        evaluation_model_path=None,
        # Set agent properties to preprocess observations
        use_history=True,  # Use previous observations for predictions
        use_2d=True,  # Use 2d convolutions
        patient=True,  # Wait to make initial observations (you don't need it if you don't use history)
        center_view=True,  # Use centering
        original_view=False,  # Use 11x11 board, if false, use 21x21
        verbose=False  # Comment actions
    ):
        super(EvaluatorAgent, self).__init__(character=character)

        # Properties
        self.use_history = use_history
        self.use_2d = use_2d
        self.patient = patient
        self.center_view = center_view
        self.original_view = original_view
        self.verbose = verbose

        # Acting history for the evaluation
        self.actions_history = []
        self.observations_history = []
        self.episode_count = 0
        self.steps = 0

        self.n_actions = n_actions

        self.simple_agent = SimpleAgent(character=character)
        # Load any custom model
        self.evaluation_model = None
        if evaluation_model:
            self.evaluation_model = evaluation_model
            if evaluation_model_path:
                try:
                    self.evaluation_model.load_weights(evaluation_model_path)
                except:
                    print('Weights load failed')
        elif evaluation_model_path:
            try:
                self.evaluation_model = load_model(evaluation_model_path)
            except:
                print('Model load failed')
        else:
            print('Use SimpleAgent')

    # Featurization
    def featurize(self, obs):
        return featurize(obs, center=self.center_view, crop=self.original_view)

    # Acting
    def act(self, obs, action_space=None):
        # Initialize new episode
        if self.steps == 0:
            self.actions_history.append([])

        # Create observation, merge with the predecessors
        obs_f = self.featurize(obs)

        # If our agent is patient, wait for the first 3 steps to make observations
        if self.patient and len(
                self.observations_history) < history_length - 1:
            self.observations_history.append(obs_f)
            self.actions_history[self.episode_count].append(0)
            return 0

        if self.use_history:
            obs_history = self.make_observation(obs_f, self.steps, self.use_2d)
        else:
            obs_history = obs_f

        self.observations_history.append(
            obs_f)  # Append current observation after the merge

        # Predict action
        if self.evaluation_model is not None:
            res = self.evaluation_model.predict(
                obs_history.reshape((1, ) + obs_history.shape))[0]
            res = np.argmax(res)
        else:
            res = self.simple_agent.act(obs, action_space)
        if self.verbose:
            print(res, end='; ')

#        # In the dueling DQN the first output relates to the advantage
#        if len(res) > self.n_actions:
#            res = res[1:]

        self.actions_history[self.episode_count].append(res)

        if self.verbose:
            print(ACTIONS[res])

        self.steps += 1
        return res

    def make_observation(self, obs, i, use_2d=True):
        if i == 0:  # If it is a first observation
            res = np.array([obs for _ in range(history_length)])
        elif i < history_length - 1:  # If there are less than 3 observations in a history
            n_first = history_length - 1 - i
            res = np.concatenate(
                [
                    np.array([
                        self.observations_history[0] for _ in range(n_first)
                    ]),  # Repeat the first observation
                    np.array(self.observations_history[:i]).reshape(
                        i, view_size, view_size,
                        n_channels),  # Add next observations
                    obs.reshape(1, view_size, view_size, n_channels)
                ],  # Current observation
                axis=0)
        else:
            res = np.concatenate(
                [
                    np.array(
                        self.observations_history[i - history_length + 1:i]).
                    reshape(history_length - 1, view_size, view_size,
                            n_channels),  # Add next observations
                    obs.reshape(1, view_size, view_size, n_channels)
                ],  # Current observation
                axis=0)
        if use_2d:
            res = np.concatenate(res, axis=-1)
        return res

    # Evaluation
    def end_episode(self):
        self.steps = 0
        self.episode_count += 1
        self.observations_history = []

    def reset_run(self):
        self.actions_history = []
        self.episode_count = 0
        self.steps = 0

    def close(self):
        pass

    def run_episode(self, config, env):
        return run_episode(self, config, env, self.agent_id)

    def plot_statistics(self, info, selected_labels):
        return plot_statistics(self, info, selected_labels)

    def evaluate_agent(self, selected_labels, iterations=100, plot=True):
        return evaluate_agent(self, selected_labels, self.agent_id, iterations,
                              plot)