Exemple #1
0
    def act(self, gs: GameState) -> int:
        root_hash = gs.get_unique_id()
        memory = self.memory if self.keep_memory else dict()

        if root_hash not in memory:
            MOMCTSAgent.create_node_in_memory(memory, root_hash, gs.get_available_actions(
                gs.get_active_player()), gs.get_active_player()
                                          )

        for i in range(self.max_iteration):
            gs_copy = gs.clone()
            s = gs_copy.get_unique_id()
            history = []

            # SELECTION
            while not gs_copy.is_game_over() and all((edge['n'] > 0
                                                      for edge in memory[s]
                                                      )):
                chosen_edge = max(((edge, MOMCTSAgent.ucb_1(edge))
                                   for edge in memory[s]
                                   ), key=lambda kv: kv[1])[0]
                history.append((s, chosen_edge))

                gs_copy.step(gs_copy.get_active_player(), chosen_edge['a'])
                s = gs_copy.get_unique_id()
                if s not in memory:
                    MOMCTSAgent.create_node_in_memory(memory, s, gs_copy.get_available_actions(
                        gs_copy.get_active_player()
                    ), gs_copy.get_active_player())

            # EXPANSION
            if not gs_copy.is_game_over():
                chosen_edge = choice(list(filter(lambda e: e['n'] == 0, (edge for edge in memory[s]))))

                history.append((s, chosen_edge))
                gs_copy.step(gs_copy.get_active_player(), chosen_edge['a'])
                s = gs_copy.get_unique_id()
                if s not in memory:
                    MOMCTSAgent.create_node_in_memory(memory, s, gs_copy.get_available_actions(
                        gs_copy.get_active_player()
                    ), gs_copy.get_active_player())

            # SIMULATION
            while not gs_copy.is_game_over():
                gs_copy.step(gs_copy.get_active_player(),
                             choice(gs_copy.get_available_actions(gs_copy.get_active_player())))

            scores = gs_copy.get_scores()
            # REMONTEE DU SCORE
            for (s, edge) in history:
                edge['n'] += 1
                edge['r'] += scores[edge['p']]
                for neighbour_edge in memory[s]:
                    neighbour_edge['np'] += 1

        return max((edge for edge in memory[root_hash]), key=lambda e: e['n'])['a']
Exemple #2
0
def run_for_n_games_and_return_stats(
        agents: List[Agent], gs: GameState,
        games_count: int) -> (np.ndarray, np.ndarray):
    total_scores = np.zeros_like(gs.get_scores())

    for _ in range(games_count):
        gs_copy = gs.clone()
        run_to_the_end(agents, gs_copy)
        total_scores += gs_copy.get_scores()

    return total_scores, total_scores / games_count
Exemple #3
0
def run_for_n_games_and_return_max(agents: List[Agent], gs: GameState,
                                   games_count: int) -> np.ndarray:
    old_and_new_scores = np.ones((2, len(gs.get_scores()))) * -9999.9

    for _ in range(games_count):
        gs_copy = gs.clone()
        run_to_the_end(agents, gs_copy)
        new_scores = gs_copy.get_scores()
        old_and_new_scores[1, :] = new_scores
        old_and_new_scores[0, :] = np.max(old_and_new_scores, axis=0)

    return old_and_new_scores[0, :]
    def act(self, gs: GameState) -> int:
        available_actions = gs.get_available_actions(gs.get_active_player())
        if self.agents is None:
            self.agents = [RandomAgent()] * gs.player_count()
        accumulated_scores = np.zeros((len(available_actions),))

        for i, a in enumerate(available_actions):
            gs_clone = gs.clone()
            gs_clone.step(gs.get_active_player(), a)
            if self.determinist_environment:
                max_scores = run_for_n_games_and_return_max(self.agents, gs_clone, self.epochs_per_action)
                accumulated_scores[i] = max_scores[gs.get_active_player()]
            else:
                (total_scores, _) = run_for_n_games_and_return_stats(self.agents, gs_clone, self.epochs_per_action)
                accumulated_scores[i] = total_scores[gs.get_active_player()]

        # print((accumulated_scores, available_actions[np.argmax(accumulated_scores)]))
        return available_actions[np.argmax(accumulated_scores)]
def run_for_n_games_and_return_stats(
        agents: List[Agent],
        gs: GameState,
        games_count: int,
        shuffle_players: bool = False) -> (np.ndarray, np.ndarray):
    total_scores = np.zeros_like(gs.get_scores())
    agents_order = np.arange(len(agents))

    agents_copy = agents
    if shuffle_players:
        agents_copy = agents.copy()
    for _ in range(games_count):
        gs_copy = gs.clone()
        if shuffle_players:
            agents_copy = agents.copy()
            shuffle(agents_order)
            for i in agents_order:
                agents_copy[i] = agents[agents_order[i]]
        run_to_the_end(agents_copy, gs_copy)
        total_scores += gs_copy.get_scores()[agents_order]

    return total_scores, total_scores / games_count
    def act(self, gs: GameState) -> int:

        if self.apprentice_training_count > self.apprentice_training_before_takeover:
            return gs.get_available_actions(gs.get_active_player())[np.argmax(
                self.brain.predict(np.array([
                    gs.get_vectorized_state()
                ]))[0][gs.get_available_actions(gs.get_active_player())])]

        root_hash = gs.get_unique_id()
        memory = self.memory if self.keep_memory else dict()

        if root_hash not in memory:
            ExpertApprenticeAgent.create_node_in_memory(
                memory, root_hash,
                gs.get_available_actions(gs.get_active_player()),
                gs.get_active_player())

        for i in range(self.max_iteration):
            gs_copy = gs.clone()
            s = gs_copy.get_unique_id()
            history = []

            # SELECTION
            while not gs_copy.is_game_over() and all(
                (edge['n'] > 0 for edge in memory[s])):
                chosen_edge = max(((edge, ExpertApprenticeAgent.ucb_1(edge))
                                   for edge in memory[s]),
                                  key=lambda kv: kv[1])[0]
                history.append((s, chosen_edge))

                gs_copy.step(gs_copy.get_active_player(), chosen_edge['a'])
                s = gs_copy.get_unique_id()
                if s not in memory:
                    ExpertApprenticeAgent.create_node_in_memory(
                        memory, s,
                        gs_copy.get_available_actions(
                            gs_copy.get_active_player()),
                        gs_copy.get_active_player())

            # EXPANSION
            if not gs_copy.is_game_over():
                chosen_edge = choice(
                    list(
                        filter(lambda e: e['n'] == 0,
                               (edge for edge in memory[s]))))

                history.append((s, chosen_edge))
                gs_copy.step(gs_copy.get_active_player(), chosen_edge['a'])
                s = gs_copy.get_unique_id()
                if s not in memory:
                    ExpertApprenticeAgent.create_node_in_memory(
                        memory, s,
                        gs_copy.get_available_actions(
                            gs_copy.get_active_player()),
                        gs_copy.get_active_player())

            # SIMULATION
            while not gs_copy.is_game_over():
                gs_copy.step(
                    gs_copy.get_active_player(),
                    choice(
                        gs_copy.get_available_actions(
                            gs_copy.get_active_player())))

            scores = gs_copy.get_scores()
            # REMONTEE DU SCORE
            for (s, edge) in history:
                edge['n'] += 1
                edge['r'] += scores[edge['p']]
                for neighbour_edge in memory[s]:
                    neighbour_edge['np'] += 1

        target = np.zeros(gs.get_action_space_size())

        for edge in memory[root_hash]:
            target[edge['a']] = edge['n']

        target /= np.sum(target)

        self.states_buffer.append(gs.get_vectorized_state())
        self.actions_buffer.append(target)

        if len(self.states_buffer) > 200:
            self.apprentice_training_count += 1
            self.brain.fit(np.array(self.states_buffer),
                           np.array(self.actions_buffer))
            self.states_buffer.clear()
            self.actions_buffer.clear()

        if self.apprentice_training_count > self.apprentice_training_before_takeover:
            print('Apprentice is playing next round')

        return max((edge for edge in memory[root_hash]),
                   key=lambda e: e['n'])['a']
Exemple #7
0
    def act(self, gs: GameState) -> int:
        root_hash = gs.get_unique_id()
        memory = self.memory if self.keep_memory else dict()

        if root_hash not in memory:
            q_values = self.brain.predict(gs.get_vectorized_state())
            HalfAlphaZeroAgent.create_node_in_memory(
                memory, root_hash,
                gs.get_available_actions(gs.get_active_player()),
                gs.get_active_player(), q_values)

        for i in range(self.max_iteration):
            gs_copy = gs.clone()
            s = gs_copy.get_unique_id()
            history = []

            # SELECTION
            while not gs_copy.is_game_over() and all(
                (edge['n'] > 0 for edge in memory[s])):
                chosen_edge = max(((edge, HalfAlphaZeroAgent.ucb_1(edge))
                                   for edge in memory[s]),
                                  key=lambda kv: kv[1])[0]
                history.append((s, chosen_edge))

                gs_copy.step(gs_copy.get_active_player(), chosen_edge['a'])
                s = gs_copy.get_unique_id()
                if s not in memory:
                    q_values = self.brain.predict(
                        gs_copy.get_vectorized_state())
                    HalfAlphaZeroAgent.create_node_in_memory(
                        memory, s,
                        gs_copy.get_available_actions(
                            gs_copy.get_active_player()),
                        gs_copy.get_active_player(), q_values)

            # EXPANSION
            if not gs_copy.is_game_over():
                chosen_edge = choice(
                    list(
                        filter(lambda e: e['n'] == 0,
                               (edge for edge in memory[s]))))

                history.append((s, chosen_edge))
                gs_copy.step(gs_copy.get_active_player(), chosen_edge['a'])
                s = gs_copy.get_unique_id()
                if s not in memory:
                    q_values = self.brain.predict(
                        gs_copy.get_vectorized_state())
                    HalfAlphaZeroAgent.create_node_in_memory(
                        memory, s,
                        gs_copy.get_available_actions(
                            gs_copy.get_active_player()),
                        gs_copy.get_active_player(), q_values)

            scores = np.zeros(gs_copy.player_count())
            scores_set = np.zeros(gs_copy.player_count())
            # REMONTEE DU SCORE
            for (s, edge) in history:
                if scores_set[edge['p']] == 0:
                    scores_set[edge['p']] = 1.0
                    scores[edge['p']] = edge['q']

                edge['n'] += 1
                edge['r'] += scores[edge['p']]
                for neighbour_edge in memory[s]:
                    neighbour_edge['np'] += 1

        chosen_action = max((edge for edge in memory[root_hash]),
                            key=lambda e: e['n'])['a']

        if len(self.states_buffer) > 0:
            self.rewards_buffer.append(self.intermediate_reward)

        self.states_buffer.append(gs.get_vectorized_state())
        self.actions_buffer.append(
            to_categorical(chosen_action, gs.get_action_space_size()))
        self.intermediate_reward = 0.0

        return chosen_action