コード例 #1
0
def run_step(agents: List[Agent], gs: GameState):
    assert (not gs.is_game_over())
    active_player_index = gs.get_active_player()

    old_scores = gs.get_scores().copy()
    action = agents[active_player_index].act(gs)
    gs.step(active_player_index, action)
    new_scores = gs.get_scores()
    rewards = new_scores - old_scores
    for i, agent in enumerate(agents):
        agent.observe(rewards[i], gs.is_game_over(), i)
コード例 #2
0
def run_for_n_games_and_return_stats(
        agents: List[Agent], gs: GameState,
        games_count: int) -> (np.ndarray, np.ndarray):
    total_scores = np.zeros_like(gs.get_scores())

    for _ in range(games_count):
        gs_copy = gs.clone()
        run_to_the_end(agents, gs_copy)
        total_scores += gs_copy.get_scores()

    return total_scores, total_scores / games_count
コード例 #3
0
def run_for_n_games_and_return_max(agents: List[Agent], gs: GameState,
                                   games_count: int) -> np.ndarray:
    old_and_new_scores = np.ones((2, len(gs.get_scores()))) * -9999.9

    for _ in range(games_count):
        gs_copy = gs.clone()
        run_to_the_end(agents, gs_copy)
        new_scores = gs_copy.get_scores()
        old_and_new_scores[1, :] = new_scores
        old_and_new_scores[0, :] = np.max(old_and_new_scores, axis=0)

    return old_and_new_scores[0, :]
コード例 #4
0
ファイル: __init__.py プロジェクト: AarganC/Quantum_TicTacToe
def run_for_n_games_and_return_stats(
        agents: List[Agent],
        gs: GameState,
        games_count: int,
        shuffle_players: bool = False) -> (np.ndarray, np.ndarray):
    total_scores = np.zeros_like(gs.get_scores())
    agents_order = np.arange(len(agents))

    agents_copy = agents
    if shuffle_players:
        agents_copy = agents.copy()
    for _ in range(games_count):
        gs_copy = gs.clone()
        if shuffle_players:
            agents_copy = agents.copy()
            shuffle(agents_order)
            for i in agents_order:
                agents_copy[i] = agents[agents_order[i]]
        run_to_the_end(agents_copy, gs_copy)
        total_scores += gs_copy.get_scores()[agents_order]

    return total_scores, total_scores / games_count