def __init__(self, state, root, config):
     self.use_q_value = config.use_q_value
     self.size = config.size
     self.history = state.history
     self.child_visits = np.array([x[1] for x in root.stats])
     self.child_visits = self.child_visits.astype(np.float16)
     if self.use_q_value:  # for when config.use_q_value is true
         self.q_values = [x[0] for x in root.stats]
     else:  # for when config.use_q_value is false
         self.utility = (game.utility(state, 1), game.utility(state, -1))
    def play_games(self, fst, snd, num_games, games_played=0):
        states = [game.initial_state(self.size) for _ in range(num_games)]
        fst_wins = 0
        turn = 0
        finished_games = games_played
        while states:
            if self.visual: self.gui.update(states[0])
            if turn % 2 == 0:  # it's first players' turn
                if fst:  # fst is an AI
                    actions = fst.decide_parallel(states)
                else:
                    actions = self.human_input()
            else:
                if snd:  # snd is an AI
                    actions = snd.decide_parallel(states)
                else:
                    actions = self.human_input()

            states = [game.result(s, a) for s, a in zip(states, actions)]
            if self.visual: self.gui.update(states[0])

            fst_wins += sum(1 for s in states if game.utility(s, 1) == 1)
            new_states = []
            for s in states:
                if game.terminal_test(s):
                    finished_games += 1
                    #self.save_game_result(fst, snd, game.utility(s,1), finished_games)
                else:
                    new_states.append(s)
            states = new_states  # important step ;)
            turn += 1
        return fst_wins
def test_utility_player_one_won():
    initial = game.initial_state(2)
    fst = game.result(initial, 0)
    snd = game.result(fst, 1)
    thd = game.result(snd, 2)
    actual = game.utility(thd, 1)
    expected = 1
    assert actual == expected
def test_utility_player_one_lost():
    initial = game.initial_state(2)
    fst = game.result(initial, 3)
    snd = game.result(fst, 0)
    thd = game.result(snd, 2)
    fth = game.result(thd, 1)
    actual = game.utility(fth, 1)
    expected = -1
    assert actual == expected
Esempio n. 5
0
def backpropagate(node, state):
    while node:
        node.update(game.utility(state, -game.player(node.state)))
        node = node.parent
def test_utility_not_terminal_player_two():
    initial = game.initial_state(6)
    actual = game.utility(initial, -1)
    expected = 0
    assert actual == expected