def __init__(self, state, root, config): self.use_q_value = config.use_q_value self.size = config.size self.history = state.history self.child_visits = np.array([x[1] for x in root.stats]) self.child_visits = self.child_visits.astype(np.float16) if self.use_q_value: # for when config.use_q_value is true self.q_values = [x[0] for x in root.stats] else: # for when config.use_q_value is false self.utility = (game.utility(state, 1), game.utility(state, -1))
def play_games(self, fst, snd, num_games, games_played=0): states = [game.initial_state(self.size) for _ in range(num_games)] fst_wins = 0 turn = 0 finished_games = games_played while states: if self.visual: self.gui.update(states[0]) if turn % 2 == 0: # it's first players' turn if fst: # fst is an AI actions = fst.decide_parallel(states) else: actions = self.human_input() else: if snd: # snd is an AI actions = snd.decide_parallel(states) else: actions = self.human_input() states = [game.result(s, a) for s, a in zip(states, actions)] if self.visual: self.gui.update(states[0]) fst_wins += sum(1 for s in states if game.utility(s, 1) == 1) new_states = [] for s in states: if game.terminal_test(s): finished_games += 1 #self.save_game_result(fst, snd, game.utility(s,1), finished_games) else: new_states.append(s) states = new_states # important step ;) turn += 1 return fst_wins
def test_utility_player_one_won(): initial = game.initial_state(2) fst = game.result(initial, 0) snd = game.result(fst, 1) thd = game.result(snd, 2) actual = game.utility(thd, 1) expected = 1 assert actual == expected
def test_utility_player_one_lost(): initial = game.initial_state(2) fst = game.result(initial, 3) snd = game.result(fst, 0) thd = game.result(snd, 2) fth = game.result(thd, 1) actual = game.utility(fth, 1) expected = -1 assert actual == expected
def backpropagate(node, state): while node: node.update(game.utility(state, -game.player(node.state))) node = node.parent
def test_utility_not_terminal_player_two(): initial = game.initial_state(6) actual = game.utility(initial, -1) expected = 0 assert actual == expected