def uct_selection(self, states): b = self.board init1 = time.time() list_not_played = [] # TODO niet uitvoeren als alle next states al gevisit zijn. for move, state in states: if self.plays[(b.current_player(state), hashable(state))] == 0: list_not_played.append((state, move)) init2 = time.time() # print("Time to make list: {}".format(init2 - init1)) if len(list_not_played) > 0: return choice(list_not_played) init3 = time.time() # print("Time to make random choice from unplayed: {}".format(init3 - init2)) log_total = math.log( sum(self.plays[(b.current_player(S), hashable(S))] for p, S in states)) value, move, state = max( ((float(self.wins[(b.current_player(S), hashable(S))]) / self.plays[(b.current_player(S), hashable(S))]) + 1.4 * math.sqrt(float(log_total) / self.plays[(b.current_player(S), hashable(S))]) + self.get_prediction_value(S[1:], b.rows, b.cols) , p, S) for p, S in states) # print("Selected state with uct value:" + str(value)) return state, move
def simulate_random_game(self, state, player1): state_copy = np.copy(state) player = self.board.current_player(state) states_simulated = list() states_simulated.append((player, hashable(state))) while not self.board.is_finished(state_copy): possible_moves = self.board.legal_plays(state_copy) random_move = choice(possible_moves) state_copy, move = self.board.next_state(state_copy, random_move) # ENTRA new_player = self.board.current_player(state_copy) state_hash = hashable(state_copy) states_simulated.append((new_player, state_hash)) if (new_player, state_hash) not in self.plays.keys(): self.plays[(new_player, state_hash)] = 0 self.wins[(new_player, state_hash)] = 0 winner = self.board.winner(state_copy) # EXTRA for player, hash_state in states_simulated: self.plays[(player, hash_state)] += 1 if player1 == winner: self.wins[(player, hash_state)] += 1
def get_best_move(self, player, next_states): win_rate, best_move, state = max( (self.wins.get((player, hashable(state)), 0) / self.plays.get((player, hashable(state)), 1), move, state) for move, state in next_states) print("Picked move " + str(best_move) + " with win rate of " + str(win_rate)) r, c, o = self.board.translate_to_coord(best_move) return state, best_move, r, c, o
def expand_state(self, current_state): next_states = [] moves = self.board.legal_plays(current_state) for move in moves: new_state, move_done = self.board.next_state(current_state, move) next_states.append((move_done, new_state)) player = self.board.current_player(new_state) self.plays[(player, hashable(new_state))] = 0 self.wins[(player, hashable(new_state))] = 0 return next_states
def simulate_random_game(self, player1, time_limit): start_sim = time.time() # print("Simulation time: " + str(time_limit)) # if time_limit < 0: # print("Stopping") # return # init1 = time.time() state, move = self.uct_selection(self.next_states) # init2 = time.time() player = self.board.current_player(state) # init3 = time.time() states_simulated = list() # init4 = time.time() states_simulated.append((player, hashable(state))) # init5 = time.time() state_copy = np.copy(state) # print("Time to init board: {}".format(init5 - init1)) # t = init2 - init1 while not self.board.is_finished(state_copy) \ and ((time.time() - start_sim) < time_limit): next_states = self.expand_state(state_copy) # uct = time.time() state_copy, move = self.uct_selection(next_states) # print("UCT Time: {}".format(time.time()-uct)) state_hash = hashable(state_copy) new_player = self.board.current_player(state_copy) states_simulated.append((new_player, state_hash)) # finished = time.time() # print("Time to finish board: {}".format(finished-finish)) if self.board.is_finished(state_copy): winner = self.board.winner(state_copy) # EXTRA for player, hash_state in states_simulated: self.plays[(player, hash_state)] += 1 if player1 == winner: self.wins[(player, hash_state)] += 1
def get_best_move(self, next_states): best_win = 0 best_move, best_state = choice(next_states) # begin = time.time() for move, state in next_states: plays = self.plays.get((self.board.current_player(state), hashable(state)), 1) if plays != 0: wins = self.wins.get((self.board.current_player(state), hashable(state)), 0) win = float(wins)/plays if win > best_win: best_win = win best_move = move best_state = state # print("Time to pick best move: {}".format(time.time() - begin)) # print("Picked move " + str(best_move) + " with win rate of " + str(best_win)) r, c, o = self.board.translate_to_coord(best_move) return best_state, best_move, r, c, o
def simulate_random_game(self, state, player1): state_copy = np.copy(state) player = self.board.current_player(state) while not self.board.is_finished(state_copy): possible_moves = self.board.legal_plays(state_copy) random_move = choice(possible_moves) state_copy, move = self.board.next_state(state_copy, random_move) winner = self.board.winner(state_copy) state_hash = hashable(state) self.plays[(player, state_hash)] += 1 if player1 == winner: self.wins[(player, state_hash)] += 1