class NSquare(StdOutPlayer): def __init__(self): super().__init__() self.nn = NeuralNetwork() self.nn.load("2018-07-19 22:29:06") def get_my_move(self): # -> Tuple[MainBoardCoords, SubBoardCoords] current_board = parse(self.main_board) last_turn = self.main_board.sub_board_next_player_must_play features = [] # so you can map all_moves = [] # these two by indexes if last_turn is None: moves = self.main_board.get_playable_coords() for move in moves: sub_board_parsed = reverse_moves_mapper[(move.row,move.col)] sub_moves = self.main_board.get_sub_board(move).get_playable_coords() for sub_move in sub_moves: next_board = deepcopy(current_board) sub_move_parsed = reverse_moves_mapper[(sub_move.row,sub_move.col)] next_board[sub_board_parsed][sub_move_parsed] = 1 new_board = np.concatenate((current_board.flatten(),next_board.flatten()),axis=0) features.append(new_board) all_moves.append((move,sub_move)) else: sub_moves = self.main_board.get_sub_board(last_turn).get_playable_coords() sub_board_parsed = reverse_moves_mapper[(last_turn.row,last_turn.col)] for sub_move in sub_moves: sub_move_parsed = reverse_moves_mapper[(sub_move.row,sub_move.col)] next_board = deepcopy(current_board) next_board[sub_board_parsed][sub_move_parsed] = 1 new_board = np.concatenate((current_board.flatten(),next_board.flatten()),axis=0) features.append(new_board) all_moves.append((last_turn,sub_move)) features = np.array(features) scores = self.nn.predict(features) max_score_index = 0 max_score = -1 # gonna lose completely least_o_score = -1 least_o_index = 0 for i, score in enumerate(scores): if score[0] > max_score: max_score = score[0] max_score_index = i if score[1] > least_o_score: least_o_score = score[1] least_o_index = i if max_score < 0: move = all_moves[least_o_index] return move[0],move[1] else: move = all_moves[max_score_index] return move[0],move[1]
class C2MCTS: def __init__(self, board, last_turn, turn=Turns.X.value, timeout=100, before=1): self.turn = turn self.board = board self._cloned_board = None self.mct = None self.timeout = timeout self.before = before self.last_turn = last_turn self.nn = NeuralNetwork() self.nn.load("2018-07-19 22:29:06") def run(self): if self.mct == None: # if new tree then initialize that it came from the opposite player self.mct = Tree(switch_turns(self.turn)) else: # if the tree exists look for the node if it contains that children = self.mct.get_root().get_children() if len(children) != 0: contained = False node = children[0] for n in children: if n.get_move() == self._cloned_board.get_previous_move(): contained = True node = n if contained: self.mct.set_root(node) else: # otherwise initialize new tree self.mct = Tree(switch_turns(self.turn)) else: # otherwise initialize new tree self.mct = Tree(switch_turns(self.turn)) start_time = current_milli_time() while current_milli_time() - start_time < self.timeout - self.before: self._cloned_board = clone( UltimateTicTacToe(board=self.board, last_turn=self.last_turn)) self.roll_out(self.expansion(self.selection(self.mct.get_root()))) return self.choose_best_next_move() def selection(self, node): while (len(self._cloned_board.get_free_moves()) == len( node.get_children()) and len(node.get_children()) != 0): node = self.select_ucb_child(node.get_children()) self.play_cloned_board(node.get_move(), node.get_turn()) return node def expansion(self, node): next_move = None won = self._cloned_board.is_game_done() if (won): node.set_game_as_over() else: # if tree contains the node for move in self._cloned_board.get_free_moves(): contained = False for child in node.get_children(): if child.get_move() == move: contained = True if not contained: next_move = move break node = node.add_child(next_move) self.play_cloned_board(move=next_move, turn=node.get_turn()) # to give more precendence to the bigger board # to make sure long term goals are in mind # if (won and self._cloned_board.get_winner() == GameState.WIN): # node.small_board_won() # next_move = node.get_move() # elif (len(self._cloned_board.get_free_moves())<3): # node.set_as_desirable(-0.5) return node def roll_out(self, node): # second approach, use neural net to predict old_board = self._cloned_board.get_board_list() if not self._cloned_board.is_game_done(): move = choice(self._cloned_board.get_free_moves()) new_board = clone(old_board) new_board[move[0]][move[1]] = 1 feature = np.concatenate( (old_board.flatten(), new_board.flatten()), axis=0).reshape(18, 9, 1) score = self.nn.predict(np.array([feature]))[0] if score[0] > score[1]: self.backpropogate(GameState.WIN, node) elif score[1] > score[0]: self.backpropogate(GameState.LOSE, node) else: self.backpropogate(GameState.DRAW, node) else: if self._cloned_board.get_winner() == None: self.backpropogate(GameState.DRAW, node) else: self.backpropogate( GameState.WIN if node.get_turn() == Turns.X.value else GameState.LOSE, node) def backpropogate(self, game_state, node): index = 0 # to alternate between wins and loses while True: if game_state == GameState.DRAW: node.update_stats(game_state) elif index % 2 == 0: node.update_stats(game_state) else: node.update_stats(GameState.LOSE if game_state == GameState.WIN else GameState.WIN) node = node.get_parent() index += 1 if node == None: break def select_ucb_child(self, nodes): return sorted(nodes, key=lambda n: n.get_ucb_value())[-1] def play_cloned_board(self, move, turn): self._cloned_board.move(turn, *move) def choose_best_next_move(self): move = sorted(self.mct.get_root().get_children(), key=lambda n: n.get_score())[-1] # self.mct.set_root(move) return move.get_move()