Beispiel #1
0
class NSquare(StdOutPlayer):
    def __init__(self):
        super().__init__()
        self.nn = NeuralNetwork()
        self.nn.load("2018-07-19 22:29:06")
        
    def get_my_move(self):  # -> Tuple[MainBoardCoords, SubBoardCoords]
        current_board = parse(self.main_board)
        last_turn = self.main_board.sub_board_next_player_must_play
        features = [] # so you can map 
        all_moves = [] # these two by indexes
        if last_turn is None:
            moves = self.main_board.get_playable_coords()
            for move in moves:
                sub_board_parsed = reverse_moves_mapper[(move.row,move.col)]
                sub_moves = self.main_board.get_sub_board(move).get_playable_coords()                                    
                for sub_move in sub_moves:
                    next_board = deepcopy(current_board)
                    sub_move_parsed = reverse_moves_mapper[(sub_move.row,sub_move.col)]
                    next_board[sub_board_parsed][sub_move_parsed] = 1
                    new_board = np.concatenate((current_board.flatten(),next_board.flatten()),axis=0)
                    features.append(new_board)
                    all_moves.append((move,sub_move))
        else:
            sub_moves = self.main_board.get_sub_board(last_turn).get_playable_coords()
            sub_board_parsed = reverse_moves_mapper[(last_turn.row,last_turn.col)]
            for sub_move in sub_moves:
                sub_move_parsed = reverse_moves_mapper[(sub_move.row,sub_move.col)]                
                next_board = deepcopy(current_board)
                next_board[sub_board_parsed][sub_move_parsed] = 1
                new_board = np.concatenate((current_board.flatten(),next_board.flatten()),axis=0)
                features.append(new_board)
                all_moves.append((last_turn,sub_move))
        features = np.array(features)
        scores = self.nn.predict(features)
        max_score_index = 0
        max_score = -1 # gonna lose completely
        least_o_score = -1
        least_o_index = 0
        for i, score in enumerate(scores):
            if score[0] > max_score:
                max_score = score[0]
                max_score_index = i
            if score[1] > least_o_score:
                least_o_score = score[1]
                least_o_index = i
        
        if max_score < 0:
            move = all_moves[least_o_index]
            return move[0],move[1]
        else:
            move = all_moves[max_score_index]
            return move[0],move[1]
Beispiel #2
0
class C2MCTS:
    def __init__(self,
                 board,
                 last_turn,
                 turn=Turns.X.value,
                 timeout=100,
                 before=1):
        self.turn = turn
        self.board = board
        self._cloned_board = None
        self.mct = None
        self.timeout = timeout
        self.before = before
        self.last_turn = last_turn
        self.nn = NeuralNetwork()
        self.nn.load("2018-07-19 22:29:06")

    def run(self):
        if self.mct == None:
            # if new tree then initialize that it came from the opposite player
            self.mct = Tree(switch_turns(self.turn))
        else:
            # if the tree exists look for the node if it contains that
            children = self.mct.get_root().get_children()
            if len(children) != 0:
                contained = False
                node = children[0]
                for n in children:
                    if n.get_move() == self._cloned_board.get_previous_move():
                        contained = True
                        node = n
                if contained:
                    self.mct.set_root(node)
                else:
                    # otherwise initialize new tree
                    self.mct = Tree(switch_turns(self.turn))
            else:
                # otherwise initialize new tree
                self.mct = Tree(switch_turns(self.turn))

        start_time = current_milli_time()
        while current_milli_time() - start_time < self.timeout - self.before:
            self._cloned_board = clone(
                UltimateTicTacToe(board=self.board, last_turn=self.last_turn))
            self.roll_out(self.expansion(self.selection(self.mct.get_root())))
        return self.choose_best_next_move()

    def selection(self, node):
        while (len(self._cloned_board.get_free_moves()) == len(
                node.get_children()) and len(node.get_children()) != 0):
            node = self.select_ucb_child(node.get_children())
            self.play_cloned_board(node.get_move(), node.get_turn())
        return node

    def expansion(self, node):
        next_move = None
        won = self._cloned_board.is_game_done()
        if (won):
            node.set_game_as_over()
        else:
            # if tree contains the node
            for move in self._cloned_board.get_free_moves():
                contained = False
                for child in node.get_children():
                    if child.get_move() == move:
                        contained = True

                if not contained:
                    next_move = move
                    break
            node = node.add_child(next_move)
            self.play_cloned_board(move=next_move, turn=node.get_turn())
        # to give more precendence to the bigger board
        # to make sure long term goals are in mind

        # if (won and self._cloned_board.get_winner() == GameState.WIN):
        #     node.small_board_won()
        #     next_move = node.get_move()
        # elif (len(self._cloned_board.get_free_moves())<3):
        #     node.set_as_desirable(-0.5)

        return node

    def roll_out(self, node):
        # second approach, use neural net to predict
        old_board = self._cloned_board.get_board_list()
        if not self._cloned_board.is_game_done():
            move = choice(self._cloned_board.get_free_moves())
            new_board = clone(old_board)
            new_board[move[0]][move[1]] = 1
            feature = np.concatenate(
                (old_board.flatten(), new_board.flatten()),
                axis=0).reshape(18, 9, 1)
            score = self.nn.predict(np.array([feature]))[0]
            if score[0] > score[1]:
                self.backpropogate(GameState.WIN, node)
            elif score[1] > score[0]:
                self.backpropogate(GameState.LOSE, node)
            else:
                self.backpropogate(GameState.DRAW, node)
        else:
            if self._cloned_board.get_winner() == None:
                self.backpropogate(GameState.DRAW, node)
            else:
                self.backpropogate(
                    GameState.WIN if node.get_turn() == Turns.X.value else
                    GameState.LOSE, node)

    def backpropogate(self, game_state, node):
        index = 0  # to alternate between wins and loses
        while True:
            if game_state == GameState.DRAW:
                node.update_stats(game_state)
            elif index % 2 == 0:
                node.update_stats(game_state)
            else:
                node.update_stats(GameState.LOSE if game_state ==
                                  GameState.WIN else GameState.WIN)

            node = node.get_parent()
            index += 1

            if node == None:
                break

    def select_ucb_child(self, nodes):
        return sorted(nodes, key=lambda n: n.get_ucb_value())[-1]

    def play_cloned_board(self, move, turn):
        self._cloned_board.move(turn, *move)

    def choose_best_next_move(self):
        move = sorted(self.mct.get_root().get_children(),
                      key=lambda n: n.get_score())[-1]
        # self.mct.set_root(move)
        return move.get_move()