Esempio n. 1
0
    def select_move(self, game):
        previous_move = game.last_move
        # print(f"Previous move: {previous_move}")
        if previous_move == None:
            open_spaces = game.board.return_open_spaces()
            return Move.play(open_spaces[np.random.choice(len(open_spaces))])

        winning_move = self.find_winning_move(game)
        if winning_move:
            return Move.play(winning_move)

        open_spaces = []
        for r in [
                previous_move.point.row - 1, previous_move.point.row,
                previous_move.point.row + 1
        ]:
            for c in [
                    previous_move.point.col - 1, previous_move.point.col,
                    previous_move.point.col + 1
            ]:
                pt = Point(r, c)
                try:
                    if game.board.is_on_grid(pt) > 0 and game.board.get(
                            pt) is None:
                        open_spaces.append(pt)
                except:
                    pass
        if len(open_spaces) == 0:
            open_spaces = game.board.return_open_spaces()

        open_spaces = self.remove_losing_moves(game, open_spaces)
        return Move.play(open_spaces[np.random.choice(len(open_spaces))])
Esempio n. 2
0
    def select_move(self, game_state):
        num_moves = self.encoder.board_width * self.encoder.board_height
        board_tensor = self.encoder.encode(game_state)
        X = np.array([board_tensor])

        if np.random.random() < self.temperature:
            move_probs = np.ones(num_moves) / num_moves
        else:
            move_probs = self.model.predict(X)[0]

        # Prevent move probs from getting stuck at 0 or 1.
        eps = 1e-5
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)
        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(Move.play(point)):
                if self.collector is not None:
                    self.collector.record_decision(state=board_tensor,
                                                   action=point_idx)
                return Move.play(point)
Esempio n. 3
0
    def select_move(self, game):
        previous_move = game.last_move
        # print(f"Previous move: {previous_move}")
        if previous_move == None:
            open_spaces = game.board.return_open_spaces()
            return Move.play(open_spaces[np.random.choice(len(open_spaces))])

        open_spaces = []
        for r in [
                previous_move.point.row - 1, previous_move.point.row,
                previous_move.point.row + 1
        ]:
            for c in [
                    previous_move.point.col - 1, previous_move.point.col,
                    previous_move.point.col + 1
            ]:
                pt = Point(r, c)
                try:
                    if game.board.is_on_grid(pt) > 0 and game.board.get(
                            pt) is None:
                        open_spaces.append(pt)
                except:
                    pass
        if len(open_spaces) > 0:
            return Move.play(open_spaces[np.random.choice(len(open_spaces))])
        else:  # no open neighbors, choose randomly
            open_spaces = game.board.return_open_spaces()
            return Move.play(open_spaces[np.random.choice(len(open_spaces))])
Esempio n. 4
0
    def select_move(self, game_state):
        board_tensor = self.encoder.encode(game_state)

        moves = []
        board_tensors = []
        for move in game_state.legal_moves():
            if not move.is_play:
                continue
            moves.append(self.encoder.encode_point(move.point))
            board_tensors.append(board_tensor)
        if not moves:
            raise NotImplementedError()  # never should happen

        num_moves = len(moves)
        board_tensors = np.array(board_tensors)
        move_vectors = np.zeros((num_moves, self.encoder.num_points()))
        for i, move in enumerate(moves):
            move_vectors[i][move] = 1

        values = self.model.predict([board_tensors, move_vectors])
        values = values.reshape(len(moves))

        ranked_moves = self.rank_moves_eps_greedy(values)

        for move_idx in ranked_moves:
            point = self.encoder.decode_point_index(moves[move_idx])
            if self.collector is not None:
                self.collector.record_decision(state=board_tensor,
                                               action=moves[move_idx])
            return Move.play(point)
Esempio n. 5
0
 def select_move(self, game_state):
     num_moves = self.encoder.board_width * self.encoder.board_height
     move_probs = self.predict(game_state)
     move_probs = move_probs**3
     eps = 1e-5
     move_probs = np.clip(move_probs, eps, 1 - eps)
     move_probs = move_probs / np.sum(move_probs)
     candidates = np.arange(num_moves)
     ranked_moves = np.random.choice(candidates,
                                     num_moves,
                                     replace=False,
                                     p=move_probs)
     for point_idx in ranked_moves:
         point = self.encoder.decode_point_index(point_idx)
         if game_state.is_valid_move(Move.play(point)):
             return Move.play(point)
Esempio n. 6
0
 def find_winning_move(self, game):
     for candidate in game.board.return_open_spaces():
         next_state = game.apply_move(Move.play(candidate))
         if next_state.is_over(
         ) and next_state.winner == next_state.current_player.other:
             return candidate
     return None
Esempio n. 7
0
 def remove_losing_moves(self, game, open_spaces):
     okay_moves = []
     for candidate in open_spaces:
         next_state = game.apply_move(Move.play(candidate))
         opponent_winning_move = self.find_winning_move(next_state)
         if opponent_winning_move == None:
             okay_moves.append(candidate)
     if len(okay_moves) > 0:
         return okay_moves
     else:
         return open_spaces
Esempio n. 8
0
 def select_move(self, game):
     open_spaces = game.board.return_open_spaces()
     while True:
         human_input = input("Select an index: ")
         try:
             human_input = int(human_input)
             r = human_input // game.board.num_cols
             c = human_input % game.board.num_cols
             pt = Point(row=r + 1, col=c + 1)
             if pt in open_spaces:
                 return Move.play(pt)
             else:
                 print("Error. Try again.")
         except:
             board_tensor = self.encoder_test.encode(game)
             print(board_tensor)
Esempio n. 9
0
 def select_move(self, game):
     previous_move = game.last_move
     open_spaces = game.board.return_open_spaces()
     return Move.play(open_spaces[np.random.choice(len(open_spaces))])
Esempio n. 10
0
    def select_move(self, game, verbose=False):
        num_moves = self.encoder.board_width * self.encoder.board_height
        board_tensor = self.encoder.encode(game)
        X = np.array([board_tensor])

        actions, values = self.model(X)
        move_probs = actions[0]
        estimated_value = values[0][0]
        self.last_move_value = float(estimated_value)

        # for rr in range(6):
        #   for cc in range(13):
        #     print(f"{move_probs[13*rr + cc]:.3f} ", end="")
        #   print(" ")

        eps = 1e-6
        move_probs = np.multiply(board_tensor[3].flatten(), move_probs)
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)
        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)

        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            move = Move.play(point)
            if game.is_valid_move(move):

                # Plot heatmaps
                if verbose:
                    for idx in [
                            0, 3, 4, 5, 6, 7, 8, 9, 12, 52, 64, 65, 66, 67, 75,
                            76, 77
                    ]:
                        move_probs[idx] = np.nan
                    heatmap = move_probs.reshape((6, 13))
                    fig, ax = plt.subplots()
                    im = ax.imshow(heatmap)
                    ax.spines['top'].set_visible(False)
                    ax.spines['right'].set_visible(False)
                    ax.spines['bottom'].set_visible(False)
                    ax.spines['left'].set_visible(False)
                    ax.set_title(
                        f"{game.current_player} to move\nChosen moves: {ranked_moves[0:6]}\nActual move: {point_idx}\nEstimated value: {estimated_value}"
                    )
                    for i in range(6):
                        for j in range(13):
                            if board_tensor[3][i][j] > 0:
                                _text = ax.text(j,
                                                i,
                                                13 * i + j,
                                                ha="center",
                                                va="center",
                                                color="w")
                    fig.tight_layout()
                    plt.show()

                if self.collector is not None:
                    self.collector.record_decision(
                        state=board_tensor,
                        action=point_idx,
                        estimated_value=estimated_value)
                return move
        raise ValueError