コード例 #1
0
state, value, done, _ = env.step(action)
memory.stmemory

memory.commit_ltmemory()
minibatch[0]['AV']
minibatch = random.sample(memory.ltmemory, min(2, len(memory.ltmemory)))
current_player.model.convertToModelInput(minibatch[0]['state'])
minibatch[0]
env.state_size
env.grid_shape
env.action_size
(2,)+env.grid_shape
tstrnn=Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
inp = best_player.model.convertToModelInput(env.gameState)
inp
tstrnn.predict(np.array([inp]))
env.actionSpace.shape


bitboard.shape

binary = np.reshape(bitboard,(12*8*8))

board = chess.Board()
board.board_fen
board.push_san('e4')
board.board_fen
board
board
board.fen()
board.set_board_fen('rnbqkbnr/pppppppp/8/8/4P3/3P4/PPP2PPP/RNBQKBNR')
コード例 #2
0
class Agent_NN:
    def __init__(self, enable_cache=False):
        self.nn_start = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                     config.INPUT_START_DIM,
                                     config.OUTPUT_START_DIM,
                                     config.HIDDEN_CNN_LAYERS)
        self.nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                               config.INPUT_DIM, config.OUTPUT_DIM,
                               config.HIDDEN_CNN_LAYERS)

        self.enable_cache = enable_cache
        self.cache = {}

    def purge_cache(self):
        self.cache = {}

    def nn_read(self, name):
        self.nn_start.read(name, 's')
        self.nn.read(name, 'g')

    def nn_write(self, name):
        self.nn_start.write(name, 's')
        self.nn.write(name, 'g')

    def predict(self, state, perspective, mcts):
        network = self.build_nn_input(state, perspective, mcts=mcts)

        if network.shape[1] == config.INPUT_DIM[0]:
            return self.nn.predict(network)
        else:
            return self.nn_start.predict(network)

    def build_start_nn_input(self, state, perspective):
        nn_input = np.zeros(
            (1, config.INPUT_START_DIM[0], config.INPUT_START_DIM[1],
             config.INPUT_START_DIM[2]),
            dtype=np.float32)

        numbers_output = {
            2: 1,
            3: 2,
            4: 3,
            5: 4,
            6: 5,
            8: 5,
            9: 4,
            10: 3,
            11: 2,
            12: 1
        }
        rotation = np.random.randint(12)

        if self.enable_cache is True and rotation in self.cache:
            nn_input[:, :11, :, :] = self.cache[rotation]

        else:
            # Resources outputs
            for number, tile in state.numbers:
                resource = state.tiles[tile]
                for vertex in config.tiles_vertex[tile]:
                    nn_input[0, resource - 2,
                             config.vertex_to_nn_input[rotation][vertex][0],
                             config.vertex_to_nn_input[rotation][vertex]
                             [1]] += numbers_output[number] / 15.0

            # Ports
            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD, config.GENERIC
            ]):
                indices = [i for i, x in enumerate(state.ports) if x == r]
                for i in indices:
                    for vertex in config.ports_vertex[i]['vert']:
                        nn_input[
                            0, key + 5,
                            config.vertex_to_nn_input[rotation][vertex][0],
                            config.vertex_to_nn_input[rotation][vertex][1]] = 1

            if self.enable_cache is True:
                self.cache[rotation] = nn_input[:, :11, :, :]

        # Settlements, cities, roads
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            for s in state.players[p].settlements:
                nn_input[0, 11 + 2 * p_order,
                         config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1
            for r in state.players[p].roads:
                nn_input[
                    0, 12 + 2 * p_order,
                    config.vertex_to_nn_input[rotation][r[0]][0],
                    config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0
                nn_input[
                    0, 12 + 2 * p_order,
                    config.vertex_to_nn_input[rotation][r[1]][0],
                    config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0

        # Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD
            ]):
                nn_input[0, 19 + key +
                         5 * p_order, :, :] = state.players[p].cards[r] / 10.0

        # State
        if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT
                or state.game_phase == config.PHASE_INITIAL_ROAD
            ) and state.initial_phase_decrease == 0:
            nn_input[0, 39, :, :] = 1
        if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT
                or state.game_phase == config.PHASE_INITIAL_ROAD
            ) and state.initial_phase_decrease == 1:
            nn_input[0, 40, :, :] = 1

        # Player turn
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            if p == state.player_turn:
                nn_input[0, 41 + p_order, :, :] = 1

        return nn_input

    def build_nn_input(self, state, perspective, mcts=None):
        if state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD:
            return self.build_start_nn_input(state, perspective)

        nn_input = np.zeros(
            (1, config.INPUT_DIM[0], config.INPUT_DIM[1], config.INPUT_DIM[2]),
            dtype=np.float32)

        numbers_output = {
            2: 1,
            3: 2,
            4: 3,
            5: 4,
            6: 5,
            8: 5,
            9: 4,
            10: 3,
            11: 2,
            12: 1
        }
        rotation = np.random.randint(12)

        if self.enable_cache is True and rotation in self.cache:
            nn_input[:, :11, :, :] = self.cache[rotation]

        else:
            # Resources outputs
            for number, tile in state.numbers:
                resource = state.tiles[tile]
                for vertex in config.tiles_vertex[tile]:
                    nn_input[0, resource - 2,
                             config.vertex_to_nn_input[rotation][vertex][0],
                             config.vertex_to_nn_input[rotation][vertex]
                             [1]] += numbers_output[number] / 15.0

            # Ports
            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD, config.GENERIC
            ]):
                indices = [i for i, x in enumerate(state.ports) if x == r]
                for i in indices:
                    for vertex in config.ports_vertex[i]['vert']:
                        nn_input[
                            0, key + 5,
                            config.vertex_to_nn_input[rotation][vertex][0],
                            config.vertex_to_nn_input[rotation][vertex][1]] = 1

            if self.enable_cache is True:
                self.cache[rotation] = nn_input[:, :11, :, :]

        # Settlements, cities, roads
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            for s in state.players[p].settlements:
                nn_input[0, 11 + 3 * p_order,
                         config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1
            for c in state.players[p].cities:
                nn_input[0, 12 + 3 * p_order,
                         config.vertex_to_nn_input[rotation][c][0],
                         config.vertex_to_nn_input[rotation][c][1]] = 1
            for r in state.players[p].roads:
                nn_input[
                    0, 13 + 3 * p_order,
                    config.vertex_to_nn_input[rotation][r[0]][0],
                    config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0
                nn_input[
                    0, 13 + 3 * p_order,
                    config.vertex_to_nn_input[rotation][r[1]][0],
                    config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0

        # Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD
            ]):
                nn_input[0, 23 + key +
                         5 * p_order, :, :] = state.players[p].cards[r] / 10.0

        # Robber
        for vertex in config.tiles_vertex[state.robber_tile]:
            nn_input[0, 43, config.vertex_to_nn_input[rotation][vertex][0],
                     config.vertex_to_nn_input[rotation][vertex][1]] = 1

        # Army Cards Played
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     44 + p_order, :, :] = state.players[p].used_knights / 5.0

        # Army Holder
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     48 + p_order, :, :] = state.players[p].largest_army_badge

        # Longest Road Holder
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     52 + p_order, :, :] = state.players[p].longest_road_badge

        # Special Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.VICTORY_POINT, config.KNIGHT, config.MONOPOLY,
                    config.ROAD_BUILDING, config.YEAR_OF_PLENTY
            ]):
                nn_input[0, 56 + key + 5 *
                         p_order, :, :] = state.players[p].special_cards.count(
                             r) / 3.0

        # Discarding, initial game phase
        if state.game_phase == config.PHASE_DISCARD:
            nn_input[0, 76, :, :] = 1

        # Player turn
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            if p == state.player_turn:
                nn_input[0, 77 + p_order, :, :] = 1

        # Other game phases
        if state.game_phase == config.PHASE_THROW_DICE:
            nn_input[0, 81, :, :] = 1
        if state.game_phase == config.PHASE_MOVE_ROBBER:
            nn_input[0, 82, :, :] = 1
        if state.game_phase == config.PHASE_STEAL_CARD:
            nn_input[0, 83, :, :] = 1
        if state.game_phase == config.PHASE_ROAD_BUILDING:
            nn_input[0, 84, :, :] = 1
        if state.game_phase == config.PHASE_YEAR_OF_PLENTY:
            nn_input[0, 85, :, :] = 1
        if state.game_phase == config.PHASE_TRADE_RESPOND:
            nn_input[0, 86, :, :] = 1

        for s in range(54):
            if state.available_settlement_spot(s):
                nn_input[0, 87, config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1

        return nn_input