Beispiel #1
0
    def _encode_board(self, board_state, name, reuse=None):
        """ Encodes a board state or prev orders state
            :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features)
            :param name: The name to use for the encoding
            :param reuse: Whether to reuse or not the weights from another encoding operation
            :return: The encoded board state / prev_orders state
        """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import film_gcn_res_block, preprocess_adjacency

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        pholder = lambda placeholder_name: self.placeholders[placeholder_name]
        relu = tf.nn.relu

        # Getting film gammas and betas
        film_gammas = self.outputs['_%s_film_gammas' % name]
        film_betas = self.outputs['_%s_film_betas' % name]

        # Computing norm adjacency
        norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
        norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0),
                                 [tf.shape(board_state)[0], 1, 1])

        # Building scope
        scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse)
        with tf.variable_scope(scope):

            # Adding noise to break symmetry
            board_state = board_state + tf.random_normal(tf.shape(board_state),
                                                         stddev=0.01)
            graph_conv = tf.layers.Dense(units=hps('gcn_size'),
                                         activation=relu)(board_state)

            # First and intermediate layers
            for layer_idx in range(hps('nb_graph_conv') - 1):
                graph_conv = film_gcn_res_block(
                    inputs=graph_conv,  # (b, NB_NODES, gcn_size)
                    gamma=film_gammas[layer_idx],
                    beta=film_betas[layer_idx],
                    gcn_out_dim=hps('gcn_size'),
                    norm_adjacency=norm_adjacency,
                    is_training=pholder('is_training'),
                    residual=True)

            # Last layer
            graph_conv = film_gcn_res_block(
                inputs=graph_conv,  # (b, NB_NODES, final_size)
                gamma=film_gammas[-1],
                beta=film_betas[-1],
                gcn_out_dim=hps('attn_size') // 2,
                norm_adjacency=norm_adjacency,
                is_training=pholder('is_training'),
                residual=False)

        # Returning
        return graph_conv
Beispiel #2
0
    def _encode_board(self, board_state, name, reuse=None):
        """ Encodes a board state or prev orders state
            :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features)
            :param name: The name to use for the encoding
            :param reuse: Whether to reuse or not the weights from another encoding operation
            :return: The encoded board state / prev_orders state
        """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency
        from diplomacy_research.utils.tensorflow import batch_norm

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        pholder = lambda placeholder_name: self.placeholders[placeholder_name]
        relu = tf.nn.relu

        # Computing norm adjacency
        norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
        norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1])

        # Building scope
        scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse)
        with tf.variable_scope(scope):

            # Adding noise to break symmetry
            board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01)
            graph_conv = board_state

            # First Layer
            graph_conv = GraphConvolution(input_dim=graph_conv.shape[-1].value,             # (b, NB_NODES, gcn_size)
                                          output_dim=hps('gcn_size'),
                                          norm_adjacency=norm_adjacency,
                                          activation_fn=relu,
                                          bias=True)(graph_conv)

            # Intermediate Layers
            for _ in range(1, hps('nb_graph_conv') - 1):
                graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                    # (b, NB_NODES, gcn_size)
                                              output_dim=hps('gcn_size'),
                                              norm_adjacency=norm_adjacency,
                                              activation_fn=relu,
                                              bias=True)(graph_conv)
                graph_conv = batch_norm(graph_conv, is_training=pholder('is_training'), fused=True)

            # Final Layer
            graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                        # (b, NB_NODES, attn_size)
                                          output_dim=hps('attn_size'),
                                          norm_adjacency=norm_adjacency,
                                          activation_fn=None,
                                          bias=True)(graph_conv)

        # Returning
        return graph_conv
Beispiel #3
0
    def _encode_board(self, board_state, name, reuse=None):
        """ Encodes a board state or prev orders state
            :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features)
            :param name: The name to use for the encoding
            :param reuse: Whether to reuse or not the weights from another encoding operation
            :return: The encoded board state / prev_orders state
        """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        relu = tf.nn.relu

        # Computing norm adjacency
        norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
        norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1])

        # Building scope
        scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse)
        with tf.variable_scope(scope):
            batch_size = tf.shape(board_state)[0]

            # Adding noise to break symmetry
            board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01)

            # Projecting (if needed) to 'gcn_size'
            if board_state.shape[-1].value == NB_FEATURES:
                with tf.variable_scope('proj', reuse=tf.AUTO_REUSE):
                    proj_w = tf.get_variable('W', shape=[1, NB_FEATURES, hps('gcn_size')], dtype=tf.float32)
                graph_conv = relu(tf.matmul(board_state, tf.tile(proj_w, [batch_size, 1, 1])))
            else:
                graph_conv = board_state

            # First and intermediate layers
            for _ in range(hps('nb_graph_conv') - 1):
                graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                    # (b, NB_NODES, gcn_size)
                                              output_dim=hps('gcn_size'),
                                              norm_adjacency=norm_adjacency,
                                              activation_fn=relu,
                                              residual=True,
                                              bias=True)(graph_conv)

            # Last Layer
            graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                        # (b, NB_NODES, final_size)
                                          output_dim=hps('attn_size') // 2,
                                          norm_adjacency=norm_adjacency,
                                          activation_fn=relu,
                                          residual=False,
                                          bias=True)(graph_conv)

        # Returning
        return graph_conv
Beispiel #4
0
    def _build_draw_initial(self):
        """ Builds the draw model (initial step) """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency
        from diplomacy_research.utils.tensorflow import to_float

        if not self.placeholders:
            self.placeholders = self.get_placeholders()
        else:
            self.placeholders.update(self.get_placeholders())

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        pholder = lambda placeholder_name: self.placeholders[placeholder_name]
        relu = tf.nn.relu
        sigmoid = tf.nn.sigmoid

        # Training loop
        with tf.variable_scope('draw', reuse=tf.AUTO_REUSE):
            with tf.device(self.cluster_config.worker_device if self.
                           cluster_config else None):

                # Features
                board_state = to_float(
                    self.features['board_state']
                )  # tf.float32 - (b, NB_NODES, NB_FEATURES)
                current_power = self.features[
                    'current_power']  # tf.int32   - (b,)
                draw_target = self.features['draw_target']  # tf.float32 - (b,)

                # Placeholders
                stop_gradient_all = pholder('stop_gradient_all')

                # Norm Adjacency
                batch_size = tf.shape(board_state)[0]
                norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
                norm_adjacency = tf.tile(
                    tf.expand_dims(norm_adjacency, axis=0), [batch_size, 1, 1])

                # Graph embeddings
                with tf.variable_scope('graph_conv_scope'):
                    board_state_h0 = board_state  # (b, 81, 35)
                    board_state_h1 = GraphConvolution(
                        input_dim=NB_FEATURES,
                        output_dim=hps('draw_gcn_1_output_size'),
                        norm_adjacency=norm_adjacency,
                        activation_fn=relu,
                        bias=True)(board_state_h0)  # (b, 81, 25)

                    # board_state_h2: (b, 2025)
                    # board_state_h3: (b, 128)
                    board_state_h2 = tf.reshape(
                        board_state_h1,
                        shape=[-1, NB_NODES * hps('draw_gcn_1_output_size')])
                    board_state_graph_conv = tf.layers.Dense(
                        units=hps('draw_embedding_size'),
                        activation=relu,
                        use_bias=True)(board_state_h2)

                # Calculating draw for all powers
                with tf.variable_scope('draw_scope'):
                    current_power_mask = tf.one_hot(current_power,
                                                    NB_POWERS,
                                                    dtype=tf.float32)

                    draw_h0 = board_state_graph_conv  # (b, 128)
                    draw_h1 = tf.layers.Dense(
                        units=hps('draw_h1_size'),  # (b, 64)
                        activation=relu,
                        use_bias=True)(draw_h0)
                    draw_h2 = tf.layers.Dense(
                        units=hps('draw_h2_size'),  # (b, 64)
                        activation=relu,
                        use_bias=True)(draw_h1)
                    draw_probs = tf.layers.Dense(
                        units=NB_POWERS,  # (b, 7)
                        activation=sigmoid,
                        use_bias=True)(draw_h2)
                    draw_prob = tf.reduce_sum(draw_probs * current_power_mask,
                                              axis=1)  # (b,)

                # Computing draw loss
                with tf.variable_scope('draw_loss'):
                    draw_loss = tf.reduce_mean(
                        tf.square(draw_target - draw_prob))
                    draw_loss = tf.cond(
                        stop_gradient_all,
                        lambda: tf.stop_gradient(draw_loss),  # pylint: disable=cell-var-from-loop
                        lambda: draw_loss)  # pylint: disable=cell-var-from-loop

        # Building output tags
        outputs = {
            'tag/draw/v001_draw_relu': True,
            'draw_prob': draw_prob,
            'draw_loss': draw_loss
        }

        # Adding features, placeholders and outputs to graph
        self.add_meta_information(outputs)
Beispiel #5
0
def test_adjacency_matrix():
    """ Tests the creation of the adjacency matrix """
    adj_matrix = state_space.get_adjacency_matrix()
    assert adj_matrix.shape == (state_space.NB_NODES, state_space.NB_NODES)
Beispiel #6
0
    def _get_board_value(self,
                         board_state,
                         current_power,
                         name='board_state_value',
                         reuse=None):
        """ Computes the estimated value of a board state
            :param board_state: The board state - (batch, NB_NODES, NB_FEATURES)
            :param current_power: The power for which we want the board value - (batch,)
            :param name: The name to use for the operaton
            :param reuse: Whether to reuse or not the weights from another operation
            :return: The value of the board state for the specified power - (batch,)
        """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        relu = tf.nn.relu

        # Computing norm adjacency
        norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
        norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0),
                                 [tf.shape(board_state)[0], 1, 1])

        # Building scope
        # No need to use 'stop_gradient_value' - Because this model does not share parameters.
        scope = tf.VariableScope(name='value/%s' % name, reuse=reuse)
        with tf.variable_scope(scope):

            with tf.variable_scope('graph_conv_scope'):
                graph_conv = board_state  # (b, NB_NODES, NB_FEAT)
                graph_conv = GraphConvolution(
                    input_dim=graph_conv.shape[-1].
                    value,  # (b, NB_NODES, gcn_1)
                    output_dim=hps('value_gcn_1_output_size'),
                    norm_adjacency=norm_adjacency,
                    activation_fn=relu,
                    bias=True)(graph_conv)
                flat_graph_conv = tf.reshape(
                    graph_conv,
                    shape=[-1, NB_NODES * hps('value_gcn_1_output_size')])
                flat_graph_conv = tf.layers.Dense(
                    units=hps('value_embedding_size'),
                    activation=relu,
                    use_bias=True)(flat_graph_conv)  # (b, value_emb_size)

            with tf.variable_scope('value_scope'):
                current_power_mask = tf.one_hot(current_power,
                                                NB_POWERS,
                                                dtype=tf.float32)
                state_value = flat_graph_conv  # (b, value_emb_size)
                state_value = tf.layers.Dense(
                    units=hps('value_h1_size'),  # (b, value_h1_size)
                    activation=relu,
                    use_bias=True)(state_value)
                state_value = tf.layers.Dense(
                    units=hps('value_h2_size'),  # (b, value_h2_size)
                    activation=relu,
                    use_bias=True)(state_value)
                state_value = tf.layers.Dense(
                    units=NB_POWERS,  # (b, NB_POWERS)
                    activation=None,
                    use_bias=True)(state_value)
                state_value = tf.reduce_sum(state_value * current_power_mask,
                                            axis=1)  # (b,)

        # Returning
        return state_value
Beispiel #7
0
import json
import pickle

# importing from research
from diplomacy_research.models import state_space
from diplomacy_research.players.random_player import RandomPlayer
from diplomacy_research.players.dummy_player import DummyPlayer
from diplomacy_research.players.rule_based_player import RuleBasedPlayer
from diplomacy_research.players.rulesets import easy_ruleset

from diplomacy_research.players.rule_based_player import RuleBasedPlayer
# from diplomacy_research.players.rule_based_player import ModelBasedPlayer
from diplomacy_research.utils.cluster import start_io_loop, stop_io_loop

# grabbing adjacency matrix
adj_matrix = state_space.get_adjacency_matrix("standard")
# print(adj_matrix)

# grabbing ordering of provinces
ordering = state_space.STANDARD_TOPO_LOCS
# print(ordering)

# province types
coasts = ["BUL/EC", "BUL/SC", "SPA/NC", "SPA/SC", "STP/NC", "STP/SC"]
water = [
    "ADR", "AEG", "BAL", "BAR", "BLA", "EAS", "ENG", "BOT", "GOL", "HEL",
    "ION", "IRI", "MID", "NAT", "NTH", "NRG", "SKA", "TYN", "WES"
]

# def test_game():
#     # creating multiple agents