Beispiel #1
0
    def _encode_board(self, board_state, name, reuse=None):
        """ Encodes a board state or prev orders state
            :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features)
            :param name: The name to use for the encoding
            :param reuse: Whether to reuse or not the weights from another encoding operation
            :return: The encoded board state / prev_orders state
        """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import film_gcn_res_block, preprocess_adjacency

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        pholder = lambda placeholder_name: self.placeholders[placeholder_name]
        relu = tf.nn.relu

        # Getting film gammas and betas
        film_gammas = self.outputs['_%s_film_gammas' % name]
        film_betas = self.outputs['_%s_film_betas' % name]

        # Computing norm adjacency
        norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
        norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0),
                                 [tf.shape(board_state)[0], 1, 1])

        # Building scope
        scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse)
        with tf.variable_scope(scope):

            # Adding noise to break symmetry
            board_state = board_state + tf.random_normal(tf.shape(board_state),
                                                         stddev=0.01)
            graph_conv = tf.layers.Dense(units=hps('gcn_size'),
                                         activation=relu)(board_state)

            # First and intermediate layers
            for layer_idx in range(hps('nb_graph_conv') - 1):
                graph_conv = film_gcn_res_block(
                    inputs=graph_conv,  # (b, NB_NODES, gcn_size)
                    gamma=film_gammas[layer_idx],
                    beta=film_betas[layer_idx],
                    gcn_out_dim=hps('gcn_size'),
                    norm_adjacency=norm_adjacency,
                    is_training=pholder('is_training'),
                    residual=True)

            # Last layer
            graph_conv = film_gcn_res_block(
                inputs=graph_conv,  # (b, NB_NODES, final_size)
                gamma=film_gammas[-1],
                beta=film_betas[-1],
                gcn_out_dim=hps('attn_size') // 2,
                norm_adjacency=norm_adjacency,
                is_training=pholder('is_training'),
                residual=False)

        # Returning
        return graph_conv
Beispiel #2
0
    def _encode_board(self, board_state, name, reuse=None):
        """ Encodes a board state or prev orders state
            :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features)
            :param name: The name to use for the encoding
            :param reuse: Whether to reuse or not the weights from another encoding operation
            :return: The encoded board state / prev_orders state
        """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency
        from diplomacy_research.utils.tensorflow import batch_norm

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        pholder = lambda placeholder_name: self.placeholders[placeholder_name]
        relu = tf.nn.relu

        # Computing norm adjacency
        norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
        norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1])

        # Building scope
        scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse)
        with tf.variable_scope(scope):

            # Adding noise to break symmetry
            board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01)
            graph_conv = board_state

            # First Layer
            graph_conv = GraphConvolution(input_dim=graph_conv.shape[-1].value,             # (b, NB_NODES, gcn_size)
                                          output_dim=hps('gcn_size'),
                                          norm_adjacency=norm_adjacency,
                                          activation_fn=relu,
                                          bias=True)(graph_conv)

            # Intermediate Layers
            for _ in range(1, hps('nb_graph_conv') - 1):
                graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                    # (b, NB_NODES, gcn_size)
                                              output_dim=hps('gcn_size'),
                                              norm_adjacency=norm_adjacency,
                                              activation_fn=relu,
                                              bias=True)(graph_conv)
                graph_conv = batch_norm(graph_conv, is_training=pholder('is_training'), fused=True)

            # Final Layer
            graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                        # (b, NB_NODES, attn_size)
                                          output_dim=hps('attn_size'),
                                          norm_adjacency=norm_adjacency,
                                          activation_fn=None,
                                          bias=True)(graph_conv)

        # Returning
        return graph_conv
Beispiel #3
0
    def _encode_board(self, board_state, name, reuse=None):
        """ Encodes a board state or prev orders state
            :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features)
            :param name: The name to use for the encoding
            :param reuse: Whether to reuse or not the weights from another encoding operation
            :return: The encoded board state / prev_orders state
        """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        relu = tf.nn.relu

        # Computing norm adjacency
        norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
        norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1])

        # Building scope
        scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse)
        with tf.variable_scope(scope):
            batch_size = tf.shape(board_state)[0]

            # Adding noise to break symmetry
            board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01)

            # Projecting (if needed) to 'gcn_size'
            if board_state.shape[-1].value == NB_FEATURES:
                with tf.variable_scope('proj', reuse=tf.AUTO_REUSE):
                    proj_w = tf.get_variable('W', shape=[1, NB_FEATURES, hps('gcn_size')], dtype=tf.float32)
                graph_conv = relu(tf.matmul(board_state, tf.tile(proj_w, [batch_size, 1, 1])))
            else:
                graph_conv = board_state

            # First and intermediate layers
            for _ in range(hps('nb_graph_conv') - 1):
                graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                    # (b, NB_NODES, gcn_size)
                                              output_dim=hps('gcn_size'),
                                              norm_adjacency=norm_adjacency,
                                              activation_fn=relu,
                                              residual=True,
                                              bias=True)(graph_conv)

            # Last Layer
            graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                        # (b, NB_NODES, final_size)
                                          output_dim=hps('attn_size') // 2,
                                          norm_adjacency=norm_adjacency,
                                          activation_fn=relu,
                                          residual=False,
                                          bias=True)(graph_conv)

        # Returning
        return graph_conv