def _encode_board(self, board_state, name, reuse=None): """ Encodes a board state or prev orders state :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features) :param name: The name to use for the encoding :param reuse: Whether to reuse or not the weights from another encoding operation :return: The encoded board state / prev_orders state """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import film_gcn_res_block, preprocess_adjacency # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] relu = tf.nn.relu # Getting film gammas and betas film_gammas = self.outputs['_%s_film_gammas' % name] film_betas = self.outputs['_%s_film_betas' % name] # Computing norm adjacency norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1]) # Building scope scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse) with tf.variable_scope(scope): # Adding noise to break symmetry board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01) graph_conv = tf.layers.Dense(units=hps('gcn_size'), activation=relu)(board_state) # First and intermediate layers for layer_idx in range(hps('nb_graph_conv') - 1): graph_conv = film_gcn_res_block( inputs=graph_conv, # (b, NB_NODES, gcn_size) gamma=film_gammas[layer_idx], beta=film_betas[layer_idx], gcn_out_dim=hps('gcn_size'), norm_adjacency=norm_adjacency, is_training=pholder('is_training'), residual=True) # Last layer graph_conv = film_gcn_res_block( inputs=graph_conv, # (b, NB_NODES, final_size) gamma=film_gammas[-1], beta=film_betas[-1], gcn_out_dim=hps('attn_size') // 2, norm_adjacency=norm_adjacency, is_training=pholder('is_training'), residual=False) # Returning return graph_conv
def _encode_board(self, board_state, name, reuse=None): """ Encodes a board state or prev orders state :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features) :param name: The name to use for the encoding :param reuse: Whether to reuse or not the weights from another encoding operation :return: The encoded board state / prev_orders state """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency from diplomacy_research.utils.tensorflow import batch_norm # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] relu = tf.nn.relu # Computing norm adjacency norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1]) # Building scope scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse) with tf.variable_scope(scope): # Adding noise to break symmetry board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01) graph_conv = board_state # First Layer graph_conv = GraphConvolution(input_dim=graph_conv.shape[-1].value, # (b, NB_NODES, gcn_size) output_dim=hps('gcn_size'), norm_adjacency=norm_adjacency, activation_fn=relu, bias=True)(graph_conv) # Intermediate Layers for _ in range(1, hps('nb_graph_conv') - 1): graph_conv = GraphConvolution(input_dim=hps('gcn_size'), # (b, NB_NODES, gcn_size) output_dim=hps('gcn_size'), norm_adjacency=norm_adjacency, activation_fn=relu, bias=True)(graph_conv) graph_conv = batch_norm(graph_conv, is_training=pholder('is_training'), fused=True) # Final Layer graph_conv = GraphConvolution(input_dim=hps('gcn_size'), # (b, NB_NODES, attn_size) output_dim=hps('attn_size'), norm_adjacency=norm_adjacency, activation_fn=None, bias=True)(graph_conv) # Returning return graph_conv
def _encode_board(self, board_state, name, reuse=None): """ Encodes a board state or prev orders state :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features) :param name: The name to use for the encoding :param reuse: Whether to reuse or not the weights from another encoding operation :return: The encoded board state / prev_orders state """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] relu = tf.nn.relu # Computing norm adjacency norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1]) # Building scope scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse) with tf.variable_scope(scope): batch_size = tf.shape(board_state)[0] # Adding noise to break symmetry board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01) # Projecting (if needed) to 'gcn_size' if board_state.shape[-1].value == NB_FEATURES: with tf.variable_scope('proj', reuse=tf.AUTO_REUSE): proj_w = tf.get_variable('W', shape=[1, NB_FEATURES, hps('gcn_size')], dtype=tf.float32) graph_conv = relu(tf.matmul(board_state, tf.tile(proj_w, [batch_size, 1, 1]))) else: graph_conv = board_state # First and intermediate layers for _ in range(hps('nb_graph_conv') - 1): graph_conv = GraphConvolution(input_dim=hps('gcn_size'), # (b, NB_NODES, gcn_size) output_dim=hps('gcn_size'), norm_adjacency=norm_adjacency, activation_fn=relu, residual=True, bias=True)(graph_conv) # Last Layer graph_conv = GraphConvolution(input_dim=hps('gcn_size'), # (b, NB_NODES, final_size) output_dim=hps('attn_size') // 2, norm_adjacency=norm_adjacency, activation_fn=relu, residual=False, bias=True)(graph_conv) # Returning return graph_conv