def parse_function(self, example_proto): """ Parses a stored protocol buffer """ from diplomacy_research.utils.tensorflow import tf, np_to_tf # Converting features to tf.io.FixedLenFeature and tf.io.VarLenFeature tf_features = {} for feature_name in self.features: if isinstance(self.features[feature_name], FixedLenFeature): tf_features[feature_name] = tf.io.FixedLenFeature( **self.features[feature_name]._asdict()) elif isinstance(self.features[feature_name], VarLenFeature): tf_features[feature_name] = tf.io.VarLenFeature( **self.features[feature_name]._asdict()) else: raise RuntimeError('Unsupported feature type.') data = tf.parse_single_example(example_proto, tf_features) proto_fields = self.parse_sparse_fields(self.proto_fields) # Decoding from protocol buffer for feature_name, proto_field in proto_fields.items(): current_dtype = np.object # Decoding tf.string if self.features[ feature_name].dtype == np.object and proto_field.dtype is not None: encoded_dtype = np.uint8 if proto_field.dtype == np.bool else proto_field.dtype data[feature_name] = tf.io.decode_raw(data[feature_name], np_to_tf(encoded_dtype)) current_dtype = encoded_dtype # Converting SparseTensor to Dense if isinstance(data[feature_name], tf.SparseTensor) and isinstance( proto_field, VarProtoField): data[feature_name] = tf.sparse.to_dense(data[feature_name]) # Casting to final dtype if proto_field.dtype is not None and proto_field.dtype != current_dtype: data[feature_name] = tf.cast(data[feature_name], np_to_tf(proto_field.dtype)) # Converting to final shape if isinstance(proto_field, FixedProtoField) and proto_field.shape: data[feature_name] = tf.reshape(data[feature_name], proto_field.shape) # Returning parsed data return data
def _build_policy_initial(self): """ Builds the policy model (initial step) """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.initializers import uniform from diplomacy_research.utils.tensorflow import build_sparse_batched_tensor, pad_axis, to_float, to_bool if not self.placeholders: self.placeholders = self.get_placeholders() # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] # Training loop with tf.variable_scope('policy', reuse=tf.AUTO_REUSE): with tf.device(self.cluster_config.worker_device if self.cluster_config else None): # Features board_state = to_float(self.features['board_state']) # tf.flt32 - (b, NB_NODES, NB_FEATURES) board_alignments = to_float(self.features['board_alignments']) # (b, NB_NODES * len) decoder_inputs = self.features['decoder_inputs'] # tf.int32 - (b, <= 1 + TOK/ORD * NB_SCS) decoder_lengths = self.features['decoder_lengths'] # tf.int32 - (b,) current_power = self.features['current_power'] # tf.int32 - (b,) current_season = self.features['current_season'] # tf.int32 - (b,) dropout_rates = self.features['dropout_rate'] # tf.flt32 - (b,) # Batch size batch_size = tf.shape(board_state)[0] # Reshaping board alignments board_alignments = tf.reshape(board_alignments, [batch_size, -1, NB_NODES]) board_alignments /= tf.math.maximum(1., tf.reduce_sum(board_alignments, axis=-1, keepdims=True)) # Building decoder mask decoder_mask_indices = self.features['decoder_mask_indices'] # tf.int64 - (b, 3 * len) decoder_mask_shape = self.proto_fields['decoder_mask'].shape # Overriding dropout_rates if pholder('dropout_rate') > 0 dropout_rates = tf.cond(tf.greater(pholder('dropout_rate'), 0.), true_fn=lambda: tf.zeros_like(dropout_rates) + pholder('dropout_rate'), false_fn=lambda: dropout_rates) # Padding inputs board_alignments = pad_axis(board_alignments, axis=1, min_size=tf.reduce_max(decoder_lengths)) decoder_inputs = pad_axis(decoder_inputs, axis=-1, min_size=2) decoder_mask_indices = pad_axis(decoder_mask_indices, axis=-1, min_size=len(decoder_mask_shape)) # Reshaping to (b, len, 3) # decoder_mask is -- tf.bool (batch, TOK/ORD * NB_SC, VOCAB_SIZE, VOCAB_SIZE) decoder_mask_indices = tf.reshape(decoder_mask_indices, [batch_size, -1, len(decoder_mask_shape)]) decoder_mask = build_sparse_batched_tensor(decoder_mask_indices, value=True, dtype=tf.bool, dense_shape=decoder_mask_shape) # Making sure all RNN lengths are at least 1 # No need to trim, because the fields are variable length raw_decoder_lengths = decoder_lengths decoder_lengths = tf.math.maximum(1, decoder_lengths) # Placeholders decoder_type = tf.reduce_max(pholder('decoder_type')) is_training = pholder('is_training') # Computing FiLM Gammas and Betas with tf.variable_scope('film_scope'): power_embedding = uniform(name='power_embedding', shape=[NB_POWERS, hps('power_emb_size')], scale=1.) current_power_mask = tf.one_hot(current_power, NB_POWERS, dtype=tf.float32) current_power_embedding = tf.reduce_sum(power_embedding[None] * current_power_mask[:, :, None], axis=1) # (b, power_emb) film_embedding_input = current_power_embedding # Also conditioning on current_season season_embedding = uniform(name='season_embedding', shape=[NB_SEASONS, hps('season_emb_size')], scale=1.) current_season_mask = tf.one_hot(current_season, NB_SEASONS, dtype=tf.float32) current_season_embedding = tf.reduce_sum(season_embedding[None] # (b,season_emb) * current_season_mask[:, :, None], axis=1) film_embedding_input = tf.concat([film_embedding_input, current_season_embedding], axis=1) film_output_dims = [hps('gcn_size')] * (hps('nb_graph_conv') - 1) + [hps('attn_size')] film_weights = tf.layers.Dense(units=2 * sum(film_output_dims), # (b, 1, 750) use_bias=True, activation=None)(film_embedding_input)[:, None, :] film_gammas, film_betas = tf.split(film_weights, 2, axis=2) # (b, 1, 750) film_gammas = tf.split(film_gammas, film_output_dims, axis=2) film_betas = tf.split(film_betas, film_output_dims, axis=2) # Storing as temporary output self.add_output('_board_state_conv_film_gammas', film_gammas) self.add_output('_board_state_conv_film_betas', film_betas) # Creating graph convolution with tf.variable_scope('graph_conv_scope'): assert hps('nb_graph_conv') >= 2 # Encoding board state board_state_0yr_conv = self.encode_board(board_state, name='board_state_conv') board_state_conv = self.get_board_state_conv(board_state_0yr_conv, is_training) # Creating word embedding vector (to embed word_ix) # Embeddings needs to be cached locally on the worker, otherwise TF can't compute their gradients with tf.variable_scope('word_embedding_scope'): # embedding: (voc_size, 256) caching_device = self.cluster_config.caching_device if self.cluster_config else None word_embedding = uniform(name='word_embedding', shape=[VOCABULARY_SIZE, hps('word_emb_size')], scale=1., caching_device=caching_device) # Building output tags outputs = {'batch_size': batch_size, 'board_alignments': board_alignments, 'decoder_inputs': decoder_inputs, 'decoder_mask': decoder_mask, 'decoder_type': decoder_type, 'raw_decoder_lengths': raw_decoder_lengths, 'decoder_lengths': decoder_lengths, 'board_state_conv': board_state_conv, 'board_state_0yr_conv': board_state_0yr_conv, 'word_embedding': word_embedding, 'in_retreat_phase': tf.math.logical_and( # 1) board not empty, 2) disl. units present tf.reduce_sum(board_state[:], axis=[1, 2]) > 0, tf.math.logical_not(to_bool(tf.reduce_min(board_state[:, :, 23], -1))))} # Adding to graph self.add_meta_information(outputs)
def _build_draw_initial(self): """ Builds the draw model (initial step) """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency from diplomacy_research.utils.tensorflow import to_float if not self.placeholders: self.placeholders = self.get_placeholders() else: self.placeholders.update(self.get_placeholders()) # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] relu = tf.nn.relu sigmoid = tf.nn.sigmoid # Training loop with tf.variable_scope('draw', reuse=tf.AUTO_REUSE): with tf.device(self.cluster_config.worker_device if self. cluster_config else None): # Features board_state = to_float( self.features['board_state'] ) # tf.float32 - (b, NB_NODES, NB_FEATURES) current_power = self.features[ 'current_power'] # tf.int32 - (b,) draw_target = self.features['draw_target'] # tf.float32 - (b,) # Placeholders stop_gradient_all = pholder('stop_gradient_all') # Norm Adjacency batch_size = tf.shape(board_state)[0] norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile( tf.expand_dims(norm_adjacency, axis=0), [batch_size, 1, 1]) # Graph embeddings with tf.variable_scope('graph_conv_scope'): board_state_h0 = board_state # (b, 81, 35) board_state_h1 = GraphConvolution( input_dim=NB_FEATURES, output_dim=hps('draw_gcn_1_output_size'), norm_adjacency=norm_adjacency, activation_fn=relu, bias=True)(board_state_h0) # (b, 81, 25) # board_state_h2: (b, 2025) # board_state_h3: (b, 128) board_state_h2 = tf.reshape( board_state_h1, shape=[-1, NB_NODES * hps('draw_gcn_1_output_size')]) board_state_graph_conv = tf.layers.Dense( units=hps('draw_embedding_size'), activation=relu, use_bias=True)(board_state_h2) # Calculating draw for all powers with tf.variable_scope('draw_scope'): current_power_mask = tf.one_hot(current_power, NB_POWERS, dtype=tf.float32) draw_h0 = board_state_graph_conv # (b, 128) draw_h1 = tf.layers.Dense( units=hps('draw_h1_size'), # (b, 64) activation=relu, use_bias=True)(draw_h0) draw_h2 = tf.layers.Dense( units=hps('draw_h2_size'), # (b, 64) activation=relu, use_bias=True)(draw_h1) draw_probs = tf.layers.Dense( units=NB_POWERS, # (b, 7) activation=sigmoid, use_bias=True)(draw_h2) draw_prob = tf.reduce_sum(draw_probs * current_power_mask, axis=1) # (b,) # Computing draw loss with tf.variable_scope('draw_loss'): draw_loss = tf.reduce_mean( tf.square(draw_target - draw_prob)) draw_loss = tf.cond( stop_gradient_all, lambda: tf.stop_gradient(draw_loss), # pylint: disable=cell-var-from-loop lambda: draw_loss) # pylint: disable=cell-var-from-loop # Building output tags outputs = { 'tag/draw/v001_draw_relu': True, 'draw_prob': draw_prob, 'draw_loss': draw_loss } # Adding features, placeholders and outputs to graph self.add_meta_information(outputs)
def _build_policy_initial(self): """ Builds the policy model (initial step) """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.initializers import uniform from diplomacy_research.utils.tensorflow import build_sparse_batched_tensor, pad_axis, to_float, to_bool if not self.placeholders: self.placeholders = self.get_placeholders() # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] # Training loop with tf.variable_scope('policy', reuse=tf.AUTO_REUSE): with tf.device(self.cluster_config.worker_device if self.cluster_config else None): # Features board_state = to_float(self.features['board_state']) # tf.flt32 - (b, NB_NODES, NB_FEATURES) decoder_inputs = self.features['decoder_inputs'] # tf.int32 - (b, <= 1 + TOK/ORD * NB_SCS) decoder_lengths = self.features['decoder_lengths'] # tf.int32 - (b,) dropout_rates = self.features['dropout_rate'] # tf.flt32 - (b,) # Batch size batch_size = tf.shape(board_state)[0] # Building decoder mask decoder_mask_indices = self.features['decoder_mask_indices'] # tf.int64 - (b, 3 * len) decoder_mask_shape = self.proto_fields['decoder_mask'].shape # Overriding dropout_rates if pholder('dropout_rate') > 0 dropout_rates = tf.cond(tf.greater(pholder('dropout_rate'), 0.), true_fn=lambda: tf.zeros_like(dropout_rates) + pholder('dropout_rate'), false_fn=lambda: dropout_rates) # Padding inputs decoder_inputs = pad_axis(decoder_inputs, axis=-1, min_size=2) decoder_mask_indices = pad_axis(decoder_mask_indices, axis=-1, min_size=len(decoder_mask_shape)) # Reshaping to (b, len, 3) # decoder_mask is -- tf.bool (batch, TOK/ORD * NB_SC, VOCAB_SIZE, VOCAB_SIZE) decoder_mask_indices = tf.reshape(decoder_mask_indices, [batch_size, -1, len(decoder_mask_shape)]) decoder_mask = build_sparse_batched_tensor(decoder_mask_indices, value=True, dtype=tf.bool, dense_shape=decoder_mask_shape) # Making sure all RNN lengths are at least 1 # No need to trim, because the fields are variable length raw_decoder_lengths = decoder_lengths decoder_lengths = tf.math.maximum(1, decoder_lengths) # Placeholders decoder_type = tf.reduce_max(pholder('decoder_type')) # Creating word embedding vector (to embed word_ix) # Embeddings needs to be cached locally on the worker, otherwise TF can't compute their gradients with tf.variable_scope('word_embedding_scope'): # embedding: (voc_size, 256) caching_device = self.cluster_config.caching_device if self.cluster_config else None word_embedding = uniform(name='word_embedding', shape=[VOCABULARY_SIZE, hps('word_emb_size')], scale=1., caching_device=caching_device) # Building output tags outputs = {'batch_size': batch_size, 'decoder_inputs': decoder_inputs, 'decoder_mask': decoder_mask, 'decoder_type': decoder_type, 'raw_decoder_lengths': raw_decoder_lengths, 'decoder_lengths': decoder_lengths, 'board_state_conv': tf.zeros([batch_size, NB_NODES, 0], dtype=tf.float32), 'board_state_0yr_conv': tf.zeros([batch_size, NB_NODES, 0], dtype=tf.float32), 'word_embedding': word_embedding, 'in_retreat_phase': tf.math.logical_and( # 1) board not empty, 2) disl. units present tf.reduce_sum(board_state[:], axis=[1, 2]) > 0, tf.math.logical_not(to_bool(tf.reduce_min(board_state[:, :, 23], -1))))} # Adding to graph self.add_meta_information(outputs)
def _build_policy_initial(self): """ Builds the policy model (initial step) """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.initializers import uniform from diplomacy_research.utils.tensorflow import pad_axis, to_int32, to_float, to_bool if not self.placeholders: self.placeholders = self.get_placeholders() # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] # Training loop with tf.variable_scope('policy', reuse=tf.AUTO_REUSE): with tf.device(self.cluster_config.worker_device if self.cluster_config else None): # Features board_state = to_float(self.features['board_state']) # tf.flt32 - (b, NB_NODES, NB_FEATURES) decoder_inputs = self.features['decoder_inputs'] # tf.int32 - (b, <= 1 + NB_SCS) decoder_lengths = self.features['decoder_lengths'] # tf.int32 - (b,) candidates = self.features['candidates'] # tf.int32 - (b, nb_locs * MAX_CANDIDATES) dropout_rates = self.features['dropout_rate'] # tf.flt32 - (b,) # Batch size batch_size = tf.shape(board_state)[0] # Overriding dropout_rates if pholder('dropout_rate') > 0 dropout_rates = tf.cond(tf.greater(pholder('dropout_rate'), 0.), true_fn=lambda: tf.zeros_like(dropout_rates) + pholder('dropout_rate'), false_fn=lambda: dropout_rates) # Padding decoder_inputs and candidates decoder_inputs = pad_axis(decoder_inputs, axis=-1, min_size=2) candidates = pad_axis(candidates, axis=-1, min_size=MAX_CANDIDATES) # Making sure all RNN lengths are at least 1 # No need to trim, because the fields are variable length raw_decoder_lengths = decoder_lengths decoder_lengths = tf.math.maximum(1, decoder_lengths) # Placeholders decoder_type = tf.reduce_max(pholder('decoder_type')) is_training = pholder('is_training') # Reshaping candidates candidates = tf.reshape(candidates, [batch_size, -1, MAX_CANDIDATES]) candidates = candidates[:, :tf.reduce_max(decoder_lengths), :] # tf.int32 - (b, nb_locs, MAX_CAN) # Creating graph convolution with tf.variable_scope('graph_conv_scope'): assert hps('nb_graph_conv') >= 2 # Encoding board state board_state_0yr_conv = self.encode_board(board_state, name='board_state_conv') board_state_conv = self.get_board_state_conv(board_state_0yr_conv, is_training) # Creating order embedding vector (to embed order_ix) # Embeddings needs to be cached locally on the worker, otherwise TF can't compute their gradients with tf.variable_scope('order_embedding_scope'): # embedding: (order_vocab_size, 64) caching_device = self.cluster_config.caching_device if self.cluster_config else None partitioner = tf.fixed_size_partitioner(NB_PARTITIONS) if hps('use_partitioner') else None order_embedding = uniform(name='order_embedding', shape=[ORDER_VOCABULARY_SIZE, hps('order_emb_size')], scale=1., partitioner=partitioner, caching_device=caching_device) # Creating candidate embedding with tf.variable_scope('candidate_embedding_scope'): # embedding: (order_vocab_size, 64) caching_device = self.cluster_config.caching_device if self.cluster_config else None partitioner = tf.fixed_size_partitioner(NB_PARTITIONS) if hps('use_partitioner') else None candidate_embedding = uniform(name='candidate_embedding', shape=[ORDER_VOCABULARY_SIZE, hps('lstm_size') + 1], scale=1., partitioner=partitioner, caching_device=caching_device) # Trimming to the maximum number of candidates candidate_lengths = tf.reduce_sum(to_int32(tf.math.greater(candidates, PAD_ID)), -1) # int32 - (b,) max_candidate_length = tf.math.maximum(1, tf.reduce_max(candidate_lengths)) candidates = candidates[:, :, :max_candidate_length] # Building output tags outputs = {'batch_size': batch_size, 'decoder_inputs': decoder_inputs, 'decoder_type': decoder_type, 'raw_decoder_lengths': raw_decoder_lengths, 'decoder_lengths': decoder_lengths, 'board_state_conv': board_state_conv, 'board_state_0yr_conv': board_state_0yr_conv, 'order_embedding': order_embedding, 'candidate_embedding': candidate_embedding, 'candidates': candidates, 'max_candidate_length': max_candidate_length, 'in_retreat_phase': tf.math.logical_and( # 1) board not empty, 2) disl. units present tf.reduce_sum(board_state[:], axis=[1, 2]) > 0, tf.math.logical_not(to_bool(tf.reduce_min(board_state[:, :, 23], -1))))} # Adding to graph self.add_meta_information(outputs)
def _build_policy_initial(self): """ Builds the policy model (initial step) """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.initializers import uniform from diplomacy_research.utils.tensorflow import pad_axis, to_int32, to_float, to_bool if not self.placeholders: self.placeholders = self.get_placeholders() # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] # Training loop with tf.variable_scope('policy', reuse=tf.AUTO_REUSE): with tf.device(self.cluster_config.worker_device if self. cluster_config else None): # Features board_state = to_float( self.features['board_state'] ) # tf.flt32 - (b, NB_NODES, NB_FEATURES) board_alignments = to_float( self.features['board_alignments']) # (b, NB_NODES * len) prev_orders_state = to_float( self.features['prev_orders_state'] ) # (b, NB_PRV_OD, NB_ND, NB_OD_FT) decoder_inputs = self.features[ 'decoder_inputs'] # tf.int32 - (b, <= 1 + NB_SCS) decoder_lengths = self.features[ 'decoder_lengths'] # tf.int32 - (b,) candidates = self.features[ 'candidates'] # tf.int32 - (b, nb_locs * MAX_CANDIDATES) current_power = self.features[ 'current_power'] # tf.int32 - (b,) current_season = self.features[ 'current_season'] # tf.int32 - (b,) dropout_rates = self.features[ 'dropout_rate'] # tf.flt32 - (b,) # Batch size batch_size = tf.shape(board_state)[0] # Reshaping board alignments board_alignments = tf.reshape(board_alignments, [batch_size, -1, NB_NODES]) board_alignments /= tf.math.maximum( 1., tf.reduce_sum(board_alignments, axis=-1, keepdims=True)) # Overriding dropout_rates if pholder('dropout_rate') > 0 dropout_rates = tf.cond( tf.greater(pholder('dropout_rate'), 0.), true_fn=lambda: tf.zeros_like(dropout_rates) + pholder( 'dropout_rate'), false_fn=lambda: dropout_rates) # Padding decoder_inputs and candidates board_alignments = pad_axis( board_alignments, axis=1, min_size=tf.reduce_max(decoder_lengths)) decoder_inputs = pad_axis(decoder_inputs, axis=-1, min_size=2) candidates = pad_axis(candidates, axis=-1, min_size=MAX_CANDIDATES) # Making sure all RNN lengths are at least 1 # No need to trim, because the fields are variable length raw_decoder_lengths = decoder_lengths decoder_lengths = tf.math.maximum(1, decoder_lengths) # Placeholders decoder_type = tf.reduce_max(pholder('decoder_type')) is_training = pholder('is_training') # Reshaping candidates candidates = tf.reshape(candidates, [batch_size, -1, MAX_CANDIDATES]) candidates = candidates[:, :tf.reduce_max( decoder_lengths), :] # tf.int32 - (b, nb_locs, MAX_CAN) # Computing FiLM Gammas and Betas with tf.variable_scope('film_scope'): power_embedding = uniform( name='power_embedding', shape=[NB_POWERS, hps('power_emb_size')], scale=1.) current_power_mask = tf.one_hot(current_power, NB_POWERS, dtype=tf.float32) current_power_embedding = tf.reduce_sum( power_embedding[None] * current_power_mask[:, :, None], axis=1) # (b, power_emb) film_embedding_input = current_power_embedding # Also conditioning on current_season season_embedding = uniform( name='season_embedding', shape=[NB_SEASONS, hps('season_emb_size')], scale=1.) current_season_mask = tf.one_hot(current_season, NB_SEASONS, dtype=tf.float32) current_season_embedding = tf.reduce_sum( season_embedding[None] # (b,season_emb) * current_season_mask[:, :, None], axis=1) film_embedding_input = tf.concat( [film_embedding_input, current_season_embedding], axis=1) film_output_dims = [hps('gcn_size')] * ( hps('nb_graph_conv') - 1) + [hps('attn_size') // 2] # For board_state board_film_weights = tf.layers.Dense( units=2 * sum(film_output_dims), # (b, 1, 750) use_bias=True, activation=None)(film_embedding_input)[:, None, :] board_film_gammas, board_film_betas = tf.split( board_film_weights, 2, axis=2) # (b, 1, 750) board_film_gammas = tf.split(board_film_gammas, film_output_dims, axis=2) board_film_betas = tf.split(board_film_betas, film_output_dims, axis=2) # For prev_orders prev_ord_film_weights = tf.layers.Dense( units=2 * sum(film_output_dims), # (b, 1, 750) use_bias=True, activation=None)(film_embedding_input)[:, None, :] prev_ord_film_weights = tf.tile( prev_ord_film_weights, [NB_PREV_ORDERS, 1, 1]) # (n_pr, 1, 750) prev_ord_film_gammas, prev_ord_film_betas = tf.split( prev_ord_film_weights, 2, axis=2) prev_ord_film_gammas = tf.split(prev_ord_film_gammas, film_output_dims, axis=2) prev_ord_film_betas = tf.split(prev_ord_film_betas, film_output_dims, axis=2) # Storing as temporary output self.add_output('_board_state_conv_film_gammas', board_film_gammas) self.add_output('_board_state_conv_film_betas', board_film_betas) self.add_output('_prev_orders_conv_film_gammas', prev_ord_film_gammas) self.add_output('_prev_orders_conv_film_betas', prev_ord_film_betas) # Creating graph convolution with tf.variable_scope('graph_conv_scope'): assert hps('nb_graph_conv') >= 2 assert hps('attn_size') % 2 == 0 # Encoding board state board_state_0yr_conv = self.encode_board( board_state, name='board_state_conv') # Encoding prev_orders prev_orders_state = tf.reshape(prev_orders_state, [ batch_size * NB_PREV_ORDERS, NB_NODES, NB_ORDERS_FEATURES ]) prev_ord_conv = self.encode_board(prev_orders_state, name='prev_orders_conv') # Splitting back into (b, nb_prev, NB_NODES, attn_size // 2) # Reducing the prev ord conv using avg prev_ord_conv = tf.reshape(prev_ord_conv, [ batch_size, NB_PREV_ORDERS, NB_NODES, hps('attn_size') // 2 ]) prev_ord_conv = tf.reduce_mean(prev_ord_conv, axis=1) # Concatenating the current board conv with the prev ord conv # The final board_state_conv should be of dimension (b, NB_NODE, attn_size) board_state_conv = self.get_board_state_conv( board_state_0yr_conv, is_training, prev_ord_conv) # Creating order embedding vector (to embed order_ix) # Embeddings needs to be cached locally on the worker, otherwise TF can't compute their gradients with tf.variable_scope('order_embedding_scope'): # embedding: (order_vocab_size, 64) caching_device = self.cluster_config.caching_device if self.cluster_config else None partitioner = tf.fixed_size_partitioner( NB_PARTITIONS) if hps('use_partitioner') else None order_embedding = uniform( name='order_embedding', shape=[ORDER_VOCABULARY_SIZE, hps('order_emb_size')], scale=1., partitioner=partitioner, caching_device=caching_device) # Creating candidate embedding with tf.variable_scope('candidate_embedding_scope'): # embedding: (order_vocab_size, 64) caching_device = self.cluster_config.caching_device if self.cluster_config else None partitioner = tf.fixed_size_partitioner( NB_PARTITIONS) if hps('use_partitioner') else None candidate_embedding = uniform( name='candidate_embedding', shape=[ORDER_VOCABULARY_SIZE, hps('lstm_size') + 1], scale=1., partitioner=partitioner, caching_device=caching_device) # Trimming to the maximum number of candidates candidate_lengths = tf.reduce_sum( to_int32(tf.math.greater(candidates, PAD_ID)), -1) # int32 - (b,) max_candidate_length = tf.math.maximum( 1, tf.reduce_max(candidate_lengths)) candidates = candidates[:, :, :max_candidate_length] # Building output tags outputs = { 'batch_size': batch_size, 'board_alignments': board_alignments, 'decoder_inputs': decoder_inputs, 'decoder_type': decoder_type, 'raw_decoder_lengths': raw_decoder_lengths, 'decoder_lengths': decoder_lengths, 'board_state_conv': board_state_conv, 'board_state_0yr_conv': board_state_0yr_conv, 'prev_ord_conv': prev_ord_conv, 'order_embedding': order_embedding, 'candidate_embedding': candidate_embedding, 'candidates': candidates, 'max_candidate_length': max_candidate_length, 'in_retreat_phase': tf.math.logical_and( # 1) board not empty, 2) disl. units present tf.reduce_sum(board_state[:], axis=[1, 2]) > 0, tf.math.logical_not( to_bool(tf.reduce_min(board_state[:, :, 23], -1)))) } # Adding to graph self.add_meta_information(outputs)
def _get_board_value(self, board_state, current_power, name='board_state_value', reuse=None): """ Computes the estimated value of a board state :param board_state: The board state - (batch, NB_NODES, NB_FEATURES) :param current_power: The power for which we want the board value - (batch,) :param name: The name to use for the operaton :param reuse: Whether to reuse or not the weights from another operation :return: The value of the board state for the specified power - (batch,) """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] relu = tf.nn.relu # Computing norm adjacency norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1]) # Building scope # No need to use 'stop_gradient_value' - Because this model does not share parameters. scope = tf.VariableScope(name='value/%s' % name, reuse=reuse) with tf.variable_scope(scope): with tf.variable_scope('graph_conv_scope'): graph_conv = board_state # (b, NB_NODES, NB_FEAT) graph_conv = GraphConvolution( input_dim=graph_conv.shape[-1]. value, # (b, NB_NODES, gcn_1) output_dim=hps('value_gcn_1_output_size'), norm_adjacency=norm_adjacency, activation_fn=relu, bias=True)(graph_conv) flat_graph_conv = tf.reshape( graph_conv, shape=[-1, NB_NODES * hps('value_gcn_1_output_size')]) flat_graph_conv = tf.layers.Dense( units=hps('value_embedding_size'), activation=relu, use_bias=True)(flat_graph_conv) # (b, value_emb_size) with tf.variable_scope('value_scope'): current_power_mask = tf.one_hot(current_power, NB_POWERS, dtype=tf.float32) state_value = flat_graph_conv # (b, value_emb_size) state_value = tf.layers.Dense( units=hps('value_h1_size'), # (b, value_h1_size) activation=relu, use_bias=True)(state_value) state_value = tf.layers.Dense( units=hps('value_h2_size'), # (b, value_h2_size) activation=relu, use_bias=True)(state_value) state_value = tf.layers.Dense( units=NB_POWERS, # (b, NB_POWERS) activation=None, use_bias=True)(state_value) state_value = tf.reduce_sum(state_value * current_power_mask, axis=1) # (b,) # Returning return state_value