def __init__(self, interface): super().__init__(interface) self.num_select_actions = self.interface.num_unit_selection_actions self.rnn_size = 256 self.self_attention = SelfAttention(hidden_size=128, num_heads=2, attention_dropout=0, train=True) self.lstm = tf.contrib.rnn.LSTMCell(self.rnn_size) self.memory_input = tf.placeholder(tf.float32, [2, None, self.rnn_size], name="memory_input") self.unit_embeddings_input = tf.placeholder(tf.float32, [None, None, self.interface.unit_embedding_size], name="unit_embeddings_input") self.unit_selection_input = tf.placeholder(tf.int32, [None], name="unit_selection_input") # TODO: Add in previous action index as an input self.prev_action_input = tf.placeholder(tf.int32, [None], name='prev_action_input') self.features = self.features() # Shape [batch_size, num_features] lstm_output, self.next_lstm_state = self._lstm_step() self.train_output = self._lstm_step_train() self.all_values = parts.value_head(self.train_output) self.nonspacial_probs, self.spacial_probs_x, self.spacial_probs_y = self._probs_from_features(lstm_output) self.nonspacial_train, self.spacial_train_x, self.spacial_train_y = \ self._probs_from_features(self.train_output[:-1]) self.unit_selection_probs = self._selection_probs_from_features(lstm_output, self.unit_embeddings_input) self._f1 = lstm_output self.unit_selection_probs_train = self._selection_probs_from_features(self.train_output[:-1], self.unit_embeddings_input[:-1])
def train_values(self): return parts.value_head(self.features)
def bootstrap_value(self): return tf.squeeze(parts.value_head( parts.conv_body(tf.expand_dims(self.bootstrap_state_input, axis=0))), axis=0)