def build_perception(self, obs): logger.info("Building %s perception..." % self.scope) with tf.variable_scope(self.scope): m_conv = blocks.cnn(obs["minimap"], "minimap_feat") s_conv = blocks.cnn(obs["screen"], "screen_feat") self.s = blocks.concat(m_conv, s_conv, obs["non_spatial"], "percept_concat") if self.use_batch_norm: self.s = tf.contrib.layers.batch_norm(self.s) self.s_fc = blocks.fully_connected(self.s, self.s_dim, "%s_perception" % self.scope) if self.use_batch_norm: self.s_fc = tf.contrib.layers.batch_norm(self.s_fc)
def _build_network(self): m_conv = blocks.cnn(self.obs["minimap"], "minimap_feat") s_conv = blocks.cnn(self.obs["screen"], "screen_feat") state_representation = blocks.concat(m_conv, s_conv, self.obs["non_spatial"], "state_rep") fc = blocks.fully_connected(state_representation, 256, "fc") self.pi = { "spatial": blocks.spatial_action(state_representation, "spatial_act_pi"), "non_spatial": blocks.non_spatial_action(fc, "non_spatial_act_pi") } self.vf = blocks.build_value(fc, "value")
def _build_network(self): m_conv = blocks.cnn(self.obs["minimap"], "minimap_feat", kernel_size=[8, 4], stride=[4, 2]) s_conv = blocks.cnn(self.obs["screen"], "screen_feat", kernel_size=[8, 4], stride=[4, 2]) non_spatial = blocks.non_spatial_feat_atari(self.obs["non_spatial"], "ns_feat") state_representation = tf.concat( [layers.flatten(m_conv), layers.flatten(s_conv), non_spatial], axis=1, name="state_rep") fc = blocks.fully_connected(state_representation, 256, "fc") spatial_action_x = blocks.spatial_action_atari(fc, self.s_size, "spatial_act_x", transpose=True) spatial_action_y = blocks.spatial_action_atari(fc, self.s_size, "spatial_act_y") spatial_action = layers.flatten(tf.multiply(spatial_action_x, spatial_action_y), scope="spatial_act_pi") self.pi = { "spatial": spatial_action, "non_spatial": blocks.non_spatial_action(fc, "non_spatial_act_pi") } self.vf = blocks.build_value(fc, "value")
def build_model(self, w_func, obs, state_in, state_out): logger.info("Building %s model..." % self.scope) with tf.variable_scope(self.scope): # Calculate U self.lstm = blocks.SingleStepConvLSTM(self.s, size=self.s_size, step_size=tf.shape( obs["minimap"])[:1], filters=1, scope="worker_lstm") if self.use_batch_norm: self.lstm.output = tf.contrib.layers.batch_norm( self.lstm.output, scope='%s_lstm_batch_norm' % self.scope) lstm_output = self.lstm.output fc = blocks.fully_connected(lstm_output, self.s_dim, "fc") U_fc = blocks.fully_connected(lstm_output, self.num_actions * self.k, "U_fc") self.U_s = tf.layers.conv2d(inputs=lstm_output, filters=self.k, kernel_size=1, padding='SAME', name="spatial_flat_logits_hidden") if self.use_batch_norm: self.U_s = tf.contrib.layers.batch_norm( self.U_s, scope='%s_spatial_batch_norm' % self.scope) U_fc = tf.contrib.layers.batch_norm( U_fc, scope='%s_non_spatial_batch_norm' % self.scope) fc = tf.contrib.layers.batch_norm(fc, scope='%s_fc_batch_norm' % self.scope) self.build_value(fc) self.U_s = tf.reshape(self.U_s, [-1, self.s_size**2, self.k], name='U_s') self.U_ns = tf.reshape(U_fc, shape=[-1, self.num_actions, self.k], name='U_ns') w = w_func(self.k) w = tf.expand_dims(w, 2) # calculate policy and sample s_logits = tf.reshape(tf.matmul(self.U_s, w), [-1, self.s_size**2]) ns_logits = tf.reshape(tf.matmul(self.U_ns, w), [-1, self.num_actions]) # Calculate policy self.pi = { "spatial": tf.nn.softmax(s_logits), "non_spatial": tf.nn.softmax(ns_logits) } self.log_pi = { "spatial": tf.nn.log_softmax(s_logits), "non_spatial": tf.nn.log_softmax(ns_logits) } # add worker c, h to state in and out state_in.extend([ self.lstm.state_in[0], self.lstm.state_in[1], ]) state_out.extend([ self.lstm.state_out[0], self.lstm.state_out[1], ])