def build_perception(self, obs): logger.info("Building %s perception..." % self.scope) with tf.variable_scope(self.scope): m_conv = blocks.cnn(obs["minimap"], "minimap_feat") s_conv = blocks.cnn(obs["screen"], "screen_feat") self.s = blocks.concat(m_conv, s_conv, obs["non_spatial"], "percept_concat") if self.use_batch_norm: self.s = tf.contrib.layers.batch_norm(self.s) self.s_fc = blocks.fully_connected(self.s, self.s_dim, "%s_perception" % self.scope) if self.use_batch_norm: self.s_fc = tf.contrib.layers.batch_norm(self.s_fc)
def _build_network(self): m_conv = blocks.cnn(self.obs["minimap"], "minimap_feat") s_conv = blocks.cnn(self.obs["screen"], "screen_feat") state_representation = blocks.concat(m_conv, s_conv, self.obs["non_spatial"], "state_rep") fc = blocks.fully_connected(state_representation, 256, "fc") self.pi = { "spatial": blocks.spatial_action(state_representation, "spatial_act_pi"), "non_spatial": blocks.non_spatial_action(fc, "non_spatial_act_pi") } self.vf = blocks.build_value(fc, "value")
def _build_network(self): m_conv = blocks.cnn(self.obs["minimap"], "minimap_feat", kernel_size=[8, 4], stride=[4, 2]) s_conv = blocks.cnn(self.obs["screen"], "screen_feat", kernel_size=[8, 4], stride=[4, 2]) non_spatial = blocks.non_spatial_feat_atari(self.obs["non_spatial"], "ns_feat") state_representation = tf.concat( [layers.flatten(m_conv), layers.flatten(s_conv), non_spatial], axis=1, name="state_rep") fc = blocks.fully_connected(state_representation, 256, "fc") spatial_action_x = blocks.spatial_action_atari(fc, self.s_size, "spatial_act_x", transpose=True) spatial_action_y = blocks.spatial_action_atari(fc, self.s_size, "spatial_act_y") spatial_action = layers.flatten(tf.multiply(spatial_action_x, spatial_action_y), scope="spatial_act_pi") self.pi = { "spatial": spatial_action, "non_spatial": blocks.non_spatial_action(fc, "non_spatial_act_pi") } self.vf = blocks.build_value(fc, "value")