Example #1
0
    def build_perception(self, obs):
        logger.info("Building %s perception..." % self.scope)
        with tf.variable_scope(self.scope):
            m_conv = blocks.cnn(obs["minimap"], "minimap_feat")
            s_conv = blocks.cnn(obs["screen"], "screen_feat")

            self.s = blocks.concat(m_conv, s_conv, obs["non_spatial"], "percept_concat")
            if self.use_batch_norm:
                self.s = tf.contrib.layers.batch_norm(self.s)
            self.s_fc = blocks.fully_connected(self.s, self.s_dim, "%s_perception" % self.scope)
            if self.use_batch_norm:
                self.s_fc = tf.contrib.layers.batch_norm(self.s_fc)
Example #2
0
    def _build_network(self):
        m_conv = blocks.cnn(self.obs["minimap"], "minimap_feat")
        s_conv = blocks.cnn(self.obs["screen"], "screen_feat")

        state_representation = blocks.concat(m_conv, s_conv,
                                             self.obs["non_spatial"],
                                             "state_rep")
        fc = blocks.fully_connected(state_representation, 256, "fc")

        self.pi = {
            "spatial":
            blocks.spatial_action(state_representation, "spatial_act_pi"),
            "non_spatial":
            blocks.non_spatial_action(fc, "non_spatial_act_pi")
        }

        self.vf = blocks.build_value(fc, "value")
Example #3
0
    def _build_network(self):
        m_conv = blocks.cnn(self.obs["minimap"],
                            "minimap_feat",
                            kernel_size=[8, 4],
                            stride=[4, 2])
        s_conv = blocks.cnn(self.obs["screen"],
                            "screen_feat",
                            kernel_size=[8, 4],
                            stride=[4, 2])
        non_spatial = blocks.non_spatial_feat_atari(self.obs["non_spatial"],
                                                    "ns_feat")

        state_representation = tf.concat(
            [layers.flatten(m_conv),
             layers.flatten(s_conv), non_spatial],
            axis=1,
            name="state_rep")

        fc = blocks.fully_connected(state_representation, 256, "fc")

        spatial_action_x = blocks.spatial_action_atari(fc,
                                                       self.s_size,
                                                       "spatial_act_x",
                                                       transpose=True)
        spatial_action_y = blocks.spatial_action_atari(fc, self.s_size,
                                                       "spatial_act_y")

        spatial_action = layers.flatten(tf.multiply(spatial_action_x,
                                                    spatial_action_y),
                                        scope="spatial_act_pi")

        self.pi = {
            "spatial": spatial_action,
            "non_spatial": blocks.non_spatial_action(fc, "non_spatial_act_pi")
        }

        self.vf = blocks.build_value(fc, "value")
Example #4
0
    def build_model(self, w_func, obs, state_in, state_out):
        logger.info("Building %s model..." % self.scope)
        with tf.variable_scope(self.scope):
            # Calculate U
            self.lstm = blocks.SingleStepConvLSTM(self.s,
                                                  size=self.s_size,
                                                  step_size=tf.shape(
                                                      obs["minimap"])[:1],
                                                  filters=1,
                                                  scope="worker_lstm")

            if self.use_batch_norm:
                self.lstm.output = tf.contrib.layers.batch_norm(
                    self.lstm.output, scope='%s_lstm_batch_norm' % self.scope)

            lstm_output = self.lstm.output

            fc = blocks.fully_connected(lstm_output, self.s_dim, "fc")
            U_fc = blocks.fully_connected(lstm_output,
                                          self.num_actions * self.k, "U_fc")

            self.U_s = tf.layers.conv2d(inputs=lstm_output,
                                        filters=self.k,
                                        kernel_size=1,
                                        padding='SAME',
                                        name="spatial_flat_logits_hidden")

            if self.use_batch_norm:
                self.U_s = tf.contrib.layers.batch_norm(
                    self.U_s, scope='%s_spatial_batch_norm' % self.scope)
                U_fc = tf.contrib.layers.batch_norm(
                    U_fc, scope='%s_non_spatial_batch_norm' % self.scope)
                fc = tf.contrib.layers.batch_norm(fc,
                                                  scope='%s_fc_batch_norm' %
                                                  self.scope)

            self.build_value(fc)

            self.U_s = tf.reshape(self.U_s, [-1, self.s_size**2, self.k],
                                  name='U_s')
            self.U_ns = tf.reshape(U_fc,
                                   shape=[-1, self.num_actions, self.k],
                                   name='U_ns')

            w = w_func(self.k)
            w = tf.expand_dims(w, 2)

            # calculate policy and sample
            s_logits = tf.reshape(tf.matmul(self.U_s, w), [-1, self.s_size**2])
            ns_logits = tf.reshape(tf.matmul(self.U_ns, w),
                                   [-1, self.num_actions])

            # Calculate policy
            self.pi = {
                "spatial": tf.nn.softmax(s_logits),
                "non_spatial": tf.nn.softmax(ns_logits)
            }

            self.log_pi = {
                "spatial": tf.nn.log_softmax(s_logits),
                "non_spatial": tf.nn.log_softmax(ns_logits)
            }

            # add worker c, h to state in and out
            state_in.extend([
                self.lstm.state_in[0],
                self.lstm.state_in[1],
            ])

            state_out.extend([
                self.lstm.state_out[0],
                self.lstm.state_out[1],
            ])