Exemple #1
0
  def build_q_factor_discrete(self, cfg, goal_embedding):
    """"Build the q value network.

    Args:
      cfg: configuration object
      goal_embedding: tensor that contains the goal embedding

    Returns:
      the q value tensor
    """
    n_layer_channel = []
    for layer_config in cfg.conv_layer_config:
      if layer_config[0] > 0:
        n_layer_channel.append(layer_config[0])
    layer_film_params = film_params(goal_embedding, n_layer_channel)
    out = self.inputs
    for cfg, param in zip(cfg.conv_layer_config, layer_film_params):
      if cfg[0] < 0:
        out = tf.layers.conv2d(out, -cfg[0], cfg[1], cfg[2], padding='SAME')
        out = tf.nn.relu(out)
      else:
        out = tf.layers.conv2d(out, cfg[0], cfg[1], cfg[2], padding='SAME')
        out = tf.layers.batch_normalization(
            out, center=False, scale=False, training=self.is_training)
        gamma, beta = tf.split(param, 2, axis=1)
        out *= tf.expand_dims(tf.expand_dims(gamma, 1), 1)
        out += tf.expand_dims(tf.expand_dims(beta, 1), 1)
        out = tf.nn.relu(out)

    out_shape = out.get_shape()
    factors = [8, 10, 10]

    # [B, s1*s2, s3]
    out = tf.reshape(out, (-1, np.prod(out_shape[1:-1]), out_shape[-1]))
    projection_mat = tf.get_variable(
        name='projection_matrix',
        shape=(sum(factors), np.prod(out_shape[1:-1])),
        dtype=tf.float32, trainable=True)
    projection_mat = tf.expand_dims(projection_mat, axis=0)
    projection_mat = tf.tile(projection_mat, [tf.shape(out)[0], 1, 1])
    out = tf.matmul(projection_mat, out)  # [B, sum(fac), s3]
    # [B, factor[0], s3] [B, factor[1], s3] [B, factor[2], s3]
    fac1, fac2, fac3 = tf.split(out, factors, axis=1)
    out = tensor_concat(fac1, fac2, fac3)  # [B, f1, f2, f3, s3]
    # [B, 800, s3*3]
    out = tf.reshape(out, [-1, np.prod(factors), out_shape[-1]*3])
    print('tensor concat: {}'.format(out))
    goal_tile = tf.expand_dims(
        tf.layers.dense(goal_embedding, out_shape[-1]), 1)  # [B, 1, s3]
    print('goal: {}'.format(goal_tile))
    goal_tile = tf.tile(
        goal_tile, multiples=[1, np.prod(factors), 1])
    # TODO(ydjiang): include context vector too?
    out = tf.concat([out, goal_tile], axis=-1)
    out = tf.expand_dims(out, axis=1)
    # TODO(ydjiang): wider network here?
    out = tf.nn.relu(tf.layers.conv2d(out, 100, 1, 1))
    out = tf.nn.relu(tf.layers.conv2d(out, 32, 1, 1))
    out = tf.layers.conv2d(out, 1, 1, 1)
    return tf.squeeze(out, axis=[1, 3])
 def call(self, inputs):
     goal_embedding = inputs['goal_embedding']
     state_inputs = inputs['state_input']
     projection_mat = tf.tile(self._projection_mat,
                              [tf.shape(state_inputs)[0], 1, 1])
     out = tf.reshape(
         state_inputs,
         (-1, np.prod(self.out_shape[1:-1]), self.out_shape[-1]))
     out = tf.matmul(projection_mat, out)
     # [B, factor[0], s3] [B, factor[1], s3] [B, factor[2], s3]
     fac1, fac2, fac3 = tf.split(out, self.factors, axis=1)
     out = tensor_concat(fac1, fac2, fac3)  # [B, f1, f2, f3, s3]
     # [B, 800, s3*3]
     out = tf.reshape(
         out, [-1, np.prod(self.factors), self.out_shape[-1] * 3])
     goal_tile = tf.expand_dims(self._dense_layer(goal_embedding), 1)
     goal_tile = tf.tile(goal_tile, multiples=[1, np.prod(self.factors), 1])
     out = tf.concat([out, goal_tile], axis=-1)
     out = tf.expand_dims(out, axis=1)
     out = tf.nn.relu(self._conv_layer_1(out))
     out = tf.nn.relu(self._conv_layer_2(out))
     out = self._conv_layer_3(out)
     return tf.squeeze(out, axis=[1, 3])