def build_q_factor_discrete(self, cfg, goal_embedding): """"Build the q value network. Args: cfg: configuration object goal_embedding: tensor that contains the goal embedding Returns: the q value tensor """ n_layer_channel = [] for layer_config in cfg.conv_layer_config: if layer_config[0] > 0: n_layer_channel.append(layer_config[0]) layer_film_params = film_params(goal_embedding, n_layer_channel) out = self.inputs for cfg, param in zip(cfg.conv_layer_config, layer_film_params): if cfg[0] < 0: out = tf.layers.conv2d(out, -cfg[0], cfg[1], cfg[2], padding='SAME') out = tf.nn.relu(out) else: out = tf.layers.conv2d(out, cfg[0], cfg[1], cfg[2], padding='SAME') out = tf.layers.batch_normalization( out, center=False, scale=False, training=self.is_training) gamma, beta = tf.split(param, 2, axis=1) out *= tf.expand_dims(tf.expand_dims(gamma, 1), 1) out += tf.expand_dims(tf.expand_dims(beta, 1), 1) out = tf.nn.relu(out) out_shape = out.get_shape() factors = [8, 10, 10] # [B, s1*s2, s3] out = tf.reshape(out, (-1, np.prod(out_shape[1:-1]), out_shape[-1])) projection_mat = tf.get_variable( name='projection_matrix', shape=(sum(factors), np.prod(out_shape[1:-1])), dtype=tf.float32, trainable=True) projection_mat = tf.expand_dims(projection_mat, axis=0) projection_mat = tf.tile(projection_mat, [tf.shape(out)[0], 1, 1]) out = tf.matmul(projection_mat, out) # [B, sum(fac), s3] # [B, factor[0], s3] [B, factor[1], s3] [B, factor[2], s3] fac1, fac2, fac3 = tf.split(out, factors, axis=1) out = tensor_concat(fac1, fac2, fac3) # [B, f1, f2, f3, s3] # [B, 800, s3*3] out = tf.reshape(out, [-1, np.prod(factors), out_shape[-1]*3]) print('tensor concat: {}'.format(out)) goal_tile = tf.expand_dims( tf.layers.dense(goal_embedding, out_shape[-1]), 1) # [B, 1, s3] print('goal: {}'.format(goal_tile)) goal_tile = tf.tile( goal_tile, multiples=[1, np.prod(factors), 1]) # TODO(ydjiang): include context vector too? out = tf.concat([out, goal_tile], axis=-1) out = tf.expand_dims(out, axis=1) # TODO(ydjiang): wider network here? out = tf.nn.relu(tf.layers.conv2d(out, 100, 1, 1)) out = tf.nn.relu(tf.layers.conv2d(out, 32, 1, 1)) out = tf.layers.conv2d(out, 1, 1, 1) return tf.squeeze(out, axis=[1, 3])
def call(self, inputs): goal_embedding = inputs['goal_embedding'] state_inputs = inputs['state_input'] projection_mat = tf.tile(self._projection_mat, [tf.shape(state_inputs)[0], 1, 1]) out = tf.reshape( state_inputs, (-1, np.prod(self.out_shape[1:-1]), self.out_shape[-1])) out = tf.matmul(projection_mat, out) # [B, factor[0], s3] [B, factor[1], s3] [B, factor[2], s3] fac1, fac2, fac3 = tf.split(out, self.factors, axis=1) out = tensor_concat(fac1, fac2, fac3) # [B, f1, f2, f3, s3] # [B, 800, s3*3] out = tf.reshape( out, [-1, np.prod(self.factors), self.out_shape[-1] * 3]) goal_tile = tf.expand_dims(self._dense_layer(goal_embedding), 1) goal_tile = tf.tile(goal_tile, multiples=[1, np.prod(self.factors), 1]) out = tf.concat([out, goal_tile], axis=-1) out = tf.expand_dims(out, axis=1) out = tf.nn.relu(self._conv_layer_1(out)) out = tf.nn.relu(self._conv_layer_2(out)) out = self._conv_layer_3(out) return tf.squeeze(out, axis=[1, 3])