def apply(self, is_train, x, mask=None): n_out = self.n_out if n_out is None: n_out = x.shape.as_list()[-1] if self.activation == "glu": gate, lin = tf.split(ops.affine(x, n_out * 2, "w"), 2, -1) gate += tf.get_variable("b", n_out, initializer=tf.zeros_initializer()) return tf.nn.sigmoid(gate) * lin else: return activation_fn(ops.affine(x, n_out, "w", bias_name="b"), self.activation)
def apply(self, is_train, features, labels): hypoth, premise = self.get_text_embeddings(is_train, features) h_embed, h_mask = hypoth.embeddings, hypoth.mask p_embed, p_mask = premise.embeddings, premise.mask if self.map_embed is not None: with tf.variable_scope("map-embed"): h_embed = self.map_embed.apply(is_train, h_embed, h_mask) with tf.variable_scope("map-embed", reuse=True): p_embed = self.map_embed.apply(is_train, p_embed, p_mask) with tf.variable_scope("fuse"): p_fused, h_fused = self.bifuse_layer.apply(is_train, p_embed, h_embed, p_mask, h_mask) with tf.variable_scope("post-process-fused"): p_fused = self.post_process_layer.apply(is_train, p_fused, p_mask) with tf.variable_scope("post-process-fused", reuse=True): h_fused = self.post_process_layer.apply(is_train, h_fused, h_mask) with tf.variable_scope("pool"): p_pooled = self.pool_layer.apply(is_train, p_fused, p_mask) with tf.variable_scope("pool", reuse=True): h_pooled = self.pool_layer.apply(is_train, h_fused, h_mask) joint = tf.concat([p_pooled, h_pooled], 1) with tf.variable_scope("post-process-pooled"): joint = self.processs_joint.apply(is_train, joint) logits = ops.affine(joint, self.n_classes, "w", "b") if labels is not None and "bias" in features: loss = self.debias_loss_fn.compute_clf_loss(joint, logits, features["bias"], labels) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) return logits
def apply(self, is_train, features, labels): hypoth, premise = self.get_text_embeddings(is_train, features) q_embed, q_mask = hypoth.embeddings, hypoth.mask p_embed, p_mask = premise.embeddings, premise.mask if self.map_embed is not None: with tf.variable_scope("map-embed"): q_embed = self.map_embed.apply(is_train, q_embed, q_mask) with tf.variable_scope("map-embed", reuse=True): p_embed = self.map_embed.apply(is_train, p_embed, p_mask) with tf.variable_scope("fuse"): fused = self.fuse_layer.apply(is_train, p_embed, q_embed, p_mask, q_mask) with tf.variable_scope("post-process-fused"): fused = self.post_process_layer.apply(is_train, fused, p_mask) logits = ops.affine(fused, 2, "predict-w") if labels is not None and "bias" in features: loss = self.debias_loss_fn.compute_qa_loss(q_embed, fused, logits, features["bias"], labels["answer_tokens"], p_mask) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) return ops.mask_logits(logits, p_mask)
def apply(self, is_train, x, mask=None): with tf.variable_scope("layer"): out = self.layer.apply(is_train, x, mask) dim = out.shape.as_list()[-1] if isinstance(self.transform, Mapper) or isinstance( self.transform, SequenceMapper): with tf.variable_scope("transform"): transform = self.transform.apply(is_train, x, mask) gate = ops.affine(x, dim, "w", "b") else: proj = ops.affine(x, dim * 2, "w", bias_name="b") gate, transform = tf.split(proj, 2, 2) transform = activation_fn(transform, self.transform) gate = tf.sigmoid(gate) return transform * (1 - gate) + gate * out
def compute_qa_loss(self, question_hidden, passage_hidden, logits, bias, labels, mask): logits = tf.nn.log_softmax(logits, 1) p1 = ops.max_pool(ops.affine(question_hidden, self.dim, "q-w", "q-b"), mask) p2 = ops.max_pool(ops.affine(passage_hidden, self.dim, "p-w", "p-b"), mask) hidden = tf.concat([p1, p2], 1) # [batch, dim*2] factor = ops.affine(hidden, 1, "scale-w", "scale-b") # [batch, 1] factor = tf.nn.softplus(factor) bias = bias * tf.expand_dims(factor, 2) loss = tf.reduce_mean(compute_nll(bias + logits, labels, mask)) if self.w == 0: return loss bias_lp = tf.nn.log_softmax(ops.mask_logits(bias, mask), 1) entropy = -tf.reduce_mean(tf.reduce_sum(tf.exp(bias_lp) * bias_lp, 1)) return loss + self.w * entropy