Esempio n. 1
0
 def apply(self, is_train, x, mask=None):
     n_out = self.n_out
     if n_out is None:
         n_out = x.shape.as_list()[-1]
     if self.activation == "glu":
         gate, lin = tf.split(ops.affine(x, n_out * 2, "w"), 2, -1)
         gate += tf.get_variable("b",
                                 n_out,
                                 initializer=tf.zeros_initializer())
         return tf.nn.sigmoid(gate) * lin
     else:
         return activation_fn(ops.affine(x, n_out, "w", bias_name="b"),
                              self.activation)
Esempio n. 2
0
  def apply(self, is_train, features, labels):
    hypoth, premise = self.get_text_embeddings(is_train, features)
    h_embed, h_mask = hypoth.embeddings, hypoth.mask
    p_embed, p_mask = premise.embeddings, premise.mask

    if self.map_embed is not None:
      with tf.variable_scope("map-embed"):
        h_embed = self.map_embed.apply(is_train, h_embed, h_mask)
      with tf.variable_scope("map-embed", reuse=True):
        p_embed = self.map_embed.apply(is_train, p_embed, p_mask)

    with tf.variable_scope("fuse"):
      p_fused, h_fused = self.bifuse_layer.apply(is_train, p_embed, h_embed, p_mask, h_mask)

    with tf.variable_scope("post-process-fused"):
      p_fused = self.post_process_layer.apply(is_train, p_fused, p_mask)

    with tf.variable_scope("post-process-fused", reuse=True):
      h_fused = self.post_process_layer.apply(is_train, h_fused, h_mask)

    with tf.variable_scope("pool"):
      p_pooled = self.pool_layer.apply(is_train, p_fused, p_mask)

    with tf.variable_scope("pool", reuse=True):
      h_pooled = self.pool_layer.apply(is_train, h_fused, h_mask)

    joint = tf.concat([p_pooled, h_pooled], 1)
    with tf.variable_scope("post-process-pooled"):
      joint = self.processs_joint.apply(is_train, joint)

    logits = ops.affine(joint, self.n_classes, "w", "b")
    if labels is not None and "bias" in features:
      loss = self.debias_loss_fn.compute_clf_loss(joint, logits, features["bias"], labels)
      tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
    return logits
Esempio n. 3
0
    def apply(self, is_train, features, labels):
        hypoth, premise = self.get_text_embeddings(is_train, features)
        q_embed, q_mask = hypoth.embeddings, hypoth.mask
        p_embed, p_mask = premise.embeddings, premise.mask

        if self.map_embed is not None:
            with tf.variable_scope("map-embed"):
                q_embed = self.map_embed.apply(is_train, q_embed, q_mask)
            with tf.variable_scope("map-embed", reuse=True):
                p_embed = self.map_embed.apply(is_train, p_embed, p_mask)

        with tf.variable_scope("fuse"):
            fused = self.fuse_layer.apply(is_train, p_embed, q_embed, p_mask,
                                          q_mask)

        with tf.variable_scope("post-process-fused"):
            fused = self.post_process_layer.apply(is_train, fused, p_mask)

        logits = ops.affine(fused, 2, "predict-w")

        if labels is not None and "bias" in features:
            loss = self.debias_loss_fn.compute_qa_loss(q_embed, fused, logits,
                                                       features["bias"],
                                                       labels["answer_tokens"],
                                                       p_mask)
            tf.add_to_collection(tf.GraphKeys.LOSSES, loss)

        return ops.mask_logits(logits, p_mask)
Esempio n. 4
0
    def apply(self, is_train, x, mask=None):
        with tf.variable_scope("layer"):
            out = self.layer.apply(is_train, x, mask)
        dim = out.shape.as_list()[-1]

        if isinstance(self.transform, Mapper) or isinstance(
                self.transform, SequenceMapper):
            with tf.variable_scope("transform"):
                transform = self.transform.apply(is_train, x, mask)
            gate = ops.affine(x, dim, "w", "b")
        else:
            proj = ops.affine(x, dim * 2, "w", bias_name="b")
            gate, transform = tf.split(proj, 2, 2)
            transform = activation_fn(transform, self.transform)

        gate = tf.sigmoid(gate)
        return transform * (1 - gate) + gate * out
Esempio n. 5
0
    def compute_qa_loss(self, question_hidden, passage_hidden, logits, bias,
                        labels, mask):
        logits = tf.nn.log_softmax(logits, 1)

        p1 = ops.max_pool(ops.affine(question_hidden, self.dim, "q-w", "q-b"),
                          mask)
        p2 = ops.max_pool(ops.affine(passage_hidden, self.dim, "p-w", "p-b"),
                          mask)
        hidden = tf.concat([p1, p2], 1)  # [batch, dim*2]
        factor = ops.affine(hidden, 1, "scale-w", "scale-b")  # [batch, 1]
        factor = tf.nn.softplus(factor)
        bias = bias * tf.expand_dims(factor, 2)

        loss = tf.reduce_mean(compute_nll(bias + logits, labels, mask))

        if self.w == 0:
            return loss

        bias_lp = tf.nn.log_softmax(ops.mask_logits(bias, mask), 1)
        entropy = -tf.reduce_mean(tf.reduce_sum(tf.exp(bias_lp) * bias_lp, 1))

        return loss + self.w * entropy