Exemple #1
0
    def call(self, x, y, mask, training=False):
        self.step += 1
        x_ = x

        x = dropout(x, keep_prob=self.keep_prob, training=training)
        y = dropout(y, keep_prob=self.keep_prob, training=training)

        if self.step == 0:
            if not self.identity:
                self.linear = layers.Dense(melt.get_shape(x, -1),
                                           activation=tf.nn.relu)
            else:
                self.linear = None

        # NOTICE shared linear!
        if self.linear is not None:
            x = self.linear(x)
            y = self.linear(y)

        scores = tf.matmul(x, tf.transpose(y, [0, 2, 1]))

        if mask is not None:
            JX = melt.get_shape(x, 1)
            mask = tf.tile(tf.expand_dims(mask, axis=1), [1, JX, 1])
            scores = softmax_mask(scores, mask)

        alpha = tf.nn.softmax(scores)
        self.alpha = alpha

        y = tf.matmul(alpha, y)

        if self.combine is None:
            return y
        else:
            return self.combine(x_, y, training=training)
Exemple #2
0
    def call(self, inputs, memory, inputs_mask, memory_mask, training=False):
        combiner = self.combiner
        # DotAttention already convert to dot_attention
        #with tf.variable_scope(self.scope):
        d_inputs = dropout(inputs, keep_prob=self.keep_prob, training=training)
        d_memory = dropout(memory, keep_prob=self.keep_prob, training=training)
        JX = tf.shape(inputs)[1]

        with tf.variable_scope("attention"):
            inputs_ = self.inputs_dense(d_inputs)
            memory_ = self.memory_dense(d_memory)

            # shared matrix for c2q and q2c attention
            scores = tf.matmul(inputs_, tf.transpose(
                memory_, [0, 2, 1])) / (self.hidden**0.5)

            # c2q attention
            mask = memory_mask
            if mask is not None:
                mask = tf.tile(tf.expand_dims(mask, axis=1), [1, JX, 1])
                scores = softmax_mask(scores, mask)

            alpha = tf.nn.softmax(scores)
            self.alpha = alpha
            c2q = tf.matmul(alpha, memory)

            # TODO check this with allennlp implementation since not good result here...
            # q2c attention
            # (batch_size, clen)
            logits = tf.reduce_max(scores, -1)
            mask = inputs_mask
            if mask is not None:
                logits = softmax_mask(logits, mask)
            alpha2 = tf.nn.softmax(logits)
            # inputs (batch_size, clen, dim), probs (batch_size, clen)
            q2c = tf.matmul(tf.expand_dims(alpha2, 1), inputs)
            # (batch_size, clen, dim)
            q2c = tf.tile(q2c, [1, JX, 1])

            outputs = tf.concat([c2q, q2c], -1)

        if self.combine is not None:
            return self.combine(inputs, outputs, training=training)
        else:
            return outputs
Exemple #3
0
    def call(self, inputs, memory, mask, self_match=False, training=False):
        combiner = self.combiner
        # DotAttention already convert to dot_attention
        #with tf.variable_scope(self.scope):
        # TODO... here has some problem might for self match dot attention as same inputs with different dropout...Try self_match == True and verify..
        # NOTICE self_match == False following HKUST rnet
        d_inputs = dropout(inputs, keep_prob=self.keep_prob, training=training)
        if not self_match:
            d_memory = dropout(memory,
                               keep_prob=self.keep_prob,
                               training=training)
        else:
            d_memory = d_inputs
        JX = tf.shape(inputs)[1]

        # TODO remove scope ?
        with tf.variable_scope("attention"):
            inputs_ = self.inputs_dense(d_inputs)
            if not self_match:
                memory_ = self.memory_dense(d_memory)
            else:
                memory_ = inputs_

            scores = tf.matmul(inputs_, tf.transpose(
                memory_, [0, 2, 1])) / (self.hidden**0.5)

            if mask is not None:
                mask = tf.tile(tf.expand_dims(mask, axis=1), [1, JX, 1])
                #print(inputs_.shape, memory_.shape, weights.shape, mask.shape)
                # (32, 318, 100) (32, 26, 100) (32, 318, 26) (32, 318, 26)
                scores = softmax_mask(scores, mask)

            alpha = tf.nn.softmax(scores)
            self.alpha = alpha
            # logits (32, 326, 326)  memory(32, 326, 200)
            outputs = tf.matmul(alpha, memory)

        if self.combine is not None:
            return self.combine(inputs, outputs, training=training)
        else:
            return outputs
Exemple #4
0
    def call(self, x, mask, training=False):
        self.step += 1
        x_ = x
        x = dropout(x, keep_prob=self.keep_prob, training=training)

        if self.step == 0:
            if not self.identity:
                self.linear = layers.Dense(melt.get_shape(x, -1),
                                           activation=tf.nn.relu)
            else:
                self.linear = None

        # NOTICE shared linear!
        if self.linear is not None:
            x = self.linear(x)

        scores = tf.matmul(x, tf.transpose(x, [0, 2, 1]))

        #  x = tf.constant([[[1,2,3], [4,5,6],[7,8,9]],[[1,2,3],[4,5,6],[7,8,9]]], dtype=tf.float32) # shape=(2, 3, 3)
        #  z = tf.matrix_set_diag(x, tf.zeros([2, 3]))
        if not self.diag:
            # TODO better dim
            dim0 = melt.get_shape(scores, 0)
            dim1 = melt.get_shape(scores, 1)
            scores = tf.matrix_set_diag(scores, tf.zeros([dim0, dim1]))

        if mask is not None:
            JX = melt.get_shape(x, 1)
            mask = tf.tile(tf.expand_dims(mask, axis=1), [1, JX, 1])
            scores = softmax_mask(scores, mask)

        alpha = tf.nn.softmax(scores)
        self.alpha = alpha

        x = tf.matmul(alpha, x)

        if self.combine is None:
            return y
        else:
            return self.combine(x_, x, training=training)