Exemple #1
0
        def self_attention(output,
                           seq_len,
                           out_size,
                           activation_fn=tf.tanh,
                           dropout=None,
                           is_train=False):
            with tf.variable_scope('self-attention'):
                context_vector = tf.get_variable(name='context_vector',
                                                 shape=[out_size],
                                                 dtype=tf.float32)
                mlp = tf.layers.dense(output,
                                      out_size,
                                      activation=activation_fn,
                                      name='mlp')
                attn = tf.tensordot(mlp, context_vector, axes=[[2], [0]])
                attn_normed1 = masked_softmax(attn, seq_len)

                attn_normed = tf.expand_dims(attn_normed1, axis=-1)
                attn_ctx = tf.matmul(mlp, attn_normed, transpose_a=True)
                attn_ctx = tf.squeeze(attn_ctx, axis=[2])
                if dropout is not None:
                    attn_ctx = tf.layers.dropout(attn_ctx,
                                                 rate=dropout,
                                                 training=is_train)
            return attn, attn_normed1, attn_ctx
    def task_specific_attention(in_x,
                                xLen,
                                out_sz,
                                activation_fn=tf.tanh,
                                dropout=None,
                                is_train=False,
                                scope=None):
        '''

        :param in_x: shape(b_sz, tstp, dim)
        :param xLen: shape(b_sz,)
        :param out_sz: scalar
        :param activation_fn: activation
        :param dropout:
        :param is_train:
        :param scope:
        :return:
        '''

        assert len(
            in_x.get_shape()) == 3 and in_x.get_shape()[-1].value is not None

        with tf.variable_scope(scope or 'attention') as scope:
            context_vector = tf.get_variable(name='context_vector',
                                             shape=[out_sz],
                                             dtype=tf.float32)  ## q
            in_x_mlp = tf.layers.dense(
                in_x, out_sz, activation=activation_fn, name='mlp'
            )  ## h, https://www.tensorflow.org/api_docs/python/tf/layers/dense

            attn = tf.tensordot(in_x_mlp, context_vector,
                                axes=[[2],
                                      [0]])  # shape(b_sz, tstp) ## u = q*h
            attn_normed = masked_softmax(attn,
                                         xLen)  ## ai = exp(ui) / sum(exp(ui))

            attn_normed = tf.expand_dims(attn_normed, axis=-1)
            attn_ctx = tf.matmul(
                in_x_mlp, attn_normed,
                transpose_a=True)  # shape(b_sz, dim, 1) ## e = ai * hi
            attn_ctx = tf.squeeze(attn_ctx, axis=[2])  # shape(b_sz, dim)
            if dropout is not None:
                attn_ctx = tf.layers.dropout(attn_ctx,
                                             rate=dropout,
                                             training=is_train)
        return attn_ctx
Exemple #3
0
    def __call__(self, inputs, state, scope=None):
        """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
        '''
            inputs: shape(b_sz, emb)
            state: shape(b_sz, h_sz)
        '''
        with vs.variable_scope(scope or "attention_cell"):

            tmp = _linear([inputs, state], self._num_units, bias=False, scope='attn_linear')     # shape(b_sz, h_sz)
            tmp = tf.tile(tf.expand_dims(tmp, axis=1), [1, self.tstp_pre, 1])   # shape(b_sz, tstp_pre, h_sz)
            M_t = tanh(self.Premise_Linear + tmp)

            Mt_linear = tf.squeeze(last_dim_linear(M_t, 1, bias=False, scope='M_t_linear'), [2]) # shape(b_sz, tstp_pre)
            Alpha_t = masked_softmax(Mt_linear, self.Premise_seqLen)    # shape(b_sz, tstp_pre)

            tmp1 = tf.reduce_sum(tf.expand_dims(Alpha_t, 2) * self.Premise_out, axis=1)    # shape(b_sz, h_sz)
            tmp2 = tanh(_linear(state, self._num_units, bias=False, scope='final_linear'))     # shape(b_sz, h_sz)
            next_state = tmp1 + tmp2

        return next_state, next_state