Exemple #1
0
def traditional_attention(rep_tensor,
                          rep_mask,
                          scope=None,
                          keep_prob=1.,
                          is_train=None,
                          wd=0.,
                          activation='elu',
                          tensor_dict=None,
                          name=None):
    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(
        rep_tensor)[2]
    ivec = rep_tensor.get_shape()[2]
    with tf.variable_scope(scope or 'traditional_attention'):
        rep_tensor_map = bn_dense_layer(rep_tensor, ivec, True, 0.,
                                        'bn_dense_map', activation, False, wd,
                                        keep_prob, is_train)

        rep_tensor_logits = get_logits([rep_tensor_map],
                                       None,
                                       False,
                                       scope='self_attn_logits',
                                       mask=rep_mask,
                                       input_keep_prob=keep_prob,
                                       is_train=is_train)  # bs,sl
        attn_res = softsel(rep_tensor, rep_tensor_logits, rep_mask)  # bs,vec

        # save attn
        if tensor_dict is not None and name is not None:
            tensor_dict[name] = tf.nn.softmax(rep_tensor_logits)

        return attn_res
Exemple #2
0
def normal_attention(rep_tensor,
                     rep_mask,
                     scope=None,
                     keep_prob=1.,
                     is_train=None,
                     wd=0.,
                     activation='elu',
                     tensor_dict=None,
                     name=None):
    batch_size, code_len, vec_size = tf.shape(rep_tensor)[0], tf.shape(
        rep_tensor)[1], tf.shape(rep_tensor)[2]
    ivec = rep_tensor.get_shape()[2]
    with tf.variable_scope(scope or 'normal_attention'):
        rep_tensor_map = bn_dense_layer(rep_tensor, ivec, True, 0.,
                                        'bn_dense_map', activation, False, wd,
                                        keep_prob, is_train)

        rep_tensor_logits = get_logits([rep_tensor_map],
                                       None,
                                       False,
                                       scope='self_attn_logits',
                                       mask=rep_mask,
                                       input_keep_prob=keep_prob,
                                       is_train=is_train)  # bs,sl
        attn_result = softsel(rep_tensor, rep_tensor_logits,
                              rep_mask)  # bs,vec

        # save attn
        if tensor_dict is not None and name is not None:
            tensor_dict[name] = tf.nn.softmax(rep_tensor_logits)

        with tf.variable_scope('output'):
            o_bias = tf.get_variable('o_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            # input gate
            fusion_gate = tf.nn.sigmoid(
                linear(rep_tensor_map, ivec, True, 0., 'linear_fusion_i',
                       False, wd, keep_prob, is_train) +
                linear(attn_result, ivec, True, 0., 'linear_fusion_a', False,
                       wd, keep_prob, is_train) + o_bias)
            output = fusion_gate * rep_tensor_map + (1 -
                                                     fusion_gate) * attn_result
            output = mask_for_high_rank(output, rep_mask)  # bs,sl,vec
        return output
Exemple #3
0
def self_choose_attention(rep_tensor, rep_mask, hn,  # correct
                          keep_prob=1., is_train=None, scope=None, simplify=False):
    """
    self soft choose attention with 
    :param rep_tensor: rank must be 3 [bs,sl,hn]
    :param rep_mask: [bs,sl]
    :param hn: 
    :param keep_prob: 
    :param is_train: 
    :param scope:
    :param simplify
    :return: 
    """
    with tf.variable_scope(scope or 'self_choose_attention'):
        if not simplify:
            rep_tensor_map = tf.nn.relu(linear([rep_tensor], hn, True, scope='linear_map',
                                        input_keep_prob=keep_prob, is_train=is_train))
        else:
            rep_tensor_map = tf.identity(rep_tensor)
        rep_tensor_logits = get_logits([rep_tensor_map], None, False, scope='self_attn_logits',
                                       mask=rep_mask, input_keep_prob=keep_prob, is_train=is_train)  # bs,sl
        attn_res = softsel(rep_tensor, rep_tensor_logits, rep_mask)  # bs,vec
        return attn_res
Exemple #4
0
def gene_similarity_mat_and_mask(tensor_row, tensor_col,
                                 mask_for_tensor_row,
                                 mask_for_tensor_col,
                                 similarity_method='inner', hn=100, scope = None):
    with tf.variable_scope(scope or 'gene_similarity_mat_and_mask'):
        # --------parameters--------
        t_main = tensor_row  # [bs,sl,vec]
        t_sec = tensor_col  # [bs,ql,vec]
        mask_main = mask_for_tensor_row  # [bs,sl]
        mask_sec = mask_for_tensor_col  # [bs,ql]

        bs, sl, vec = tf.shape(t_main)[0], tf.shape(t_main)[1], tf.shape(t_main)[2]
        ql = tf.shape(t_sec)[1]
        # -------------------------------
        # --------similarity_mat--------
        mask_main_etd = tf.expand_dims(mask_main, 2)  # bs,sl,1
        mask_sec_etd = tf.expand_dims(mask_sec, 1)  # bs,1,ql
        mask_similarity_mat = tf.logical_and(mask_main_etd, mask_sec_etd)  # bs,sl,ql
        if similarity_method == 'inner':
            t_main_etd = tf.expand_dims(t_main, 2)  # bs,sl,1,vec
            t_sec_etd = tf.expand_dims(t_sec, 1)  # bs,1,ql,vec
            similarity_mat = tf.reduce_sum(t_main_etd*t_sec_etd, -1)  # bs,sl,ql
        elif similarity_method == 'tri_linear':
            t_main_tiled = tf.tile(tf.expand_dims(t_main, 2), [1, 1, ql, 1])  # bs,sl,ql,vec
            t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1])  # bs,sl,ql,vec
            similarity_mat = get_logits([t_main_tiled, t_sec_tiled], None, False,
                                        scope='tri_linear_tri_linear', func='tri_linear')
        elif similarity_method == 'map_linear':
            t_main_map = tf.nn.relu(linear([t_main], hn, True, scope='linear_map_main'))
            t_sec_map = tf.nn.relu(linear([t_sec], hn, True, scope='linear_map_sec'))
            t_main_map_etd = tf.expand_dims(t_main_map, 2)  # bs,sl,1,hn
            t_sec_map_etd = tf.expand_dims(t_sec_map, 1)  # bs,1,ql,hn
            similarity_mat = tf.reduce_sum(t_main_map_etd * t_sec_map_etd, -1)  # bs,sl,ql
        else:
            raise AttributeError('No similarity matrix calculation method \'%s\'' % similarity_method)

        return similarity_mat, mask_similarity_mat
Exemple #5
0
def normal_attention(tensor_base, tensor_to_attend,
                     mask_for_tensor_base,
                     mask_for_tensor_to_attend,
                     similarity_method='inner', hn=100,
                     use_pooling=False, pooling_method='max',
                     reverse=False, scope=None):
    """
    normal_attention for attention strategy 2 
    :param tensor_base: rank 3 [bs,sl,vec]
    :param tensor_to_attend: rank 3 [bs,ql,vec]
    :param mask_for_tensor_base: [bs,ql]
    :param mask_for_tensor_to_attend: [bs,sl]
    :param similarity_method: 'inner' 'tri_linear' 'map_linear'
    :param hn: some method need 
    :param use_pooling: True or False
    :param pooling_method: 'max' or 'mean'
    :param reverse: if use strategy 3
    :param scope: 
    :return: use_pooling==True: [bs,sl,hn] else [bs,hn]
    """
    with tf.variable_scope(scope or 'normal_attention'):
        # --------parameters--------
        t_main = tensor_base  # [bs,sl,vec]
        t_sec = tensor_to_attend  # [bs,ql,vec]
        mask_main = mask_for_tensor_base  # [bs,sl]
        mask_sec = mask_for_tensor_to_attend  # [bs,ql]

        bs, sl, vec = tf.shape(t_main)[0], tf.shape(t_main)[1], tf.shape(t_main)[2]
        ql = tf.shape(t_sec)[1]
        # -------------------------------
        # --------similarity_mat--------
        mask_main_etd = tf.expand_dims(mask_main, 2)  # bs,sl,1
        mask_sec_etd = tf.expand_dims(mask_sec, 1)  # bs,1,ql
        mask_similarity_mat = tf.logical_and(mask_main_etd, mask_sec_etd)  # bs,sl,ql
        if similarity_method == 'inner':
            t_main_etd = tf.expand_dims(t_main, 2)  # bs,sl,1,vec
            t_sec_etd = tf.expand_dims(t_sec, 1)  # bs,1,ql,vec
            similarity_mat = tf.reduce_sum(t_main_etd*t_sec_etd, -1)  # bs,sl,ql
        elif similarity_method == 'tri_linear':
            t_main_tiled = tf.tile(tf.expand_dims(t_main, 2), [1, 1, ql, 1])  # bs,sl,ql,vec
            t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1])  # bs,sl,ql,vec
            similarity_mat = get_logits([t_main_tiled, t_sec_tiled], None, False,
                                        scope='tri_linear_tri_linear', func='tri_linear')
        elif similarity_method == 'map_linear':
            t_main_map = tf.nn.relu(linear([t_main], hn, True, scope='linear_map_main'))
            t_sec_map = tf.nn.relu(linear([t_sec], hn, True, scope='linear_map_sec'))
            t_main_map_etd = tf.expand_dims(t_main_map, 2)  # bs,sl,1,hn
            t_sec_map_etd = tf.expand_dims(t_sec_map, 1)  # bs,1,ql,hn
            similarity_mat = tf.reduce_sum(t_main_map_etd * t_sec_map_etd, -1)  # bs,sl,ql
        else:
            raise AttributeError('No similarity matrix calculation method \'%s\'' % similarity_method)
        # -------------------------------
        if use_pooling:
            # pool mat along -2
            if pooling_method == 'max':
                pooling_out = tf.reduce_max(exp_mask(similarity_mat, mask_similarity_mat), -2)  # bs,sl,ql -> bs,ql
            elif pooling_method == 'mean':
                sum_out = tf.reduce_sum(normal_mask(similarity_mat, mask_similarity_mat), -2)  # bs,sl,ql -> bs,ql
                num = tf.reduce_sum(tf.cast(mask_similarity_mat, tf.int32), -2)  # bs,ql
                num = tf.where(tf.equal(num, tf.zeros_like(num, tf.int32)),
                               tf.ones_like(num, tf.int32), num)
                pooling_out = sum_out / tf.cast(num, tf.float32)  # bs,ql
            else:
                raise AttributeError('No pooling method \'%s\'' % pooling_method)
            return softsel(t_sec, pooling_out, mask_sec)  # bs,ql,vec -> bs,ql
        else:
            t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1])  # bs,sl,ql,vec
            # target: q_tiled:[bs,sl,ql,hn]; logits: [bs,sl,ql]
            if not reverse:
                out = normal_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat)
            else:
                out = reverse_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat)
            return out  # bs,sl,vec
Exemple #6
0
    def build_network(self):
        tds, tel, hn = self.tds, self.tel, self.hn
        bs, sn, sl, ql = self.bs, self.sn, self.sl, self.ql

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                scope='gene_token_emb_mat')
            c_emb = tf.nn.embedding_lookup(token_emb_mat,
                                           self.context_token)  # bs,sn,sl,tel
            q_emb = tf.nn.embedding_lookup(token_emb_mat,
                                           self.question_token)  # s,ql,tel

        with tf.variable_scope('prepro'):
            q_rep = multi_dimensional_attention(q_emb,
                                                self.question_token_mask,
                                                'q2coding', cfg.dropout,
                                                self.is_train, cfg.wd,
                                                'relu')  # bs, hn
            q_rep_map = bn_dense_layer(q_rep, hn, True, 0., 'q_rep_map',
                                       'relu', False, cfg.wd, cfg.dropout,
                                       self.is_train)  # bs, hn

        with tf.variable_scope('sent_emb'):
            c_emb_rshp = tf.reshape(c_emb, [bs * sn, sl, tel],
                                    'c_emb_rshp')  # bs*sn,sl,tel
            c_mask_rshp = tf.reshape(self.context_token_mask, [bs * sn, sl],
                                     'c_mask_rshp')  # bs*sn,sl,tel
            sent_enc_rshp = sentence_encoding_models(
                c_emb_rshp,
                c_mask_rshp,
                cfg.context_fusion_method,
                'relu',
                'sent2enc',
                cfg.wd,
                self.is_train,
                cfg.dropout,
                hn,
                block_len=cfg.block_len)  # bs*sn, 2*hn
            sent_enc = tf.reshape(sent_enc_rshp,
                                  [bs, sn, 2 * hn])  # bs,sn, 2*hn
            sent_enc_map = bn_dense_layer(sent_enc, hn, True, 0.,
                                          'sent_enc_map', 'relu', False,
                                          cfg.wd, cfg.dropout, self.is_train)

        with tf.variable_scope('fusion'):
            q_rep_map_ex = tf.tile(tf.expand_dims(q_rep_map, 1),
                                   [1, sn, 1])  # bs, sn, hn
            fusion_rep = tf.concat([
                sent_enc_map, q_rep_map_ex, sent_enc_map - q_rep_map_ex,
                sent_enc_map * q_rep_map_ex
            ], -1)  # bs,sn,4hn

        with tf.variable_scope('output'):
            out_cf = context_fusion_layers(fusion_rep,
                                           self.context_sent_mask,
                                           cfg.context_fusion_method,
                                           'relu',
                                           'out_cf',
                                           cfg.wd,
                                           self.is_train,
                                           cfg.dropout,
                                           hn,
                                           block_len=4)
            pre_output = bn_dense_layer(out_cf, hn, True, 0., 'pre_output',
                                        'relu', False, cfg.wd, cfg.dropout,
                                        self.is_train)

        logits = get_logits(  # exp masked
            pre_output, None, True, 0., 'logits', self.context_sent_mask,
            cfg.wd, cfg.dropout, self.is_train, 'linear')
        return logits