def do_attention(self, x): ''' add none or one of the following attention layers :param x: :param att_type: :return: ''' att_layer = None if self.att_type == 'scaled_dot': att_layer = scaled_dot_attention.ScaledDotProductAttention( name='Attention') x = att_layer(x) # x = GlobalAveragePooling1D()(x) x = GlobalMaxPooling1D()(x) elif self.att_type == 'seq_self_attention': att_layer = seq_self_attention.SeqSelfAttention( attention_activation='sigmoid') x = att_layer(x) # x = GlobalAveragePooling1D()(x) x = GlobalMaxPooling1D()(x) elif self.att_type == 'seq_weighted_attention': att_layer = seq_weighted_attention.SeqWeightedAttention() x = att_layer(x) # x = seq_weighted_attention.SeqWeightedAttention()(x) elif self.att_type == 'attention_with_context': att_layer = many_to_one_attention_with_context.AttentionWithContext( ) x = att_layer(x) return x, att_layer
def get_model(self, pre_embeddings, dp_rate=-1.0, filter_sizes=[2, 3, 4], attention_dim=100): """ :param pre_embeddings: :param dp_rate: drop out rate :param filter_sizes: sizes of convolution kernels :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding( self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen, )) embedding = embedding_layer(input) # add none or one of the following attention layers if self.att_type == 'scaled_dot': cxt_vec = scaled_dot_attention.ScaledDotProductAttention( name='Attention')(embedding) elif self.att_type == 'seq_self_attention': cxt_vec = seq_self_attention.SeqSelfAttention( attention_activation='sigmoid')(embedding) elif self.att_type == 'attention_before_convolution': cxt_vec = attention_before_convolution.AttentionBeforeConvolution( self.maxlen, attention_hidden_dim=attention_dim)(embedding) cxt_vec = Reshape(( self.maxlen, self.embedding_dims, ))(cxt_vec) # concatenate context and input embedding embedding = Concatenate()([embedding, cxt_vec]) # perform the convolution operations convs = [] for kernel_size in filter_sizes: c = Conv1D(NUM_FILTERS, kernel_size, activation='relu')(embedding) c = GlobalMaxPooling1D()(c) convs.append(c) x = Concatenate()(convs) if dp_rate > 0: # 加dropout层 x = Dropout(dp_rate)(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def add_attention_layer(self, model): ''' add the attention layer for the keras sequential modeling style :param model: :return: ''' if self.att_type == 'scaled_dot': model.add(scaled_dot_attention.ScaledDotProductAttention(name='Attention')) model.add(GlobalAveragePooling1D()) elif self.att_type == 'seq_self_attention': model.add(seq_self_attention.SeqSelfAttention(attention_activation='sigmoid')) model.add(GlobalAveragePooling1D()) elif self.att_type == 'seq_weighted_attention': model.add(seq_weighted_attention.SeqWeightedAttention()) elif self.att_type == 'attention_with_context': model.add(many_to_one_attention_with_context.AttentionWithContext())