def call(self, inputs): # PE_2i(p) = sin(p/10000^(2i/d_pos)) # PE_2i+1(p) = cos(p/10000^(2i/d_pos)) batch_size, seq_len, word_emb_dim = K.shape(inputs)[0], K.shape( inputs)[1], K.shape(inputs)[2] if not self.embedding_dim or self.method == 'add': self.embedding_dim = word_emb_dim t = 2 * K.arange(self.embedding_dim / 2, dtype='float32') / K.cast( self.embedding_dim, dtype='float32') embedding_wise_pos = 1. / K.pow( 10000., t) # 1/10000 ^(2i/d_pos) , shape = (p_dim/2, ) embedding_wise_pos = K.expand_dims(embedding_wise_pos, 0) # (1, p_dim/2) word_wise_pos = K.cumsum(K.ones_like(inputs[:, :, 0]), axis=1) # shape = [batch_size, seq_len] word_wise_pos = K.expand_dims(word_wise_pos, 2) # (batch_size, seq_len, 1) position_embedding = K.dot( word_wise_pos, embedding_wise_pos) # (batch_size, seq_len, p_dim/2) position_embedding = K.expand_dims(position_embedding, 3) position_embedding = K.reshape(K.concatenate( [K.sin(position_embedding), K.cos(position_embedding)], axis=-1), shape=(batch_size, seq_len, -1)) if self.method == 'add': return inputs + position_embedding return K.concatenate([inputs, position_embedding], axis=-1)
def compute_loss_of_similarity(self, inputs): y_true = self.get_labels_of_similarity(inputs) # 构建标签 y_pred = K.l2_normalize(inputs, axis=1) # 句向量归一化 similarities = K.dot(y_pred, K.transpose(y_pred)) # 相似度矩阵 similarities = similarities - K.eye(K.shape(y_pred)[0]) * 1e12 # 排除对角线 similarities = similarities * self.scale # scale loss = K.categorical_crossentropy( y_true, similarities, from_logits=True ) return loss
def compute_loss_of_similarity(self, inputs, mask=None): # _, _, _, y_pred, _ = inputs _, _, _, _, y_pred = inputs # use last layer's logits y_true = self.get_labels_of_similarity(y_pred) # 构建标签 y_pred = K.l2_normalize(y_pred, axis=1) # 句向量归一化 similarities = K.dot(y_pred, K.transpose(y_pred)) # 相似度矩阵 similarities = similarities - K.eye(K.shape(y_pred)[0]) * 1e12 # 排除对角线 similarities = similarities * 20 # scale loss = K.categorical_crossentropy( y_true, similarities, from_logits=True ) self.add_metric(loss, 'sim_loss') return loss
def call(self, inputs, mode='embedding'): """ embedding mode: 普通embedding, dense mode: 无bias的Dense,x dot embedding.T :param inputs: :param mode: :return: """ self._mode = mode if mode == 'embedding': return super(Embedding, self).call(inputs) kernel = K.transpose(self.embeddings) return K.dot(inputs, kernel)
def compute_loss_of_scl(self, inputs, mask=None): y_pred, y_true = inputs label_mask = self.get_label_mask(y_true) y_pred = K.l2_normalize(y_pred, axis=1) # 特征向量归一化 similarities = K.dot(y_pred, K.transpose(y_pred)) # 相似矩阵 similarities = similarities - K.eye(K.shape(y_pred)[0]) * 1e12 # 排除对角线,即 i == j similarities = similarities / self.T # Temperature scale similarities = K.exp(similarities) # exp sum_similarities = K.sum(similarities, axis=-1, keepdims=True) # sum i != k scl = similarities / sum_similarities scl = K.log(scl + K.epsilon()) # sum log scl = -K.sum(scl * label_mask, axis=1, keepdims=True) / (K.sum(label_mask, axis=1, keepdims=True) + K.epsilon()) return K.mean(scl)