def call(self, inputs): # PE_2i(p) = sin(p/10000^(2i/d_pos)) # PE_2i+1(p) = cos(p/10000^(2i/d_pos)) batch_size, seq_len, word_emb_dim = K.shape(inputs)[0], K.shape( inputs)[1], K.shape(inputs)[2] if not self.embedding_dim or self.method == 'add': self.embedding_dim = word_emb_dim t = 2 * K.arange(self.embedding_dim / 2, dtype='float32') / K.cast( self.embedding_dim, dtype='float32') embedding_wise_pos = 1. / K.pow( 10000., t) # 1/10000 ^(2i/d_pos) , shape = (p_dim/2, ) embedding_wise_pos = K.expand_dims(embedding_wise_pos, 0) # (1, p_dim/2) word_wise_pos = K.cumsum(K.ones_like(inputs[:, :, 0]), axis=1) # shape = [batch_size, seq_len] word_wise_pos = K.expand_dims(word_wise_pos, 2) # (batch_size, seq_len, 1) position_embedding = K.dot( word_wise_pos, embedding_wise_pos) # (batch_size, seq_len, p_dim/2) position_embedding = K.expand_dims(position_embedding, 3) position_embedding = K.reshape(K.concatenate( [K.sin(position_embedding), K.cos(position_embedding)], axis=-1), shape=(batch_size, seq_len, -1)) if self.method == 'add': return inputs + position_embedding return K.concatenate([inputs, position_embedding], axis=-1)
def compute_mask(self, inputs, mask=None): if self.conditional: masks = mask if mask is not None else [] masks = [K.expand_dims(m, 0) for m in masks if m is not None] if len(masks) == 0: return None else: return K.all(K.concatenate(masks, axis=0), axis=0) return mask
def compute_mask(self, inputs, mask=None): """为了适配T5,保证第一个token不被mask """ if self._mode == 'embedding': mask = super(Embedding, self).compute_mask(inputs, mask) if mask is not None: mask1 = K.ones_like(mask[:, :1], dtype='bool') mask2 = mask[:, 1:] return K.concatenate([mask1, mask2], 1) else: return mask
def call(self, inputs): input_shape = K.shape(inputs) batch_size, seq_length = input_shape[0], input_shape[1] pos_embedding = self.embeddings[:seq_length] pos_embedding = K.expand_dims(pos_embedding, 0) if self.merge_mode != 'add': pos_embedding = K.tile(pos_embedding, [batch_size, 1, 1]) if self.merge_mode == 'add': return inputs + pos_embedding return K.concatenate([inputs, pos_embedding], axis=-1)
def dense_loss(self, y_true, y_pred): """y_true需要是one hot形式 """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2, keepdims=True) mask = K.cast(mask, K.floatx()) # 计算目标分数 y_true, y_pred = y_true * mask, y_pred * mask target_score = self.path_score(y_pred, y_true) # 递归计算log Z init_states = [y_pred[:, 0]] y_pred = K.concatenate([y_pred, mask], axis=2) input_length = K.int_shape(y_pred[:, 1:])[1] log_norm, _, _ = K.rnn(self.log_norm_step, y_pred[:, 1:], init_states, input_length=input_length) # 最后一步的log Z向量 log_norm = K.logsumexp(log_norm, 1) # logsumexp得标量 # 计算损失 -log p return log_norm - target_score
def call(self, x): seq, vec = x vec = K.expand_dims(vec, 1) vec = K.tile(vec, [1, K.shape(seq)[1], 1]) return K.concatenate([seq, vec], 2)