def call(self, inputs): """如果custom_position_ids,那么第二个输入为自定义的位置id """ input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] if self.custom_position_ids: inputs, position_ids = inputs else: position_ids = K.arange(0, seq_len, dtype=K.floatx())[None] indices = K.arange(0, self.output_dim // 2, dtype=K.floatx()) indices = K.pow(10000.0, -2 * indices / self.output_dim) pos_embeddings = tf.einsum('bn,d->bnd', position_ids, indices) pos_embeddings = K.concatenate([ K.sin(pos_embeddings)[..., None], K.cos(pos_embeddings)[..., None] ]) pos_embeddings = K.reshape( pos_embeddings, (-1, seq_len, self.output_dim) ) if self.merge_mode == 'add': return inputs + pos_embeddings elif self.merge_mode == 'mul': return inputs * pos_embeddings else: if not self.custom_position_ids: pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1]) return K.concatenate([inputs, pos_embeddings])
def basic_loss(self, y_true, y_pred, go_backwards=False): """y_true需要是整数形式(非one hot) """ mask = self.output_mask # y_true需要重新明确一下dtype和shape y_true = K.cast(y_true, 'int32') y_true = K.reshape(y_true, [K.shape(y_true)[0], -1]) # 反转相关 if self.hidden_dim is None: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans histoty = K.gather(trans, y_true) else: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) r_trans, l_trans = self.l_trans, self.r_trans else: l_trans, r_trans = self.l_trans, self.r_trans histoty = K.gather(l_trans, y_true) histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans) # 计算loss histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 loss = K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True) if mask is None: return K.mean(loss) else: return K.sum(loss * mask) / K.sum(mask)
def call(self, inputs): """如果custom_position_ids,那么第二个输入为自定义的位置id """ if self.custom_position_ids: seq_len = K.shape(inputs)[1] inputs, position_ids = inputs if 'float' not in K.dtype(position_ids): position_ids = K.cast(position_ids, K.floatx()) else: input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] position_ids = K.arange(0, seq_len, dtype=K.floatx())[None] indices = K.arange(0, self.output_dim // 2, dtype=K.floatx()) indices = K.pow(10000.0, -2 * indices / self.output_dim) embeddings = tf.einsum('bn,d->bnd', position_ids, indices) embeddings = K.stack([K.sin(embeddings), K.cos(embeddings)], axis=-1) embeddings = K.reshape(embeddings, (-1, seq_len, self.output_dim)) if self.merge_mode == 'add': return inputs + embeddings elif self.merge_mode == 'mul': return inputs * (embeddings + 1.0) else: if not self.custom_position_ids: embeddings = K.tile(embeddings, [batch_size, 1, 1]) return K.concatenate([inputs, embeddings])
def basic_accuracy(self, y_true, y_pred, go_backwards=False): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2) mask = K.cast(mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 反转相关 if self.hidden_dim is None: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans histoty = K.gather(trans, y_true) else: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) r_trans, l_trans = self.l_trans, self.r_trans else: l_trans, r_trans = self.l_trans, self.r_trans histoty = K.gather(l_trans, y_true) histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans) # 计算逐标签accuracy histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) return K.sum(isequal * mask) / K.sum(mask)
def call(self, inputs): """如果custom_position_ids,那么第二个输入为自定义的位置id """ input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] if self.custom_position_ids: inputs, position_ids = inputs if K.dtype(position_ids) != 'int32': position_ids = K.cast(position_ids, 'int32') else: position_ids = K.arange(0, seq_len, dtype='int32')[None] if self.hierarchical: alpha = 0.4 if self.hierarchical is True else self.hierarchical embeddings = self.embeddings - alpha * self.embeddings[:1] embeddings = embeddings / (1 - alpha) embeddings_x = K.gather(embeddings, position_ids // self.input_dim) embeddings_y = K.gather(embeddings, position_ids % self.input_dim) pos_embeddings = alpha * embeddings_x + (1 - alpha) * embeddings_y else: if self.custom_position_ids: pos_embeddings = K.gather(self.embeddings, position_ids) else: pos_embeddings = self.embeddings[None, :seq_len] if self.merge_mode == 'add': return inputs + pos_embeddings elif self.merge_mode == 'mul': return inputs * pos_embeddings else: if not self.custom_position_ids: pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1]) return K.concatenate([inputs, pos_embeddings])
def parse_function(serialized): features = { 'token_ids': tf.io.FixedLenFeature([sequence_length], tf.int64), } features = tf.io.parse_single_example(serialized, features) token_ids = features['token_ids'] segment = K.random_uniform(shape=[1], minval=1, maxval=sequence_length - 1, dtype='int64')[0] segment_ids = K.one_hot(segment + 1, sequence_length) segment_ids = K.cast(K.cumsum(segment_ids), 'int64') token_ids_1 = token_ids[:segment] token_ids_2 = K.zeros([1], dtype='int64') + token_sep_id token_ids_3 = token_ids[segment:-1] token_ids = K.concatenate([token_ids_1, token_ids_2, token_ids_3]) x = { 'Input-Token': token_ids, 'Input-Segment': segment_ids, } y = { 'unilm_loss': K.zeros([1]), 'unilm_acc': K.zeros([1]), } return x, y
def basic_loss(self, y_true, y_pred, go_backwards=False): """y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2) mask = K.cast(mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 反转相关 if self.hidden_dim is None: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans histoty = K.gather(trans, y_true) else: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) r_trans, l_trans = self.l_trans, self.r_trans else: l_trans, r_trans = self.l_trans, self.r_trans histoty = K.gather(l_trans, y_true) histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans) # 计算loss histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 loss = K.sparse_categorical_crossentropy( y_true, y_pred, from_logits=True ) return K.sum(loss * mask) / K.sum(mask)
def extrac_trigger(inputs): "根据subject_ids从output中取出subject的向量表征" output, trigger_ids = inputs trigger_ids = K.cast(trigger_ids, 'int32') start = batch_gather(output, trigger_ids[:, :1]) end = batch_gather(output, trigger_ids[:, 1:]) trigger = K.concatenate([start, end], 2) return trigger[:, 0]
def compute_mask(self, inputs, mask=None): """保证第一个token不被mask """ mask = super(Embedding, self).compute_mask(inputs, mask) if mask is not None: mask1 = K.ones_like(mask[:, :1], dtype='bool') mask2 = mask[:, 1:] return K.concatenate([mask1, mask2], 1)
def compute_mask(self, inputs, mask=None): if mask is not None: masks = [] for i, m in enumerate(mask): if m is None: m = K.ones_like(inputs[i][..., 0], dtype='bool') masks.append(m) return K.concatenate(masks, axis=1)
def extract_subject(inputs): """根据subject_ids从output中取出subject的向量表征 """ output, subject_ids = inputs start = batch_gather(output, subject_ids[:, :1]) end = batch_gather(output, subject_ids[:, 1:]) subject = K.concatenate([start, end], 2) return subject[:, 0]
def extrac_subject(inputs): """根据subject_ids从output中取出subject的向量表征 """ output, subject_ids = inputs subject_ids = K.cast(subject_ids, 'int32') start = batch_gather(output, subject_ids[:, :1]) #[? ? 768]->[? 1 768] end = batch_gather(output, subject_ids[:, 1:]) #[? 1 768] subject = K.concatenate([start, end], 2) #[? 1 1536] return subject[:, 0]
def extrac_subject(inputs): """根据subject_ids从output中取出subject的向量表征 """ output, subject_ids = inputs subject_ids = K.cast(subject_ids, 'int32') start = tf.gather(output, subject_ids[:, :1], batch_dims=-1) end = tf.gather(output, subject_ids[:, 1:], batch_dims=-1) subject = K.concatenate([start, end], 2) return subject[:, 0]
def compute_mask(self, inputs, mask=None): if self.conditional: masks = [K.expand_dims(m, 0) for m in mask if m is not None] if len(masks) == 0: return None else: return K.all(K.concatenate(masks, axis=0), axis=0) else: return mask
def extrac_subject(inputs): """根据subject_ids从output中取出subject的向量表征 """ output, subject_ids = inputs subject_ids = K.cast(subject_ids, 'int32') # batch_gather 通过索引获取数组 start = batch_gather(output, subject_ids[:, :1]) end = batch_gather(output, subject_ids[:, 1:]) subject = K.concatenate([start, end], 2) return subject[:, 0]
def call(self, inputs): input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] pos_embeddings = self.embeddings[:seq_len] pos_embeddings = K.expand_dims(pos_embeddings, 0) pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1]) if self.merge_mode == 'add': return inputs + pos_embeddings else: return K.concatenate([inputs, pos_embeddings])
def compute_mask(self, inputs, mask=None): if self.conditional: masks = mask if mask is not None else [] masks = [m[None] for m in masks if m is not None] if len(masks) == 0: return None else: return K.all(K.concatenate(masks, axis=0), axis=0) else: return mask
def compute_mask(self, inputs, mask=None): """为了适配T5,保证第一个token不被mask """ if self._current_mode == 'embedding': mask = super(Embedding, self).compute_mask(inputs, mask) if mask is not None: mask1 = K.ones_like(mask[:, :1], dtype='bool') mask2 = mask[:, 1:] return K.concatenate([mask1, mask2], 1) else: return mask
def call(self, inputs): input_shape = K.shape(inputs) # (btz, seq_len, 768) batch_size, seq_len = input_shape[0], input_shape[1] pos_embeddings = self.embeddings[: seq_len] # (seq_len, self.embedding_size) pos_embeddings = K.expand_dims(pos_embeddings, 0) # (1, seq_len, self.embedding_size) if self.merge_mode == 'add': return inputs + pos_embeddings # (btz, seq_len, 768)相加后的shape else: pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1]) return K.concatenate([inputs, pos_embeddings])
def call(self, inputs): input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] if self.offset_positions_by_padding and self.padding_idx: pos_embeddings = self.embeddings[self.padding_idx + 1:seq_len + self.padding_idx + 1] else: pos_embeddings = self.embeddings[self.padding_idx] pos_embeddings = K.expand_dims(pos_embeddings, 0) pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1]) if self.merge_mode == 'add': return inputs + pos_embeddings else: return K.concatenate([inputs, pos_embeddings])
def extrac_subject(inputs): """根据subject_ids从output中取出subject的向量表征 """ output, subject_ids = inputs subject_ids = K.cast( subject_ids, 'int32') # Tensor("lambda_2/Cast:0", shape=(?, 2), dtype=int32) start = batch_gather( output, subject_ids[:, :1] ) # 取出start的向量 subject_ids表示每个btz中只有一个数字 Tensor("lambda_2/Gather/Reshape_3:0", shape=(?, 1, 768), dtype=float32) end = batch_gather( output, subject_ids[:, 1:] ) # Tensor("lambda_2/Gather_1/Reshape_3:0", shape=(?, 1, 768), dtype=float32) subject = K.concatenate( [start, end], 2) # Tensor("lambda_2/concat:0", shape=(?, 1, 1536), dtype=float32) return subject[:, 0]
def dense_loss(self, y_true, y_pred): """y_true需要是one hot形式 """ mask = self.output_mask # 计算目标分数 target_score = self.target_score(y_true, y_pred, mask) # 递归计算log Z init_states = [y_pred[:, 0]] if mask is None: mask = K.ones_like(y_pred[:, :, :1]) else: mask = K.expand_dims(mask, 2) y_pred = K.concatenate([y_pred, mask]) log_norm, _, _ = K.rnn(self.log_norm_step, y_pred[:, 1:], init_states) # 最后一步的log Z向量 log_norm = tf.reduce_logsumexp(log_norm, 1) # logsumexp得标量 # 计算损失 -log p return log_norm - target_score
def call(self, inputs): """如果inputs是一个list,则默认第二个输入是传入的位置id,否则 是默认顺序id,即[0, 1, 2, 3, ...] """ if isinstance(inputs, list): inputs, pos_ids = inputs pos_embeddings = K.gather(self.embeddings, pos_ids) else: input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] pos_embeddings = self.embeddings[:seq_len] pos_embeddings = K.expand_dims(pos_embeddings, 0) pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1]) if self.merge_mode == 'add': return inputs + pos_embeddings else: return K.concatenate([inputs, pos_embeddings])
def dense_loss(self, y_true, y_pred): """y_true需要是one hot形式 """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2, keepdims=True) mask = K.cast(mask, K.floatx()) # 计算目标分数 y_true, y_pred = y_true * mask, y_pred * mask target_score = self.target_score(y_true, y_pred) # 递归计算log Z init_states = [y_pred[:, 0]] y_pred = K.concatenate([y_pred, mask], axis=2) input_length = K.int_shape(y_pred[:, 1:])[1] log_norm, _, _ = K.rnn(self.log_norm_step, y_pred[:, 1:], init_states, input_length=input_length) # 最后一步的log Z向量 log_norm = tf.reduce_logsumexp(log_norm, 1) # logsumexp得标量 # 计算损失 -log p return log_norm - target_score
def call(self, inputs): """如果custom_position_ids,那么第二个输入为自定义的位置id """ if self.custom_position_ids: inputs, position_ids = inputs if K.dtype(position_ids) != 'int32': position_ids = K.cast(position_ids, 'int32') pos_embeddings = K.gather(self.embeddings, position_ids) else: input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] pos_embeddings = self.embeddings[:seq_len] pos_embeddings = K.expand_dims(pos_embeddings, 0) if self.merge_mode != 'add': pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1]) if self.merge_mode == 'add': return inputs + pos_embeddings else: return K.concatenate([inputs, pos_embeddings])
def seq_gather(x: list): """ 传入从传入的列表x中获取句子张量seq和下标idxs seq是[batch_size, seq_len, vector_size]的形状, idxs是[batch_size, 1]的形状 在seq的第i个序列中选出第idxs[i]个向量, 最终输出[batch_size, s_size]的向量。 :param x: [seq, idxs] seq 原始序列的张量,idxs需要拆分的向量下标 :return: 收集出来的字向量 """ # 获取句子张量以及字下标张量 idx = [[4],[9],[8],[11],[23],[45],[60],[30]] seq, idxs = x # 将下标数据类型转化为整型 idxs = K.cast(idxs, 'int32') # 使用keras方法构造0-batch_size的张量[0,1,2,3,4,5,6,7] batch_idxs = K.arange(0, K.shape(seq)[0]) # 在batch_idxs中扩充维度1,为的是与idx进行拼接后到seq中取切分向量[[0],[1],[2],[3],[4],[5],[6],[7]] batch_idxs = K.expand_dims(batch_idxs, 1) # 拼接idxs与batch_idx [[0,4],[1,9],[2,8],[3,11],[4,23],[5,45],[6,60],[7,30]] idxs = K.concatenate([batch_idxs, idxs], 1) # 对应idxs下标将seq中对应位置的向量收集出来 return tf.gather_nd(seq, idxs)
def basic_accuracy(self, y_true, y_pred, go_backwards=False): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ mask = self.output_mask # y_true需要重新明确一下dtype和shape y_true = K.cast(y_true, 'int32') y_true = K.reshape(y_true, [K.shape(y_true)[0], -1]) # 是否反转序列 if go_backwards: y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans # 计算逐标签accuracy histoty = K.gather(trans, y_true) histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) if mask is None: return K.mean(isequal) else: return K.sum(isequal * mask) / K.sum(mask)
def call(self, inputs): return K.concatenate(inputs, axis=1)
def concat(xll): return K.concatenate([xll[0], xll[1][:, 0, :]], -1)
def compute_mask(self, inputs, mask=None): if isinstance(mask, list): if all([m is not None for m in mask]): return K.concatenate(mask, 0)