Beispiel #1
0
 def basic_loss(self, y_true, y_pred, go_backwards=False):
     """y_true需要是整数形式(非one hot)
     """
     # 导出mask并转换数据类型
     mask = K.all(K.greater(y_pred, -1e6), axis=2)
     mask = K.cast(mask, K.floatx())
     # y_true需要重新明确一下shape和dtype
     y_true = K.reshape(y_true, K.shape(y_pred)[:-1])
     y_true = K.cast(y_true, 'int32')
     # 反转相关
     if self.hidden_dim is None:
         if go_backwards:  # 是否反转序列
             y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
             trans = K.transpose(self.trans)
         else:
             trans = self.trans
         histoty = K.gather(trans, y_true)
     else:
         if go_backwards:  # 是否反转序列
             y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
             r_trans, l_trans = self.l_trans, self.r_trans
         else:
             l_trans, r_trans = self.l_trans, self.r_trans
         histoty = K.gather(l_trans, y_true)
         histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans)
     # 计算loss
     histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1)
     y_pred = (y_pred + histoty) / 2
     loss = K.sparse_categorical_crossentropy(
         y_true, y_pred, from_logits=True
     )
     return K.sum(loss * mask) / K.sum(mask)
Beispiel #2
0
    def call(self, inputs):
        """如果custom_position_ids,那么第二个输入为自定义的位置id
        """
        input_shape = K.shape(inputs)
        batch_size, seq_len = input_shape[0], input_shape[1]

        if self.custom_position_ids:
            inputs, position_ids = inputs
            if K.dtype(position_ids) != 'int32':
                position_ids = K.cast(position_ids, 'int32')
        else:
            position_ids = K.arange(0, seq_len, dtype='int32')[None]

        if self.hierarchical:
            alpha = 0.4 if self.hierarchical is True else self.hierarchical
            embeddings = self.embeddings - alpha * self.embeddings[:1]
            embeddings = embeddings / (1 - alpha)
            embeddings_x = K.gather(embeddings, position_ids // self.input_dim)
            embeddings_y = K.gather(embeddings, position_ids % self.input_dim)
            pos_embeddings = alpha * embeddings_x + (1 - alpha) * embeddings_y
        else:
            if self.custom_position_ids:
                pos_embeddings = K.gather(self.embeddings, position_ids)
            else:
                pos_embeddings = self.embeddings[None, :seq_len]

        if self.merge_mode == 'add':
            return inputs + pos_embeddings
        elif self.merge_mode == 'mul':
            return inputs * pos_embeddings
        else:
            if not self.custom_position_ids:
                pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1])
            return K.concatenate([inputs, pos_embeddings])
Beispiel #3
0
 def basic_loss(self, y_true, y_pred, go_backwards=False):
     """y_true需要是整数形式(非one hot)
     """
     mask = self.output_mask
     # y_true需要重新明确一下dtype和shape
     y_true = K.cast(y_true, 'int32')
     y_true = K.reshape(y_true, [K.shape(y_true)[0], -1])
     # 反转相关
     if self.hidden_dim is None:
         if go_backwards:  # 是否反转序列
             y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
             trans = K.transpose(self.trans)
         else:
             trans = self.trans
         histoty = K.gather(trans, y_true)
     else:
         if go_backwards:  # 是否反转序列
             y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
             r_trans, l_trans = self.l_trans, self.r_trans
         else:
             l_trans, r_trans = self.l_trans, self.r_trans
         histoty = K.gather(l_trans, y_true)
         histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans)
     # 计算loss
     histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1)
     y_pred = (y_pred + histoty) / 2
     loss = K.sparse_categorical_crossentropy(y_true,
                                              y_pred,
                                              from_logits=True)
     if mask is None:
         return K.mean(loss)
     else:
         return K.sum(loss * mask) / K.sum(mask)
Beispiel #4
0
 def basic_accuracy(self, y_true, y_pred, go_backwards=False):
     """训练过程中显示逐帧准确率的函数,排除了mask的影响
     此处y_true需要是整数形式(非one hot)
     """
     # 导出mask并转换数据类型
     mask = K.all(K.greater(y_pred, -1e6), axis=2)
     mask = K.cast(mask, K.floatx())
     # y_true需要重新明确一下shape和dtype
     y_true = K.reshape(y_true, K.shape(y_pred)[:-1])
     y_true = K.cast(y_true, 'int32')
     # 反转相关
     if self.hidden_dim is None:
         if go_backwards:  # 是否反转序列
             y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
             trans = K.transpose(self.trans)
         else:
             trans = self.trans
         histoty = K.gather(trans, y_true)
     else:
         if go_backwards:  # 是否反转序列
             y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
             r_trans, l_trans = self.l_trans, self.r_trans
         else:
             l_trans, r_trans = self.l_trans, self.r_trans
         histoty = K.gather(l_trans, y_true)
         histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans)
     # 计算逐标签accuracy
     histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1)
     y_pred = (y_pred + histoty) / 2
     y_pred = K.cast(K.argmax(y_pred, 2), 'int32')
     isequal = K.cast(K.equal(y_true, y_pred), K.floatx())
     return K.sum(isequal * mask) / K.sum(mask)
Beispiel #5
0
 def new_update(x, new_x):
     if x is var and self._do_lazy_optimization(x):
         if indices is None:
             r = K.any(K.not_equal(grad, 0.),
                       axis=-1,
                       keepdims=True)
             new_x = x + (new_x - x) * K.cast(r, K.floatx())
             return old_update(x, new_x)
         else:
             return self._resource_scatter_add(
                 x, indices, K.gather(new_x - x, indices))
     return old_update(x, new_x)
Beispiel #6
0
    def call(self, inputs):
        """如果inputs是一个list,则默认第二个输入是传入的位置id,否则
        是默认顺序id,即[0, 1, 2, 3, ...]
        """
        if isinstance(inputs, list):
            inputs, pos_ids = inputs
            pos_embeddings = K.gather(self.embeddings, pos_ids)
        else:
            input_shape = K.shape(inputs)
            batch_size, seq_len = input_shape[0], input_shape[1]
            pos_embeddings = self.embeddings[:seq_len]
            pos_embeddings = K.expand_dims(pos_embeddings, 0)
            pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1])

        if self.merge_mode == 'add':
            return inputs + pos_embeddings
        else:
            return K.concatenate([inputs, pos_embeddings])
Beispiel #7
0
    def call(self, inputs):
        """如果custom_position_ids,那么第二个输入为自定义的位置id
        """
        if self.custom_position_ids:
            inputs, position_ids = inputs
            if K.dtype(position_ids) != 'int32':
                position_ids = K.cast(position_ids, 'int32')
            pos_embeddings = K.gather(self.embeddings, position_ids)
        else:
            input_shape = K.shape(inputs)
            batch_size, seq_len = input_shape[0], input_shape[1]
            pos_embeddings = self.embeddings[:seq_len]
            pos_embeddings = K.expand_dims(pos_embeddings, 0)
            if self.merge_mode != 'add':
                pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1])

        if self.merge_mode == 'add':
            return inputs + pos_embeddings
        else:
            return K.concatenate([inputs, pos_embeddings])
Beispiel #8
0
 def basic_accuracy(self, y_true, y_pred, go_backwards=False):
     """训练过程中显示逐帧准确率的函数,排除了mask的影响
     此处y_true需要是整数形式(非one hot)
     """
     mask = self.output_mask
     # y_true需要重新明确一下dtype和shape
     y_true = K.cast(y_true, 'int32')
     y_true = K.reshape(y_true, [K.shape(y_true)[0], -1])
     # 是否反转序列
     if go_backwards:
         y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
         trans = K.transpose(self.trans)
     else:
         trans = self.trans
     # 计算逐标签accuracy
     histoty = K.gather(trans, y_true)
     histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1)
     y_pred = (y_pred + histoty) / 2
     y_pred = K.cast(K.argmax(y_pred, 2), 'int32')
     isequal = K.cast(K.equal(y_true, y_pred), K.floatx())
     if mask is None:
         return K.mean(isequal)
     else:
         return K.sum(isequal * mask) / K.sum(mask)
Beispiel #9
0
 def call(self, inputs):
     pos_ids = self.compute_position_ids(inputs)
     return K.gather(self.embeddings, pos_ids)
Beispiel #10
0
 def call(self, inputs, q_mask=False, v_mask=False, a_mask=False):
     """实现多头注意力
     q_mask: 对输入的query序列的mask。
             主要是将输出结果的padding部分置0。
     v_mask: 对输入的value序列的mask。
             主要是防止attention读取到padding信息。
     a_mask: 对attention矩阵的mask。
             不同的attention mask对应不同的应用。
     """
     # 处理mask
     inputs = inputs[:]
     for i, mask in enumerate([q_mask, v_mask, a_mask]):
         if not mask:
             inputs.insert(3 + i, None)
     q, k, v, q_mask, v_mask = inputs[:5]
     if len(inputs) == 5:
         a_mask = 'history_only'
     elif len(inputs) == 6:
         a_mask = inputs[-1]
     else:
         raise ValueError('wrong inputs for MultiHeadAttention.')
     # 线性变换
     qw = self.q_dense(q)
     kw = self.k_dense(k)
     vw = self.v_dense(v)
     # 形状变换
     qw = K.reshape(qw, (-1, K.shape(q)[1], self.heads, self.key_size))
     kw = K.reshape(kw, (-1, K.shape(k)[1], self.heads, self.key_size))
     vw = K.reshape(vw, (-1, K.shape(v)[1], self.heads, self.head_size))
     # Attention
     a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
     # 相对位置编码
     if self.max_relative_position is not None:
         q_idxs = K.arange(0, K.shape(q)[1], dtype='int32')
         q_idxs = K.expand_dims(q_idxs, 1)
         v_idxs = K.arange(0, K.shape(v)[1], dtype='int32')
         v_idxs = K.expand_dims(v_idxs, 0)
         pos_ids = v_idxs - q_idxs
         pos_ids = K.clip(pos_ids, -self.max_relative_position,
                          self.max_relative_position)
         pos_ids = pos_ids + self.max_relative_position
         pos_embeddings = K.gather(self.relative_embeddings, pos_ids)
         a = a + tf.einsum('bjhd,jkd->bhjk', qw, pos_embeddings)
     # Attention(续)
     a = a / self.key_size**0.5
     a = sequence_masking(a, v_mask, 1, -1)
     if a_mask is not None:
         if is_string(a_mask):
             ones = K.ones_like(a[:1, :1])
             a_mask = (ones - tf.linalg.band_part(ones, -1, 0)) * 1e12
             a = a - a_mask
         else:
             a = a - (1 - a_mask) * 1e12
     a = K.softmax(a)
     # 完成输出
     o = tf.einsum('bhjk,bkhd->bjhd', a, vw)
     if self.max_relative_position is not None:
         o = o + tf.einsum('bhjk,jkd->bjhd', a, pos_embeddings)
     o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))
     o = self.o_dense(o)
     o = sequence_masking(o, q_mask, 0)
     return o
Beispiel #11
0
 def call(self, inputs, q_mask=None, v_mask=None, a_mask=None):
     """实现多头注意力
     q_mask: 对输入的query序列的mask。
             主要是将输出结果的padding部分置0。
     v_mask: 对输入的value序列的mask。
             主要是防止attention读取到padding信息。
     a_mask: 对attention矩阵的mask。
             不同的attention mask对应不同的应用。
     """
     q, k, v = inputs[:3]
     if a_mask:
         if len(inputs) == 3:
             a_mask = 'history_only'
         else:
             a_mask = inputs[3]
     if q_mask is not None:
         if not hasattr(self, 'q_mask_layer'):
             self.q_mask_layer = search_layer(q, q_mask)
         q_mask = self.q_mask_layer.output_mask
     if v_mask is not None:
         if not hasattr(self, 'v_mask_layer'):
             self.v_mask_layer = search_layer(v, v_mask)
         v_mask = self.v_mask_layer.output_mask
     # Pooling
     if self.pool_size > 1:
         is_self_attention = (q is k is v)
         q_in_len = K.shape(q)[1]
         q = sequence_masking(q, q_mask, 0)
         q = divisible_temporal_padding(q, self.pool_size)
         q = pool1d(q, self.pool_size, self.pool_size, pool_mode='avg')
         if is_self_attention:
             k = v = q
         else:
             k = sequence_masking(k, v_mask, 0)
             k = divisible_temporal_padding(k, self.pool_size)
             k = pool1d(k, self.pool_size, self.pool_size, pool_mode='avg')
             v = sequence_masking(v, v_mask, 0)
             v = divisible_temporal_padding(v, self.pool_size)
             v = pool1d(v, self.pool_size, self.pool_size, pool_mode='avg')
         if v_mask is not None:
             v_mask = v_mask[:, ::self.pool_size]
         if a_mask is not None and not is_string(a_mask):
             a_mask = a_mask[..., ::self.pool_size, ::self.pool_size]
     # 线性变换
     qw = self.q_dense(q)
     kw = self.k_dense(k)
     vw = self.v_dense(v)
     # 形状变换
     qw = K.reshape(qw, (-1, K.shape(q)[1], self.heads, self.key_size))
     kw = K.reshape(kw, (-1, K.shape(k)[1], self.heads, self.key_size))
     vw = K.reshape(vw, (-1, K.shape(v)[1], self.heads, self.head_size))
     # Attention
     a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
     # 相对位置编码
     if self.max_relative_position is not None:
         q_idxs = K.arange(0, K.shape(q)[1], dtype='int32')
         q_idxs = K.expand_dims(q_idxs, 1)
         v_idxs = K.arange(0, K.shape(v)[1], dtype='int32')
         v_idxs = K.expand_dims(v_idxs, 0)
         pos_ids = v_idxs - q_idxs
         pos_ids = K.clip(pos_ids, -self.max_relative_position,
                          self.max_relative_position)
         pos_ids = pos_ids + self.max_relative_position
         pos_embeddings = K.gather(self.relative_embeddings, pos_ids)
         a = a + tf.einsum('bjhd,jkd->bhjk', qw, pos_embeddings)
     # Attention(续)
     a = a / self.key_size**0.5
     a = sequence_masking(a, v_mask, 1, -1)
     if a_mask is not None:
         if is_string(a_mask):
             ones = K.ones_like(a[:1, :1])
             a_mask = (ones - tf.linalg.band_part(ones, -1, 0)) * 1e12
             a = a - a_mask
         else:
             a = a - (1 - a_mask) * 1e12
     a = K.softmax(a)
     # 完成输出
     o = tf.einsum('bhjk,bkhd->bjhd', a, vw)
     if self.max_relative_position is not None:
         o = o + tf.einsum('bhjk,jkd->bjhd', a, pos_embeddings)
     o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))
     o = self.o_dense(o)
     # 恢复长度
     if self.pool_size > 1:
         o = K.repeat_elements(o, self.pool_size, 1)[:, :q_in_len]
     # 返回结果
     o = sequence_masking(o, q_mask, 0)
     return o
Beispiel #12
0
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
     outputs = K.gather(self._embeddings, inputs)
     outputs = K.dot(outputs, self._project_kernel)
     return outputs