Esempio n. 1
0
def evaluate(data):
    total, right, true_positives, possible_positives, predicted_positives = 0., 0., 0., 0., 0.
    for x_true, y_true in data:
        y_pred = model.predict(x_true).argmax(axis=1)
        y_true = y_true[:, 0]
        right += (y_true == y_pred).sum()
        true_positives += K.sum(K.round(K.clip(y_true * y_pred, 0, 1))).numpy()
        possible_positives += K.sum(K.round(K.clip(y_true, 0, 1))).numpy()
        predicted_positives += K.sum(K.round(K.clip(y_pred, 0, 1))).numpy()
        total += len(y_true)
    accuracy = right / total
    recall = true_positives / (possible_positives + K.epsilon())
    precision = true_positives / (predicted_positives + K.epsilon())
    f1_score = 2 * ((precision * recall) / (precision + recall + K.epsilon()))
    return accuracy, recall, precision, f1_score
Esempio n. 2
0
 def new_update(x, new_x):
     if is_one_of(x, params) and self._do_layer_adaptation(x):
         dx = new_x - x
         lr_t = K.clip(self.learning_rate, K.epsilon(), 1e10)
         x_norm = tf.norm(x)
         g_norm = tf.norm(dx / lr_t)
         ratio = K.switch(
             x_norm > 0.,
             K.switch(g_norm > K.epsilon(), x_norm / g_norm, 1.),
             1.)
         new_x = x + dx * ratio
     return old_update(x, new_x)
Esempio n. 3
0
 def new_update(x, new_x):
     if x is var and self._do_layer_adaptation(x):
         dx = new_x - x
         lr_t = self._decayed_lr(x.dtype.base_dtype)
         lr_t = K.clip(lr_t, K.epsilon(), K.infinity())
         x_norm = tf.norm(x)
         g_norm = tf.norm(dx / lr_t)
         ratio = K.switch(
             x_norm > 0.0,
             K.switch(g_norm > 0.0, x_norm / g_norm, 1.0), 1.0)
         new_x = x + dx * ratio
     return old_update(x, new_x)
Esempio n. 4
0
 def compute_position_ids(self, inputs):
     q, v = inputs
     # 计算位置差
     q_idxs = K.arange(0, K.shape(q)[1], dtype='int32')
     q_idxs = K.expand_dims(q_idxs, 1)
     v_idxs = K.arange(0, K.shape(v)[1], dtype='int32')
     v_idxs = K.expand_dims(v_idxs, 0)
     pos_ids = v_idxs - q_idxs
     # 后处理操作
     max_position = (self.input_dim - 1) // 2
     pos_ids = K.clip(pos_ids, -max_position, max_position)
     pos_ids = pos_ids + max_position
     return pos_ids
Esempio n. 5
0
 def call(self, inputs, q_mask=False, v_mask=False, a_mask=False):
     """实现多头注意力
     q_mask: 对输入的query序列的mask。
             主要是将输出结果的padding部分置0。
     v_mask: 对输入的value序列的mask。
             主要是防止attention读取到padding信息。
     a_mask: 对attention矩阵的mask。
             不同的attention mask对应不同的应用。
     """
     # 处理mask
     inputs = inputs[:]
     for i, mask in enumerate([q_mask, v_mask, a_mask]):
         if not mask:
             inputs.insert(3 + i, None)
     q, k, v, q_mask, v_mask = inputs[:5]
     if len(inputs) == 5:
         a_mask = 'history_only'
     elif len(inputs) == 6:
         a_mask = inputs[-1]
     else:
         raise ValueError('wrong inputs for MultiHeadAttention.')
     # 线性变换
     qw = self.q_dense(q)
     kw = self.k_dense(k)
     vw = self.v_dense(v)
     # 形状变换
     qw = K.reshape(qw, (-1, K.shape(q)[1], self.heads, self.key_size))
     kw = K.reshape(kw, (-1, K.shape(k)[1], self.heads, self.key_size))
     vw = K.reshape(vw, (-1, K.shape(v)[1], self.heads, self.head_size))
     # Attention
     a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
     # 相对位置编码
     if self.max_relative_position is not None:
         q_idxs = K.arange(0, K.shape(q)[1], dtype='int32')
         q_idxs = K.expand_dims(q_idxs, 1)
         v_idxs = K.arange(0, K.shape(v)[1], dtype='int32')
         v_idxs = K.expand_dims(v_idxs, 0)
         pos_ids = v_idxs - q_idxs
         pos_ids = K.clip(pos_ids, -self.max_relative_position,
                          self.max_relative_position)
         pos_ids = pos_ids + self.max_relative_position
         pos_embeddings = K.gather(self.relative_embeddings, pos_ids)
         a = a + tf.einsum('bjhd,jkd->bhjk', qw, pos_embeddings)
     # Attention(续)
     a = a / self.key_size**0.5
     a = sequence_masking(a, v_mask, 1, -1)
     if a_mask is not None:
         if is_string(a_mask):
             ones = K.ones_like(a[:1, :1])
             a_mask = (ones - tf.linalg.band_part(ones, -1, 0)) * 1e12
             a = a - a_mask
         else:
             a = a - (1 - a_mask) * 1e12
     a = K.softmax(a)
     # 完成输出
     o = tf.einsum('bhjk,bkhd->bjhd', a, vw)
     if self.max_relative_position is not None:
         o = o + tf.einsum('bhjk,jkd->bjhd', a, pos_embeddings)
     o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))
     o = self.o_dense(o)
     o = sequence_masking(o, q_mask, 0)
     return o
Esempio n. 6
0
 def call(self, inputs, q_mask=None, v_mask=None, a_mask=None):
     """实现多头注意力
     q_mask: 对输入的query序列的mask。
             主要是将输出结果的padding部分置0。
     v_mask: 对输入的value序列的mask。
             主要是防止attention读取到padding信息。
     a_mask: 对attention矩阵的mask。
             不同的attention mask对应不同的应用。
     """
     q, k, v = inputs[:3]
     if a_mask:
         if len(inputs) == 3:
             a_mask = 'history_only'
         else:
             a_mask = inputs[3]
     if q_mask is not None:
         if not hasattr(self, 'q_mask_layer'):
             self.q_mask_layer = search_layer(q, q_mask)
         q_mask = self.q_mask_layer.output_mask
     if v_mask is not None:
         if not hasattr(self, 'v_mask_layer'):
             self.v_mask_layer = search_layer(v, v_mask)
         v_mask = self.v_mask_layer.output_mask
     # Pooling
     if self.pool_size > 1:
         is_self_attention = (q is k is v)
         q_in_len = K.shape(q)[1]
         q = sequence_masking(q, q_mask, 0)
         q = divisible_temporal_padding(q, self.pool_size)
         q = pool1d(q, self.pool_size, self.pool_size, pool_mode='avg')
         if is_self_attention:
             k = v = q
         else:
             k = sequence_masking(k, v_mask, 0)
             k = divisible_temporal_padding(k, self.pool_size)
             k = pool1d(k, self.pool_size, self.pool_size, pool_mode='avg')
             v = sequence_masking(v, v_mask, 0)
             v = divisible_temporal_padding(v, self.pool_size)
             v = pool1d(v, self.pool_size, self.pool_size, pool_mode='avg')
         if v_mask is not None:
             v_mask = v_mask[:, ::self.pool_size]
         if a_mask is not None and not is_string(a_mask):
             a_mask = a_mask[..., ::self.pool_size, ::self.pool_size]
     # 线性变换
     qw = self.q_dense(q)
     kw = self.k_dense(k)
     vw = self.v_dense(v)
     # 形状变换
     qw = K.reshape(qw, (-1, K.shape(q)[1], self.heads, self.key_size))
     kw = K.reshape(kw, (-1, K.shape(k)[1], self.heads, self.key_size))
     vw = K.reshape(vw, (-1, K.shape(v)[1], self.heads, self.head_size))
     # Attention
     a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
     # 相对位置编码
     if self.max_relative_position is not None:
         q_idxs = K.arange(0, K.shape(q)[1], dtype='int32')
         q_idxs = K.expand_dims(q_idxs, 1)
         v_idxs = K.arange(0, K.shape(v)[1], dtype='int32')
         v_idxs = K.expand_dims(v_idxs, 0)
         pos_ids = v_idxs - q_idxs
         pos_ids = K.clip(pos_ids, -self.max_relative_position,
                          self.max_relative_position)
         pos_ids = pos_ids + self.max_relative_position
         pos_embeddings = K.gather(self.relative_embeddings, pos_ids)
         a = a + tf.einsum('bjhd,jkd->bhjk', qw, pos_embeddings)
     # Attention(续)
     a = a / self.key_size**0.5
     a = sequence_masking(a, v_mask, 1, -1)
     if a_mask is not None:
         if is_string(a_mask):
             ones = K.ones_like(a[:1, :1])
             a_mask = (ones - tf.linalg.band_part(ones, -1, 0)) * 1e12
             a = a - a_mask
         else:
             a = a - (1 - a_mask) * 1e12
     a = K.softmax(a)
     # 完成输出
     o = tf.einsum('bhjk,bkhd->bjhd', a, vw)
     if self.max_relative_position is not None:
         o = o + tf.einsum('bhjk,jkd->bjhd', a, pos_embeddings)
     o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))
     o = self.o_dense(o)
     # 恢复长度
     if self.pool_size > 1:
         o = K.repeat_elements(o, self.pool_size, 1)[:, :q_in_len]
     # 返回结果
     o = sequence_masking(o, q_mask, 0)
     return o