def loss_with_gradient_penalty(y_true, y_pred, epsilon=1):
    """带梯度惩罚的loss
    """
    loss = K.mean(sparse_categorical_crossentropy(y_true, y_pred))
    embeddings = search_layer(y_pred, 'Embedding-Token').embeddings
    gp = K.sum(K.gradients(loss, [embeddings])[0].values**2)
    return loss + 0.5 * epsilon * gp
Exemple #2
0
def virtual_adversarial_training(model,
                                 embedding_name,
                                 epsilon=1,
                                 xi=10,
                                 iters=1):
    """给模型添加虚拟对抗训练
    其中model是需要添加对抗训练的keras模型,embedding_name
    则是model里边Embedding层的名字。要在模型compile之后使用。
    """
    if model.train_function is None:  # 如果还没有训练函数
        model._make_train_function()  # 手动make
    old_train_function = model.train_function  # 备份旧的训练函数

    # 查找Embedding层
    for output in model.outputs:
        embedding_layer = search_layer(output, embedding_name)
        if embedding_layer is not None:
            break
    if embedding_layer is None:
        raise Exception('Embedding layer not found')

    # 求Embedding梯度
    embeddings = embedding_layer.embeddings  # Embedding矩阵
    gradients = K.gradients(model.total_loss, [embeddings])  # Embedding梯度
    gradients = K.zeros_like(embeddings) + gradients[0]  # 转为dense tensor

    # 封装为函数
    inputs = (model._feed_inputs + model._feed_targets +
              model._feed_sample_weights)  # 所有输入层
    model_outputs = K.function(
        inputs=inputs,
        outputs=model.outputs,
        name='model_outputs',
    )  # 模型输出函数
    embedding_gradients = K.function(
        inputs=inputs,
        outputs=[gradients],
        name='embedding_gradients',
    )  # 模型梯度函数

    def l2_normalize(x):
        return x / (np.sqrt((x**2).sum()) + 1e-8)

    def train_function(inputs):  # 重新定义训练函数
        outputs = model_outputs(inputs)
        inputs = inputs[:2] + outputs + inputs[3:]
        delta1, delta2 = 0.0, np.random.randn(*K.int_shape(embeddings))
        for _ in range(iters):  # 迭代求扰动
            delta2 = xi * l2_normalize(delta2)
            K.set_value(embeddings, K.eval(embeddings) - delta1 + delta2)
            delta1 = delta2
            delta2 = embedding_gradients(inputs)[0]  # Embedding梯度
        delta2 = epsilon * l2_normalize(delta2)
        K.set_value(embeddings, K.eval(embeddings) - delta1 + delta2)
        outputs = old_train_function(inputs)  # 梯度下降
        K.set_value(embeddings, K.eval(embeddings) - delta2)  # 删除扰动
        return outputs

    model.train_function = train_function  # 覆盖原训练函数
def adversarial_training(model, embedding_name, epsilon=1):
    """给模型添加对抗训练
    其中model是需要添加对抗训练的keras模型,embedding_name
    则是model里边Embedding层的名字。要在模型compile之后使用。
    """
    if model.train_function is None:  # 如果还没有训练函数
        model._make_train_function()  # 手动make
    old_train_function = model.train_function  # 备份旧的训练函数

    # 查找Embedding层
    for output in model.outputs:
        embedding_layer = search_layer(output, embedding_name)
        if embedding_layer is not None:
            break
    if embedding_layer is None:
        raise Exception('Embedding layer not found')

    # 求Embedding梯度
    embeddings = embedding_layer.embeddings  # Embedding矩阵
    gradients = K.gradients(model.total_loss, [embeddings])  # Embedding梯度
    """
    embedding的梯度不是一个普通的tensor,而是一个IndexedSlices,需要用这种方式转换成普通的tensor,才能参与运算。
    """
    gradients = K.zeros_like(embeddings) + gradients[0]  # 转为dense tensor

    # 封装为函数
    inputs = (model._feed_inputs + model._feed_targets +
              model._feed_sample_weights)  # 所有输入层
    embedding_gradients = K.function(
        inputs=inputs,
        outputs=[gradients],
        name='embedding_gradients',
    )  # 封装为函数

    def train_function(inputs):  # 重新定义训练函数
        grads = embedding_gradients(inputs)[0]  # Embedding梯度
        delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8)  # 计算扰动
        K.set_value(embeddings, K.eval(embeddings) + delta)  # 注入扰动
        outputs = old_train_function(inputs)  # 梯度下降
        K.set_value(embeddings, K.eval(embeddings) - delta)  # 删除扰动
        return outputs

    model.train_function = train_function  # 覆盖原训练函数
    这主要是因为keras自带的sparse_categorical_crossentropy不支持求二阶梯度。
    """
    y_true = K.reshape(y_true, K.shape(y_pred)[:-1])
    y_true = K.cast(y_true, 'int32')
    y_true = K.one_hot(y_true, K.shape(y_pred)[-1])
    return K.categorical_crossentropy(y_true, y_pred)


# 交叉熵作为loss,并mask掉输入部分的预测
y_true = model.input[0][:, 1:]  # 目标tokens
y_mask = model.input[1][:, 1:]
y_pred = model.output[:, :-1]  # 预测tokens,预测与目标错开一位
cross_entropy = sparse_categorical_crossentropy(y_true, y_pred)
cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)
embeddings = search_layer(model.output, 'Embedding-Token').embeddings
gp = K.sum(K.gradients(cross_entropy, [embeddings])[0].values**2)

model.add_loss(cross_entropy + 0.5 * gp)
model.compile(optimizer=Adam(1e-5))


class AutoTitle(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.set_rtype('probas')
    def predict(self, inputs, output_ids, step):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        segment_ids = np.concatenate(
            [segment_ids, np.ones_like(output_ids)], 1)
        return model.predict([token_ids, segment_ids])[:, -1]
 def call(self, input):
     input, output, label = input
     output = batch_gather(output, label)
     return K.gradients(output, [input])[0] * input
 def call(self, input):
     input, output, label = input
     label = K.cast(label, 'int32')
     output = batch_gather(output, label)
     return K.gradients(output, [input])[0] * input
Exemple #7
0
def adversarial_training(model, embedding_names, epsilon=1):
    """给模型添加对抗训练
    其中model是需要添加对抗训练的keras模型,embedding_names
    则是model里边Embedding层的名字。要在模型compile之后使用。
    """
    if model.train_function is None:  # 如果还没有训练函数
        model._make_train_function()  # 手动make
    old_train_function = model.train_function  # 备份旧的训练函数

    # 查找Embedding层
    embedding_layers = []
    for embedding_name in embedding_names:
        for output in model.outputs:
            embedding_layer = search_layer(output, embedding_name)
            if embedding_layer is not None:
                embedding_layers.append(embedding_layer)
                break
    for embedding_layer in embedding_layers:
        if embedding_layer is None:
            raise Exception('Embedding layer not found')

    # 求Embedding梯度
    embeddings = [
        embedding_layer.embeddings for embedding_layer in embedding_layers
    ]  # Embedding矩阵
    gradients = K.gradients(model.total_loss, embeddings)  # Embedding梯度
    # gradients = K.zeros_like(embeddings) + gradients[0]  # 转为dense tensor
    gradients = [
        K.zeros_like(embedding) + gradient
        for embedding, gradient in zip(embeddings, gradients)
    ]

    # 封装为函数
    inputs = (model._feed_inputs + model._feed_targets +
              model._feed_sample_weights)  # 所有输入层
    embedding_gradients = K.function(
        inputs=inputs,
        outputs=gradients,
        name='embedding_gradients',
    )  # 封装为函数

    def train_function(inputs):  # 重新定义训练函数
        #         grads = embedding_gradients(inputs)[0]  # Embedding梯度
        #         delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8)  # 计算扰动
        grads = embedding_gradients(inputs)  # Embedding梯度
        deltas = [
            epsilon * grad / (np.sqrt((grad**2).sum()) + 1e-8)
            for grad in grads
        ]  # 计算扰动
        # 注入扰动
        # K.set_value(embeddings, K.eval(embeddings) + delta)
        for embedding, delta in zip(embeddings, deltas):
            K.set_value(embedding, K.eval(embedding) + delta)

        outputs = old_train_function(inputs)  # 梯度下降
        # 删除扰动
        # K.set_value(embeddings, K.eval(embeddings) - delta)  # 删除扰动
        for embedding, delta in zip(embeddings, deltas):
            K.set_value(embedding, K.eval(embedding) - delta)
        return outputs

    model.train_function = train_function  # 覆盖原训练函数