def test_train_pgd(self, model_fn):
        w = np.array([[4.0], [-3.0]])
        x0 = np.array([[2.0, 3.0]])
        y0 = np.array([[0.0]])
        adv_multiplier = 0.2
        adv_step_size = 0.01
        learning_rate = 0.01
        pgd_iterations = 3
        pgd_epsilon = 2.5 * adv_step_size
        adv_config = configs.make_adv_reg_config(multiplier=adv_multiplier,
                                                 adv_step_size=adv_step_size,
                                                 adv_grad_norm='infinity',
                                                 pgd_iterations=pgd_iterations,
                                                 pgd_epsilon=pgd_epsilon)
        y_hat = np.dot(x0, w)
        # The adversarial perturbation is constant across PGD iterations.
        x_adv = x0 + pgd_epsilon * np.sign((y_hat - y0) * w.T)
        y_hat_adv = np.dot(x_adv, w)
        grad_w_labeled_loss = 2. * (y_hat - y0) * x0.T
        grad_w_adv_loss = adv_multiplier * 2. * (y_hat_adv - y0) * x_adv.T
        w_new = w - learning_rate * (grad_w_labeled_loss + grad_w_adv_loss)

        inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)}
        model = model_fn(input_shape=(2, ), weights=w)
        adv_model = adversarial_regularization.AdversarialRegularization(
            model, label_keys=['label'], adv_config=adv_config)
        adv_model.compile(tf.keras.optimizers.SGD(learning_rate), loss='MSE')
        adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1)

        self.assertAllClose(w_new,
                            tf.keras.backend.get_value(model.weights[0]))
    def test_train_with_feature_column_input(self):
        x1, x2 = np.array([[1.]]), np.array([[4., 5.]])
        w = np.array([[2.], [3.], [6.]])
        y = np.array([0.])
        inputs = {'x1': x1, 'x2': x2, 'label': y}
        lr, adv_step_size = 0.001, 0.1

        feature_columns = [
            tf.feature_column.numeric_column('x1', shape=[1]),
            tf.feature_column.numeric_column('x2', shape=[2]),
        ]
        model = tf.keras.Sequential([
            tf.keras.layers.DenseFeatures(feature_columns),
            tf.keras.layers.Dense(
                1,
                use_bias=False,
                kernel_initializer=tf.keras.initializers.Constant(w)),
        ])

        adv_config = configs.make_adv_reg_config(multiplier=1.0,
                                                 adv_step_size=adv_step_size,
                                                 adv_grad_norm='l2')
        adv_model = adversarial_regularization.AdversarialRegularization(
            model, label_keys=['label'], adv_config=adv_config)
        adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MAE')
        adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1)

        x = np.concatenate([x1, x2], axis=-1)
        # loss = |x * w|, gradient(loss, x) = w
        x_adv = x + adv_step_size * w.T / np.linalg.norm(w, ord=2)
        # gradient(loss, w) = x
        w_new = w - lr * (x + x_adv).T
        self.assertAllClose(
            w_new, tf.keras.backend.get_value(model.layers[1].weights[0]))
Ejemplo n.º 3
0
  def test_train_with_2_inputs(self, name1, name2):
    x1, x2 = np.array([[1.]]), np.array([[4., 5.]])
    w1, w2 = np.array([[2.]]), np.array([[3.], [6.]])
    y = np.array([0.])
    inputs = {name1: x1, name2: x2, 'label': y}
    lr, adv_step_size = 0.001, 0.1

    input1 = tf.keras.Input(shape=(1,), name=name1)
    input2 = tf.keras.Input(shape=(2,), name=name2)
    dense1 = tf.keras.layers.Dense(
        w1.shape[-1],
        use_bias=False,
        kernel_initializer=tf.keras.initializers.Constant(w1))
    dense2 = tf.keras.layers.Dense(
        w2.shape[-1],
        use_bias=False,
        kernel_initializer=tf.keras.initializers.Constant(w2))
    output = tf.keras.layers.Add()([dense1(input1), dense2(input2)])
    model = tf.keras.Model(inputs=[input1, input2], outputs=output)

    adv_config = configs.make_adv_reg_config(
        multiplier=1.0, adv_step_size=adv_step_size, adv_grad_norm='l2')
    adv_model = adversarial_regularization.AdversarialRegularization(
        model, label_keys=['label'], adv_config=adv_config)
    adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MAE')
    adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1)

    # loss = |x1 * w1 + x2 * w2|, gradient(loss, [x1, x2]) = [w1, w2]
    w_norm = np.sqrt((np.sum(w1 * w1) + np.sum(w2 * w2)))
    x1_adv, x2_adv = x1 + adv_step_size * w1.T / w_norm, x2 + adv_step_size * w2.T / w_norm
    # gradient(loss, [w1, w2]) = [x1, x2]
    w1_new, w2_new = w1 - lr * (x1 + x1_adv).T, w2 - lr * (x2 + x2_adv).T
    self.assertAllClose(w1_new, tf.keras.backend.get_value(dense1.weights[0]))
    self.assertAllClose(w2_new, tf.keras.backend.get_value(dense2.weights[0]))
  def test_adversarial_wrapper_adds_regularization(self):
    # base model: y = w*x+b = 4*x1 + 3*x2 + 2
    weight = np.array([[4.0], [3.0]], dtype=np.float32)
    bias = np.array([2.0], dtype=np.float32)
    x0, y0 = np.array([[1.0, 1.0]]), np.array([8.0])
    adv_step_size = 0.1
    learning_rate = 0.01

    base_est = self.build_linear_regressor(weight=weight, bias=bias)
    adv_config = nsl_configs.make_adv_reg_config(
        multiplier=1.0,  # equal weight on original and adv examples
        adv_step_size=adv_step_size)
    adv_est = nsl_estimator.add_adversarial_regularization(
        base_est,
        optimizer_fn=lambda: tf.train.GradientDescentOptimizer(learning_rate),
        adv_config=adv_config)
    input_fn = single_batch_input_fn({FEATURE_NAME: x0}, y0)
    adv_est.train(input_fn=input_fn, steps=1)

    # Computes the gradients on original and adversarial examples.
    orig_pred = np.dot(x0, weight) + bias  # [9.0]
    orig_grad_w = 2 * (orig_pred - y0) * x0.T  # [[2.0], [2.0]]
    orig_grad_b = 2 * (orig_pred - y0).reshape((1,))  # [2.0]
    grad_x = 2 * (orig_pred - y0) * weight.T  # [[8.0, 6.0]]
    perturbation = adv_step_size * grad_x / np.linalg.norm(grad_x)
    x_adv = x0 + perturbation  # [[1.08, 1.06]]
    adv_pred = np.dot(x_adv, weight) + bias  # [9.5]
    adv_grad_w = 2 * (adv_pred - y0) * x_adv.T  # [[3.24], [3.18]]
    adv_grad_b = 2 * (adv_pred - y0).reshape((1,))  # [3.0]

    new_bias = bias - learning_rate * (orig_grad_b + adv_grad_b)
    new_weight = weight - learning_rate * (orig_grad_w + adv_grad_w)
    self.assertAllClose(new_bias, adv_est.get_variable_value(BIAS_VARIABLE))
    self.assertAllClose(new_weight, adv_est.get_variable_value(WEIGHT_VARIABLE))
    def test_adversarial_wrapper_adds_regularization(self, adv_step_size,
                                                     pgd_iterations,
                                                     pgd_epsilon):
        # base model: y = w*x+b = 4*x1 + 3*x2 + 2
        weight = np.array([[4.0], [3.0]], dtype=np.float32)
        bias = np.array([2.0], dtype=np.float32)
        x0, y0 = np.array([[1.0, 1.0]]), np.array([8.0])
        learning_rate = 0.01

        base_est = self.build_linear_regressor(weight=weight, bias=bias)
        adv_config = nsl_configs.make_adv_reg_config(
            multiplier=1.0,  # equal weight on original and adv examples
            adv_step_size=adv_step_size,
            pgd_iterations=pgd_iterations,
            pgd_epsilon=pgd_epsilon)
        adv_est = nsl_estimator.add_adversarial_regularization(
            base_est,
            optimizer_fn=lambda: tf.train.GradientDescentOptimizer(
                learning_rate),
            adv_config=adv_config)
        input_fn = single_batch_input_fn({FEATURE_NAME: x0}, y0)
        adv_est.train(input_fn=input_fn, steps=1)

        # Computes the gradients on original and adversarial examples.
        orig_pred = np.dot(x0, weight) + bias  # [9.0]
        orig_grad_w = 2 * (orig_pred - y0) * x0.T  # [[2.0], [2.0]]
        orig_grad_b = 2 * (orig_pred - y0).reshape((1, ))  # [2.0]
        grad_x = 2 * (orig_pred - y0) * weight.T  # [[8.0, 6.0]]
        # Gradient direction is independent of x, so perturbing for multiple
        # iterations is the same as scaling the perturbation.
        perturbation_magnitude = pgd_iterations * adv_step_size
        if pgd_epsilon is not None:
            perturbation_magnitude = np.minimum(perturbation_magnitude,
                                                pgd_epsilon)
        perturbation = perturbation_magnitude * grad_x / np.linalg.norm(grad_x)
        x_adv = x0 + perturbation  # fgm: [[1.08, 1.06]]; pgd: [[1.20, 1.15]]
        adv_pred = np.dot(x_adv, weight) + bias  # fgm: [9.5]; pgd: [10.25]
        adv_grad_w = 2 * (adv_pred - y0) * x_adv.T  # fgm: [[3.24], [3.18]]
        adv_grad_b = 2 * (adv_pred - y0).reshape(
            (1, ))  # fgm: [3.0]; pgd: [4.5]

        new_bias = bias - learning_rate * (orig_grad_b + adv_grad_b)
        new_weight = weight - learning_rate * (orig_grad_w + adv_grad_w)
        self.assertAllClose(new_bias,
                            adv_est.get_variable_value(BIAS_VARIABLE))
        self.assertAllClose(new_weight,
                            adv_est.get_variable_value(WEIGHT_VARIABLE))
 def _set_up_linear_regression(self, sample_weight=1.0):
     w = np.array([[4.0], [-3.0]])
     x0 = np.array([[2.0, 3.0]])
     y0 = np.array([[0.0]])
     adv_multiplier = 0.2
     adv_step_size = 0.01
     learning_rate = 0.01
     adv_config = configs.make_adv_reg_config(multiplier=adv_multiplier,
                                              adv_step_size=adv_step_size,
                                              adv_grad_norm='infinity')
     y_hat = np.dot(x0, w)
     x_adv = x0 + adv_step_size * np.sign((y_hat - y0) * w.T)
     y_hat_adv = np.dot(x_adv, w)
     grad_w_labeled_loss = sample_weight * 2. * (y_hat - y0) * x0.T
     grad_w_adv_loss = adv_multiplier * sample_weight * 2. * (y_hat_adv -
                                                              y0) * x_adv.T
     w_new = w - learning_rate * (grad_w_labeled_loss + grad_w_adv_loss)
     return w, x0, y0, learning_rate, adv_config, w_new
 def setUp(self):
     super(AdversarialLossTest, self).setUp()
     self.adv_step_size = 0.01
     self.adv_config = configs.make_adv_reg_config(
         adv_step_size=self.adv_step_size, adv_grad_norm='infinity')