def testEndToEnd(self,
                     predictor_cls,
                     attack_cls,
                     optimizer_cls,
                     epsilon,
                     restarted=False):
        # l-\infty norm of perturbation ball.
        if isinstance(epsilon, list):
            # We test the ability to have different epsilons across dimensions.
            epsilon = tf.constant([epsilon], dtype=tf.float32)
        bounds = (-.5, 2.5)
        # Create a simple network.
        m = snt.Linear(1,
                       initializers={
                           'w': tf.constant_initializer(1.),
                           'b': tf.constant_initializer(1.),
                       })
        z = tf.constant([[1, 2]], dtype=tf.float32)
        predictor = predictor_cls(m, self)
        # Not important for the test but needed.
        labels = tf.constant([1], dtype=tf.int64)

        # We create two attacks to maximize and then minimize the output.
        max_spec = ibp.LinearSpecification(tf.constant([[[1.]]]))
        max_attack = attack_cls(predictor,
                                max_spec,
                                epsilon,
                                input_bounds=bounds,
                                optimizer_builder=optimizer_cls)
        if restarted:
            max_attack = ibp.RestartedAttack(max_attack, num_restarts=10)
        z_max = max_attack(z, labels)
        min_spec = ibp.LinearSpecification(tf.constant([[[-1.]]]))
        min_attack = attack_cls(predictor,
                                min_spec,
                                epsilon,
                                input_bounds=bounds,
                                optimizer_builder=optimizer_cls)
        if restarted:
            min_attack = ibp.RestartedAttack(min_attack, num_restarts=10)
        z_min = min_attack(z, labels)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            z_max_values, z_min_values = sess.run([z_max, z_min])
            z_max_values = z_max_values[0]
            z_min_values = z_min_values[0]
            self.assertAlmostEqual(2., z_max_values[0])
            self.assertAlmostEqual(2.5, z_max_values[1])
            self.assertAlmostEqual(0., z_min_values[0])
            self.assertAlmostEqual(1., z_min_values[1])
 def testLinearSpecification(self):
     # c has shape [batch_size, num_specifications, num_outputs]
     # d has shape [batch_size, num_specifications]
     c = tf.constant([[[1, 2]]], dtype=tf.float32)
     d = tf.constant([[3]], dtype=tf.float32)
     # The above is equivalent to z_{K,1} + 2 * z_{K,2} + 3 <= 0
     spec = ibp.LinearSpecification(c, d, collapse=False)
     spec_collapse = ibp.LinearSpecification(c, d, collapse=True)
     modules = _build_spec_input()
     values = spec(modules)
     values_collapse = spec_collapse(modules)
     with self.test_session() as sess:
         self.assertAlmostEqual(17., sess.run(values).item())
         self.assertAlmostEqual(17., sess.run(values_collapse).item())
Ejemplo n.º 3
0
    def testEndToEnd(self):
        predictor = FixedNN()
        predictor = ibp.VerifiableModelWrapper(predictor)
        # Labels.
        labels = tf.constant([1], dtype=tf.int64)
        # Connect to input.
        z = tf.constant([[1, 2, 3]], dtype=tf.float32)
        predictor(z, is_training=True)
        # Input bounds.
        eps = 1.
        input_bounds = ibp.IntervalBounds(z - eps, z + eps)
        predictor.propagate_bounds(input_bounds)
        # Create output specification (that forces the first logits to be greater).
        c = tf.constant([[[1, -1]]], dtype=tf.float32)
        d = tf.constant([[0]], dtype=tf.float32)
        # Turn elision off for more interesting results.
        spec = ibp.LinearSpecification(c, d, collapse=False)
        # Create an attack.
        attack = ibp.UntargetedPGDAttack(predictor,
                                         spec,
                                         eps,
                                         num_steps=1,
                                         input_bounds=(-100., 100))
        # Build loss.
        losses = ibp.Losses(predictor,
                            spec,
                            attack,
                            interval_bounds_loss_type='hinge',
                            interval_bounds_hinge_margin=0.)
        losses(labels)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            # We expect the worst-case logits from IBP to be [9, 4].
            # The adversarial attack should fail since logits are always [l, l + 1].
            # Similarly, the nominal predictions are correct.
            accuracy_values, loss_values = sess.run(
                [losses.scalar_metrics, losses.scalar_losses])
            self.assertAlmostEqual(1., accuracy_values.nominal_accuracy)
            self.assertAlmostEqual(0., accuracy_values.verified_accuracy)
            self.assertAlmostEqual(1., accuracy_values.attack_accuracy)
            expected_xent = 0.31326168751822947
            self.assertAlmostEqual(expected_xent,
                                   loss_values.nominal_cross_entropy,
                                   places=5)
            self.assertAlmostEqual(expected_xent,
                                   loss_values.attack_cross_entropy,
                                   places=5)
            expected_hinge = 5.
            self.assertAlmostEqual(expected_hinge, loss_values.verified_loss)
def _build_classification_specification(label, num_classes):
    """Returns a LinearSpecification for adversarial classification."""
    # Pre-construct the specifications of the different classes.
    eye = np.eye(num_classes - 1)
    specifications = []
    for i in range(num_classes):
        specifications.append(
            np.concatenate(
                [eye[:, :i], -np.ones((num_classes - 1, 1)), eye[:, i:]],
                axis=1))
    specifications = np.array(specifications, dtype=np.float32)
    specifications = tf.constant(specifications)
    # We can then use gather.
    c = tf.gather(specifications, label)
    # By construction all specifications are relevant.
    d = tf.zeros(shape=(tf.shape(label)[0], num_classes - 1))
    return ibp.LinearSpecification(c, d, prune_irrelevant=False)
def _generate_identity_spec(modules, shape, dimension=1):
    spec = ibp.LinearSpecification(tf.reshape(tf.eye(dimension), shape),
                                   prune_irrelevant=False)
    initial_bound = ibp.crown.create_initial_backward_bounds(spec, modules)
    return initial_bound