def body(i, metrics):
            """Compute the sum of all metrics."""
            test_data = ibp.build_dataset(data_test,
                                          batch_size=batch_size,
                                          sequential=True)
            predictor(test_data.image, override=True, is_training=False)
            input_interval_bounds = ibp.IntervalBounds(
                tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]),
                tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1]))
            predictor.propagate_bounds(input_interval_bounds)
            test_specification = ibp.ClassificationSpecification(
                test_data.label, num_classes)
            test_attack = attack_builder(predictor,
                                         test_specification,
                                         FLAGS.epsilon,
                                         input_bounds=input_bounds,
                                         optimizer_builder=ibp.UnrolledAdam)

            # Use CROWN-IBP bound or IBP bound.
            if FLAGS.bound_method == 'crown-ibp':
                test_losses = ibp.crown.Losses(
                    predictor,
                    test_specification,
                    test_attack,
                    use_crown_ibp=True,
                    crown_bound_schedule=tf.constant(1.))
            else:
                test_losses = ibp.Losses(predictor, test_specification,
                                         test_attack)

            test_losses(test_data.label)
            new_metrics = []
            for m, n in zip(metrics, test_losses.scalar_metrics):
                new_metrics.append(m + n)
            return i + 1, new_metrics
Example #2
0
 def body(i, metrics):
     """Compute the sum of all metrics."""
     test_data = ibp.build_dataset((x_test, y_test),
                                   batch_size=batch_size,
                                   sequential=True)
     predictor(test_data.image, override=True, is_training=False)
     input_interval_bounds = ibp.IntervalBounds(
         tf.maximum(test_data.image - FLAGS.epsilon,
                    input_bounds[0]),
         tf.minimum(test_data.image + FLAGS.epsilon,
                    input_bounds[1]))
     predictor.propagate_bounds(input_interval_bounds)
     test_specification = ibp.ClassificationSpecification(
         test_data.label, num_classes)
     test_attack = attack_builder(
         predictor,
         test_specification,
         FLAGS.epsilon,
         input_bounds=input_bounds,
         optimizer_builder=ibp.UnrolledAdam)
     test_losses = ibp.Losses(predictor, test_specification,
                              test_attack)
     test_losses(test_data.label)
     new_metrics = []
     for m, n in zip(metrics, test_losses.scalar_metrics):
         new_metrics.append(m + n)
     return i + 1, new_metrics
Example #3
0
    def testEndToEnd(self):
        predictor = FixedNN()
        predictor = ibp.VerifiableModelWrapper(predictor)
        # Labels.
        labels = tf.constant([1], dtype=tf.int64)
        # Connect to input.
        z = tf.constant([[1, 2, 3]], dtype=tf.float32)
        predictor(z, is_training=True)
        # Input bounds.
        eps = 1.
        input_bounds = ibp.IntervalBounds(z - eps, z + eps)
        predictor.propagate_bounds(input_bounds)
        # Create output specification (that forces the first logits to be greater).
        c = tf.constant([[[1, -1]]], dtype=tf.float32)
        d = tf.constant([[0]], dtype=tf.float32)
        # Turn elision off for more interesting results.
        spec = ibp.LinearSpecification(c, d, collapse=False)
        # Create an attack.
        attack = ibp.UntargetedPGDAttack(predictor,
                                         spec,
                                         eps,
                                         num_steps=1,
                                         input_bounds=(-100., 100))
        # Build loss.
        losses = ibp.Losses(predictor,
                            spec,
                            attack,
                            interval_bounds_loss_type='hinge',
                            interval_bounds_hinge_margin=0.)
        losses(labels)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            # We expect the worst-case logits from IBP to be [9, 4].
            # The adversarial attack should fail since logits are always [l, l + 1].
            # Similarly, the nominal predictions are correct.
            accuracy_values, loss_values = sess.run(
                [losses.scalar_metrics, losses.scalar_losses])
            self.assertAlmostEqual(1., accuracy_values.nominal_accuracy)
            self.assertAlmostEqual(0., accuracy_values.verified_accuracy)
            self.assertAlmostEqual(1., accuracy_values.attack_accuracy)
            expected_xent = 0.31326168751822947
            self.assertAlmostEqual(expected_xent,
                                   loss_values.nominal_cross_entropy,
                                   places=5)
            self.assertAlmostEqual(expected_xent,
                                   loss_values.attack_cross_entropy,
                                   places=5)
            expected_hinge = 5.
            self.assertAlmostEqual(expected_hinge, loss_values.verified_loss)