def body(i, metrics): """Compute the sum of all metrics.""" test_data = ibp.build_dataset(data_test, batch_size=batch_size, sequential=True) predictor(test_data.image, override=True, is_training=False) input_interval_bounds = ibp.IntervalBounds( tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]), tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1])) predictor.propagate_bounds(input_interval_bounds) test_specification = ibp.ClassificationSpecification( test_data.label, num_classes) test_attack = attack_builder(predictor, test_specification, FLAGS.epsilon, input_bounds=input_bounds, optimizer_builder=ibp.UnrolledAdam) # Use CROWN-IBP bound or IBP bound. if FLAGS.bound_method == 'crown-ibp': test_losses = ibp.crown.Losses( predictor, test_specification, test_attack, use_crown_ibp=True, crown_bound_schedule=tf.constant(1.)) else: test_losses = ibp.Losses(predictor, test_specification, test_attack) test_losses(test_data.label) new_metrics = [] for m, n in zip(metrics, test_losses.scalar_metrics): new_metrics.append(m + n) return i + 1, new_metrics
def body(i, metrics): """Compute the sum of all metrics.""" test_data = ibp.build_dataset((x_test, y_test), batch_size=batch_size, sequential=True) predictor(test_data.image, override=True, is_training=False) input_interval_bounds = ibp.IntervalBounds( tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]), tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1])) predictor.propagate_bounds(input_interval_bounds) test_specification = ibp.ClassificationSpecification( test_data.label, num_classes) test_attack = attack_builder( predictor, test_specification, FLAGS.epsilon, input_bounds=input_bounds, optimizer_builder=ibp.UnrolledAdam) test_losses = ibp.Losses(predictor, test_specification, test_attack) test_losses(test_data.label) new_metrics = [] for m, n in zip(metrics, test_losses.scalar_metrics): new_metrics.append(m + n) return i + 1, new_metrics
def testEndToEnd(self): predictor = FixedNN() predictor = ibp.VerifiableModelWrapper(predictor) # Labels. labels = tf.constant([1], dtype=tf.int64) # Connect to input. z = tf.constant([[1, 2, 3]], dtype=tf.float32) predictor(z, is_training=True) # Input bounds. eps = 1. input_bounds = ibp.IntervalBounds(z - eps, z + eps) predictor.propagate_bounds(input_bounds) # Create output specification (that forces the first logits to be greater). c = tf.constant([[[1, -1]]], dtype=tf.float32) d = tf.constant([[0]], dtype=tf.float32) # Turn elision off for more interesting results. spec = ibp.LinearSpecification(c, d, collapse=False) # Create an attack. attack = ibp.UntargetedPGDAttack(predictor, spec, eps, num_steps=1, input_bounds=(-100., 100)) # Build loss. losses = ibp.Losses(predictor, spec, attack, interval_bounds_loss_type='hinge', interval_bounds_hinge_margin=0.) losses(labels) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) # We expect the worst-case logits from IBP to be [9, 4]. # The adversarial attack should fail since logits are always [l, l + 1]. # Similarly, the nominal predictions are correct. accuracy_values, loss_values = sess.run( [losses.scalar_metrics, losses.scalar_losses]) self.assertAlmostEqual(1., accuracy_values.nominal_accuracy) self.assertAlmostEqual(0., accuracy_values.verified_accuracy) self.assertAlmostEqual(1., accuracy_values.attack_accuracy) expected_xent = 0.31326168751822947 self.assertAlmostEqual(expected_xent, loss_values.nominal_cross_entropy, places=5) self.assertAlmostEqual(expected_xent, loss_values.attack_cross_entropy, places=5) expected_hinge = 5. self.assertAlmostEqual(expected_hinge, loss_values.verified_loss)