def evaluate(sess, test_inputs, test_labels, model, report): """Evaluates model against normal test examples and adversarial training examples :param sess: tensorflow session :param test_inputs: placeholder for test inputs :param test_labels: placeholder for test labels :param model: cleverhans model :param report: cleverhans report :returns: tuple containing accuracy on clean testing examples and accuracy on adversarial example """ with tf.variable_scope("clean_evaluation"): x_train, y_train, x_test, y_test = datasets.get_clever_mnist() # test on test datase test_logits = model.get_logits(test_inputs) clean_accuracy = adv_util.evaluate(sess, report, test_inputs, test_labels, test_logits, x_test, y_test, 'clean_train_clean_eval', MODEL_CONSTANTS.EVAL_PARAMS, False) # adversarial testing on train dataset attack_type, params = ADV_CONSTANTS.get_attack_details(FLAGS.test_attack) attack = attack_type(model, sess=sess) with tf.variable_scope("adversarial_evaluation"): adv_x = attack.generate(test_inputs, **params) adv_logits = model.get_logits(adv_x) adv_accuracy = adv_util.evaluate(sess, report, test_inputs, test_labels, adv_logits, x_train, y_train, 'clean_train_adv_eval', MODEL_CONSTANTS.EVAL_PARAMS, True) return clean_accuracy, adv_accuracy
def adv_training(sess, inputs, labels): """Train and evaluates a model on adversarial examples """ adversarial_model = get_model(FLAGS.model_type, 'train_adv') fgsm2 = FastGradientMethod(adversarial_model, sess=sess) def attack(inputs): return fgsm2.generate(inputs, **ADV_CONSTANTS.get_attack_details(FLAGS.train_attack)[1]) loss2 = CrossEntropy( adversarial_model, smoothing=FLAGS.label_smoothing, attack=attack) adv_x2 = attack(inputs) preds_2 = adversarial_model.get_logits(inputs) preds2_adv = adversarial_model.get_logits(adv_x2) def evaluate2(): """Interface to cleverhans train """ # Accuracy of adversarially trained model on legitimate test inputs utl.evaluate(sess, report, inputs, labels, preds_2, x_test, y_test, 'adv_train_clean_eval', MODEL_CONSTANTS.EVAL_PARAMS, False) # Accuracy of the adversarially trained model on adversarial examples utl.evaluate(sess, report, inputs, labels, preds2_adv, x_test, y_test, 'adv_train_adv_eval', MODEL_CONSTANTS.EVAL_PARAMS, True) # Perform and evaluate adversarial training train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=MODEL_CONSTANTS.TRAIN_PARAMS, rng=rng, var_list=adversarial_model.get_params()) # Calculate training errors utl.evaluate(sess, report, inputs, labels, preds2_adv, x_train, y_train, 'train_adv_train_clean_eval', MODEL_CONSTANTS.EVAL_PARAMS) utl.evaluate(sess, report, inputs, labels, preds2_adv, x_train, y_train, 'train_adv_train_adv_eval', MODEL_CONSTANTS.EVAL_PARAMS)
def train_clean(sess, inputs, labels): """Train without adversarial examples """ def _evaluate(): """Interface o evaluation method for cleverhans train """ utl.evaluate(sess, report, inputs, labels, preds, x_test, y_test, 'clean_train_clean_eval', MODEL_CONSTANTS.EVAL_PARAMS, False) if FLAGS.clean_train: model = get_model(FLAGS.model_type, 'train_1') preds = model.get_logits(inputs) loss = CrossEntropy(model, smoothing=FLAGS.label_smoothing) train(sess, loss, x_train, y_train, evaluate=_evaluate, args=MODEL_CONSTANTS.TRAIN_PARAMS, rng=rng, var_list=model.get_params()) # evaluate model on adversarial examples fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate( inputs, **ADV_CONSTANTS.get_attack_details(FLAGS.train_attack)[1]) preds_adv = model.get_logits(adv_x) utl.evaluate(sess, report, inputs, labels, preds_adv, x_test, y_test, 'clean_train_adv_eval', MODEL_CONSTANTS.EVAL_PARAMS, True) # calculate training error utl.evaluate(sess, report, inputs, labels, preds, x_train, y_train, 'train_clean_train_clean_eval', MODEL_CONSTANTS.EVAL_PARAMS)
def _evaluate(): """Interface o evaluation method for cleverhans train """ utl.evaluate(sess, report, inputs, labels, preds, x_test, y_test, 'clean_train_clean_eval', MODEL_CONSTANTS.EVAL_PARAMS, False)
def evaluate2(): """Interface to cleverhans train """ # Accuracy of adversarially trained model on legitimate test inputs utl.evaluate(sess, report, inputs, labels, preds_2, x_test, y_test, 'adv_train_clean_eval', MODEL_CONSTANTS.EVAL_PARAMS, False) # Accuracy of the adversarially trained model on adversarial examples utl.evaluate(sess, report, inputs, labels, preds2_adv, x_test, y_test, 'adv_train_adv_eval', MODEL_CONSTANTS.EVAL_PARAMS, True)