inputs = np.array(inputs) targets = np.array(targets) return inputs, targets if __name__ == "__main__": with tf.Session() as sess: use_log = True # data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) data, model = ImageNet(), InceptionModel(sess, use_log) attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=15000, confidence=0, use_log=use_log) inputs, targets = generate_data(data, samples=1, targeted=True, start=6, inception=False) inputs = inputs[1:2] targets = targets[1:2] timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs),
def main(args): temp_encoder = encoder(level=args['level']) with tf.Session() as sess: use_log = not args['use_zvalue'] is_inception = args['dataset'] == "imagenet" # load network print('Loading model', args['dataset']) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log) elif args['dataset'] == "cifar10": #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log) data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess, use_log) elif args['dataset'] == "imagenet": data, model = ImageNet(), InceptionModel(sess, use_log) print('Done...') if args['numimg'] == 0: args['numimg'] = len(data.test_labels) - args['firstimg'] print('Using', args['numimg'], 'test images') # load attack module if args['attack'] == "white": # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly attack = CarliniL2(sess, model, batch_size=1, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2']) else: # batch size 128, optimize on 128 coordinates of a single image attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, use_tanh=args['use_tanh'], use_resize=args['use_resize'], adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2'], reset_adam_after_found=args['reset_adam'], solver=args['solver'], save_ckpts=args['save_ckpts'], load_checkpoint=args['load_ckpt'], start_iter=args['start_iter'], init_size=args['init_size'], use_importance=not args['uniform']) random.seed(args['seed']) np.random.seed(args['seed']) print('Generate data') all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data( data, samples=args['numimg'], targeted=not args['untargeted'], start=args['firstimg'], inception=is_inception) print('Done...') #print('all_inputs : ', all_inputs.shape) #print('encoding_all : ',encoding_all.shape) os.system("mkdir -p {}/{}".format(args['save'], args['dataset'])) img_no = 0 total_success = 0 l2_total = 0.0 origin_correct = 0 adv_correct = 0 for i in range(all_true_ids.size): print(' adversarial_image_no: ', i) inputs = all_inputs[i:i + 1] encoding_inputs = encoding_all[i:i + 1] #print('encoding_inputs shape: ', encoding_inputs) targets = all_targets[i:i + 1] labels = all_labels[i:i + 1] print("true labels:", np.argmax(labels), labels) print("target:", np.argmax(targets), targets) # test if the image is correctly classified original_predict = model.model.predict(encoding_inputs) original_predict = np.squeeze(original_predict) original_prob = np.sort(original_predict) original_class = np.argsort(original_predict) print("original probabilities:", original_prob[-1:-6:-1]) print("original classification:", original_class[-1:-6:-1]) print("original probabilities (most unlikely):", original_prob[:6]) print("original classification (most unlikely):", original_class[:6]) if original_class[-1] != np.argmax(labels): print( "skip wrongly classified image no. {}, original class {}, classified as {}" .format(i, np.argmax(labels), original_class[-1])) continue origin_correct += np.argmax(labels, 1) == original_class[-1] img_no += 1 timestart = time.time() adv, const = attack.attack_batch(inputs, targets) if type(const) is list: const = const[0] if len(adv.shape) == 3: adv = adv.reshape((1, ) + adv.shape) timeend = time.time() l2_distortion = np.sum((adv - inputs)**2)**.5 ##### llj encode_adv = np.transpose(adv, axes=(0, 3, 1, 2)) channel0, channel1, channel2 = encode_adv[:, 0, :, :], encode_adv[:, 1, :, :], encode_adv[:, 2, :, :] channel0, channel1, channel2 = temp_encoder.tempencoding( channel0), temp_encoder.tempencoding( channel1), temp_encoder.tempencoding(channel2) encode_adv = np.concatenate([channel0, channel1, channel2], axis=1) encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1)) #### llj adversarial_predict = model.model.predict(encode_adv) adversarial_predict = np.squeeze(adversarial_predict) adversarial_prob = np.sort(adversarial_predict) adversarial_class = np.argsort(adversarial_predict) print("adversarial probabilities:", adversarial_prob[-1:-6:-1]) print("adversarial classification:", adversarial_class[-1:-6:-1]) adv_correct += np.argmax(labels, 1) == adversarial_class[-1] success = False if args['untargeted']: if adversarial_class[-1] != original_class[-1]: success = True else: if adversarial_class[-1] == np.argmax(targets): success = True if l2_distortion > 20.0: success = False if success: total_success += 1 l2_total += l2_distortion suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format( all_true_ids[i], i, original_class[-1], adversarial_class[-1], success, l2_distortion) print("Saving to", suffix) show( inputs, "{}/{}/{}_original_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv, "{}/{}/{}_adversarial_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv - inputs, "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'], img_no, suffix)) print( "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}" .format(img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success)) sys.stdout.flush() print(' origin accuracy : ', 100.0 * origin_correct / all_true_ids.size) print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
def main(args): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: use_log = not args['use_zvalue'] is_inception = args['dataset'] == "imagenet" # load network print('Loading model', args['dataset']) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log) elif args['dataset'] == "cifar10": data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log) elif args['dataset'] == "imagenet": data, model = ImageNet(), InceptionModel(sess, use_log) print('Done...') if args['numimg'] == 0: args['numimg'] = len(data.test_labels) - args['firstimg'] print('Using', args['numimg'], 'test images') # load attack module if args['attack'] == "white": # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly attack = CarliniL2(sess, model, batch_size=1, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2']) else: # batch size 128, optimize on 128 coordinates of a single image attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, use_tanh=args['use_tanh'], use_resize=args['use_resize'], adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2'], reset_adam_after_found=args['reset_adam'], solver=args['solver'], save_ckpts=args['save_ckpts'], load_checkpoint=args['load_ckpt'], start_iter=args['start_iter'], init_size=args['init_size'], use_importance=not args['uniform']) random.seed(args['seed']) np.random.seed(args['seed']) print('Generate data') all_inputs, all_targets, all_labels, all_true_ids = generate_data( data, samples=args['numimg'], targeted=not args['untargeted'], start=args['firstimg'], inception=is_inception) print('Done...') os.system("mkdir -p {}/{}".format(args['save'], args['dataset'])) img_no = 0 total_success = 0 l2_total = 0.0 for i in range(all_true_ids.size): inputs = all_inputs[i:i + 1] targets = all_targets[i:i + 1] labels = all_labels[i:i + 1] print("true labels:", np.argmax(labels), labels) print("target:", np.argmax(targets), targets) # test if the image is correctly classified original_predict = model.model.predict(inputs) original_predict = np.squeeze(original_predict) original_prob = np.sort(original_predict) original_class = np.argsort(original_predict) print("original probabilities:", original_prob[-1:-6:-1]) print("original classification:", original_class[-1:-6:-1]) print("original probabilities (most unlikely):", original_prob[:6]) print("original classification (most unlikely):", original_class[:6]) if original_class[-1] != np.argmax(labels): print( "skip wrongly classified image no. {}, original class {}, classified as {}" .format(i, np.argmax(labels), original_class[-1])) continue img_no += 1 timestart = time.time() adv, const = attack.attack_batch(inputs, targets) if type(const) is list: const = const[0] if len(adv.shape) == 3: adv = adv.reshape((1, ) + adv.shape) timeend = time.time() l2_distortion = np.sum((adv - inputs)**2)**.5 adversarial_predict = model.model.predict(adv) adversarial_predict = np.squeeze(adversarial_predict) adversarial_prob = np.sort(adversarial_predict) adversarial_class = np.argsort(adversarial_predict) print("adversarial probabilities:", adversarial_prob[-1:-6:-1]) print("adversarial classification:", adversarial_class[-1:-6:-1]) success = False if args['untargeted']: if adversarial_class[-1] != original_class[-1]: success = True else: if adversarial_class[-1] == np.argmax(targets): success = True if l2_distortion > 20.0: success = False if success: total_success += 1 l2_total += l2_distortion suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format( all_true_ids[i], i, original_class[-1], adversarial_class[-1], success, l2_distortion) print("Saving to", suffix) show( inputs, "{}/{}/{}_original_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv, "{}/{}/{}_adversarial_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv - inputs, "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'], img_no, suffix)) print( "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}" .format(img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success)) with open(args['save'] + "/report.txt", 'a') as f: f.write("*" * 20) to_write = "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}".format( img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success) f.write(to_write) f.write("*" * 20) f.write("\n\n") sys.stdout.flush()