def consistent_backdoor_dataset(eps, basic_model, dataset, orig_labels, target_label, pattern, mask, noise_mean, noise_sd): # good default eps = 4.0 eps = eps attack = fb.attacks.LinfPGD(abs_stepsize=4.0 / 255.0, steps=20) label_mask = orig_labels == target_label data = dataset[label_mask] labels = orig_labels[label_mask] eps_array_format = np.array([eps]) fmodel = fb.TensorFlowModel(basic_model, bounds=(0, 1), preprocessing=dict()) adv_dict, robust_accuracy = generate_adversarial(fmodel, attack, data, labels, eps_array_format) adv_exs, success = adv_dict[eps] good_adv_exs = adv_exs[success] print("Num successful adv_examples: {}".format(good_adv_exs.shape[0])) bd_data, bd_labels = generate_backdoor_dataset( good_adv_exs, to_categorical([target_label], num_classes=10), 1.0, pattern, mask, noise_mean, noise_sd) return (bd_data, bd_labels)
def __init__(self, tfmodel, epsilons): tfmodel.trainable = False self.fbmodel = fb.TensorFlowModel(tfmodel, bounds=[0.0, 1.0], preprocessing=dict()) self.epsilons = epsilons self.attack = None self.is_setup_called = False self.name = None
def tensorflow_mobilenetv2(request: Any) -> ModelAndData: if request.config.option.skipslow: pytest.skip() import tensorflow as tf model = tf.keras.applications.MobileNetV2(weights="imagenet") fmodel = fbn.TensorFlowModel( model, bounds=(0, 255), preprocessing=dict(mean=127.5, std=127.5) ) x, y = fbn.samples(fmodel, dataset="imagenet", batchsize=16) x = ep.astensor(x) y = ep.astensor(y) return fmodel, x, y
def tensorflow_resnet50(request: Any) -> ModelAndData: if request.config.option.skipslow: pytest.skip() import tensorflow as tf if not tf.test.is_gpu_available(): pytest.skip("ResNet50 test too slow without GPU") model = tf.keras.applications.ResNet50(weights="imagenet") preprocessing = dict(flip_axis=-1, mean=[104.0, 116.0, 123.0]) # RGB to BGR fmodel = fbn.TensorFlowModel(model, bounds=(0, 255), preprocessing=preprocessing) x, y = fbn.samples(fmodel, dataset="imagenet", batchsize=16) x = ep.astensor(x) y = ep.astensor(y) return fmodel, x, y
def tensorflow_simple_functional(request: Any) -> ModelAndData: import tensorflow as tf channels = 3 h = w = 224 data_format = tf.keras.backend.image_data_format() shape = (channels, h, w) if data_format == "channels_first" else (h, w, channels) x = x_ = tf.keras.Input(shape=shape) x = tf.keras.layers.GlobalAveragePooling2D()(x) model = tf.keras.Model(inputs=x_, outputs=x) bounds = (0, 1) fmodel = fbn.TensorFlowModel(model, bounds=bounds) x, _ = fbn.samples(fmodel, dataset="imagenet", batchsize=16) x = ep.astensor(x) y = fmodel(x).argmax(axis=-1) return fmodel, x, y
def tensorflow_simple_sequential( device: Optional[str] = None, preprocessing: fbn.types.Preprocessing = None ) -> ModelAndData: import tensorflow as tf with tf.device(device): model = tf.keras.Sequential() model.add(tf.keras.layers.GlobalAveragePooling2D()) bounds = (0, 1) fmodel = fbn.TensorFlowModel( model, bounds=bounds, device=device, preprocessing=preprocessing ) x, _ = fbn.samples(fmodel, dataset="cifar10", batchsize=16) x = ep.astensor(x) y = fmodel(x).argmax(axis=-1) return fmodel, x, y
def model_attack(model, model_type, attack_type, config): if model_type == "pt": fmodel = fb.PyTorchModel(model, bounds=(0, 1)) elif model_type == "tf": fmodel = fb.TensorFlowModel(model, bounds=(0, 1)) else: fmodel = fb.models.MXNetModel(model, bounds=(0, 1)) images, labels = fb.utils.samples(fmodel, dataset='mnist', batchsize=config['batch_size']) if attack_type == "uniform": attack = fb.attacks.L2AdditiveUniformNoiseAttack() elif attack_type == "gaussian": attack = fb.attacks.L2AdditiveGaussianNoiseAttack() elif attack_type == "saltandpepper": attack = fb.attacks.SaltAndPepperNoiseAttack() epsilons = [ 0.0, 0.0002, 0.0005, 0.0008, 0.001, 0.0015, 0.002, 0.003, 0.01, 0.1, 0.3, 0.5, 1.0, ] raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilons) if model_type == "pt": robust_accuracy = 1 - success.cpu().numpy().astype( float).flatten().mean(axis=-1) elif model_type == "tf": robust_accuracy = 1 - success.numpy().astype(float).flatten().mean( axis=-1) else: robust_accuracy = 1 - success.numpy().astype(float).flatten().mean( axis=-1) return robust_accuracy
def tensorflow_mobilenetv3(request: Any) -> ModelAndData: if request.config.option.skipslow: pytest.skip() import tensorflow as tf model = tf.keras.applications.MobileNetV3Small( weights="imagenet", minimalistic=True ) fmodel = fbn.TensorFlowModel( model, bounds=(0, 255), preprocessing=None, ) x, y = fbn.samples(fmodel, dataset="imagenet", batchsize=8) x = ep.astensor(x) y = ep.astensor(y) return fmodel, x, y
def tensorflow_simple_subclassing(request: Any) -> ModelAndData: import tensorflow as tf class Model(tf.keras.Model): # type: ignore def __init__(self) -> None: super().__init__() self.pool = tf.keras.layers.GlobalAveragePooling2D() def call(self, x: tf.Tensor) -> tf.Tensor: # type: ignore x = self.pool(x) return x model = Model() bounds = (0, 1) fmodel = fbn.TensorFlowModel(model, bounds=bounds) x, _ = fbn.samples(fmodel, dataset="cifar10", batchsize=16) x = ep.astensor(x) y = fmodel(x).argmax(axis=-1) return fmodel, x, y
optimizer=tf.keras.optimizers.Adam(0.001), metrics=['accuracy'], ) baseline_model.fit(ds_train, epochs=1, validation_data=ds_test, steps_per_epoch=7500 // batch_size, validation_steps=2500 // batch_size, callbacks=[tensorboard_callback]) for images, labels in ds_train.take(1): # only take first element of dataset images_ex = ep.astensors(images) labels_ex = ep.astensors(labels) fmodel = fb.TensorFlowModel(baseline_model, bounds=(0, 1)) attacks = [ fa.FGSM(), fa.LinfPGD(), fa.LinfBasicIterativeAttack(), fa.LinfAdditiveUniformNoiseAttack(), fa.LinfDeepFoolAttack(), ] attacks_names = [ "FGSM", "LinfPGD", "LinfBasicIterativeAttack", "LinfAdditiveUniformNoiseAttack", "LinfDeepFoolAttack" ] epsilons = [
metrics=['accuracy'], ) model.fit( ds_train, epochs=1, validation_data=ds_test, callbacks=[tensorboard_callback] ) for images, labels in ds_train.take(1): images_ex = ep.astensors(images) labels_ex = ep.astensors(labels) fmodel = fb.TensorFlowModel(model, bounds=(0, 1)) attacks = [ fa.FGSM(), fa.LinfPGD(), fa.LinfBasicIterativeAttack(), fa.LinfAdditiveUniformNoiseAttack(), fa.LinfDeepFoolAttack(), ] epsilons = [ 0.0, 0.0005, 0.001, 0.0015, 0.002,
def create(name: str): Model, mode = Models[name] model = Model(weights="imagenet") bounds, preprocessing = get_bounds_and_preprocessing(mode) fmodel = fbn.TensorFlowModel(model, bounds=bounds, preprocessing=preprocessing) return fmodel
def get_boundary_points(model, x, y_onehot, batch_size=64, pipeline=['pgd'], search_range=['local', 'l2', 0.3, None, 100], clamp=[0, 1], backend='pytorch', device='cuda:0', **kwargs): """Find nearby boundary points by running adversarial attacks Args: model (tf.models.Model or torch.nn.Module): tf.keras model or pytorch model x (np.ndarray): Benigh inputs y_onehot (np.ndarray): One-hot labels for the benign inputs batch_size (int, optional): Batch size. Defaults to 64. pipeline (list, optional): A list of adversarial attacks used to find nearby boundaries. Defaults to ['pgd']. search_range (list, optional): Parameters shared by all adversarial attacks. Defaults to ['local', 'l2', 0.3, None, 100]. clamp (list, optional): Data range. Defaults to [0, 1]. backend (str, optional): Deep learning frame work. It is either 'tf.keras' or 'pytorch'. Defaults to 'pytorch'. device (str, optional): GPU device to run the attack. This only matters if the backend is 'pytorch'. Defaults to 'cuda:0'. Returns: (np.ndarray, np.ndarray): Points on the closest boundary and distances """ bd = None dis2cls_bd = np.zeros(x.shape[0]) + 1e16 if 'pgd' in pipeline: print(">>> Start PGD Attack <<<", end='\n', flush=True) if backend == 'tf.keras': fmodel = fb.TensorFlowModel(model, bounds=(clamp[0], clamp[1])) x = tf.constant(x, dtype=tf.float32) y_onehot = tf.constant(y_onehot, dtype=tf.int32) if isinstance(search_range[2], float): if search_range[1] == 'l2': attack = fb.attacks.L2PGD( rel_stepsize=search_range[3] if search_range[3] is not None else 2 * search_range[2] / search_range[4], steps=search_range[4]) else: attack = fb.attacks.LinfPGD( rel_stepsize=search_range[3] if search_range[3] is not None else 2 * search_range[2] / search_range[4], steps=search_range[4]) boundary_points = [] success = 0 for i in trange(0, x.shape[0], batch_size): batch_x = x[i:i + batch_size] batch_y = y_onehot[i:i + batch_size] _, batch_boundary_points, batch_success = attack( fmodel, batch_x, tf.argmax(batch_y, -1), epsilons=[search_range[2]]) boundary_points.append(batch_boundary_points[0]) success += np.sum(batch_success) boundary_points = tf.concat(boundary_points, axis=0) success /= x.shape[0] print( f">>> Attacking with EPS={search_range[2]} (norm={search_range[1]}), Success Rate={success} <<<" ) elif isinstance(search_range[2], (list, np.ndarray)): boundary_points = [] success = 0. for i in trange(0, x.shape[0], batch_size): batch_x = x[i:i + batch_size] batch_y = y_onehot[i:i + batch_size] batch_boundary_points = None batch_success = None for eps in search_range[2]: if search_range[1] == 'l2': attack = fb.attacks.L2PGD( rel_stepsize=search_range[3] if search_range[3] is not None else 2 * eps / search_range[4], steps=search_range[4]) else: attack = fb.attacks.LinfPGD( rel_stepsize=search_range[3] if search_range[3] is not None else 2 * eps / search_range[4], steps=search_range[4]) _, c_boundary_points, c_success = attack( fmodel, batch_x, tf.argmax(batch_y, -1), epsilons=[eps]) c_boundary_points = c_boundary_points[0].numpy() c_success = tf.cast(c_success[0], tf.int32).numpy() print( f">>> Attacking with EPS={eps} (norm={search_range[1]}), Success Rate={tf.reduce_mean(tf.cast(c_success, tf.float32))} <<<" ) if batch_boundary_points is None: batch_boundary_points = c_boundary_points batch_success = c_success else: for i in range(batch_boundary_points.shape[0]): if batch_success[i] == 0 and c_success[i] == 1: batch_boundary_points[ i] = c_boundary_points[i] batch_success[i] = c_success[i] boundary_points.append(batch_boundary_points) success += np.sum(batch_success) boundary_points = tf.concat(boundary_points, axis=0) success /= x.shape[0] else: raise TypeError( f"Expecting eps as float or list, but got {type(search_range[3])}" ) y_pred = np.argmax( model.predict(boundary_points, batch_size=batch_size), -1) x = x.numpy() y_onehot = y_onehot.numpy() boundary_points = boundary_points.numpy() elif backend == 'pytorch': model.eval() x, y_onehot, model = to_device(x, y_onehot, model, device) fmodel = fb.PyTorchModel(model, bounds=(clamp[0], clamp[1])) model = PytorchModel(model) if isinstance(search_range[2], float): if search_range[1] == 'l2': attack = fb.attacks.L2PGD( rel_stepsize=search_range[3] if search_range[3] is not None else 2 * search_range[2] / search_range[4], steps=search_range[4]) else: attack = fb.attacks.LinfPGD( rel_stepsize=search_range[3] if search_range[3] is not None else 2 * search_range[2] / search_range[4], steps=search_range[4]) boundary_points = [] success = 0 for i in trange(0, x.shape[0], batch_size): batch_x = x[i:i + batch_size] batch_y = y_onehot[i:i + batch_size] _, batch_boundary_points, batch_success = attack( fmodel, batch_x, torch.argmax(batch_y, -1), epsilons=[search_range[2]]) boundary_points.append( batch_boundary_points[0].unsqueeze(0)) success += torch.sum(batch_success.detach()) boundary_points = torch.cat(boundary_points, dim=0) success /= x.shape[0] print( f">>> Attacking with EPS={search_range[2]} (norm={search_range[1]}), Success Rate={success.cpu().numpy()} <<<" ) elif isinstance(search_range[2], (list, np.ndarray)): boundary_points = [] success = 0. for i in trange(0, x.shape[0], batch_size): batch_x = x[i:i + batch_size] batch_y = y_onehot[i:i + batch_size] batch_boundary_points = None batch_success = None for eps in search_range[2]: if search_range[1] == 'l2': attack = fb.attacks.L2PGD( rel_stepsize=search_range[3] if search_range[3] is not None else 2 * eps / search_range[4], steps=search_range[4]) else: attack = fb.attacks.LinfPGD( rel_stepsize=search_range[3] if search_range[3] is not None else 2 * eps / search_range[4], steps=search_range[4]) _, c_boundary_points, c_success = attack( fmodel, batch_x, torch.argmax(batch_y, -1), epsilons=[eps]) c_boundary_points = c_boundary_points[0] c_success = c_success.squeeze(0) print( f">>> Attacking with EPS={eps} (norm={search_range[1]}), Success Rate={c_success.detach().cpu().numpy().mean()} <<<" ) if batch_boundary_points is None: batch_boundary_points = c_boundary_points.detach( ).cpu() batch_success = c_success.detach().cpu() else: for i in range(batch_boundary_points.shape[0]): if batch_success[i] == 0 and c_success[i] == 1: batch_boundary_points[ i] = c_boundary_points[i] batch_success[i] = c_success[i] boundary_points.append(batch_boundary_points) success += torch.sum(batch_success.detach()).float() boundary_points = torch.cat(boundary_points, dim=0) success /= x.shape[0] else: raise TypeError( f"Expecting eps as float or list, but got {type(search_range[3])}" ) torch.cuda.empty_cache() y_pred = model(boundary_points, batch_size=batch_size, training=False, device=device) x = x.detach().cpu().numpy() y_onehot = y_onehot.detach().cpu().numpy() y_pred = y_pred.numpy() boundary_points = boundary_points.detach().cpu().numpy() else: raise ValueError(f"Unknow backend: {backend}") bd, dis2cls_bd = take_closer_bd(x, np.argmax(y_onehot, -1), bd, dis2cls_bd, boundary_points, np.argmax(y_pred, -1)) if 'cw' in pipeline: print(">>> Start CW Attack <<<", end='\n', flush=True) if backend == 'tf.keras': fmodel = fb.TensorFlowModel(model, bounds=(clamp[0], clamp[1])) x = tf.constant(x, dtype=tf.float32) y_onehot = tf.constant(y_onehot, dtype=tf.int32) attack = fb.attacks.L2CarliniWagnerAttack( stepsize=search_range[3] if search_range[3] is not None else 2 * search_range[2] / search_range[4], steps=search_range[4]) boundary_points = [] success = 0. for i in trange(0, x.shape[0], batch_size): batch_x = x[i:i + batch_size] batch_y = y_onehot[i:i + batch_size] _, batch_boundary_points, batch_success = attack( fmodel, batch_x, tf.argmax(batch_y, -1), epsilons=[search_range[2]]) boundary_points.append(batch_boundary_points[0]) success += tf.reduce_sum(tf.cast(batch_success, tf.int32)) boundary_points = tf.concat(boundary_points, axis=0) success /= x.shape[0] print( f">>> Attacking with EPS={search_range[2]} (norm={search_range[1]}), Success Rate={success} <<<" ) y_pred = np.argmax( model.predict(boundary_points, batch_size=batch_size), -1) x = x.numpy() y_onehot = y_onehot.numpy() boundary_points = boundary_points.numpy() elif backend == 'pytorch': model.eval() x, y_onehot, model = to_device(x, y_onehot, model, device) fmodel = fb.PyTorchModel(model, bounds=(clamp[0], clamp[1])) model = PytorchModel(model) attack = fb.attacks.L2CarliniWagnerAttack( stepsize=search_range[3] if search_range[3] is not None else 2 * search_range[2] / search_range[4], steps=search_range[4]) boundary_points = [] success = 0. for i in trange(0, x.shape[0], batch_size): batch_x = x[i:i + batch_size] batch_y = y_onehot[i:i + batch_size] _, batch_boundary_points, batch_success = attack( fmodel, batch_x, torch.argmax(batch_y, -1), epsilons=[search_range[2]]) boundary_points.append(batch_boundary_points[0]) success += torch.sum(batch_success.detach()) boundary_points = torch.cat(boundary_points, dim=0) success /= x.shape[0] print( f">>> Attacking with EPS={search_range[2]} (norm={search_range[1]}), Success Rate={success.cpu().numpy()} <<<" ) y_pred = model(boundary_points, batch_size=batch_size, training=False, device=device) x = x.detach().cpu().numpy() y_onehot = y_onehot.detach().cpu().numpy() y_pred = y_pred.detach().cpu().numpy() boundary_points = boundary_points.detach().cpu().numpy() else: raise ValueError(f"Unknow backend: {backend}") bd, dis2cls_bd = take_closer_bd(x, np.argmax(y_onehot, -1), bd, dis2cls_bd, boundary_points, np.argmax(y_pred, -1)) return convert_to_numpy(bd), dis2cls_bd