def _to_art_classifier( classifier: Union[tf.keras.Model, torch.nn.Module], nb_classes: int, input_shape: Tuple[int, ...], ) -> Union[TensorFlowV2Classifier, PyTorchClassifier]: """Converts a classifier to an ART classifier. :param classifier: Classifier to be converted. Either a Pytorch or Tensorflow classifier. :param nb_classes: Number of classes that were used to train the classifier. :param input_shape: Input shape of a data point of the classifier. :return: Given classifier converted to an ART classifier. :raises TypeError: If the given classifier is of an invalid type. """ if isinstance(classifier, torch.nn.Module): return PyTorchClassifier( model=classifier, loss=None, nb_classes=nb_classes, input_shape=input_shape, ) if isinstance(classifier, tf.keras.Model): return TensorFlowV2Classifier( model=classifier, nb_classes=nb_classes, input_shape=input_shape, ) else: raise TypeError( f"Expected classifier to be an instance of {str(torch.nn.Module)} or {str(tf.keras.Model)}, received {str(type(classifier))} instead." )
def test_generate(art_warning): try: x_train = np.ones((2, 12, 299, 299, 3)).astype(np.float32) y_train = np.zeros((2, 101)) y_train[:, 1] = 1 model = Model() classifier = PyTorchClassifier(model=model, loss=None, input_shape=x_train.shape[1:], nb_classes=y_train.shape[1], clip_values=(0, 1)) attack = OverTheAirFlickeringPyTorch(classifier=classifier, max_iter=1, verbose=False) x_train_adv = attack.generate(x=x_train, y=y_train) assert x_train.shape == x_train_adv.shape assert np.min(x_train_adv) >= 0.0 assert np.max(x_train_adv) <= 1.0 except ARTTestException as e: art_warning(e)
def main(args): print('==> Loading data..') if args['dataset'] == 'mnist': (_, _), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist() input_shape = (1, 28, 28) else: (_, _), (x_test, y_test), min_pixel_value, max_pixel_value = load_cifar10() input_shape = (3, 32, 32) x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32) print('==> Loading model..') model = loadmodel(args) model = model.cuda() model = model.eval() criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) classifier = PyTorchClassifier( model=model, clip_values=(min_pixel_value, max_pixel_value), loss=criterion, optimizer=optimizer, input_shape=input_shape, nb_classes=10, ) predictions = classifier.predict(x_test[:args['n_samples']]) clean_accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax( y_test[:args['n_samples']], axis=1)) / len( y_test[:args['n_samples']]) print("Accuracy on benign test examples: {}%".format(clean_accuracy * 100)) print("==> Evaluate the classifier on adversarial test examples") queries = [100, 200, 500] acc = attackmodel(args, classifier, x_test[:args['n_samples']], y_test[:args['n_samples']], queries) np.save("./pgd_results/" + args['dataset'] + args['save'], np.array(acc)) print("The adjusted accuracies are:") print(acc)
def getClassifier(model): criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) classifier = PyTorchClassifier( model=model, clip_values=(0, 1), loss=criterion, optimizer=optimizer, input_shape=(3, 224, 224), nb_classes=1000, ) return classifier
def attack_FGSM_nontargeted(dataloader, model, model_info, args, checkpoint_dir): """ FGSM attack """ device = args.device criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) img_size = model_info["model_img_size"] n_classes = model_info["num_classes"] classifier = PyTorchClassifier( model=model, loss=criterion, clip_values=(0.0, 1.0), optimizer=optimizer, input_shape=(img_size, img_size), nb_classes=n_classes, device_type=device, ) # attack = FastGradientMethod(estimator=classifier, batch_size=args.batch_size) attack = FastGradientMethod(estimator=classifier, batch_size=args.batch_size) # Launching a non-targeted attack # t = args.target_class print(f"Launching FGSM nontargeted attack") dest_images = os.path.join(checkpoint_dir, args.model_name) os.makedirs(dest_images, exist_ok=True) # Running over the entire-batch to compute a universal perturbation for data in tqdm(dataloader): sample, label, img_path = data sample = sample.to(device) # Launch attack sample_adv = attack.generate(x=sample.cpu()) # Code to save these images img_path = [it.split("/")[-1] for it in img_path] for i in range(len(sample_adv)): _img = sample_adv[i].transpose(1, 2, 0) skimage.io.imsave(os.path.join(dest_images, img_path[i]), img_as_ubyte(_img)) with open(os.path.join(dest_images, "stats.txt"), "w") as f: f.write(f"Fooling-rate was nan\n") return dest_images
def test_general_iris_nn(iris_dataset): """ Check whether the produced adversaries are correct, given Neural Network classifier and iris flower dataset. """ (x_train, y_train, x_valid, y_valid), _, clip_values = iris_dataset x = Variable(torch.FloatTensor(np.array(x_train))) y = Variable(torch.FloatTensor(np.eye(3)[y_train])) neural_network = NeuralNetwork() nn_model_irises = neural_network.get_nn_model(4, 3, 10) neural_network.train_nn(nn_model_irises, x, y, 1e-4, 1000) est_nn_iris = PyTorchClassifier(model=nn_model_irises, loss=neural_network.loss_fn, input_shape=(4, ), nb_classes=3, clip_values=clip_values) lpf_nn = LowProFool(classifier=est_nn_iris, eta=5, lambd=0.2, eta_decay=0.9) lpf_nn.fit_importances(x_valid, y_valid) target = np.eye(3)[np.array( y_valid.apply( lambda x: np.random.choice([i for i in range(3) if i != x])))] # Use of LowProFool adversaries = lpf_nn.generate(x=x_valid, y=target) expected = np.argmax(target, axis=1) x = Variable(torch.from_numpy(adversaries.astype(np.float32))) predicted = np.argmax(nn_model_irises.forward(x).detach().numpy(), axis=1) # Test correct = expected == predicted success_rate = np.sum(correct) / correct.shape[0] expected = 0.75 logger.info( "[Irises, PyTorch neural network] success rate of adversarial attack (expected >{:.2f}): " "{:.2f}%".format(expected * 100, success_rate * 100)) assert success_rate > expected
def create_classifier_art(): in_chans=1 extras=dict(in_chans=in_chans) model=timm.create_model("resnet50", pretrained=True, num_classes=10, **extras ) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) classifier = PyTorchClassifier( model=model, clip_values=(0, 1), loss=criterion, optimizer=optimizer, input_shape=(in_chans, 32, 32), nb_classes=10, ) return classifier
def test_get_loss_gradients(art_warning): try: x_train = np.ones((2, 12, 299, 299, 3)).astype(np.float32) y_train = np.zeros((2, 101)) y_train[:, 1] = 1 model = Model() classifier = PyTorchClassifier( model=model, loss=None, input_shape=x_train.shape[1:], nb_classes=y_train.shape[1] ) attack = OverTheAirFlickeringPyTorch(classifier=classifier, verbose=False) gradients = attack._get_loss_gradients( x=torch.from_numpy(x_train), y=torch.from_numpy(y_train), perturbation=torch.zeros(x_train.shape) ) assert gradients.shape == (2, 12, 1, 1, 3) except ARTTestException as e: art_warning(e)
def train(dataloader, model,criterion, optimizer, scheduler, epoch): model.train() print('epoch ' + str(epoch)) train_loss = 0.0 train_acc = 0.0 total = len(dataloader) start = time.time() toPilImage = transforms.ToPILImage() # transform tensor into PIL image to save for batch_num, (x, y) in enumerate(dataloader): x = x.to(device) y = y.to(device) # gauss noise training gauss_noise = torch.randn_like(x, device=device) * args.noise_sd # x_noise = x + torch.randn_like(x, device=device) * args.noise_sd # targeted noise training tmp_criterion = nn.CrossEntropyLoss() tmp_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) classifier = PyTorchClassifier( model=model, clip_values=(min_pixel_value, max_pixel_value), loss=tmp_criterion, optimizer=tmp_optimizer, input_shape=(3, 32, 32), nb_classes=10, ) # all other classes targets = [] y_np = y.cpu().numpy() for i in range(y.shape[0]) : targets.append( np.expand_dims( np.random.permutation( np.delete(np.arange(get_num_classes()), y_np[i]) ), axis=0 ) ) # print(targets[0].shape) targets = np.concatenate(targets) # print(targets.shape) # exit(0) mix_noise = torch.zeros_like(x) for t in range(targets.shape[1]): # generate random targets # targets = art.utils.random_targets(y.cpu().numpy(), get_num_classes()) # calculate loss gradient # print(np.squeeze(targets[:,t]).shape) # exit() y_slice = np.squeeze(targets[:,t]) y_oh = np.zeros((y_slice.size, get_num_classes())) y_oh[np.arange(y_slice.size), y_slice] = 1 grad = classifier.loss_gradient(x=x.cpu().numpy(), y=y_oh) * (-1.0) scaled_grad = torch.Tensor(grad * args.eps_step).to(device) mix_noise += scaled_grad model.zero_grad() tmp_optimizer.zero_grad() # print((scaled_grad.shape, gauss_noise.shape, targets.shape)) # combine noise and targeted noise x_combine = x + (gauss_noise * (1.0 - args.k_value)) + (mix_noise * args.k_value) model.zero_grad() output = model(x_combine) loss = criterion(output, y) acc = accuracy(output, y) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() train_acc += acc scheduler.step() end = time.time() print('trainning time:',end - start,'sec, loss: ', train_loss/total, 'acc: ', train_acc/total) return train_loss/total, train_acc/total
# Step 2: Create the model model = Net() # Step 2a: Define the loss function and the optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Step 3: Create the ART classifier classifier = PyTorchClassifier( model=model, clip_values=(min_pixel_value, max_pixel_value), loss=criterion, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10, ) # Step 4: Train the ART classifier classifier.fit(x_train, y_train, batch_size=64, nb_epochs=3) # Step 5: Evaluate the ART classifier on benign test examples predictions = classifier.predict(x_test) accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100)) # Step 6: Generate adversarial test examples
weight_decay = 1e-2 params = model.parameters() optimizer = torch.optim.SGD(params, lr=lr_max, momentum=0.9, weight_decay=weight_decay) min_pixel_value = 0 max_pixel_value = 1 # Step 3: Create the ART classifier classifier = PyTorchClassifier( model=model, clip_values=(min_pixel_value, max_pixel_value), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10, preprocessing=(cifar_mu, cifar_std), ) # Step 5: Evaluate the ART classifier on benign test examples # normalized_x_test = normalize(x_test) predictions = classifier.predict(x_test) accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test) # accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print("=== Accuracy on benign test examples: {}%".format(accuracy * 100)) # Step 6: Generate adversarial test examples
def __init__( self, estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE", norm: Union[int, float, str] = np.inf, eps: float = 0.3, eps_step: float = 0.1, max_iter: int = 100, targeted: bool = False, nb_random_init: int = 5, batch_size: int = 32, loss_type: Optional[str] = None, verbose: bool = True, ): """ Create a :class:`.AutoProjectedGradientDescent` instance. :param estimator: An trained estimator. :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2. :param eps: Maximum perturbation that the attacker can introduce. :param eps_step: Attack step size (input variation) at each iteration. :param max_iter: The maximum number of iterations. :param targeted: Indicates whether the attack is targeted (True) or untargeted (False). :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 starting at the original input. :param batch_size: Size of the batch on which adversarial samples are generated. :param loss_type: Defines the loss to attack. Available options: None (Use loss defined by estimator), "cross_entropy", or "difference_logits_ratio" :param verbose: Show progress bars. """ from art.estimators.classification import TensorFlowClassifier, TensorFlowV2Classifier, PyTorchClassifier if loss_type not in self._predefined_losses: raise ValueError( "The argument loss_type has an invalid value. The following options for `loss_type` are currently " "supported: {}".format(self._predefined_losses) ) if loss_type is None: if hasattr(estimator, "predict") and is_probability( estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32)) ): raise ValueError( "AutoProjectedGradientDescent is expecting logits as estimator output, the provided " "estimator seems to predict probabilities." ) estimator_apgd = estimator else: if isinstance(estimator, TensorFlowClassifier): import tensorflow as tf if loss_type == "cross_entropy": if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))): raise NotImplementedError("Cross-entropy loss is not implemented for probability output.") self._loss_object = tf.reduce_mean( tf.keras.losses.categorical_crossentropy( y_pred=estimator._output, y_true=estimator._labels_ph, from_logits=True ) ) elif loss_type == "difference_logits_ratio": if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))): raise ValueError( "The provided estimator seems to predict probabilities. " "If loss_type='difference_logits_ratio' the estimator has to to predict logits." ) raise ValueError( "The loss `difference_logits_ratio` has not been validate completely. It seems that the " "commented implemented below is failing to selected the second largest logit for cases " "where the largest logit is the true logit. For future work `difference_logits_ratio` and " "loss_fn should return the same loss value." ) # def difference_logits_ratio(y_true, y_pred): # i_y_true = tf.cast(tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32) # i_y_pred_arg = tf.argsort(y_pred, axis=1) # # Not completely sure if the following line is correct. # # `i_y_pred_arg[:, -2], i_y_pred_arg[:, -1]` seems closer to the output of `loss_fn` than # # `i_y_pred_arg[:, -1], i_y_pred_arg[:, -2]` # i_z_i = tf.where(i_y_pred_arg[:, -1] != i_y_true[:], i_y_pred_arg[:, -2], # i_y_pred_arg[:, -1]) # # z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0) # z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0) # z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0) # z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0) # # z_1 = tf.linalg.diag_part(z_1) # z_3 = tf.linalg.diag_part(z_3) # z_i = tf.linalg.diag_part(z_i) # z_y = tf.linalg.diag_part(z_y) # # dlr = -(z_y - z_i) / (z_1 - z_3) # # return tf.reduce_mean(dlr) # # def loss_fn(y_true, y_pred): # i_y_true = np.argmax(y_true, axis=1) # i_y_pred_arg = np.argsort(y_pred, axis=1) # i_z_i = np.where(i_y_pred_arg[:, -1] != i_y_true[:], i_y_pred_arg[:, -1], # i_y_pred_arg[:, -2]) # # z_1 = y_pred[:, i_y_pred_arg[:, -1]] # z_3 = y_pred[:, i_y_pred_arg[:, -3]] # z_i = y_pred[:, i_z_i] # z_y = y_pred[:, i_y_true] # # z_1 = np.diag(z_1) # z_3 = np.diag(z_3) # z_i = np.diag(z_i) # z_y = np.diag(z_y) # # dlr = -(z_y - z_i) / (z_1 - z_3) # # return np.mean(dlr) # # self._loss_fn = loss_fn # self._loss_object = difference_logits_ratio(y_true=estimator._labels_ph, # y_pred=estimator._output) estimator_apgd = TensorFlowClassifier( input_ph=estimator._input_ph, output=estimator._output, labels_ph=estimator._labels_ph, train=estimator._train, loss=self._loss_object, learning=estimator._learning, sess=estimator._sess, channels_first=estimator.channels_first, clip_values=estimator.clip_values, preprocessing_defences=estimator.preprocessing_defences, postprocessing_defences=estimator.postprocessing_defences, preprocessing=estimator.preprocessing, feed_dict=estimator._feed_dict, ) elif isinstance(estimator, TensorFlowV2Classifier): import tensorflow as tf if loss_type == "cross_entropy": if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))): self._loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) else: self._loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True) elif loss_type == "difference_logits_ratio": if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))): raise ValueError( "The provided estimator seems to predict probabilities. " "If loss_type='difference_logits_ratio' the estimator has to to predict logits." ) class difference_logits_ratio: def __init__(self): self.reduction = "mean" def __call__(self, y_true, y_pred): i_y_true = tf.cast(tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32) i_y_pred_arg = tf.argsort(y_pred, axis=1) i_z_i_list = list() for i in range(y_true.shape[0]): if i_y_pred_arg[i, -1] != i_y_true[i]: i_z_i_list.append(i_y_pred_arg[i, -1]) else: i_z_i_list.append(i_y_pred_arg[i, -2]) i_z_i = tf.stack(i_z_i_list) z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0) z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0) z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0) z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0) z_1 = tf.linalg.diag_part(z_1) z_3 = tf.linalg.diag_part(z_3) z_i = tf.linalg.diag_part(z_i) z_y = tf.linalg.diag_part(z_y) dlr = -(z_y - z_i) / (z_1 - z_3) return tf.reduce_mean(dlr) self._loss_fn = difference_logits_ratio() self._loss_object = difference_logits_ratio() estimator_apgd = TensorFlowV2Classifier( model=estimator.model, nb_classes=estimator.nb_classes, input_shape=estimator.input_shape, loss_object=self._loss_object, train_step=estimator._train_step, channels_first=estimator.channels_first, clip_values=estimator.clip_values, preprocessing_defences=estimator.preprocessing_defences, postprocessing_defences=estimator.postprocessing_defences, preprocessing=estimator.preprocessing, ) elif isinstance(estimator, PyTorchClassifier): import torch if loss_type == "cross_entropy": if is_probability( estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32)) ): raise ValueError( "The provided estimator seems to predict probabilities. If loss_type='cross_entropy' " "the estimator has to to predict logits." ) self._loss_object = torch.nn.CrossEntropyLoss(reduction="mean") elif loss_type == "difference_logits_ratio": if is_probability( estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=ART_NUMPY_DTYPE)) ): raise ValueError( "The provided estimator seems to predict probabilities. " "If loss_type='difference_logits_ratio' the estimator has to to predict logits." ) class difference_logits_ratio: def __init__(self): self.reduction = "mean" def __call__(self, y_pred, y_true): # type: ignore if isinstance(y_true, np.ndarray): y_true = torch.from_numpy(y_true) if isinstance(y_pred, np.ndarray): y_pred = torch.from_numpy(y_pred) y_true = y_true.float() i_y_true = torch.argmax(y_true, axis=1) i_y_pred_arg = torch.argsort(y_pred, axis=1) i_z_i_list = list() for i in range(y_true.shape[0]): if i_y_pred_arg[i, -1] != i_y_true[i]: i_z_i_list.append(i_y_pred_arg[i, -1]) else: i_z_i_list.append(i_y_pred_arg[i, -2]) i_z_i = torch.stack(i_z_i_list) z_1 = y_pred[:, i_y_pred_arg[:, -1]] z_3 = y_pred[:, i_y_pred_arg[:, -3]] z_i = y_pred[:, i_z_i] z_y = y_pred[:, i_y_true] z_1 = torch.diagonal(z_1) z_3 = torch.diagonal(z_3) z_i = torch.diagonal(z_i) z_y = torch.diagonal(z_y) dlr = -(z_y - z_i) / (z_1 - z_3) return torch.mean(dlr.float()) self._loss_object = difference_logits_ratio() estimator_apgd = PyTorchClassifier( model=estimator.model, loss=self._loss_object, input_shape=estimator.input_shape, nb_classes=estimator.nb_classes, optimizer=None, channels_first=estimator.channels_first, clip_values=estimator.clip_values, preprocessing_defences=estimator.preprocessing_defences, postprocessing_defences=estimator.postprocessing_defences, preprocessing=estimator.preprocessing, device_type=estimator._device, ) else: raise ValueError("The loss type {} is not supported for the provided estimator.".format(loss_type)) super().__init__(estimator=estimator_apgd) self.norm = norm self.eps = eps self.eps_step = eps_step self.max_iter = max_iter self.targeted = targeted self.nb_random_init = nb_random_init self.batch_size = batch_size self.loss_type = loss_type self.verbose = verbose self._check_params()
model = nn.Sequential(nn.Conv2d(1, 4, 5), nn.ReLU(), nn.MaxPool2d(2, 2), nn.Conv2d(4, 10, 5), nn.ReLU(), nn.MaxPool2d(2, 2), nn.Flatten(), nn.Linear(4 * 4 * 10, 100), nn.Linear(100, 10)) # Step 2a: Define the loss function and the optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Step 3: Create the ART classifier classifier = PyTorchClassifier( model=model, clip_values=(0, 1), loss=criterion, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10, ) classifier.fit(x_train, y_train, batch_size=128, nb_epochs=5) predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100)) def calculate_l0(batch_original, batch_adversarial, dim): # image_original==x_test_adv matrix_bool = batch_original == batch_adversarial
class Feature(nn.Module): def __init__(self, features): super().__init__() self.features = features def forward(self, x): return self.features(x)[0] fm_model = nn.Sequential(model._model.normalize, Feature(model._model.features), model._model.pool, model._model.flatten) classifier = PyTorchClassifier( model=fm_model, loss=model.criterion, input_shape=(3, 32, 32), nb_classes=model._model.classifier[0].in_features, ) attack_model = MembershipInferenceAttackModel( num_classes=model.num_classes, num_features=model._model.classifier[0].in_features) attack = MembershipInferenceBlackBox(classifier, attack_model=attack_model) x_train, y_train = dataset_to_list(dataset.get_dataset('train')) x_train, y_train = to_numpy( torch.stack(x_train)), to_numpy(y_train) x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid')) x_valid, y_valid = to_numpy( torch.stack(x_valid)), to_numpy(y_valid) x_train, y_train = x_train[:1000], y_train[:1000]
trojanvision.trainer.add_argument(parser) args = parser.parse_args() env = trojanvision.environ.create(**args.__dict__) dataset = trojanvision.datasets.create(**args.__dict__) model = trojanvision.models.create(dataset=dataset, **args.__dict__) if env['verbose']: summary(env=env, dataset=dataset, model=model) model._validate() print('\n\n') from art.estimators.classification import PyTorchClassifier # type: ignore classifier = PyTorchClassifier( model=model._model, loss=model.criterion, input_shape=dataset.data_shape, nb_classes=model.num_classes, ) x_train, y_train = dataset_to_list(dataset.get_dataset('train')) x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train) # valid_train, valid_valid = dataset.split_set(dataset.get_dataset('valid'), length=5000) # x_train, y_train = dataset_to_list(valid_train) # x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train) # valid_loader = dataset.get_dataloader('valid', dataset=valid_valid) # thieved_model._validate(print_prefix='Before Stealing', loader=valid_loader) # thieved_model._validate(print_prefix='After Stealing', loader=valid_loader) import art.attacks.extraction # type:ignore for name in ['CopycatCNN', 'KnockoffNets']:
def adv_train_loop(model, params, ds, min_y, base_data, model_id, attack_type, device, batch_size, max_epochs=5): print('training adversarial:', attack_type) ds_train, ds_valid = ds min_y_train, min_y_val = min_y original_model = copy.deepcopy( model) # used to generate adv images for the trained model original_model.eval() model = copy.deepcopy( model) # making a copy so that original model is not changed model = model.to(device) model_id = f'{model_id}_{attack_type}' with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) classifier = PyTorchClassifier( model=original_model, clip_values=(0, 1), loss=nn.CrossEntropyLoss(), optimizer=optimizer, input_shape=(3, 64, 64), nb_classes=200, ) attack = None # if attack_type == "fgsm": # attack = FastGradientMethod(estimator=classifier, eps=0.2) # elif attack_type == "bim": # attack = BasicIterativeMethod(estimator=classifier, eps=0.2) # elif attack_type == "carlini": # attack = CarliniLInfMethod(classifier=classifier) # elif attack_type == "deepfool": # attack = DeepFool(classifier=classifier) if attack_type == "fgsm": attack = GradientSignAttack(model, loss_fn=loss, eps=0.2) elif attack_type == "ffa": attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3) elif attack_type == "carlini": attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000) elif attack_type == "lbfgs": attack = DeepFool(classifier=classifier) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train with ctx_noparamgrad_and_eval(model): x_adv = attack.perturb(x, y) optimizer.zero_grad() x = torch.cat((x, x_adv)) y = torch.cat((y, y)) ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() # return ans, y return l.item() trainer = Engine(train_step) # acc_metric.attach(trainer, "accuracy") # loss_metric.attach(trainer, 'loss') def train_eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_train x_adv = attack.perturb(x, y) x = torch.cat((x, x_adv)) y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_val x_adv = attack.perturb(x, y) x = torch.cat((x, x_adv)) y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=50)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics # metrics = engine.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 50) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) # @trainer.on(Events.EPOCH_COMPLETED) # def log_training_results(engine): # train_evaluator.run(ds_train) # metrics = train_evaluator.state.metrics # # metrics = engine.state.metrics # avg_accuracy = metrics['accuracy'] # avg_nll = metrics['loss'] # print("Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_accuracy, avg_nll)) # writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch) # writer.add_scalar("training/avg_accuracy", # avg_accuracy, engine.state.epoch) # writer.add_scalar("training/avg_error", 1. - # avg_accuracy, engine.state.epoch) @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10), handler) trainer.run(ds_train, max_epochs=max_epochs)
def robustness_evaluation( object_storage_url, object_storage_username, object_storage_password, data_bucket_name, result_bucket_name, model_id, feature_testset_path="processed_data/X_test.npy", label_testset_path="processed_data/y_test.npy", clip_values=(0, 1), nb_classes=2, input_shape=(1, 3, 64, 64), model_class_file="model.py", model_class_name="model", LossFn="", Optimizer="", epsilon=0.2, ): url = re.compile(r"https?://") cos = Minio( url.sub("", object_storage_url), access_key=object_storage_username, secret_key=object_storage_password, secure=False, ) dataset_filenamex = "X_test.npy" dataset_filenamey = "y_test.npy" weights_filename = "model.pt" model_files = model_id + "/_submitted_code/model.zip" cos.fget_object(data_bucket_name, feature_testset_path, dataset_filenamex) cos.fget_object(data_bucket_name, label_testset_path, dataset_filenamey) cos.fget_object(result_bucket_name, model_id + "/" + weights_filename, weights_filename) cos.fget_object(result_bucket_name, model_files, "model.zip") # Load PyTorch model definition from the source code. zip_ref = zipfile.ZipFile("model.zip", "r") zip_ref.extractall("model_files") zip_ref.close() modulename = "model_files." + model_class_file.split(".")[0].replace( "-", "_") """ We required users to define where the model class is located or follow some naming convention we have provided. """ model_class = getattr(importlib.import_module(modulename), model_class_name) # load & compile model device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model_class().to(device) model.load_state_dict(torch.load(weights_filename, map_location=device)) # Define Loss and optimizer function for the PyTorch model if LossFn: loss_fn = eval(LossFn) else: loss_fn = torch.nn.CrossEntropyLoss() if Optimizer: optimizer = eval(Optimizer) else: optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # create pytorch classifier classifier = PyTorchClassifier( model=model, loss=loss_fn, optimizer=optimizer, input_shape=input_shape, nb_classes=nb_classes, clip_values=clip_values, ) # load test dataset x = np.load(dataset_filenamex) y = np.load(dataset_filenamey) # craft adversarial samples using FGSM crafter = FastGradientMethod(classifier, eps=epsilon) x_samples = crafter.generate(x) # obtain all metrics (robustness score, perturbation metric, reduction in confidence) metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y) print("metrics:", metrics) return metrics
def test_fgsm(adv_model, dataset, loss_fn, optimizer, batch_size=32, num_workers=20, device='cuda:0', attack='fgsm', **kwargs): """ Train the model with the given training data :param x: :param y: :param epochs: """ epsilons =[0.00001, 0.0001, 0.004, 0.01, 0.1, 1, 10, 100] label_dict = pkl.load(open('external/speaker2int_7323.pkl','rb')) extractor = mfcc_extractor(collate=False) adv_classifier = PyTorchClassifier(model=AdvModel(adv_model.cpu(), extractor.cpu()), loss=loss_fn, optimizer=optimizer, input_shape=[1, 32000], nb_classes=250) # Create Dataloader dataloader = DataLoader(dataset=dataset['eval'], batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=PadBatch()) n_iterations = len(dataloader) f_log_all, f_name_all = createLogFiles('all') with open(f_name_all, 'a+') as f_log_all: f_log_all.write("\n\n #################################### Begin #####################################") f_log_all.write("\n New Log: {}".format(datetime.now())) # Loop over all the training data for generator n_files = 0 accuracy = 0 adv_acc_eps = {e: 0.0 for e in epsilons} success_eps = {e: 0.0 for e in epsilons} for i, (X, y, f) in enumerate(dataloader): if label_dict: y = torch.LongTensor([label_dict[y_] for y_ in y]) # send data to the GPU y = y.to(device) x_mfccs, labels = extractor((X.to(device).transpose(1,2))), y clean_logits = adv_model.forward(x_mfccs) clean_class = clean_logits.argmax(dim=-1) n_files += len(X) tmp_accuracy = torch.sum(clean_class == y).detach().cpu() accuracy += tmp_accuracy # Epsilon loop for e in epsilons: # FGSM if attack == 'fgsm': attack = FastGradientMethod(estimator=adv_classifier, eps=e) elif attack == 'bim': attack = ProjectedGradientDescent(estimator=adv_classifier, eps=e, eps_step=e/5, max_iter=100) X_fgsm = torch.Tensor(attack.generate(x=X)).to(device) assert(len(X_fgsm) == len(X)) pred_mfccs, labels_preds = extractor(X_fgsm.transpose(1,2)), y adv_logits = adv_model.forward(pred_mfccs) adv_class = adv_logits.argmax(dim=-1) tmp_success = torch.sum(clean_class != adv_class).detach().cpu() tmp_adv_acc = torch.sum(y == adv_class).detach().cpu() success_eps[e] += tmp_success adv_acc_eps[e] += tmp_adv_acc # Update total loss and acc with open(f_name_all, 'a+') as f_log_all: f_log_all.write('File {}\tBatch {}\tEps {}\tTarg {}\tClean {}\tAdv {}\n'.format( f[0][-1], i+1, e, y.cpu().detach().numpy(), clean_class.cpu().detach().numpy(), adv_class.cpu().detach().numpy())) for wav, fi in zip(X_fgsm, f): adv_path="samples/fgsm/{}".format(fi[-2]) if not os.path.exists(adv_path): os.makedirs(adv_path) torchaudio.save("{}/{}_{}.wav".format(adv_path,fi[-1], e), wav.squeeze().detach().cpu(), 8000) print("Epsilon: {}".format(e), "Tmp Acc: {:.3f}".format((tmp_accuracy + 0.0) / len(X)), "Tmp Adv: {:.3f}".format((tmp_adv_acc + 0.0) / len(X)), "Tmp Suc: {:.3f}".format((tmp_success + 0.0) / len(X))) accuracy = (accuracy + 0.0) / n_files adv_acc_eps = {k : v / n_files for k, v in adv_acc_eps.items()} success_eps = {k : v / n_files for k, v in success_eps.items()} with open(f_name_all, 'a+') as f_log_all: f_log_all.write('Epsilons: {} - Accuracy: {}%\tAdv Accuracy: {}%\tSuccess rate: {}%\n'.format(e, accuracy, adv_acc_eps, success_eps)) return
def __init__( self, estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE", norm: Union[int, float, str] = np.inf, eps: float = 0.3, eps_step: float = 0.1, max_iter: int = 100, targeted: bool = False, nb_random_init: int = 5, batch_size: int = 32, loss_type: Optional[str] = None, ): """ Create a :class:`.AutoProjectedGradientDescent` instance. :param estimator: An trained estimator. :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2. :param eps: Maximum perturbation that the attacker can introduce. :param eps_step: Attack step size (input variation) at each iteration. :param max_iter: The maximum number of iterations. :param targeted: Indicates whether the attack is targeted (True) or untargeted (False). :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 starting at the original input. :param batch_size: Size of the batch on which adversarial samples are generated. """ from art.estimators.classification import TensorFlowClassifier, TensorFlowV2Classifier, PyTorchClassifier if isinstance(estimator, TensorFlowClassifier): import tensorflow as tf if loss_type == "cross_entropy": if is_probability( estimator.predict(x=np.ones( shape=(1, *estimator.input_shape)))): raise NotImplementedError( "Cross-entropy loss is not implemented for probability output." ) else: self._loss_object = tf.reduce_mean( tf.keras.losses.categorical_crossentropy( y_pred=estimator._output, y_true=estimator._labels_ph, from_logits=True)) def loss_fn(y_true, y_pred): y_pred_norm = y_pred - np.amax( y_pred, axis=1, keepdims=True) loss_value = -(y_true * y_pred_norm - np.log( np.sum(np.exp(y_pred_norm), axis=1, keepdims=True))) return np.mean(loss_value) self._loss_fn = loss_fn elif loss_type == "difference_logits_ratio": if is_probability( estimator.predict(x=np.ones( shape=(1, *estimator.input_shape)))): raise ValueError( "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' " "the estimator has to to predict logits.") else: def difference_logits_ratio(y_true, y_pred): i_y_true = tf.cast( tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32) i_y_pred_arg = tf.argsort(y_pred, axis=1) i_z_i = tf.where(i_y_pred_arg[:, -1] != i_y_true[:], i_y_pred_arg[:, -2], i_y_pred_arg[:, -1]) z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0) z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0) z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0) z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0) z_1 = tf.linalg.diag_part(z_1) z_3 = tf.linalg.diag_part(z_3) z_i = tf.linalg.diag_part(z_i) z_y = tf.linalg.diag_part(z_y) dlr = -(z_y - z_i) / (z_1 - z_3) return tf.reduce_mean(dlr) def loss_fn(y_true, y_pred): i_y_true = np.argmax(y_true, axis=1) i_y_pred_arg = np.argsort(y_pred, axis=1) i_z_i = np.where(i_y_pred_arg[:, -1] != i_y_true[:], i_y_pred_arg[:, -1], i_y_pred_arg[:, -2]) z_1 = y_pred[:, i_y_pred_arg[:, -1]] z_3 = y_pred[:, i_y_pred_arg[:, -3]] z_i = y_pred[:, i_z_i] z_y = y_pred[:, i_y_true] z_1 = np.diag(z_1) z_3 = np.diag(z_3) z_i = np.diag(z_i) z_y = np.diag(z_y) dlr = -(z_y - z_i) / (z_1 - z_3) return np.mean(dlr) self._loss_fn = loss_fn self._loss_object = difference_logits_ratio( y_true=estimator._labels_ph, y_pred=estimator._output) elif loss_type is None: self._loss_object = estimator._loss_object else: raise ValueError( "The argument loss_type has an invalid value. The following options for loss_type are " "supported: {}".format( [None, "cross_entropy", "difference_logits_ratio"])) estimator_apgd = TensorFlowClassifier( input_ph=estimator._input_ph, output=estimator._output, labels_ph=estimator._labels_ph, train=estimator._train, loss=self._loss_object, learning=estimator._learning, sess=estimator._sess, channels_first=estimator.channels_first, clip_values=estimator.clip_values, preprocessing_defences=estimator.preprocessing_defences, postprocessing_defences=estimator.postprocessing_defences, preprocessing=estimator.preprocessing, feed_dict=estimator._feed_dict, ) elif isinstance(estimator, TensorFlowV2Classifier): import tensorflow as tf if loss_type == "cross_entropy": if is_probability( estimator.predict(x=np.ones( shape=(1, *estimator.input_shape)))): self._loss_object = tf.keras.losses.CategoricalCrossentropy( from_logits=False) self._loss_fn = self._loss_object else: self._loss_object = tf.keras.losses.CategoricalCrossentropy( from_logits=True) self._loss_fn = self._loss_object elif loss_type == "difference_logits_ratio": if is_probability( estimator.predict(x=np.ones( shape=(1, *estimator.input_shape)))): raise ValueError( "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' " "the estimator has to to predict logits.") else: def difference_logits_ratio(y_true, y_pred): i_y_true = tf.cast( tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32) i_y_pred_arg = tf.argsort(y_pred, axis=1) i_z_i_list = list() for i in range(y_true.shape[0]): if i_y_pred_arg[i, -1] != i_y_true[i]: i_z_i_list.append(i_y_pred_arg[i, -1]) else: i_z_i_list.append(i_y_pred_arg[i, -2]) i_z_i = tf.stack(i_z_i_list) z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0) z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0) z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0) z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0) z_1 = tf.linalg.diag_part(z_1) z_3 = tf.linalg.diag_part(z_3) z_i = tf.linalg.diag_part(z_i) z_y = tf.linalg.diag_part(z_y) dlr = -(z_y - z_i) / (z_1 - z_3) return tf.reduce_mean(dlr) self._loss_fn = difference_logits_ratio self._loss_object = difference_logits_ratio elif loss_type is None: self._loss_object = estimator._loss_object else: raise ValueError( "The argument loss_type has an invalid value. The following options for loss_type are " "supported: {}".format( [None, "cross_entropy", "difference_logits_ratio"])) estimator_apgd = TensorFlowV2Classifier( model=estimator.model, nb_classes=estimator.nb_classes, input_shape=estimator.input_shape, loss_object=self._loss_object, train_step=estimator._train_step, channels_first=estimator.channels_first, clip_values=estimator.clip_values, preprocessing_defences=estimator.preprocessing_defences, postprocessing_defences=estimator.postprocessing_defences, preprocessing=estimator.preprocessing, ) elif isinstance(estimator, PyTorchClassifier): import torch if loss_type == "cross_entropy": if is_probability( estimator.predict( x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32))): raise ValueError( "The provided estimator seems to predict probabilities. If loss_type='cross_entropy' " "the estimator has to to predict logits.") else: def loss_fn(y_true, y_pred): return torch.nn.CrossEntropyLoss()( torch.from_numpy(y_pred), torch.from_numpy(np.argmax(y_true, axis=1))) self._loss_fn = loss_fn self._loss_object = torch.nn.CrossEntropyLoss() elif loss_type == "difference_logits_ratio": if is_probability( estimator.predict( x=np.ones(shape=(1, *estimator.input_shape), dtype=ART_NUMPY_DTYPE))): raise ValueError( "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' " "the estimator has to to predict logits.") else: # def difference_logits_ratio(y_true, y_pred): def difference_logits_ratio(y_pred, y_true): # type: ignore if isinstance(y_true, np.ndarray): y_true = torch.from_numpy(y_true) if isinstance(y_pred, np.ndarray): y_pred = torch.from_numpy(y_pred) y_true = y_true.float() # dlr = torch.mean((y_pred - y_true) ** 2) # return loss i_y_true = torch.argmax(y_true, axis=1) i_y_pred_arg = torch.argsort(y_pred, axis=1) i_z_i_list = list() for i in range(y_true.shape[0]): if i_y_pred_arg[i, -1] != i_y_true[i]: i_z_i_list.append(i_y_pred_arg[i, -1]) else: i_z_i_list.append(i_y_pred_arg[i, -2]) i_z_i = torch.stack(i_z_i_list) z_1 = y_pred[:, i_y_pred_arg[:, -1]] z_3 = y_pred[:, i_y_pred_arg[:, -3]] z_i = y_pred[:, i_z_i] z_y = y_pred[:, i_y_true] z_1 = torch.diagonal(z_1) z_3 = torch.diagonal(z_3) z_i = torch.diagonal(z_i) z_y = torch.diagonal(z_y) dlr = -(z_y - z_i) / (z_1 - z_3) return torch.mean(dlr.float()) self._loss_fn = difference_logits_ratio self._loss_object = difference_logits_ratio elif loss_type is None: self._loss_object = estimator._loss_object else: raise ValueError( "The argument loss_type has an invalid value. The following options for loss_type are " "supported: {}".format( [None, "cross_entropy", "difference_logits_ratio"])) estimator_apgd = PyTorchClassifier( model=estimator.model, loss=self._loss_object, input_shape=estimator.input_shape, nb_classes=estimator.nb_classes, optimizer=None, channels_first=estimator.channels_first, clip_values=estimator.clip_values, preprocessing_defences=estimator.preprocessing_defences, postprocessing_defences=estimator.postprocessing_defences, preprocessing=estimator.preprocessing, device_type=estimator._device, ) else: estimator_apgd = None super().__init__(estimator=estimator_apgd) self.norm = norm self.eps = eps self.eps_step = eps_step self.max_iter = max_iter self.targeted = targeted self.nb_random_init = nb_random_init self.batch_size = batch_size self.loss_type = loss_type self._check_params()
def test_2_pt(self): """ Test with a PyTorch Classifier. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32) x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32) # Create a model from scratch class PyTorchModel(nn.Module): def __init__(self): super(PyTorchModel, self).__init__() self.conv_1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5, stride=1) self.conv_2 = nn.Conv2d(in_channels=4, out_channels=10, kernel_size=5, stride=1) self.fc_1 = nn.Linear(in_features=4 * 4 * 10, out_features=100) self.fc_2 = nn.Linear(in_features=100, out_features=10) def forward(self, x): x = F.relu(self.conv_1(x)) x = F.max_pool2d(x, 2, 2) x = F.relu(self.conv_2(x)) x = F.max_pool2d(x, 2, 2) x = x.view(-1, 4 * 4 * 10) x = F.relu(self.fc_1(x)) x = self.fc_2(x) return x # Step 2a: Define the loss function and the optimizer model = PyTorchModel() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Step 3: Create the ART classifier classifier = PyTorchClassifier( model=model, clip_values=(0, 1), loss=criterion, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10, ) # Initialize DPA Classifier dpa = DeepPartitionEnsemble( classifiers=classifier, ensemble_size=ENSEMBLE_SIZE, channels_first=classifier.channels_first, clip_values=classifier.clip_values, preprocessing_defences=classifier.preprocessing_defences, postprocessing_defences=classifier.postprocessing_defences, preprocessing=classifier.preprocessing, ) # Check basic functionality of DPA Classifier # check predict y_test_dpa = dpa.predict(x=x_test) self.assertEqual(y_test_dpa.shape, y_test.shape) self.assertTrue((np.sum(y_test_dpa, axis=1) <= ENSEMBLE_SIZE * np.ones( (NB_TEST, ))).all()) # loss gradient grad = dpa.loss_gradient(x=x_test, y=y_test, sampling=True) assert grad.shape == (10, 1, 28, 28) # fit dpa.fit(x=x_train, y=y_train)
# model = mobilenet_v2(num_classes = 10) # Step 2a: Define the loss function and the optimizer criterion = nn.CrossEntropyLoss() # optimizer = optim.Adam(model.parameters(), lr=0.01) optimizer = optim.Adam(model.parameters(), lr=0.01) # Step 3: Create the ART classifier classifier = PyTorchClassifier( model=model, clip_values=(0.0, 1.0), preprocessing=(cifar_mu, cifar_std), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10, ) # Step 4: Train the ART classifier classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10) exp_time = time.strftime('%H_%M_%S') # torch.save(classifier.model.state_dict(), 'pth/{}.pth.tar'.format(exp_time)) # Step 5: Evaluate the ART classifier on benign test examples predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100))
def __init__(self, estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE", detector: "CLASSIFIER_LOSS_GRADIENTS_TYPE", detector_th: int = 0.5, beta: int = 0.5, detector_clip_fun=None, norm: Union[int, float, str] = np.inf, eps: float = 0.3, eps_step: float = 0.1, max_iter: int = 100, targeted: bool = False, nb_random_init: int = 5, batch_size: int = 32, loss_type: Optional[str] = None, verbose: bool = True): """ Create a :class:`.AutoProjectedGradientDescentDetectors` instance. :param estimator: A trained estimator. :param detector: A trained detector. Its prediction should be equal to 1 for the sample predicted as malicious and 0 for the ones predicted as benign. :param detector_th: Threshold to have a chosen number of false positives. :param beta: Constant which regulates the trade-off between the optimization of the classifier and the detector losses. In particular is the weight given to the detector's loss. :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2. :param eps: Maximum perturbation that the attacker can introduce. :param eps_step: Attack step size (input variation) at each iteration. :param max_iter: The maximum number of iterations. :param targeted: Indicates whether the attack is targeted (True) or untargeted (False). :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 starting at the original input. :param batch_size: Size of the batch on which adversarial samples are generated. :param verbose: Show progress bars. """ from art.estimators.classification import PyTorchClassifier self.beta = beta self.detector_th = detector_th self.detector_clip_fun = detector_clip_fun if targeted is True: raise NotImplementedError("This attack so far do not works as a " "targeted attack. (the objective " "function and its gradient function " "need a little fix to make it work).") if isinstance(detector, PyTorchClassifier): import torch if detector.clip_values is not None: raise ValueError("The clip value of the detector cannot " "be different from None.") class detector_loss: """ The detector loss is the detector score for the class 1 - the detector threshold """ def __init__(self): self.reduction = "mean" def __call__(self, y_pred, y_true): # type: ignore """ y_pred are actually the logits. y_true is actually unused. """ if isinstance(y_pred, np.ndarray): scores = torch.from_numpy(y_pred) else: scores = y_pred # apply the softmax to have scores in 0 1 softmax_obj = Softmax(dim=1) scores = softmax_obj(scores) # consider the score assigned to the malicious class scores = scores[:, 1] scores = scores - detector_th # create a vector of zeros zero_vector = torch.zeros_like(scores) # get the maximum values between scores - threshold and 0 scores = torch.max(scores, zero_vector) if self.reduction == 'mean': return torch.mean(scores) else: return scores self._det_loss_object = detector_loss() detector_apgd = PyTorchClassifier( model=detector.model, loss=self._det_loss_object, input_shape=detector.input_shape, nb_classes=detector.nb_classes, optimizer=None, channels_first=detector.channels_first, preprocessing_defences=detector.preprocessing_defences, postprocessing_defences=detector.postprocessing_defences, preprocessing=detector.preprocessing, device_type=detector._device, ) self._det_loss_object = detector_loss() else: raise ValueError("The type of the detector classifier is not " "supported.") self.detector = detector_apgd super().__init__(estimator=estimator, norm=norm, eps=eps, eps_step=eps_step, max_iter=max_iter, targeted=targeted, nb_random_init=nb_random_init, batch_size=batch_size, loss_type=loss_type, verbose=verbose)
def run_attack_untargeted(file_model, X, y, att_name, eps, device): path = file_model.split('/')[0] file_str = file_model.split('/')[-1] name_arr = file_str.split('_') data = name_arr[0] model_name = name_arr[1] file_data = os.path.join( path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, round(eps * 1000))) if os.path.exists(file_data): print('Found existing file:', file_data) obj = torch.load(file_data) return obj['adv'], obj['X'], obj['y'] if data == 'mnist': n_features = (1, 28, 28) n_classes = 10 model = BaseModel(use_prob=False).to(device) elif data == 'cifar10': n_features = (3, 32, 32) n_classes = 10 if model_name == 'resnet': model = Resnet(use_prob=False).to(device) elif model_name == 'vgg': model = Vgg(use_prob=False).to(device) else: raise NotImplementedError else: raise NotImplementedError model.load_state_dict(torch.load(file_model, map_location=device)) loss = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) classifier = PyTorchClassifier(model=model, loss=loss, input_shape=n_features, optimizer=optimizer, nb_classes=n_classes, clip_values=(0.0, 1.0), device_type='gpu') if att_name == 'apgd': eps_step = eps / 4. if eps <= 0.2 else 0.1 attack = AutoProjectedGradientDescent(estimator=classifier, eps=eps, eps_step=eps_step, max_iter=1000, batch_size=BATCH_SIZE, targeted=False) elif att_name == 'apgd2': attack = AutoProjectedGradientDescent(estimator=classifier, norm=2, eps=eps, eps_step=0.1, max_iter=1000, batch_size=BATCH_SIZE, targeted=False) elif att_name == 'cw2': # Do not increase the batch_size attack = CarliniWagnerAttackL2(model=model, n_classes=n_classes, confidence=eps, verbose=True, check_prob=False, batch_size=32, targeted=False) elif att_name == 'deepfool': # Do not adjust Epsilon attack = DeepFool(classifier=classifier, batch_size=BATCH_SIZE) elif att_name == 'fgsm': attack = FastGradientMethod(estimator=classifier, eps=eps, batch_size=BATCH_SIZE) elif att_name == 'line': attack = LineAttack(color=1, thickness=2) else: raise NotImplementedError time_start = time.time() adv = attack.generate(x=X) time_elapsed = time.time() - time_start print('Total run time:', str(datetime.timedelta(seconds=time_elapsed))) obj = {'X': X, 'y': y, 'adv': adv} torch.save(obj, file_data) print('Save data to:', file_data) return adv, X, y
def attack_universal_perturbations_nontargeted(dataloader, model, model_info, args, checkpoint_dir, norm, eps): """ UAP nontargeted attack """ device = args.device criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) img_size = model_info["model_img_size"] n_classes = model_info["num_classes"] classifier = PyTorchClassifier( model=model, loss=criterion, clip_values=(0.0, 1.0), optimizer=optimizer, input_shape=(img_size, img_size), nb_classes=n_classes, device_type=device, ) attack = UniversalPerturbation( classifier=classifier, attacker="fgsm", attacker_params={ "eps": eps, "batch_size": 32, "norm": norm }, delta=0.25, max_iter=20, #max_iter=3, eps=eps, norm=norm, ) # Launching a non-targeted attack # t = args.target_class print(f"Launching univ-pert nontargeted attack") #dest_images = os.path.join(checkpoint_dir, args.model_name) dest_images = checkpoint_dir os.makedirs(dest_images, exist_ok=True) # Running over the entire-batch to compute a universal perturbation for data in tqdm(dataloader): sample, label, img_path = data sample = sample.float() # Launch attack sample_adv = attack.generate(x=sample.cpu()) # Code to save these images img_path = [it.split("/")[-1] for it in img_path] for i in range(len(sample_adv)): _img = sample_adv[i].transpose(1, 2, 0) skimage.io.imsave(os.path.join(dest_images, img_path[i]), img_as_ubyte(_img)) # Also save noise image for universal attack _img = attack.noise.squeeze(0).transpose(1, 2, 0) #import ipdb; ipdb.set_trace() skimage.io.imsave(os.path.join(dest_images, "noise.png"), img_as_ubyte(_img)) with open(os.path.join(dest_images, "stats.txt"), "w") as f: f.write(f"Fooling-rate was {attack.fooling_rate}\n") return dest_images
data_dir = config['data_dir'] # Set up GPU os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpu_id']) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Set up model model = get_resnet18().to(device) model.eval() test_loader = get_test_loader(data_dir=data_dir, batch_size=50) classifier = PyTorchClassifier( model=model, loss=nn.CrossEntropyLoss(), input_shape=(3, 32, 32), nb_classes=10, optimizer=None, clip_values=(0, 1), ) attack = ProjectedGradientDescentPyTorch( estimator=classifier, norm=np.inf, eps=config['epsilon'], eps_step=config['step_size'], max_iter=config['num_steps'], num_random_init=config['num_random_init'], batch_size=50, ) # attack = AutoProjectedGradientDescent(
test_set_x = dataset[2][0] test_set_y = dataset[2][1] data, target = torch.from_numpy(test_set_x), torch.from_numpy(test_set_y) data = torch.reshape(data, [-1, 1, 57, 47]) print("The size of the input is:") print(data.shape) data, target = data.to(device), target.to(device) """ White-Box Classifier """ classifier = PyTorchClassifier(model=model, input_shape=(data.shape), nb_classes=40, loss=nn.CrossEntropyLoss(), device_type="cpu") original_predictions = classifier.predict(data) accuracy = np.sum( np.argmax(original_predictions, axis=1) == np.argmax( test_set_y, axis=1)) / test_set_y.shape[0] print("Accuracy on benign test examples: {}%".format(accuracy * 100)) # Generate adversarial test examples """ White-Box Attacks """ # FGSM
trojanvision.datasets.add_argument(parser) trojanvision.models.add_argument(parser) args = parser.parse_args() env = trojanvision.environ.create(**args.__dict__) dataset = trojanvision.datasets.create(**args.__dict__) model = trojanvision.models.create(dataset=dataset, **args.__dict__) if env['verbose']: summary(env=env, dataset=dataset, model=model) import torch import numpy as np from sklearn import metrics from trojanzoo.utils.data import dataset_to_list from art.estimators.classification import PyTorchClassifier # type: ignore classifier = PyTorchClassifier( model=model._model, loss=model.criterion, input_shape=dataset.data_shape, nb_classes=model.num_classes, ) model._validate() from art.attacks.inference.membership_inference import LabelOnlyDecisionBoundary as Attack # type: ignore attack = Attack(classifier) x_train, y_train = dataset_to_list(dataset.get_dataset('train')) x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train) x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid')) x_valid, y_valid = to_numpy(torch.stack(x_valid)), to_numpy(y_valid) sample_size = 64 tau_path = os.path.normpath(os.path.join(model.folder_path,
# Step 2: create an interface for classifier, load trained model, to be used by attack model trainer # Define the loss function and the optimizer for attack model trainer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(base_classifier.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) # Create the ART classifier classifier = PyTorchClassifier( model=base_classifier, clip_values=(min_pixel_value, max_pixel_value), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10, ) # Step 3: Train the ART classifier # TODO: add option to train on demand # Step 4: Evaluate the ART classifier on benign test examples y_test = y_test[:args.max] # limit the length of test set trans = transforms.ToTensor( ) # transform ndarray into tensor, normalize in the process x_data = [] # list for test data predictions = [] # list for prediction
def main(): with open('data.json') as data_json: data_params = json.load(data_json) parser = argparse.ArgumentParser() parser.add_argument('--data', type=str) parser.add_argument('--data_path', type=str, default='data') parser.add_argument('--output_path', type=str, default='results') parser.add_argument('--pretrained', type=str, required=True) parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('--attack', type=str, required=True, choices=data_params['attacks']) parser.add_argument('--eps', type=float, default=0.3) # NOTE: In CW_L2 attack, eps is the upper bound of c. parser.add_argument('--n_samples', type=int, default=2000) parser.add_argument('--random_state', type=int, default=1234) args = parser.parse_args() print(args) set_seeds(args.random_state) if not os.path.exists(args.output_path): print('Output folder does not exist. Create:', args.output_path) os.mkdir(args.output_path) print('Dataset:', args.data) print('Pretrained model:', args.pretrained) print('Running attack: {}'.format(args.attack)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if args.data == 'mnist': dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms) elif args.data == 'cifar10': dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms) else: data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name']) print('Read file:', data_path) X, y = load_csv(data_path) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=data_params['data'][args.data]['n_test'], random_state=args.random_state) scaler = MinMaxScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long)) dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long)) dataloader_train = DataLoader(dataset_train, 256, shuffle=False) dataloader_test = DataLoader(dataset_test, 256, shuffle=False) shape_train = get_shape(dataloader_train.dataset) shape_test = get_shape(dataloader_test.dataset) print('Train set:', shape_train) print('Test set:', shape_test) # Load model use_prob = args.attack not in ['apgd', 'apgd1', 'apgd2', 'cw2', 'cwinf'] print('Attack:', args.attack) print('Using softmax layer:', use_prob) if args.data == 'mnist': model = BaseModel(use_prob=use_prob).to(device) model_name = 'basic' elif args.data == 'cifar10': model_name = args.pretrained.split('_')[1] if model_name == 'resnet': model = Resnet(use_prob=use_prob).to(device) elif model_name == 'vgg': model = Vgg(use_prob=use_prob).to(device) else: raise ValueError('Unknown model: {}'.format(model_name)) else: n_features = data_params['data'][args.data]['n_features'] n_classes = data_params['data'][args.data]['n_classes'] model = NumericModel( n_features, n_hidden=n_features * 4, n_classes=n_classes, use_prob=use_prob).to(device) model_name = 'basic' + str(n_features * 4) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) loss = nn.CrossEntropyLoss() pretrained_path = os.path.join(args.output_path, args.pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) _, acc_train = validate(model, dataloader_train, loss, device) _, acc_test = validate(model, dataloader_test, loss, device) print('Accuracy on train set: {:.4f}%'.format(acc_train * 100)) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) # Create a subset which only contains recognisable samples. tensor_test_X, tensor_test_y = get_correct_examples( model, dataset_test, device=device, return_tensor=True) dataset_perfect = TensorDataset(tensor_test_X, tensor_test_y) loader_perfect = DataLoader(dataset_perfect, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader_perfect, loss, device) print('Accuracy on {} filtered test examples: {:.4f}%'.format( len(dataset_perfect), acc_perfect * 100)) # Generate adversarial examples n_features = data_params['data'][args.data]['n_features'] n_classes = data_params['data'][args.data]['n_classes'] if isinstance(n_features, int): n_features = (n_features,) classifier = PyTorchClassifier( model=model, loss=loss, input_shape=n_features, optimizer=optimizer, nb_classes=n_classes, clip_values=(0.0, 1.0), device_type='gpu') if args.attack == 'apgd': eps_step = args.eps / 10.0 if args.eps <= 0.1 else 0.1 attack = AutoProjectedGradientDescent( estimator=classifier, eps=args.eps, eps_step=eps_step, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'apgd1': attack = AutoProjectedGradientDescent( estimator=classifier, norm=1, eps=args.eps, eps_step=0.1, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'apgd2': attack = AutoProjectedGradientDescent( estimator=classifier, norm=2, eps=args.eps, eps_step=0.1, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'bim': eps_step = args.eps / 10.0 attack = BasicIterativeMethod( estimator=classifier, eps=args.eps, eps_step=eps_step, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'boundary': attack = BoundaryAttack( estimator=classifier, max_iter=1000, sample_size=args.batch_size, targeted=False) elif args.attack == 'cw2': # NOTE: Do NOT increase the batch size! attack = CarliniWagnerAttackL2( model=model, n_classes=n_classes, confidence=args.eps, verbose=True, check_prob=False, batch_size=args.batch_size, targeted=False) elif args.attack == 'cwinf': attack = CarliniLInfMethod( classifier=classifier, confidence=args.eps, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'deepfool': attack = DeepFool( classifier=classifier, epsilon=args.eps, batch_size=args.batch_size) elif args.attack == 'fgsm': attack = FastGradientMethod( estimator=classifier, eps=args.eps, batch_size=args.batch_size) elif args.attack == 'jsma': attack = SaliencyMapMethod( classifier=classifier, gamma=args.eps, batch_size=args.batch_size) elif args.attack == 'line': if args.data == 'mnist': color = args.eps elif args.data == 'cifar10': color = (args.eps, args.eps, args.eps) else: raise NotImplementedError attack = LineAttack(color=color, thickness=1) elif args.attack == 'shadow': attack = ShadowAttack( estimator=classifier, batch_size=args.batch_size, targeted=False, verbose=False) elif args.attack == 'watermark': attack = WaterMarkAttack( eps=args.eps, n_classes=data_params['data'][args.data]['n_classes'], x_min=0.0, x_max=1.0, targeted=False) X_train, y_train = get_correct_examples(model, dataset_train, device=device, return_tensor=True) X_train = X_train.cpu().detach().numpy() y_train = y_train.cpu().detach().numpy() attack.fit(X_train, y_train) else: raise NotImplementedError if len(dataset_perfect) > args.n_samples: n = args.n_samples else: n = len(dataset_perfect) X_benign = tensor_test_X[:n].cpu().detach().numpy() y = tensor_test_y[:n].cpu().detach().numpy() print('Creating {} adversarial examples with eps={} (Not all attacks use eps)'.format(n, args.eps)) time_start = time.time() # Shadow attack only takes single sample! if args.attack == 'shadow': adv = np.zeros_like(X_benign) for i in trange(len(X_benign)): adv[i] = attack.generate(x=np.expand_dims(X_benign[i], axis=0)) elif args.attack == 'watermark': # This is untargeted. adv = attack.generate(X_benign, y) else: adv = attack.generate(x=X_benign) time_elapsed = time.time() - time_start print('Total time spend: {}'.format(str(datetime.timedelta(seconds=time_elapsed)))) pred_benign = np.argmax(classifier.predict(X_benign), axis=1) acc_benign = np.sum(pred_benign == y) / n pred_adv = np.argmax(classifier.predict(adv), axis=1) acc_adv = np.sum(pred_adv == y) / n print("Accuracy on benign samples: {:.4f}%".format(acc_benign * 100)) print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100)) # Save results if args.n_samples < 2000: output_file = '{}_{}_{}_{}_size{}'.format(args.data, model_name, args.attack, str(args.eps), args.n_samples) else: output_file = '{}_{}_{}_{}'.format(args.data, model_name, args.attack, str(args.eps)) path_x = os.path.join(args.output_path, '{}_x.npy'.format(output_file)) path_y = os.path.join(args.output_path, '{}_y.npy'.format(output_file)) path_adv = os.path.join(args.output_path, '{}_adv.npy'.format(output_file)) np.save(path_x, X_benign) np.save(path_y, y) np.save(path_adv, adv) print('Saved to:', '{}_adv.npy'.format(output_file)) print()
def train(dataloader, model, criterion, optimizer, scheduler, epoch): model.train() print('epoch ' + str(epoch)) train_loss = 0.0 train_acc = 0.0 total = len(dataloader) start = time.time() toPilImage = transforms.ToPILImage( ) # transform tensor into PIL image to save for batch_num, (x, y) in enumerate(dataloader): x = x.to(device) y = y.to(device) # gauss noise training gauss_noise = torch.randn_like(x, device=device) * args.noise_sd # x_noise = x + torch.randn_like(x, device=device) * args.noise_sd # targeted noise training tmp_criterion = nn.CrossEntropyLoss() tmp_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) classifier = PyTorchClassifier( model=model, clip_values=(min_pixel_value, max_pixel_value), loss=tmp_criterion, optimizer=tmp_optimizer, input_shape=(3, 32, 32), nb_classes=10, ) # generate random targets targets = art.utils.random_targets(y.cpu().numpy(), get_num_classes()) # calculate loss gradient grad = classifier.loss_gradient(x=x.cpu().numpy(), y=targets) * (-1.0) scaled_grad = torch.Tensor(grad * args.eps_step).to(device) # print((scaled_grad.shape, gauss_noise.shape, targets.shape)) # combine noise and targeted noise x_combine = x + (gauss_noise * (1.0 - args.k_value)) + (scaled_grad * args.k_value) model.zero_grad() output = model(x_combine) loss = criterion(output, y) acc = accuracy(output, y) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() train_acc += acc scheduler.step() end = time.time() print('trainning time:', end - start, 'sec, loss: ', train_loss / total, 'acc: ', train_acc / total) return train_loss / total, train_acc / total