def test_crossentropy(): predictions = np.array([0.1, 0.5, 0.7, 0.4]) probabilities = softmax(predictions) for i in range(len(predictions)): ce = crossentropy(logits=predictions, label=i) assert ce == approx(-np.log(probabilities[i]))
def _run_one(self, a, epsilon, stepsize, iterations, random_start, targeted, class_, return_early, scale, bb_step=15, RO=False, m=2, RC=False, TAP=False, uniform_or_not=False, moment_or_not=False): min_, max_ = a.bounds() s = max_ - min_ original = a.original_image.copy() if random_start: noise = np.random.uniform( -epsilon * s, epsilon * s, original.shape).astype( original.dtype) x = original + self._clip_perturbation(a, noise, epsilon) strict = False else: x = original strict = True if RC: #use curl iteration to update adversarial example success = False momentum_up = 0 momentum_down = 0 go_up_flag = True #gradient descend flag x_up = x.copy() logits_init, is_adversarial_init = a.predictions(np.round(x)) ce_init = crossentropy(class_, logits_init) up_better_start = x.copy() for _ in range(iterations): avg_gradient_down = 0 avg_gradient_up = 0 for m_counter in range(m): #gradient ascent trajectory if RO: if uniform_or_not: #add uniform noise to gradient calculation process temp_x_up = np.clip(np.random.uniform(-scale, scale, original.shape) + x_up + stepsize*self.success_dir, min_, max_).astype(np.float32) else: #add gaussian noise to gradient calculation process temp_x_up = np.clip(np.random.normal(loc=x_up, scale=scale) + stepsize*self.success_dir, min_, max_).astype(np.float32) else: if uniform_or_not: temp_x_up = np.clip(np.random.uniform(-scale, scale, original.shape) + x_up, min_, max_).astype(np.float32) else: temp_x_up = np.clip(np.random.normal(loc=x_up, scale=scale), min_, max_).astype(np.float32) temp_x_up.dtype = "float32" gradient_up = self._gradient(a, temp_x_up, class_, strict=strict) #calculate gradient on substitute model avg_gradient_up += gradient_up #gradient descent trajectory if RO: if uniform_or_not: temp_x_down = np.clip(np.random.uniform(-scale, scale, original.shape) + x + stepsize*self.success_dir, min_, max_).astype(np.float32) else: temp_x_down = np.clip(np.random.normal(loc=x, scale=scale) + stepsize*self.success_dir, min_, max_).astype(np.float32) else: if uniform_or_not: temp_x_down = np.clip(np.random.uniform(-scale, scale, original.shape) + x, min_, max_).astype(np.float32) else: temp_x_down = np.clip(np.random.normal(loc=x, scale=scale), min_, max_).astype(np.float32) temp_x_down.dtype = "float32" gradient_down = self._gradient(a, temp_x_down, class_, strict=strict) avg_gradient_down += gradient_down avg_gradient_up = avg_gradient_up/m avg_gradient_down = avg_gradient_down/m strict = True if targeted: avg_gradient_down = -avg_gradient_down avg_gradient_up = -avg_gradient_up if moment_or_not: #whether use momentum as in MI-FGSM momentum_up += avg_gradient_up momentum_up_norm = np.sqrt(np.mean(np.square(momentum_up))) momentum_up_norm = max(1e-12, momentum_up_norm) # avoid divsion by zero momentum_down += avg_gradient_down momentum_down_norm = np.sqrt(np.mean(np.square(momentum_down))) momentum_down_norm = max(1e-12, momentum_down_norm) # avoid divsion by zero if go_up_flag: x_up = x_up - stepsize * (momentum_up/momentum_up_norm) else: x_up = x_up + stepsize * (momentum_up/momentum_up_norm) x = x + stepsize * (momentum_down/momentum_down_norm) else: if go_up_flag: avg_gradient_up = -avg_gradient_up x_up = x_up + stepsize * avg_gradient_up else: x_up = x_up + stepsize * avg_gradient_up x = x + stepsize * avg_gradient_down x = original + self._clip_perturbation(a, x - original, epsilon) x_up = original + self._clip_perturbation(a, x_up - original, epsilon) x = np.clip(x, min_, max_) x_up = np.clip(x_up, min_, max_) logits_down, is_adversarial_down = a.predictions(np.round(x)) logits_up, is_adversarial_up = a.predictions(np.round(x_up)) if logging.getLogger().isEnabledFor(logging.DEBUG): if targeted: ce = crossentropy(a.original_class, logits_down) logging.debug('crossentropy to {} is {}'.format( a.original_class, ce)) ce = crossentropy(class_, logits_down) logging.debug('crossentropy to {} is {}'.format(class_, ce)) if is_adversarial_up: if RO: self.update_success_dir(x_up) #start binary search left = original right = x_up for binary_counter in range(bb_step): middle = np.clip((left + right)/2, min_, max_) temp_logits, temp_is_adversarial = a.predictions(np.round(middle)) if temp_is_adversarial: #find a better adversarial example if RO: self.update_success_dir(middle) right = middle else: left = middle if return_early: return True else: success = True if is_adversarial_down: if RO: self.update_success_dir(x) left = original right = x for binary_counter in range(bb_step): middle = np.clip((left + right)/2, min_, max_) temp_logits, temp_is_adversarial = a.predictions(np.round(middle)) if temp_is_adversarial: if RO: self.update_success_dir(middle) right = middle else: left = middle if return_early: return True else: success = True if go_up_flag: ce_now = crossentropy(class_, logits_up) if ce_now < ce_init: ce_init = ce_now up_better_start = x_up else: go_up_flag = False #stop gradient descent, start gradient ascent momentum_up = 0 x_up = up_better_start else: #normal iterative process success = False momentum_down = 0 for _ in range(iterations): avg_gradient_down = 0 avg_gradient_up = 0 for m_counter in range(m): if RO: if uniform_or_not: temp_x_down = np.clip(np.random.uniform(-scale, scale, original.shape) + x + stepsize*self.success_dir, min_, max_).astype(np.float32) else: temp_x_down = np.clip(np.random.normal(loc=x, scale=scale) + stepsize*self.success_dir, min_, max_).astype(np.float32) else: if uniform_or_not: temp_x_down = np.clip(np.random.uniform(-scale, scale, original.shape) + x, min_, max_).astype(np.float32) else: temp_x_down = np.clip(np.random.normal(loc=x, scale=scale), min_, max_).astype(np.float32) temp_x_down.dtype = "float32" gradient_down = self._gradient(a, temp_x_down, class_, strict=strict) avg_gradient_down += gradient_down avg_gradient_down = avg_gradient_down/m strict = True if targeted: avg_gradient_down = -avg_gradient_down if moment_or_not: momentum_down += avg_gradient_down momentum_down_norm = np.sqrt(np.mean(np.square(momentum_down))) momentum_down_norm = max(1e-12, momentum_down_norm) # avoid divsion by zero x = x + stepsize * (momentum_down/momentum_down_norm) else: x = x + stepsize * avg_gradient_down x = original + self._clip_perturbation(a, x - original, epsilon) x = np.clip(x, min_, max_) logits_down, is_adversarial_down = a.predictions(np.round(x)) if logging.getLogger().isEnabledFor(logging.DEBUG): if targeted: ce = crossentropy(a.original_class, logits_down) logging.debug('crossentropy to {} is {}'.format( a.original_class, ce)) ce = crossentropy(class_, logits_down) logging.debug('crossentropy to {} is {}'.format(class_, ce)) if is_adversarial_down: if RO: self.update_success_dir(x) left = original right = x for binary_counter in range(bb_step): middle = np.clip((left + right)/2, min_, max_) temp_logits, temp_is_adversarial = a.predictions(np.round(middle)) if temp_is_adversarial: if RO: self.update_success_dir(middle) right = middle else: left = middle if return_early: return True else: success = True return success
def crossentropy(x): # lbfgs with approx grad does not seem to respect the bounds # setting strict to False logits, _ = a.predictions(x.reshape(shape), strict=False) ce = utils.crossentropy(logits=logits, label=target_class) return ce
def crossentropy(x): logits, gradient, _ = a.predictions_and_gradient( x.reshape(shape), target_class, strict=False) gradient = gradient.reshape(-1) ce = utils.crossentropy(logits=logits, label=target_class) return ce, gradient
def __call__(self, input_or_adv, label=None, unpack=True, steps=100, subsample=10, p=None): """Simple and close to optimal gradient-based adversarial attack. Parameters ---------- input_or_adv : `numpy.ndarray` or :class:`Adversarial` The original, unperturbed input as a `numpy.ndarray` or an :class:`Adversarial` instance. label : int The reference label of the original input. Must be passed if `a` is a `numpy.ndarray`, must not be passed if `a` is an :class:`Adversarial` instance. unpack : bool If true, returns the adversarial input, otherwise returns the Adversarial object. steps : int Maximum number of steps to perform. subsample : int Limit on the number of the most likely classes that should be considered. A small value is usually sufficient and much faster. p : int or float Lp-norm that should be minimzed, must be 2 or np.inf. """ a = input_or_adv del input_or_adv del label del unpack if not a.has_gradient(): return if a.target_class() is not None: logging.fatal('DeepFool is an untargeted adversarial attack.') return if p is None: # set norm to optimize based on the distance measure if a._distance == MeanSquaredDistance: p = 2 elif a._distance == Linfinity: p = np.inf else: raise NotImplementedError('Please choose a distance measure' ' for which DeepFool is implemented' ' or specify manually which norm' ' to optimize.') if not (1 <= p <= np.inf): raise ValueError if p not in [2, np.inf]: raise NotImplementedError _label = a.original_class # define labels logits, _ = a.predictions(a.original_image) labels = np.argsort(logits)[::-1] if subsample: # choose the top-k classes logging.info('Only testing the top-{} classes'.format(subsample)) assert isinstance(subsample, int) labels = labels[:subsample] def get_residual_labels(logits): """Get all labels with p < p[original_class]""" return [ k for k in labels if logits[k] < logits[_label]] perturbed = a.original_image min_, max_ = a.bounds() for step in range(steps): logits, grad, is_adv = a.predictions_and_gradient(perturbed) if is_adv: return # correspondance to algorithm 2 in [1]_: # # loss corresponds to f (in the paper: negative cross-entropy) # grad corresponds to -df/dx (gradient of cross-entropy) loss = -crossentropy(logits=logits, label=_label) residual_labels = get_residual_labels(logits) # instead of using the logits and the gradient of the logits, # we use a numerically stable implementation of the cross-entropy # and expect that the deep learning frameworks also use such a # stable implemenation to calculate the gradient losses = [ -crossentropy(logits=logits, label=k) for k in residual_labels] grads = [a.gradient(perturbed, label=k) for k in residual_labels] # compute optimal direction (and loss difference) # pairwise between each label and the target diffs = [(l - loss, g - grad) for l, g in zip(losses, grads)] # calculate distances if p == 2: distances = [abs(dl) / (np.linalg.norm(dg) + 1e-8) for dl, dg in diffs] elif p == np.inf: distances = [abs(dl) / (np.sum(np.abs(dg)) + 1e-8) for dl, dg in diffs] else: # pragma: no cover assert False # choose optimal one optimal = np.argmin(distances) df, dg = diffs[optimal] # apply perturbation # the (-dg) corrects the sign, gradient here is -gradient of paper if p == 2: perturbation = abs(df) / (np.linalg.norm(dg) + 1e-8)**2 * (-dg) elif p == np.inf: perturbation = abs(df) / (np.sum(np.abs(dg)) + 1e-8) \ * np.sign(-dg) else: # pragma: no cover assert False # the original implementation accumulates the perturbations # and only adds the overshoot when adding the accumulated # perturbation to the original image; we apply the overshoot # to each perturbation (step) perturbed = perturbed + 1.05 * perturbation perturbed = np.clip(perturbed, min_, max_) a.predictions(perturbed) # to find an adversarial in the last step