def _binary_search(self, originals: ep.Tensor, perturbed: ep.Tensor, boost: Optional[bool] = False) -> ep.Tensor: # Choose upper thresholds in binary search based on constraint. highs = ep.ones(perturbed, len(perturbed)) d = np.prod(perturbed.shape[1:]) thresholds = self._BS_gamma / (d * math.sqrt(d)) lows = ep.zeros_like(highs) # Boost Binary search if boost: boost_vec = 0.1 * originals + 0.9 * perturbed is_advs = self._is_adversarial(boost_vec) is_advs = atleast_kd(is_advs, originals.ndim) originals = ep.where(is_advs.logical_not(), boost_vec, originals) perturbed = ep.where(is_advs, boost_vec, perturbed) # use this variable to check when mids stays constant and the BS has converged old_mids = highs iteration = 0 while ep.any(highs - lows > thresholds) and iteration < self._BS_max_iteration: iteration += 1 mids = (lows + highs) / 2 mids_perturbed = self._project(originals, perturbed, mids) is_adversarial_ = self._is_adversarial(mids_perturbed) highs = ep.where(is_adversarial_, mids, highs) lows = ep.where(is_adversarial_, lows, mids) # check of there is no more progress due to numerical imprecision reached_numerical_precision = (old_mids == mids).all() old_mids = mids if reached_numerical_precision: break results = self._project(originals, perturbed, highs) return results
def __call__(self, model: Model, inputs: T, criterion: Union[Misclassification, T]) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): classes = criterion_.labels else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected labels to have shape ({N},), got {classes.shape}") bounds = model.bounds def loss_fun(delta: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: assert x.shape[0] == logits.shape[0] assert delta.shape == x.shape x_hat = x + delta logits_hat = model(x_hat) loss = ep.kl_div_with_logits(logits, logits_hat).sum() return loss value_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=False) clean_logits = model(x) # start with random vector as search vector d = ep.normal(x, shape=x.shape, mean=0, stddev=1) for it in range(self.iterations): # normalize proposal to be unit vector d = d * self.xi / atleast_kd(ep.norms.l2(flatten(d), axis=-1), x.ndim) # use gradient of KL divergence as new search vector _, grad = value_and_grad(d, clean_logits) d = grad # rescale search vector d = (bounds[1] - bounds[0]) * d if ep.any(ep.norms.l2(flatten(d), axis=-1) < 1e-64): raise RuntimeError( "Gradient vanished; this can happen if xi is too small.") final_delta = (self.epsilon / ep.sqrt( (d**2).sum(keepdims=True, axis=(1, 2, 3))) * d) x_adv = ep.clip(x + final_delta, *bounds) return restore_type(x_adv)
def _binary_search( self, is_adversarial: Callable[[ep.Tensor], ep.Tensor], originals: ep.Tensor, perturbed: ep.Tensor, ) -> ep.Tensor: # Choose upper thresholds in binary search based on constraint. d = np.prod(perturbed.shape[1:]) if self.constraint == "linf": highs = linf(originals, perturbed) # TODO: Check if the threshold is correct # empirically this seems to be too low thresholds = highs * self.gamma / (d * d) else: highs = ep.ones(perturbed, len(perturbed)) thresholds = self.gamma / (d * math.sqrt(d)) lows = ep.zeros_like(highs) # use this variable to check when mids stays constant and the BS has converged old_mids = highs while ep.any(highs - lows > thresholds): mids = (lows + highs) / 2 mids_perturbed = self._project(originals, perturbed, mids) is_adversarial_ = is_adversarial(mids_perturbed) highs = ep.where(is_adversarial_, mids, highs) lows = ep.where(is_adversarial_, lows, mids) # check of there is no more progress due to numerical imprecision reached_numerical_precision = (old_mids == mids).all() old_mids = mids if reached_numerical_precision: # TODO: warn user break res = self._project(originals, perturbed, highs) return res
def test_any_none_keepdims(t: Tensor) -> Tensor: return ep.any(t > 3, axis=None, keepdims=True)
def test_any_axes(dummy: Tensor) -> Tensor: t = ep.arange(dummy, 30).float32().reshape((3, 5, 2)) return ep.any(t > 3, axis=(0, 1))
def test_any_axis(t: Tensor) -> Tensor: return ep.any(t > 3, axis=0)
def test_any(t: Tensor) -> Tensor: return ep.any(t > 3)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) if starting_points is None: raise ValueError("BinarizationRefinementAttack requires starting_points") (o, x), restore_type = ep.astensors_(inputs, starting_points) del inputs, starting_points, kwargs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if self.threshold is None: min_, max_ = model.bounds threshold = (min_ + max_) / 2.0 else: threshold = self.threshold assert o.dtype == x.dtype nptype = o.reshape(-1)[0].numpy().dtype.type if nptype not in [np.float16, np.float32, np.float64]: raise ValueError( # pragma: no cover f"expected dtype to be float16, float32 or float64, found '{nptype}'" ) threshold = nptype(threshold) offset = nptype(1.0) if self.included_in == "lower": lower_ = threshold upper_ = np.nextafter(threshold, threshold + offset) elif self.included_in == "upper": lower_ = np.nextafter(threshold, threshold - offset) upper_ = threshold else: raise ValueError( f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'" ) assert lower_ < upper_ p = ep.full_like(o, ep.nan) lower = ep.ones_like(o) * lower_ upper = ep.ones_like(o) * upper_ indices = ep.logical_and(o <= lower, x <= lower) p = ep.where(indices, o, p) indices = ep.logical_and(o <= lower, x >= upper) p = ep.where(indices, upper, p) indices = ep.logical_and(o >= upper, x <= lower) p = ep.where(indices, lower, p) indices = ep.logical_and(o >= upper, x >= upper) p = ep.where(indices, o, p) assert not ep.any(ep.isnan(p)) is_adv1 = is_adversarial(x) is_adv2 = is_adversarial(p) if (is_adv1 != is_adv2).any(): raise ValueError( "The specified threshold does not match what is done by the model." ) return restore_type(p)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, **kwargs: Any, ) -> T: """For models that preprocess their inputs by binarizing the inputs, this attack can improve adversarials found by other attacks. It does this by utilizing information about the binarization and mapping values to the corresponding value in the clean input or to the right side of the threshold. Parameters ---------- threshold : float The treshold used by the models binarization. If none, defaults to (model.bounds()[1] - model.bounds()[0]) / 2. included_in : str Whether the threshold value itself belongs to the lower or upper interval. """ raise_if_kwargs(kwargs) if starting_points is None: raise ValueError( "BinarizationRefinementAttack requires starting_points") (o, x), restore_type = ep.astensors_(inputs, starting_points) del inputs, starting_points, kwargs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if self.threshold is None: min_, max_ = model.bounds threshold = (min_ + max_) / 2.0 else: threshold = self.threshold assert o.dtype == x.dtype nptype = o.reshape(-1)[0].numpy().dtype.type if nptype not in [np.float16, np.float32, np.float64]: raise ValueError( # pragma: no cover f"expected dtype to be float16, float32 or float64, found '{nptype}'" ) threshold = nptype(threshold) offset = nptype(1.0) if self.included_in == "lower": lower_ = threshold upper_ = np.nextafter(threshold, threshold + offset) elif self.included_in == "upper": lower_ = np.nextafter(threshold, threshold - offset) upper_ = threshold else: raise ValueError( f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'" ) assert lower_ < upper_ p = ep.full_like(o, ep.nan) lower = ep.ones_like(o) * lower_ upper = ep.ones_like(o) * upper_ indices = ep.logical_and(o <= lower, x <= lower) p = ep.where(indices, o, p) indices = ep.logical_and(o <= lower, x >= upper) p = ep.where(indices, upper, p) indices = ep.logical_and(o >= upper, x <= lower) p = ep.where(indices, lower, p) indices = ep.logical_and(o >= upper, x >= upper) p = ep.where(indices, o, p) assert not ep.any(ep.isnan(p)) is_adv1 = is_adversarial(x) is_adv2 = is_adversarial(p) if (is_adv1 != is_adv2).any(): raise ValueError( "The specified threshold does not match what is done by the model." ) return restore_type(p)
def __call__( self, inputs, labels, *, adversarials, criterion, threshold=None, included_in="upper", ): """For models that preprocess their inputs by binarizing the inputs, this attack can improve adversarials found by other attacks. It does this by utilizing information about the binarization and mapping values to the corresponding value in the clean input or to the right side of the threshold. Parameters ---------- threshold : float The treshold used by the models binarization. If none, defaults to (model.bounds()[1] - model.bounds()[0]) / 2. included_in : str Whether the threshold value itself belongs to the lower or upper interval. """ originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = ep.astensor(self.model.forward(p.tensor)) return criterion(originals, labels, p, logits) o = ep.astensor(inputs) x = ep.astensor(adversarials) min_, max_ = self.model.bounds() if threshold is None: threshold = (min_ + max_) / 2.0 assert o.dtype == x.dtype dtype = o.dtype if dtype == o.backend.float16: nptype = np.float16 elif dtype == o.backend.float32: nptype = np.float32 elif dtype == o.backend.float64: nptype = np.float64 else: raise ValueError( "expected dtype to be float16, float32 or float64, found '{dtype}'" ) threshold = nptype(threshold) offset = nptype(1.0) if included_in == "lower": lower = threshold upper = np.nextafter(threshold, threshold + offset) elif included_in == "upper": lower = np.nextafter(threshold, threshold - offset) upper = threshold else: raise ValueError( "expected included_in to be 'lower' or 'upper', found '{included_in}'" ) assert lower < upper p = ep.full_like(o, ep.nan) lower = ep.ones_like(o) * lower upper = ep.ones_like(o) * upper indices = ep.logical_and(o <= lower, x <= lower) p = ep.where(indices, o, p) indices = ep.logical_and(o <= lower, x >= upper) p = ep.where(indices, upper, p) indices = ep.logical_and(o >= upper, x <= lower) p = ep.where(indices, lower, p) indices = ep.logical_and(o >= upper, x >= upper) p = ep.where(indices, o, p) assert not ep.any(ep.isnan(p)) is_adv1 = is_adversarial(x) is_adv2 = is_adversarial(p) assert (is_adv1 == is_adv2).all( ), "The specified threshold does not match what is done by the model." return p.tensor
def run( self, model: Model, inputs: T, criterion: Union[Criterion, Any] = None, *, starting_points: Optional[ep.Tensor] = None, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) del kwargs x, restore_type = ep.astensor_(inputs) del inputs verify_input_bounds(x, model) criterion_ = get_criterion(criterion) del criterion is_adversarial = get_is_adversarial(criterion_, model) if starting_points is None: init_attack: MinimizationAttack if self.init_attack is None: init_attack = SaltAndPepperNoiseAttack() logging.info( f"Neither starting_points nor init_attack given. Falling" f" back to {init_attack!r} for initialization." ) else: init_attack = self.init_attack # TODO: use call and support all types of attacks (once early_stop is # possible in __call__) starting_points = init_attack.run(model, x, criterion_) x_adv = ep.astensor(starting_points) assert is_adversarial(x_adv).all() original_shape = x.shape N = len(x) x_flat = flatten(x) x_adv_flat = flatten(x_adv) # was there a pixel left in the samples to manipulate, # i.e. reset to the clean version? found_index_to_manipulate = ep.from_numpy(x, np.ones(N, dtype=bool)) while ep.any(found_index_to_manipulate): diff_mask = (ep.abs(x_flat - x_adv_flat) > 1e-8).numpy() diff_idxs = [z.nonzero()[0] for z in diff_mask] untouched_indices = [z.tolist() for z in diff_idxs] untouched_indices = [ np.random.permutation(it).tolist() for it in untouched_indices ] found_index_to_manipulate = ep.from_numpy(x, np.zeros(N, dtype=bool)) # since the number of pixels still left to manipulate might differ # across different samples we track each of them separately and # and manipulate the images until there is no pixel left for # any of the samples. to not update already finished samples, we mask # the updates such that only samples that still have pixels left to manipulate # will be updated i = 0 while i < max([len(it) for it in untouched_indices]): # mask all samples that still have pixels to manipulate left relevant_mask = [len(it) > i for it in untouched_indices] relevant_mask = np.array(relevant_mask, dtype=bool) relevant_mask_index = np.flatnonzero(relevant_mask) # for each image get the index of the next pixel we try out relevant_indices = [it[i] for it in untouched_indices if len(it) > i] old_values = x_adv_flat[relevant_mask_index, relevant_indices] new_values = x_flat[relevant_mask_index, relevant_indices] x_adv_flat = ep.index_update( x_adv_flat, (relevant_mask_index, relevant_indices), new_values ) # check if still adversarial is_adv = is_adversarial(x_adv_flat.reshape(original_shape)) found_index_to_manipulate = ep.index_update( found_index_to_manipulate, relevant_mask_index, ep.logical_or(found_index_to_manipulate, is_adv)[relevant_mask], ) # if not, undo change new_or_old_values = ep.where( is_adv[relevant_mask], new_values, old_values ) x_adv_flat = ep.index_update( x_adv_flat, (relevant_mask_index, relevant_indices), new_or_old_values, ) i += 1 if not ep.any(found_index_to_manipulate): break if self.l2_binary_search: while True: diff_mask = (ep.abs(x_flat - x_adv_flat) > 1e-12).numpy() diff_idxs = [z.nonzero()[0] for z in diff_mask] untouched_indices = [z.tolist() for z in diff_idxs] # draw random shuffling of all indices for all samples untouched_indices = [ np.random.permutation(it).tolist() for it in untouched_indices ] # whether that run through all values made any improvement improved = ep.from_numpy(x, np.zeros(N, dtype=bool)).astype(bool) logging.info("Starting new loop through all values") # use the same logic as above i = 0 while i < max([len(it) for it in untouched_indices]): # mask all samples that still have pixels to manipulate left relevant_mask = [len(it) > i for it in untouched_indices] relevant_mask = np.array(relevant_mask, dtype=bool) relevant_mask_index = np.flatnonzero(relevant_mask) # for each image get the index of the next pixel we try out relevant_indices = [ it[i] for it in untouched_indices if len(it) > i ] old_values = x_adv_flat[relevant_mask_index, relevant_indices] new_values = x_flat[relevant_mask_index, relevant_indices] x_adv_flat = ep.index_update( x_adv_flat, (relevant_mask_index, relevant_indices), new_values ) # check if still adversarial is_adv = is_adversarial(x_adv_flat.reshape(original_shape)) improved = ep.index_update( improved, relevant_mask_index, ep.logical_or(improved, is_adv)[relevant_mask], ) if not ep.all(is_adv): # run binary search for examples that became non-adversarial updated_new_values = self._binary_search( x_adv_flat, relevant_mask, relevant_mask_index, relevant_indices, old_values, new_values, (-1, *original_shape[1:]), is_adversarial, ) x_adv_flat = ep.index_update( x_adv_flat, (relevant_mask_index, relevant_indices), ep.where( is_adv[relevant_mask], new_values, updated_new_values ), ) improved = ep.index_update( improved, relevant_mask_index, ep.logical_or( old_values != updated_new_values, improved[relevant_mask], ), ) i += 1 if not ep.any(improved): # no improvement for any of the indices break x_adv = x_adv_flat.reshape(original_shape) return restore_type(x_adv)