def _run_basic(self, dataset, logger): ''' The ``run`` method for bim, pgd, mim. ''' # the attack is already configured in `config()` rs = dict() iterator = dataset_to_iterator(dataset.batch(self.batch_size), self._session) for i_batch, (_, xs, ys, ys_target) in enumerate(iterator): g = self.attack.batch_attack(xs, ys, ys_target) try: step = 0 while True: step += 1 labels, dists = next(g) if step in rs: rs[step][0].append(labels) rs[step][1].append(dists) else: rs[step] = ([labels], [dists]) if logger: begin = i_batch * len(xs) logger.info('n={}..{}: iteration={}'.format(begin, begin + len(xs) - 1, step)) except StopIteration: pass for key in rs.keys(): rs[key] = (np.concatenate(rs[key][0]), np.concatenate(rs[key][1])) return rs
def _run_deepfool(self, dataset, logger): ''' The ``run`` method for deepfool. ''' # the attack is already configured in `config()` rs = {step: ([], []) for step in range(1, self.attack.iteration + 1)} iterator = dataset_to_iterator(dataset.batch(self.batch_size), self._session) for i_batch, (_, xs, ys, ys_target) in enumerate(iterator): g = self.attack.batch_attack(xs, ys, ys_target) try: step = 0 while True: step += 1 labels, dists = next(g) rs[step][0].append(labels) rs[step][1].append(dists) if logger: begin = i_batch * len(xs) logger.info('n={}..{}: iteration={}'.format(begin, begin + len(xs) - 1, step)) except StopIteration: # DeepFool would early stop. Padding the remaining steps with the last step's data. labels, dists = rs[step - 1][0][-1], rs[step - 1][1][-1] for remain_step in range(step, self.attack.iteration + 1): rs[remain_step][0].append(labels) rs[remain_step][1].append(dists) for key in rs.keys(): rs[key] = (np.concatenate(rs[key][0]), np.concatenate(rs[key][1])) return rs
def _run_optimized(self, dataset, logger): ''' The ``run`` method for cw and deepfool. ''' # the attack is already configured in `config()` self.attack.config(logger=logger) rs = [] iterator = dataset_to_iterator(dataset.batch(self.batch_size), self._session) for i_batch, (_, xs, ys, ys_target) in enumerate(iterator): if logger: begin = i_batch * len(xs) logger.info('n={}..{}'.format(begin, begin + len(xs) - 1)) xs_adv = self.attack.batch_attack(xs, ys, ys_target) for x, x_adv, success in zip(xs, xs_adv, self.attack.details['success']): if not success: rs.append(np.nan) else: if self.distance_metric == 'l_inf': rs.append(np.max(np.abs(x_adv - x))) else: rs.append(np.sqrt(np.sum((x_adv - x)**2))) return np.array(rs)
def gen_starting_points(model, ys, ys_target, goal, dataset_name, session, pred_fn, cache=None): ''' Generate starting points which are already adversarial according to the adversarial goal. :param model: The model. :param ys: True labels. :param ys_target: Targetted labels. :param goal: Adversarial goal. :param dataset_name: The dataset's name. All valid values are ``'cifar10'`` and ``'imagenet'``. :param session: ``tf.Session`` for loading dataset. :param pred_fn: A function which accepts a batch of model inputs as a numpy array and returns the model's predictions. :param cache: A cache for reusing generated starting points. A dictionary. Same cache shall not be shared between different model and adversarial goal. :return: Starting points as a numpy array. ''' if cache is None: cache = dict() starting_points = np.zeros((len(ys), *model.x_shape), dtype=model.x_dtype.as_numpy_dtype) if goal in ('ut', 'tm'): for index, y in enumerate(ys): y = int(y) if y not in cache: while True: x = np.random.uniform(low=model.x_min, high=model.x_max, size=(1, *model.x_shape)) x = x.astype(model.x_dtype.as_numpy_dtype) x_pred = pred_fn(x)[0] if x_pred != y: cache[y] = x[0] break starting_points[index] = cache[y] else: for index, y in enumerate(ys_target): if y not in cache: if dataset_name == 'cifar10': dataset = cifar10.load_dataset_for_classifier( model, target_label=y).batch(1) else: dataset = imagenet.load_dataset_for_classifier( model, target_label=y).batch(1) for _, x, _ in dataset_to_iterator(dataset, session): x_pred = pred_fn(x)[0] if x_pred == y: cache[y] = x[0] break starting_points[index] = cache[y] return starting_points
def _run_score_based(self, dataset, logger): ''' The ``run`` method for nes, spsa, nattack. ''' # the attack is already configured in `config()` iterator = dataset_to_iterator(dataset, self._session) ts = [] for i, (_, x, y, y_target) in enumerate(iterator): x_adv = self.attack.attack(x, y, y_target) labels, dists = self._session.run(self._score_based_data, feed_dict={self._x_ph: x, self._x_adv_ph: x_adv}) label, dist, queries = labels[0], dists[0], self.attack.details['queries'] ts.append((label, dist, queries)) if logger: logger.info('n={}, {}'.format(i, self.attack.details)) labels = np.array([x[0] for x in ts]) dists = np.array([x[1] for x in ts]) queries = np.array([x[2] for x in ts]) return labels, dists, queries
def _run_cw(self, dataset, logger): ''' The ``run`` method for cw. ''' # the attack is already configured in `config()` iterations = [int(self.iteration * i / self.cw_n_points) for i in range(1, self.cw_n_points + 1)] rs = {step: ([], []) for step in iterations} iterator = dataset_to_iterator(dataset.batch(self.batch_size), self._session) for i_batch, (_, xs, ys, ys_target) in enumerate(iterator): for iteration in iterations: self.attack.config(iteration=iteration) xs_adv = self.attack.batch_attack(xs, ys, ys_target) labels, dists = self._session.run(self._cw_data, feed_dict={self._xs_ph: xs, self._xs_adv_ph: xs_adv}) rs[iteration][0].append(labels) rs[iteration][1].append(dists) if logger: begin = i_batch * len(xs) logger.info('n={}..{}: iteration={}'.format(begin, begin + len(xs) - 1, iteration)) for key in rs.keys(): rs[key] = (np.concatenate(rs[key][0]), np.concatenate(rs[key][1])) return rs
def _run_decision_based(self, dataset, logger): ''' The ``run`` method for boundary, evolutionary. ''' # the attack is already configured in `config()` iterator = dataset_to_iterator(dataset.batch(self.batch_size), self._session) def pred_fn(xs): return self._session.run(self._xs_label, feed_dict={self._xs_ph: xs}) cache = dict() rs = dict() for i_batch, (_, xs, ys, ys_target) in enumerate(iterator): starting_points = gen_starting_points( self.model, ys, ys_target, self.goal, self.dataset_name, self._session, pred_fn, cache) self.attack.config(starting_points=starting_points) g = self.attack.batch_attack(xs, ys, ys_target) try: step = 0 while True: step += 1 labels, dists = next(g) if step in rs: rs[step][0].append(labels) rs[step][1].append(dists) else: rs[step] = ([labels], [dists]) if logger: begin = i_batch * len(xs) logger.info('n={}..{}: iteration={}'.format(begin, begin + len(xs) - 1, step)) except StopIteration: pass for key in rs.keys(): rs[key] = (np.concatenate(rs[key][0]), np.concatenate(rs[key][1])) return rs
from ares.model.ensemble import EnsembleModel, EnsembleRandomnessModel batch_size = 100 config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../example/cifar10/resnet56.py') model = load_model_from_path(model_path).load(session) e_model = EnsembleModel([model, model], [0.5, 0.5]) er_model = EnsembleRandomnessModel(model, 10, session) ds = cifar10.load_dataset_for_classifier(model).batch(batch_size).take(1) _, xs, ys = next(dataset_to_iterator(ds, session)) xs_ph = tf.placeholder(model.x_dtype, shape=(batch_size, *model.x_shape)) labels = model.labels(xs_ph) e_labels = e_model.labels(xs_ph) er_labels = er_model.labels(xs_ph) labels_np = session.run(labels, feed_dict={xs_ph: xs}) e_labels_np = session.run(e_labels, feed_dict={xs_ph: xs}) er_labels_np = session.run(er_labels, feed_dict={xs_ph: xs}) assert (np.array_equal(labels_np, e_labels_np)) assert (np.array_equal(labels_np, er_labels_np)) print(labels_np)
'../example/cifar10/adp.py', ] rs = dict() for model_path_short in MODELS: print('Loading {}...'.format(model_path_short)) model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), model_path_short) model = load_model_from_path(model_path).load(session) dataset = cifar10.load_dataset_for_classifier(model, offset=0, load_target=True) xs_ph = tf.placeholder(model.x_dtype, shape=(None, *model.x_shape)) labels = model.labels(xs_ph) accs = [] for _ in range(10): for i_batch, (_, xs, ys, ys_target) in enumerate( dataset_to_iterator(dataset.batch(batch_size), session)): predictions = session.run(labels, feed_dict={xs_ph: xs}) acc = np.equal(predictions, ys).astype(np.float32).mean() accs.append(acc) print('n={}..{} acc={:3f}'.format( i_batch * batch_size, i_batch * batch_size + batch_size - 1, acc)) rs[model_path_short] = np.mean(accs) print('{} acc={:f}'.format(model_path, rs[model_path_short])) for k, v in rs.items(): print('{} acc={:f}'.format(k, v))
xs_ph = tf.placeholder(model.x_dtype, shape=(batch_size, *model.x_shape)) lgs, lbs = model.logits_and_labels(xs_ph) dataset = imagenet.load_dataset_for_classifier(model, load_target=True) dataset = dataset.batch(batch_size).take(10) loss = CrossEntropyLoss(model) attack = BIM(model=model, batch_size=batch_size, loss=loss, goal='ut', distance_metric='l_inf', session=session) attack.config( iteration=50, magnitude=8.0 / 255.0, alpha=0.5 / 255.0, ) accs, adv_accs = [], [] for filenames, xs, ys, ys_target in dataset_to_iterator(dataset, session): xs_adv = attack.batch_attack(xs, ys=ys) lbs_pred = session.run(lbs, feed_dict={xs_ph: xs}) lbs_adv = session.run(lbs, feed_dict={xs_ph: xs_adv}) accs.append(np.equal(ys, lbs_pred).astype(np.float).mean()) adv_accs.append(np.equal(ys, lbs_adv).astype(np.float).mean()) print(accs[-1], adv_accs[-1]) print(np.mean(accs), np.mean(adv_accs))
def _run_binsearch(self, dataset, logger): ''' The ``run`` method for fgsm. ''' # the attack is already configured in `config()` rs = [] iterator = dataset_to_iterator(dataset.batch(self.batch_size), self._session) for i_batch, (_, xs, ys, ys_target) in enumerate(iterator): # create numpy index for fetching the original and target label's logit value ys_range = np.arange(0, self.batch_size * self.model.n_class, self.model.n_class) ys_flatten = ys_range.astype( self.model.y_dtype.as_numpy_dtype) + ys ys_target_flatten = ys_range.astype( self.model.y_dtype.as_numpy_dtype) + ys_target del ys_range lo = np.zeros(self.batch_size, dtype=np.float32) hi = lo + self.init_distortion # set xs_result to zeros initially, so that if the attack fails all the way down we could know it. xs_result = np.zeros_like(xs) # use linear search to find an adversarial magnitude since fgsm do not play well with exponential search # The attack would be run with magnitude of: # [ init_distortion * 1, init_distortion * 2, ..., init_distortion * (2**search_steps) ]. # The 2**search_steps here intends to archive the similar sematic as exponential search. for i in range(2**self.search_steps): magnitude = self.init_distortion * (2**self.search_steps - i) # config the attack self.attack.config(magnitude=magnitude) # run the attack xs_adv = self.attack.batch_attack(xs, ys, ys_target) logits = self._session.run(self._logits, feed_dict={self._xs_ph: xs_adv}) # check if attack succeed considering the confidence value if self.goal == 'ut' or self.goal == 'tm': # for ut and tm goal, if the original label's logit is not the largest one, the example is # adversarial. succ = logits.max( axis=1) - logits.take(ys_flatten) > self.confidence else: # for t goal, if the target label's logit is the largest one, the example is adversarial. logits_this = logits.take(ys_target_flatten) logits = logits.flatten() logits[ys_target_flatten] = np.nan logits_that = np.nanmax(logits.reshape( (self.batch_size, -1)), axis=1) succ = logits_this - logits_that > self.confidence # update the advsearial examples xs_result[succ] = xs_adv[succ] # update the smallest adversarial magnitude hi[succ] = magnitude if logger: begin = i_batch * len(xs) logger.info( 'linsearch n={}..{}: i={}, success_rate={:.3f}'.format( begin, begin + len(xs) - 1, i, succ.astype(np.float).mean())) if np.all(succ): break lo = hi - self.init_distortion # run binsearch to find the minimal adversarial magnitude for i in range(self.binsearch_steps): # config the attack mi = (lo + hi) / 2 self.attack.config(magnitude=mi) # run the attack xs_adv = self.attack.batch_attack(xs, ys, ys_target) logits = self._session.run(self._logits, feed_dict={self._xs_ph: xs_adv}) # check if attack succeed considering the confidence value if self.goal == 'ut' or self.goal == 'tm': # for ut and tm goal, if the original label's logit is not the largest one, the example is # adversarial. succ = logits.max( axis=1) - logits.take(ys_flatten) > self.confidence else: # for t goal, if the target label's logit is the largest one, the example is adversarial. logits_this = logits.take(ys_target_flatten) logits = logits.flatten() logits[ys_target_flatten] = np.nan logits_that = np.nanmax(logits.reshape( (self.batch_size, -1)), axis=1) succ = logits_this - logits_that > self.confidence # update the advsearial examples xs_result[succ] = xs_adv[succ] # update hi (if succeed) or lo (if not) not_succ = np.logical_not(succ) hi[succ] = mi[succ] lo[not_succ] = mi[not_succ] if logger: begin = i_batch * len(xs) logger.info( 'binsearch n={}..{}: i={}, success_rate={:.3f}'.format( begin, begin + len(xs) - 1, i, succ.astype(np.float).mean())) for x, x_result in zip(xs, xs_result): if np.all(x_result == 0): # all attacks failed rs.append(np.nan) else: if self.distance_metric == 'l_inf': rs.append(np.max(np.abs(x_result - x))) else: rs.append(np.sqrt(np.sum((x_result - x)**2))) return np.array(rs)
def _run_binsearch_nes_family(self, dataset, logger): ''' The ``run`` method for nes, spsa & nattack. ''' # the attack is already configured in `config()` self.attack.config(logger=logger) rs = [] iterator = dataset_to_iterator(dataset, self._session) for n, (_, x, y, y_target) in enumerate(iterator): found = False lo = 0.0 hi = self.init_distortion x_result = np.zeros_like(x) for i in range(self.search_steps): if self.attack_name == 'nes': self.attack.config(magnitude=hi, lr=hi * self.nes_lr_factor, min_lr=hi * self.nes_min_lr_factor) elif self.attack_name == 'spsa': self.attack.config(magnitude=hi, lr=hi * self.spsa_lr_factor) else: # self.attack_name == 'nattack': self.attack.config(magnitude=hi) x_adv = self.attack.attack(x, y, y_target) succ = self.attack.details['success'] if logger: logger.info('search n={}: i={}, success={}'.format( n, i, succ)) if succ: found = True x_result = x_adv break lo = hi hi *= 2.0 for i in range(self.binsearch_steps): mi = (lo + hi) / 2 if self.attack_name == 'nes': self.attack.config(magnitude=mi, lr=mi * self.nes_lr_factor, min_lr=mi * self.nes_min_lr_factor) elif self.attack_name == 'spsa': self.attack.config(magnitude=mi, lr=mi * self.spsa_lr_factor) else: # self.attack_name == 'nattack': self.attack.config(magnitude=mi) x_adv = self.attack.attack(x, y, y_target) succ = self.attack.details['success'] if succ: hi = mi found = True x_result = x_adv else: lo = mi if logger: logger.info('binsearch n={}: i={}, success={}'.format( n, i, succ)) if not found: # all attacks failed rs.append(np.nan) else: if self.distance_metric == 'l_inf': rs.append(np.max(np.abs(x_result - x))) else: rs.append(np.sqrt(np.sum((x_result - x)**2))) return np.array(rs)
def run(self, dataset, logger): ''' Run the attack on the dataset. :param dataset: A ``tf.data.Dataset`` instance, whose first element is the unique identifier for the data point, second element is the image, third element is the ground truth label. If the goal is 'tm' or 't', a forth element should be provided as the target label for the attack. :param logger: A standard logger. :return: A tuple of five numpy array. The first element represents whether the model predicting correctly on each dataset point. The second element represents whether the model predicting correctly on the adversarial example for each dataset point. The third element represents whether the dataset point is non-adversarial according the the goal. The fourth element represents whether the attack succeed. The fifth element is the generated adversarial example's distance to the dataset's original example. ''' acc, acc_adv, total, succ, dist = [], [], [], [], [] def update(accs, accs_adv, totals, succs, dists): acc.append(accs) acc_adv.append(accs_adv) total.append(totals) succ.append(succs) dist.append(dists) if logger is not None: logger.info('acc={:3f}, adv_acc={:3f}, succ={:3f}, dist_mean={:3f}'.format( np.mean(accs.astype(np.float)), np.mean(accs_adv.astype(np.float)), np.sum(succs.astype(np.float)) / np.sum(totals.astype(np.float)), np.mean(dists) )) if self.attack_name in ('fgsm', 'bim', 'pgd', 'mim', 'cw', 'deepfool'): iterator = dataset_to_iterator(dataset.batch(self.batch_size), self.session) for _, xs, ys, ts in iterator: xs_adv = self.attack.batch_attack(xs, ys, ts) xs_pred = self.session.run(self.xs_label, feed_dict={self.xs_ph: xs}) xs_adv_pred = self.session.run(self.xs_label, feed_dict={self.xs_ph: xs_adv}) update(*self._batch_info(xs, xs_adv, ys, ts, xs_pred, xs_adv_pred)) elif self.attack_name in ('boundary', 'evolutionary'): cache = dict() iterator = dataset_to_iterator(dataset.batch(self.batch_size), self.session) def pred_fn(xs): return self.session.run(self.xs_label, feed_dict={self.xs_ph: xs}) for _, xs, ys, ts in iterator: starting_points = gen_starting_points( self.model, ys, ts, self.goal, self.dataset_name, self.session, pred_fn, cache) self.config(starting_points=starting_points) xs_adv = self.attack.batch_attack(xs, ys, ts) xs_pred = self.session.run(self.xs_label, feed_dict={self.xs_ph: xs}) xs_adv_pred = self.session.run(self.xs_label, feed_dict={self.xs_ph: xs_adv}) update(*self._batch_info(xs, xs_adv, ys, ts, xs_pred, xs_adv_pred)) elif self.attack_name in ('nes', 'spsa', 'nattack'): iterator = dataset_to_iterator(dataset, self.session) for _, x, y, t in iterator: x_adv = self.attack.attack(x, y, t) x_pred = self.session.run(self.xs_label, feed_dict={self.xs_ph: [x]})[0] x_adv_pred = self.session.run(self.xs_label, feed_dict={self.xs_ph: [x_adv]})[0] xs, xs_adv, ys, ts = np.array([x]), np.array([x_adv]), np.array([y]), np.array([t]) xs_pred, xs_adv_pred = np.array([x_pred]), np.array([x_adv_pred]) update(*self._batch_info(xs, xs_adv, ys, ts, xs_pred, xs_adv_pred)) return tuple(map(np.concatenate, (acc, acc_adv, total, succ, dist)))