def evaluation_transferability_single(self, i, attack_params, path): cur_x = np.expand_dims(self.test_x[i], axis=0) transferability_record = {} dissimilarity_record = {} for model_name in self.model_names: adv_x = self.attack(model_name, cur_x, attack_params) adv_x = np.clip(adv_x, 0, 1) for cand_name in self.model_names: if cand_name == model_name: pass pred = self.inference(cand_name, adv_x) if pred.argmax(axis=-1) != self.test_y.argmax(axis=-1)[i]: try: transferability_record['number of models fooled by {}'. format(model_name)] += 1 except KeyError: transferability_record['number of models fooled by {}'. format(model_name)] = 1 dissimilarity_record[model_name] = measure.frobenius_norm( adv_x, cur_x) self.save_to_json(transferability_record, path, 'transferability record for {}th example'.format(i)) self.save_to_json(dissimilarity_record, path, 'dissimilarity record for {}th example'.format(i)) print("Finished Evaluating {}th example!".format(i))
def get_perturb_upperbound(attacker=ATTACK.FGSM): orig_file = 'test_BS-{}-clean.npy'.format(DATA.CUR_DATASET_NAME) orig_file = os.path.join(PATH.ADVERSARIAL_FILE, orig_file) adv_file = 'test_AE-{}-cnn-clean'.format(DATA.CUR_DATASET_NAME) adv_file = os.path.join(PATH.ADVERSARIAL_FILE, adv_file) if attacker == ATTACK.FGSM: # use the strongest as upperbound adv_file = '{}-{}.npy'.format(adv_file, ATTACK.get_fgsm_AETypes()[-1]) elif attacker == ATTACK.JSMA: adv_file = '{}-{}.npy'.format(adv_file, ATTACK.get_jsma_AETypes()[-1]) print('Loading original file [{}]...'.format(orig_file)) X = np.load(orig_file) print('Loading perturbed file [{}]...'.format(adv_file)) X_adv = np.load(adv_file) upperbound = np.round(measure.frobenius_norm(X1=X_adv, X2=X), 2) print('Upperbound of {} perturbation: {}'.format(attacker, upperbound)) return upperbound
def generate_single(sess, x, y, attacker=ATTACK.FGSM, candidates=None, attack_count=None, max_perturb=get_perturb_upperbound(), strategy=ATTACK_STRATEGY.RANDOM.value): # candidate_names = candidates.copy() candidate_names = copy.deepcopy(list(candidates.keys())) fooled = [] attack_params = get_attack_params(attacker) x_adv = x perturbed_norm = measure.frobenius_norm(x_adv, x) max_iteration = len(candidate_names) iter = 0 while ((len(fooled) < attack_count) and (iter < max_iteration)): # generate adversarial example for target model print('ITERATION {}: candidates/fooled ::: {}/{}'.format( iter, len(candidate_names), len(fooled))) iter += 1 target_name = pick_target_model(candidate_names, strategy) transformation = target_name.split('.')[0].split('-')[-1] x_trans = transform(x_adv, transformation) if len(x_trans.shape) < 4: print('x_trans shape:', x_trans.shape) x_trans = np.expand_dims(x_trans, axis=0) x_tmp = attack_single(sess, candidates[target_name], attacker, x_trans, y, **attack_params) perturbed_norm = measure.frobenius_norm(x_tmp, transform(x, transformation)) if perturbed_norm >= max_perturb: # keep the last x_adv if current one is out of the boundary print('out of perturbed boundary, stop.') break x_adv = reset(x_tmp, transformation) if MODE.DEBUG: plot_image(x_adv[0], transformation) del x_trans # filter out candidates that are fooled by x_adv true_label = np.argmax(y) for cand_name in candidate_names: transformation = cand_name.split('.')[0].split('-')[-1] # apply transformation x_trans = transform(x_adv, transformation) pred_label = np.argmax(candidates[cand_name].predict(x_trans)) if MODE.DEBUG: print('prediction: [{}/{}/{}]'.format(transformation, true_label, pred_label)) if (true_label != pred_label): # remove candidate being fooled by x_adv candidate_names.remove(cand_name) # record only the name of the weak defense print('+++ fooled [{}]'.format(cand_name)) fooled.append(cand_name) # release del x_trans # use current adversarial example as the input of next iteration print('') del target_name return x_adv[0]