def calculate_lp_distance(original_image, compared_image): """ Calculate l0, l2 and linf distance for two images with the same shape. Args: original_image (np.ndarray): Original image. compared_image (np.ndarray): Another image for comparison. Returns: - float, l0 distances between two images. - float, l2 distances between two images. - float, linf distances between two images. Raises: TypeError: If type of original_image or type of compared_image is not numpy.ndarray. ValueError: If the shape of original_image and compared_image are not the same. """ check_numpy_param('original_image', original_image) check_numpy_param('compared_image', compared_image) check_equal_shape('original_image', original_image, 'compared_image', compared_image) avoid_zero_div = 1e-14 diff = (original_image - compared_image).flatten() data = original_image.flatten() l0_dist = np.linalg.norm(diff, ord=0) \ / (np.linalg.norm(data, ord=0) + avoid_zero_div) l2_dist = np.linalg.norm(diff, ord=2) \ / (np.linalg.norm(data, ord=2) + avoid_zero_div) linf_dist = np.linalg.norm(diff, ord=np.inf) \ / (np.linalg.norm(data, ord=np.inf) + avoid_zero_div) return l0_dist, l2_dist, linf_dist
def _dist(self, before, after): """ Calculate the distance between the model outputs of a raw sample and its smoothed counterpart. Args: before (numpy.ndarray): Model output of raw samples. after (numpy.ndarray): Model output of smoothed counterparts. Returns: float, distance based on specified norm. """ before, after = check_pair_numpy_param('before', before, 'after', after) before, after = check_equal_shape('before', before, 'after', after) res = [] diff = after - before for _, elem in enumerate(diff): if self._metric == 'l1': res.append(np.linalg.norm(elem, ord=1)) elif self._metric == 'l2': res.append(np.linalg.norm(elem, ord=2)) else: res.append(np.linalg.norm(elem, ord=1)) return res
def fit(self, inputs, labels=None): """ Train detector to decide the best radius. Args: inputs (numpy.ndarray): Benign samples. labels (numpy.ndarray): Ground truth labels of the input samples. Default:None. Returns: float, the best radius. """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) LOGGER.debug(TAG, 'enter fit() function.') time_start = time.time() search_iters = (self._max_radius - self._initial_radius) / self._search_step search_iters = np.round(search_iters).astype(int) radius = self._initial_radius pred = self._model.predict(Tensor(inputs)) raw_preds = np.argmax(pred.asnumpy(), axis=1) if not self._sparse: labels = np.argmax(labels, axis=1) raw_preds, labels = check_equal_shape('raw_preds', raw_preds, 'labels', labels) raw_acc = np.sum(raw_preds == labels) / inputs.shape[0] for _ in range(search_iters): rc_preds = self._rc_forward(inputs, radius) rc_preds, labels = check_equal_shape('rc_preds', rc_preds, 'labels', labels) def_acc = np.sum(rc_preds == labels) / inputs.shape[0] if def_acc >= raw_acc - self._degrade_limit: radius += self._search_step continue break self._radius = radius - self._search_step LOGGER.debug(TAG, 'best radius is: %s', self._radius) LOGGER.debug( TAG, 'time used to train detector of %d samples is: %s seconds', inputs.shape[0], time.time() - time_start) return self._radius
def _fast_reduction(self, x_ori, best_position, q_times, auxiliary_inputs, gt_boxes, gt_labels, model): """ Decrease the differences between the original samples and adversarial samples in a fast way. Args: x_ori (numpy.ndarray): Original samples. best_position (numpy.ndarray): Adversarial examples. q_times (int): Query times. auxiliary_inputs (tuple): Auxiliary inputs mathced with x_ori. gt_boxes (numpy.ndarray): Ground-truth boxes of x_ori. gt_labels (numpy.ndarray): Ground-truth labels of x_ori. model (BlackModel): Target model. Returns: - numpy.ndarray, adversarial examples after reduction. - int, total query times after reduction. """ LOGGER.info(TAG, 'Reduction begins...') model = check_model('model', model, BlackModel) x_ori = check_numpy_param('x_ori', x_ori) _, gt_num = self._detection_scores((x_ori, ) + auxiliary_inputs, gt_boxes, gt_labels, model) best_position = check_numpy_param('best_position', best_position) x_ori, best_position = check_equal_shape('x_ori', x_ori, 'best_position', best_position) _, original_num = self._detection_scores( (best_position, ) + auxiliary_inputs, gt_boxes, gt_labels, model) # pylint: disable=invalid-name REDUCTION_ITERS = 6 # recover 10% difference each time and recover 60% totally. for _ in range(REDUCTION_ITERS): BLOCK_NUM = 30 # divide the image into 30 segments block_width = best_position.shape[0] // BLOCK_NUM if block_width > 0: for i in range(BLOCK_NUM): diff = x_ori[i*block_width: (i+1)*block_width, :, :]\ - best_position[i*block_width:(i+1)*block_width, :, :] if np.max(np.abs(diff)) >= 0.1 * (self._bounds[1] - self._bounds[0]): res = diff * 0.1 best_position[i * block_width:(i + 1) * block_width, :, :] += res _, correct_num = self._detection_scores( (best_position, ) + auxiliary_inputs, gt_boxes, gt_labels, model) q_times += 1 if correct_num[0] > max( original_num[0], gt_num[0] * self._reserve_ratio): best_position[i * block_width:(i + 1) * block_width, :, :] -= res return best_position, q_times
def _reduction(x_ori, q_times, label, best_position, model, targeted_attack): """ Decrease the differences between the original samples and adversarial samples. Args: x_ori (numpy.ndarray): Original samples. q_times (int): Query times. label (int): Target label ot ground-truth label. best_position (numpy.ndarray): Adversarial examples. model (BlackModel): Target model. targeted_attack (bool): If True, it means this is a targeted attack. If False, it means this is an untargeted attack. Returns: numpy.ndarray, adversarial examples after reduction. Examples: >>> adv_reduction = self._reduction(self, [0.1, 0.2, 0.3], 20, 1, >>> [0.12, 0.15, 0.25]) """ LOGGER.info(TAG, 'Reduction begins...') model = check_model('model', model, BlackModel) x_ori = check_numpy_param('x_ori', x_ori) best_position = check_numpy_param('best_position', best_position) x_ori, best_position = check_equal_shape('x_ori', x_ori, 'best_position', best_position) x_ori_fla = x_ori.flatten() best_position_fla = best_position.flatten() pixel_deep = np.max(x_ori) - np.min(x_ori) nums_pixel = len(x_ori_fla) for i in range(nums_pixel): diff = x_ori_fla[i] - best_position_fla[i] if abs(diff) > pixel_deep * 0.1: best_position_fla[i] += diff * 0.5 cur_label = np.argmax( model.predict(best_position_fla.reshape(x_ori.shape))) q_times += 1 if targeted_attack: if cur_label != label: best_position_fla[i] -= diff * 0.5 else: if cur_label == label: best_position_fla -= diff * 0.5 return best_position_fla.reshape(x_ori.shape), q_times
def _reduction(self, x_ori, q_times, label, best_position): """ Decrease the differences between the original samples and adversarial samples. Args: x_ori (numpy.ndarray): Original samples. q_times (int): Query times. label (int): Target label ot ground-truth label. best_position (numpy.ndarray): Adversarial examples. Returns: numpy.ndarray, adversarial examples after reduction. Examples: >>> adv_reduction = self._reduction(self, [0.1, 0.2, 0.3], 20, 1, >>> [0.12, 0.15, 0.25]) """ x_ori = check_numpy_param('x_ori', x_ori) best_position = check_numpy_param('best_position', best_position) x_ori, best_position = check_equal_shape('x_ori', x_ori, 'best_position', best_position) x_ori_fla = x_ori.flatten() best_position_fla = best_position.flatten() pixel_deep = self._bounds[1] - self._bounds[0] nums_pixel = len(x_ori_fla) for i in range(nums_pixel): diff = x_ori_fla[i] - best_position_fla[i] if abs(diff) > pixel_deep * 0.1: old_poi_fla = np.copy(best_position_fla) best_position_fla[i] = np.clip( best_position_fla[i] + diff * 0.5, self._bounds[0], self._bounds[1]) cur_label = np.argmax( self._model.predict( np.expand_dims(best_position_fla.reshape(x_ori.shape), axis=0))[0]) q_times += 1 if self._targeted: if cur_label != label: best_position_fla = old_poi_fla else: if cur_label == label: best_position_fla = old_poi_fla return best_position_fla.reshape(x_ori.shape), q_times
def evaluate(self, original_images, inversion_images, labels=None, new_network=None): """ Evaluate the quality of inverted images by three index: the average L2 distance and SSIM value between original images and inversion images, and the average of inverted images' confidence on true labels of inverted inferred by a new trained network. Args: original_images (numpy.ndarray): Original images, whose shape should be (img_num, channels, img_width, img_height). inversion_images (numpy.ndarray): Inversion images, whose shape should be (img_num, channels, img_width, img_height). labels (numpy.ndarray): Ground truth labels of original images. Default: None. new_network (Cell): A network whose structure contains all parts of self._network, but loaded with different checkpoint file. Default: None. Returns: - float, l2 distance. - float, average ssim value. - Union[float, None], average confidence. It would be None if labels or new_network is None. Examples: >>> net = LeNet5() >>> inversion_attack = ImageInversionAttack(net, input_shape=(1, 32, 32), input_bound=(0, 1), >>> loss_weights=[1, 0.2, 5]) >>> features = np.random.random((2, 10)).astype(np.float32) >>> inver_images = inversion_attack.generate(features, iters=10) >>> ori_images = np.random.random((2, 1, 32, 32)) >>> result = inversion_attack.evaluate(ori_images, inver_images) >>> print(len(result)) """ check_numpy_param('original_images', original_images) check_numpy_param('inversion_images', inversion_images) if labels is not None: check_numpy_param('labels', labels) true_labels = np.squeeze(labels) if len(true_labels.shape) > 1: msg = 'Shape of true_labels should be (1, n) or (n,), but got {}'.format( true_labels.shape) raise ValueError(msg) if true_labels.size != original_images.shape[0]: msg = 'The size of true_labels should equal the number of images, but got {} and {}'.format( true_labels.size, original_images.shape[0]) raise ValueError(msg) if new_network is not None: check_param_type('new_network', new_network, Cell) LOGGER.info( TAG, 'Please make sure that the network you pass is loaded with different checkpoint files ' 'compared with that of self._network.') img_1, img_2 = check_equal_shape('original_images', original_images, 'inversion_images', inversion_images) if (len(img_1.shape) != 4) or (img_1.shape[1] != 1 and img_1.shape[1] != 3): msg = 'The shape format of img_1 and img_2 should be (img_num, channels, img_width, img_height),' \ ' but got {} and {}'.format(img_1.shape, img_2.shape) raise ValueError(msg) total_l2_distance = 0 total_ssim = 0 img_1 = img_1.transpose(0, 2, 3, 1) img_2 = img_2.transpose(0, 2, 3, 1) for i in range(img_1.shape[0]): _, l2_dis, _ = calculate_lp_distance(img_1[i], img_2[i]) total_l2_distance += l2_dis total_ssim += compute_ssim(img_1[i], img_2[i]) avg_l2_dis = total_l2_distance / img_1.shape[0] avg_ssim = total_ssim / img_1.shape[0] avg_confi = None if (new_network is not None) and (labels is not None): pred_logits = new_network( Tensor(inversion_images.astype(np.float32))).asnumpy() logits_softmax = softmax(pred_logits, axis=1) avg_confi = np.mean(logits_softmax[np.arange(img_1.shape[0]), true_labels]) return avg_l2_dis, avg_ssim, avg_confi
def _compute_ssim(img_1, img_2, kernel_sigma=1.5, kernel_width=11): """ compute structural similarity. Args: img_1 (numpy.ndarray): The first image to be compared. img_2 (numpy.ndarray): The second image to be compared. kernel_sigma (float): Gassian kernel param. Default: 1.5. kernel_width (int): Another Gassian kernel param. Default: 11. Returns: float, structural similarity. """ img_1, img_2 = check_equal_shape('images_1', img_1, 'images_2', img_2) if len(img_1.shape) > 2: total_ssim = 0 for i in range(img_1.shape[2]): total_ssim += _compute_ssim(img_1[:, :, i], img_2[:, :, i]) return total_ssim / 3 # Create gaussian kernel gaussian_kernel = np.zeros((kernel_width, kernel_width)) for i in range(kernel_width): for j in range(kernel_width): gaussian_kernel[i, j] = (1 / (2 * np.pi * (kernel_sigma**2))) * np.exp(-( ((i - 5)**2) + ((j - 5)**2)) / (2 * (kernel_sigma**2))) img_1 = img_1.astype(np.float32) img_2 = img_2.astype(np.float32) img_sq_1 = img_1**2 img_sq_2 = img_2**2 img_12 = img_1 * img_2 # Mean img_mu_1 = convolve(img_1, gaussian_kernel) img_mu_2 = convolve(img_2, gaussian_kernel) # Mean square img_mu_sq_1 = img_mu_1**2 img_mu_sq_2 = img_mu_2**2 img_mu_12 = img_mu_1 * img_mu_2 # Variances img_sigma_sq_1 = convolve(img_sq_1, gaussian_kernel) img_sigma_sq_2 = convolve(img_sq_2, gaussian_kernel) # Covariance img_sigma_12 = convolve(img_12, gaussian_kernel) # Centered squares of variances img_sigma_sq_1 = img_sigma_sq_1 - img_mu_sq_1 img_sigma_sq_2 = img_sigma_sq_2 - img_mu_sq_2 img_sigma_12 = img_sigma_12 - img_mu_12 k_1 = 0.01 k_2 = 0.03 c_1 = (k_1 * 255)**2 c_2 = (k_2 * 255)**2 # Calculate ssim num_ssim = (2 * img_mu_12 + c_1) * (2 * img_sigma_12 + c_2) den_ssim = (img_mu_sq_1 + img_mu_sq_2 + c_1) * (img_sigma_sq_1 + img_sigma_sq_2 + c_2) res = np.average(num_ssim / den_ssim) return res
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and targeted labels. Args: inputs (numpy.ndarray): Input samples. labels (numpy.ndarray): The ground truth label of input samples or target labels. Returns: numpy.ndarray, generated adversarial examples. Examples: >>> advs = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [1, 2]] """ LOGGER.debug(TAG, "enter the func generate.") inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: labels = np.argmax(labels, axis=1) self._dtype = inputs.dtype att_original = self._to_attack_space(inputs) reconstructed_original, _ = self._to_model_space(att_original) # find an adversarial sample const = np.ones_like(labels, dtype=self._dtype) * self._initial_const lower_bound = np.zeros_like(labels, dtype=self._dtype) upper_bound = np.ones_like(labels, dtype=self._dtype) * np.inf adversarial_res = inputs.copy() adversarial_loss = np.ones_like(labels, dtype=self._dtype) * np.inf samples_num = labels.shape[0] adv_flag = np.zeros_like(labels) for binary_search_step in range(self._bin_search_steps): if (binary_search_step == self._bin_search_steps - 1) and \ (self._bin_search_steps >= 10): const = min(1e10, upper_bound) LOGGER.debug(TAG, 'starting optimization with const = %s', str(const)) att_perturbation = np.zeros_like(att_original, dtype=self._dtype) loss_at_previous_check = np.ones_like(labels, dtype=self._dtype) * np.inf # create a new optimizer to minimize the perturbation optimizer = _AdamOptimizer(att_perturbation.shape) for iteration in range(self._max_iterations): x_input, dxdp = self._to_model_space(att_original + att_perturbation) logits = self._network(Tensor(x_input)).asnumpy() current_l2_loss, current_loss, dldx = self._loss_function( logits, x_input, reconstructed_original, labels, const, self._confidence) # check if attack success (include all examples) if self._targeted: is_adv = (np.argmax(logits, axis=1) == labels) else: is_adv = (np.argmax(logits, axis=1) != labels) for i in range(samples_num): if is_adv[i]: adv_flag[i] = True if current_l2_loss[i] < adversarial_loss[i]: adversarial_res[i] = x_input[i] adversarial_loss[i] = current_l2_loss[i] if np.all(adv_flag): if self._fast: LOGGER.debug(TAG, "succeed find adversarial examples.") msg = 'iteration: {}, logits_att: {}, ' \ 'loss: {}, l2_dist: {}' \ .format(iteration, np.argmax(logits, axis=1), current_loss, current_l2_loss) LOGGER.debug(TAG, msg) return adversarial_res dldx, inputs = check_equal_shape('dldx', dldx, 'inputs', inputs) gradient = dldx * dxdp att_perturbation += \ optimizer(gradient, self._learning_rate) # check if should stop iteration early flag = True iter_check = iteration % (np.ceil( self._max_iterations * self._abort_early_check_ratio)) if self._abort_early and iter_check == 0: # check progress for i in range(inputs.shape[0]): if current_loss[i] <= .9999 * loss_at_previous_check[i]: flag = False # stop Adam if all samples has no progress if flag: LOGGER.debug( TAG, 'step:%d, no progress yet, stop iteration', binary_search_step) break loss_at_previous_check = current_loss for i in range(samples_num): # update bound based on search result if adv_flag[i]: LOGGER.debug( TAG, 'example %d, found adversarial with const=%f', i, const[i]) upper_bound[i] = const[i] else: LOGGER.debug( TAG, 'example %d, failed to find adversarial' ' with const=%f', i, const[i]) lower_bound[i] = const[i] if upper_bound[i] == np.inf: const[i] *= 10 else: const[i] = (lower_bound[i] + upper_bound[i]) / 2 return adversarial_res
def _loss_function(self, logits, new_x, org_x, org_or_target_class, constant, confidence): """ Calculate the value of loss function and gradients of loss w.r.t inputs. Args: logits (numpy.ndarray): The output of network before softmax. new_x (numpy.ndarray): Adversarial examples. org_x (numpy.ndarray): Original benign input samples. org_or_target_class (numpy.ndarray): Original/target labels. constant (float): A trade-off constant to use to balance loss and perturbation norm. confidence (float): Confidence level of the output of adversarial examples. Returns: numpy.ndarray, norm of perturbation, sum of the loss and the norm, and gradients of the sum w.r.t inputs. Raises: ValueError: If loss is less than 0. Examples: >>> L2_loss, total_loss, dldx = self._loss_function([0.2 , 0.3, >>> 0.5], [0.1, 0.2, 0.2, 0.4], [0.12, 0.2, 0.25, 0.4], [1], 2, 0) """ LOGGER.debug(TAG, "enter the func _loss_function.") logits = check_numpy_param('logits', logits) org_x = check_numpy_param('org_x', org_x) new_x, org_or_target_class = check_pair_numpy_param( 'new_x', new_x, 'org_or_target_class', org_or_target_class) new_x, org_x = check_equal_shape('new_x', new_x, 'org_x', org_x) other_class_index = _best_logits_of_other_class(logits, org_or_target_class, value=np.inf) loss1 = np.sum((new_x - org_x)**2, axis=tuple(range(len(new_x.shape))[1:])) loss2 = np.zeros_like(loss1, dtype=self._dtype) loss2_grade = np.zeros_like(new_x, dtype=self._dtype) jaco_grad = jacobian_matrix(self._net_grad, new_x, self._num_classes) if self._targeted: for i in range(org_or_target_class.shape[0]): loss2[i] = max( 0, logits[i][other_class_index[i]] - logits[i][org_or_target_class[i]] + confidence) loss2_grade[i] = constant[i] * ( jaco_grad[other_class_index[i]][i] - jaco_grad[org_or_target_class[i]][i]) else: for i in range(org_or_target_class.shape[0]): loss2[i] = max( 0, logits[i][org_or_target_class[i]] - logits[i][other_class_index[i]] + confidence) loss2_grade[i] = constant[i] * ( jaco_grad[org_or_target_class[i]][i] - jaco_grad[other_class_index[i]][i]) total_loss = loss1 + constant * loss2 loss1_grade = 2 * (new_x - org_x) for i in range(org_or_target_class.shape[0]): if loss2[i] < 0: msg = 'loss value should greater than or equal to 0, ' \ 'but got loss2 {}'.format(loss2[i]) LOGGER.error(TAG, msg) raise ValueError(msg) if loss2[i] == 0: loss2_grade[i, ...] = 0 total_loss_grade = loss1_grade + loss2_grade return loss1, total_loss, total_loss_grade