def detect(self, **kwargs): super().detect(**kwargs) target_class = self.attack.target_class self.attack.mark.random_pos = False self.attack.mark.height_offset = 0 self.attack.mark.width_offest = 0 if not self.random_pos: self.real_mask = self.attack.mark.mask mark_list, mask_list, loss_list = self.get_potential_triggers() mask_norms = mask_list.flatten(start_dim=1).norm(p=1, dim=1) print('mask norms: ', mask_norms) print('mask MAD: ', normalize_mad(mask_norms)) print('loss: ', loss_list) print('loss MAD: ', normalize_mad(loss_list)) if not self.random_pos: overlap = jaccard_idx(mask_list[self.attack.target_class], self.real_mask, select_num=self.attack.mark.mark_height * self.attack.mark.mark_width) print(f'Jaccard index: {overlap:.3f}') if not os.path.exists(self.folder_path): os.makedirs(self.folder_path) mark_list = [to_numpy(i) for i in mark_list] mask_list = [to_numpy(i) for i in mask_list] loss_list = [to_numpy(i) for i in loss_list] np.savez(self.folder_path + self.get_filename(target_class=target_class) + '.npz', mark_list=mark_list, mask_list=mask_list, loss_list=loss_list) print('Defense results saved at: ' + self.folder_path + self.get_filename(target_class=target_class) + '.npz')
def detect(self, **kwargs): super().detect(**kwargs) if not self.attack.mark.random_pos: self.real_mask = self.attack.mark.mask loss_list, mark_list = self.get_potential_triggers() np.savez(self.folder_path + self.get_filename(target_class=self.target_class) + '.npz', mark_list=mark_list, loss_list=loss_list) print('loss: ', loss_list) print('loss MAD: ', normalize_mad(loss_list))
def get_potential_triggers(self, neuron_dict: dict[int, list[dict]], _input: torch.Tensor, _label: torch.Tensor, use_mask=True) -> dict[int, list[dict]]: losses = AverageMeter('Loss', ':.4e') norms = AverageMeter('Norm', ':6.2f') jaccard = AverageMeter('Jaccard Idx', ':6.2f') score_list = [0.0] * len(list(neuron_dict.keys())) result_dict = {} for label, label_list in neuron_dict.items(): print('label: ', label) best_score = 100.0 for _dict in reversed(label_list): layer = _dict['layer'] neuron = _dict['neuron'] value = _dict['value'] # color = ('{red}' if label == self.attack.target_class else '{green}').format(**ansi) # _str = f'layer: {layer:<20} neuron: {neuron:<5d} label: {label:<5d}' # prints('{color}{_str}{reset}'.format(color=color, _str=_str, **ansi), indent=4) mark, mask, loss = self.remask(_input, layer=layer, neuron=neuron, label=label, use_mask=use_mask) self.attack.mark.mark = mark self.attack.mark.alpha_mask = mask self.attack.mark.mask = torch.ones_like(mark, dtype=torch.bool) self.attack.target_class = label attack_loss, attack_acc = self.model._validate( verbose=False, get_data_fn=self.attack.get_data, keep_org=False) _dict['loss'] = loss _dict['attack_acc'] = attack_acc _dict['attack_loss'] = attack_loss _dict['mask'] = to_numpy(mask) _dict['mark'] = to_numpy(mark) _dict['norm'] = float(mask.norm(p=1)) score = attack_loss + 7e-2 * float(mask.norm(p=1)) if score < best_score: best_score = score result_dict[label] = _dict if attack_acc > 90: losses.update(loss) norms.update(mask.norm(p=1)) _str = f' layer: {layer:20s} neuron: {neuron:5d} value: {value:.3f}' _str += f' loss: {loss:10.3f}' f' ATK Acc: {attack_acc:.3f}' f' ATK Loss: {attack_loss:10.3f}' f' Norm: {mask.norm(p=1):.3f}' f' Score: {score:.3f}' if not self.attack.mark.random_pos: overlap = jaccard_idx(mask, self.real_mask) _dict['jaccard'] = overlap _str += f' Jaccard: {overlap:.3f}' if attack_acc > 90: jaccard.update(overlap) else: _dict['jaccard'] = 0.0 print(_str) if not os.path.exists(self.folder_path): os.makedirs(self.folder_path) np.save( self.folder_path + self.get_filename(target_class=self.target_class) + '.npy', neuron_dict) np.save( self.folder_path + self.get_filename(target_class=self.target_class) + '_best.npy', result_dict) print( f'Label: {label:3d} loss: {result_dict[label]["loss"]:10.3f} ATK loss: {result_dict[label]["attack_loss"]:10.3f} Norm: {result_dict[label]["norm"]:10.3f} Jaccard: {result_dict[label]["jaccard"]:10.3f} Score: {best_score:.3f}' ) score_list[label] = best_score print('Score: ', score_list) print('Score MAD: ', normalize_mad(score_list)) return neuron_dict
def detect(self, **kwargs): super().detect(**kwargs) exp_features = self.get_explation_feature() exp_features = torch.tensor(exp_features) print('exp features: ', exp_features) print('exp mad: ', normalize_mad(exp_features))