Exemple #1
0
    def detect(self, **kwargs):
        super().detect(**kwargs)
        target_class = self.attack.target_class
        self.attack.mark.random_pos = False
        self.attack.mark.height_offset = 0
        self.attack.mark.width_offest = 0
        if not self.random_pos:
            self.real_mask = self.attack.mark.mask
        mark_list, mask_list, loss_list = self.get_potential_triggers()
        mask_norms = mask_list.flatten(start_dim=1).norm(p=1, dim=1)
        print('mask norms: ', mask_norms)
        print('mask MAD: ', normalize_mad(mask_norms))
        print('loss: ', loss_list)
        print('loss MAD: ', normalize_mad(loss_list))

        if not self.random_pos:
            overlap = jaccard_idx(mask_list[self.attack.target_class], self.real_mask,
                                  select_num=self.attack.mark.mark_height * self.attack.mark.mark_width)
            print(f'Jaccard index: {overlap:.3f}')

        if not os.path.exists(self.folder_path):
            os.makedirs(self.folder_path)
        mark_list = [to_numpy(i) for i in mark_list]
        mask_list = [to_numpy(i) for i in mask_list]
        loss_list = [to_numpy(i) for i in loss_list]
        np.savez(self.folder_path + self.get_filename(target_class=target_class) + '.npz',
                 mark_list=mark_list, mask_list=mask_list, loss_list=loss_list)
        print('Defense results saved at: ' + self.folder_path + self.get_filename(target_class=target_class) + '.npz')
 def detect(self, **kwargs):
     super().detect(**kwargs)
     if not self.attack.mark.random_pos:
         self.real_mask = self.attack.mark.mask
     loss_list, mark_list = self.get_potential_triggers()
     np.savez(self.folder_path + self.get_filename(target_class=self.target_class) + '.npz',
              mark_list=mark_list, loss_list=loss_list)
     print('loss: ', loss_list)
     print('loss MAD: ', normalize_mad(loss_list))
Exemple #3
0
 def get_potential_triggers(self,
                            neuron_dict: dict[int, list[dict]],
                            _input: torch.Tensor,
                            _label: torch.Tensor,
                            use_mask=True) -> dict[int, list[dict]]:
     losses = AverageMeter('Loss', ':.4e')
     norms = AverageMeter('Norm', ':6.2f')
     jaccard = AverageMeter('Jaccard Idx', ':6.2f')
     score_list = [0.0] * len(list(neuron_dict.keys()))
     result_dict = {}
     for label, label_list in neuron_dict.items():
         print('label: ', label)
         best_score = 100.0
         for _dict in reversed(label_list):
             layer = _dict['layer']
             neuron = _dict['neuron']
             value = _dict['value']
             # color = ('{red}' if label == self.attack.target_class else '{green}').format(**ansi)
             # _str = f'layer: {layer:<20} neuron: {neuron:<5d} label: {label:<5d}'
             # prints('{color}{_str}{reset}'.format(color=color, _str=_str, **ansi), indent=4)
             mark, mask, loss = self.remask(_input,
                                            layer=layer,
                                            neuron=neuron,
                                            label=label,
                                            use_mask=use_mask)
             self.attack.mark.mark = mark
             self.attack.mark.alpha_mask = mask
             self.attack.mark.mask = torch.ones_like(mark, dtype=torch.bool)
             self.attack.target_class = label
             attack_loss, attack_acc = self.model._validate(
                 verbose=False,
                 get_data_fn=self.attack.get_data,
                 keep_org=False)
             _dict['loss'] = loss
             _dict['attack_acc'] = attack_acc
             _dict['attack_loss'] = attack_loss
             _dict['mask'] = to_numpy(mask)
             _dict['mark'] = to_numpy(mark)
             _dict['norm'] = float(mask.norm(p=1))
             score = attack_loss + 7e-2 * float(mask.norm(p=1))
             if score < best_score:
                 best_score = score
                 result_dict[label] = _dict
             if attack_acc > 90:
                 losses.update(loss)
                 norms.update(mask.norm(p=1))
             _str = f'    layer: {layer:20s}    neuron: {neuron:5d}    value: {value:.3f}'
             _str += f'    loss: {loss:10.3f}'
             f'    ATK Acc: {attack_acc:.3f}'
             f'    ATK Loss: {attack_loss:10.3f}'
             f'    Norm: {mask.norm(p=1):.3f}'
             f'    Score: {score:.3f}'
             if not self.attack.mark.random_pos:
                 overlap = jaccard_idx(mask, self.real_mask)
                 _dict['jaccard'] = overlap
                 _str += f'    Jaccard: {overlap:.3f}'
                 if attack_acc > 90:
                     jaccard.update(overlap)
             else:
                 _dict['jaccard'] = 0.0
             print(_str)
             if not os.path.exists(self.folder_path):
                 os.makedirs(self.folder_path)
             np.save(
                 self.folder_path +
                 self.get_filename(target_class=self.target_class) + '.npy',
                 neuron_dict)
             np.save(
                 self.folder_path +
                 self.get_filename(target_class=self.target_class) +
                 '_best.npy', result_dict)
         print(
             f'Label: {label:3d}  loss: {result_dict[label]["loss"]:10.3f}  ATK loss: {result_dict[label]["attack_loss"]:10.3f}  Norm: {result_dict[label]["norm"]:10.3f}  Jaccard: {result_dict[label]["jaccard"]:10.3f}  Score: {best_score:.3f}'
         )
         score_list[label] = best_score
     print('Score: ', score_list)
     print('Score MAD: ', normalize_mad(score_list))
     return neuron_dict
Exemple #4
0
 def detect(self, **kwargs):
     super().detect(**kwargs)
     exp_features = self.get_explation_feature()
     exp_features = torch.tensor(exp_features)
     print('exp features: ', exp_features)
     print('exp mad: ', normalize_mad(exp_features))