Exemple #1
0
    def detect(self, **kwargs):
        super().detect(**kwargs)
        target_class = self.attack.target_class
        self.attack.mark.random_pos = False
        self.attack.mark.height_offset = 0
        self.attack.mark.width_offest = 0
        if not self.random_pos:
            self.real_mask = self.attack.mark.mask
        mark_list, mask_list, loss_list = self.get_potential_triggers()
        mask_norms = mask_list.flatten(start_dim=1).norm(p=1, dim=1)
        print('mask norms: ', mask_norms)
        print('mask MAD: ', normalize_mad(mask_norms))
        print('loss: ', loss_list)
        print('loss MAD: ', normalize_mad(loss_list))

        if not self.random_pos:
            overlap = jaccard_idx(mask_list[self.attack.target_class],
                                  self.real_mask,
                                  select_num=self.attack.mark.mark_height *
                                  self.attack.mark.mark_width)
            print(f'Jaccard index: {overlap:.3f}')

        if not os.path.exists(self.folder_path):
            os.makedirs(self.folder_path)
        mark_list = [to_numpy(i) for i in mark_list]
        mask_list = [to_numpy(i) for i in mask_list]
        loss_list = [to_numpy(i) for i in loss_list]
        np.savez(self.folder_path +
                 self.get_filename(target_class=target_class) + '.npz',
                 mark_list=mark_list,
                 mask_list=mask_list,
                 loss_list=loss_list)
        print('Defense results saved at: ' + self.folder_path +
              self.get_filename(target_class=target_class) + '.npz')
 def get_potential_triggers(
         self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     mark_list, mask_list, loss_list = [], [], []
     # todo: parallel to avoid for loop
     file_path = os.path.normpath(
         os.path.join(
             self.folder_path,
             self.get_filename(target_class=self.target_class) + '.npz'))
     for label in range(self.model.num_classes):
         print('Class: ', output_iter(label, self.model.num_classes))
         mark, mask, loss = self.remask(label)
         mark_list.append(mark)
         mask_list.append(mask)
         loss_list.append(loss)
         if not self.random_pos:
             overlap = jaccard_idx(mask,
                                   self.real_mask,
                                   select_num=self.attack.mark.mark_height *
                                   self.attack.mark.mark_width)
             print(f'Jaccard index: {overlap:.3f}')
         np.savez(file_path,
                  mark_list=mark_list,
                  mask_list=mask_list,
                  loss_list=loss_list)
         print('Defense results saved at: ' + file_path)
     mark_list = torch.stack(mark_list)
     mask_list = torch.stack(mask_list)
     loss_list = torch.as_tensor(loss_list)
     return mark_list, mask_list, loss_list
    def get_potential_triggers(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        mark_list, mask_list, loss_list = [], [], []
        # todo: parallel to avoid for loop
        for label in range(self.model.num_classes):
            print('Class: ', output_iter(label, self.model.num_classes))
            mark, mask, loss = self.remask(
                label)
            mark_list.append(mark)
            mask_list.append(mask)
            loss_list.append(loss)

            if not self.random_pos:
                overlap = jaccard_idx(mask, self.real_mask,
                                      select_num=self.attack.mark.mark_height * self.attack.mark.mark_width)
                print(f'Jaccard index: {overlap:.3f}')
        mark_list = torch.stack(mark_list)
        mask_list = torch.stack(mask_list)
        loss_list = torch.as_tensor(loss_list)
        return mark_list, mask_list, loss_list
Exemple #4
0
    def trigger_detect(self, _input: torch.Tensor):
        """
        Args:
            _input (torch.Tensor): (N, C, H, W)

        """
        # get dominant color
        dom_c_list = []
        for img in _input:
            dom_c: torch.Tensor = self.get_dominant_colour(img)  # (C)
            dom_c_list.append(dom_c)
        dom_c = torch.stack(dom_c_list).unsqueeze(-1).unsqueeze(
            -1)  # (N, C, 1, 1)

        # generate random numbers
        height, width = _input.shape[-2:]
        pos_height: torch.Tensor = torch.randint(
            low=0,
            high=height - self.size[0],
            size=[len(_input), self.sample_num])  # (N, sample_num)
        pos_width: torch.Tensor = torch.randint(
            low=0,
            high=width - self.size[1],
            size=[len(_input), self.sample_num])  # (N, sample_num)
        pos_list: torch.Tensor = torch.stack(
            [pos_height, pos_width]).transpose(0, -1)  # (N, sample_num, 2)
        # block potential triggers on _input
        block_input = _input.unsqueeze(1).repeat(1, self.sample_num, 1, 1,
                                                 1)  # (N, sample_num, C, H, W)
        for i in range(len(_input)):
            for j in range(self.sample_num):
                x = pos_list[i][j][0]
                y = pos_list[i][j][1]
                block_input[i, j, :, x:x + self.size[0],
                            y:y + self.size[1]] = dom_c[i]
        # get potential triggers
        _input = to_tensor(_input)
        block_input = to_tensor(block_input)
        org_class = self.model.get_class(_input).unsqueeze(1).repeat(
            1, self.sample_num)  # (N, sample_num)
        block_class_list = []
        for i in range(self.sample_num):
            block_class = self.model.get_class(
                block_input[:, i])  # (N, sample_num)
            block_class_list.append(block_class)
        block_class = torch.stack(block_class_list, dim=1)
        potential_idx: torch.Tensor = org_class.eq(
            block_class).detach().cpu()  # (N, sample_num)

        # confirm triggers
        result_list = torch.zeros(len(_input), dtype=torch.bool)
        mask_shape = [_input.shape[0], _input.shape[-2], _input.shape[-1]]
        mask_list = torch.zeros(mask_shape,
                                dtype=torch.float)  # (N, C, height, width)
        mark_class = self.attack.mark
        for i in range(len(_input)):
            print(f'input {i:3d}')
            pos_pairs = pos_list[i][~potential_idx[i]]  # (*, 2)
            if len(pos_pairs) == 0:
                continue
            for j, pos in enumerate(pos_pairs):
                self.attack.mark.height_offset = pos[0]
                self.attack.mark.width_offset = pos[1]
                mark_class.org_mark = _input[i, :,
                                             pos[0]:pos[0] + self.size[0],
                                             pos[1]:pos[1] + self.size[1]]
                mark_class.org_mask = torch.ones(self.size, dtype=torch.bool)
                mark_class.org_alpha_mask = torch.ones(self.size,
                                                       dtype=torch.float)
                mark_class.mark, mark_class.mask, mark_class.alpha_mask = mark_class.mask_mark(
                    height_offset=pos[0], width_offset=pos[1])
                target_acc = self.confirm_backdoor()
                output_str = f'    {j:3d}  Acc: {target_acc:5.2f}'
                if not self.attack.mark.random_pos:
                    overlap = jaccard_idx(mark_class.mask.detach().cpu(),
                                          self.real_mask.detach().cpu(),
                                          select_num=self.size[0] *
                                          self.size[1])
                    output_str += f'  Jaccard Idx: {overlap:5.3f}'
                print(output_str)
                if target_acc > self.threshold_t:
                    result_list[i] = True
                    mask_list[i] = mark_class.mask
        return result_list, mask_list
Exemple #5
0
 def get_potential_triggers(self,
                            neuron_dict: dict[int, list[dict]],
                            _input: torch.Tensor,
                            _label: torch.Tensor,
                            use_mask=True) -> dict[int, list[dict]]:
     losses = AverageMeter('Loss', ':.4e')
     norms = AverageMeter('Norm', ':6.2f')
     jaccard = AverageMeter('Jaccard Idx', ':6.2f')
     score_list = [0.0] * len(list(neuron_dict.keys()))
     result_dict = {}
     for label, label_list in neuron_dict.items():
         print('label: ', label)
         best_score = 100.0
         for _dict in reversed(label_list):
             layer = _dict['layer']
             neuron = _dict['neuron']
             value = _dict['value']
             # color = ('{red}' if label == self.attack.target_class else '{green}').format(**ansi)
             # _str = f'layer: {layer:<20} neuron: {neuron:<5d} label: {label:<5d}'
             # prints('{color}{_str}{reset}'.format(color=color, _str=_str, **ansi), indent=4)
             mark, mask, loss = self.remask(_input,
                                            layer=layer,
                                            neuron=neuron,
                                            label=label,
                                            use_mask=use_mask)
             self.attack.mark.mark = mark
             self.attack.mark.alpha_mask = mask
             self.attack.mark.mask = torch.ones_like(mark, dtype=torch.bool)
             self.attack.target_class = label
             attack_loss, attack_acc = self.model._validate(
                 verbose=False,
                 get_data_fn=self.attack.get_data,
                 keep_org=False)
             _dict['loss'] = loss
             _dict['attack_acc'] = attack_acc
             _dict['attack_loss'] = attack_loss
             _dict['mask'] = to_numpy(mask)
             _dict['mark'] = to_numpy(mark)
             _dict['norm'] = float(mask.norm(p=1))
             score = attack_loss + 7e-2 * float(mask.norm(p=1))
             if score < best_score:
                 best_score = score
                 result_dict[label] = _dict
             if attack_acc > 90:
                 losses.update(loss)
                 norms.update(mask.norm(p=1))
             _str = f'    layer: {layer:20s}    neuron: {neuron:5d}    value: {value:.3f}'
             _str += f'    loss: {loss:10.3f}'
             f'    ATK Acc: {attack_acc:.3f}'
             f'    ATK Loss: {attack_loss:10.3f}'
             f'    Norm: {mask.norm(p=1):.3f}'
             f'    Score: {score:.3f}'
             if not self.attack.mark.random_pos:
                 overlap = jaccard_idx(mask, self.real_mask)
                 _dict['jaccard'] = overlap
                 _str += f'    Jaccard: {overlap:.3f}'
                 if attack_acc > 90:
                     jaccard.update(overlap)
             else:
                 _dict['jaccard'] = 0.0
             print(_str)
             if not os.path.exists(self.folder_path):
                 os.makedirs(self.folder_path)
             np.save(
                 self.folder_path +
                 self.get_filename(target_class=self.target_class) + '.npy',
                 neuron_dict)
             np.save(
                 self.folder_path +
                 self.get_filename(target_class=self.target_class) +
                 '_best.npy', result_dict)
         print(
             f'Label: {label:3d}  loss: {result_dict[label]["loss"]:10.3f}  ATK loss: {result_dict[label]["attack_loss"]:10.3f}  Norm: {result_dict[label]["norm"]:10.3f}  Jaccard: {result_dict[label]["jaccard"]:10.3f}  Score: {best_score:.3f}'
         )
         score_list[label] = best_score
     print('Score: ', score_list)
     print('Score MAD: ', normalize_mad(score_list))
     return neuron_dict
Exemple #6
0
    def remask(self, label: int) -> tuple[torch.Tensor, torch.Tensor]:
        generator = Generator(self.noise_dim, self.dataset.num_classes,
                              self.dataset.data_shape)
        for param in generator.parameters():
            param.requires_grad_()
        optimizer = optim.Adam(generator.parameters(), lr=self.remask_lr)
        optimizer.zero_grad()
        # mask = self.attack.mark.mask

        losses = AverageMeter('Loss', ':.4e')
        entropy = AverageMeter('Entropy', ':.4e')
        norm = AverageMeter('Norm', ':.4e')
        acc = AverageMeter('Acc', ':6.2f')
        torch.manual_seed(env['seed'])
        noise = torch.rand(1, self.noise_dim, device=env['device'])
        mark = torch.zeros(self.dataset.data_shape, device=env['device'])
        for _epoch in range(self.remask_epoch):
            losses.reset()
            entropy.reset()
            norm.reset()
            acc.reset()
            epoch_start = time.perf_counter()
            loader = self.loader
            if env['tqdm']:
                loader = tqdm(loader)
            for data in loader:
                _input, _label = self.model.get_data(data)
                batch_size = _label.size(0)
                poison_label = label * torch.ones_like(_label)
                mark = generator(
                    noise,
                    torch.tensor([label],
                                 device=poison_label.device,
                                 dtype=poison_label.dtype))
                poison_input = (_input + mark).clamp(0, 1)
                _output = self.model(poison_input)

                batch_acc = poison_label.eq(_output.argmax(1)).float().mean()
                batch_entropy = self.model.criterion(_output, poison_label)
                batch_norm = mark.flatten(start_dim=1).norm(p=1, dim=1).mean()
                batch_loss = batch_entropy + self.gamma_2 * batch_norm

                acc.update(batch_acc.item(), batch_size)
                entropy.update(batch_entropy.item(), batch_size)
                norm.update(batch_norm.item(), batch_size)
                losses.update(batch_loss.item(), batch_size)

                batch_loss.backward()
                optimizer.step()
                optimizer.zero_grad()
            epoch_time = str(
                datetime.timedelta(seconds=int(time.perf_counter() -
                                               epoch_start)))
            pre_str = '{blue_light}Epoch: {0}{reset}'.format(
                output_iter(_epoch + 1, self.remask_epoch),
                **ansi).ljust(64 if env['color'] else 35)
            _str = ' '.join([
                f'Loss: {losses.avg:.4f},'.ljust(20),
                f'Acc: {acc.avg:.2f}, '.ljust(20),
                f'Norm: {norm.avg:.4f},'.ljust(20),
                f'Entropy: {entropy.avg:.4f},'.ljust(20),
                f'Time: {epoch_time},'.ljust(20),
            ])
            prints(pre_str,
                   _str,
                   prefix='{upline}{clear_line}'.format(
                       **ansi) if env['tqdm'] else '',
                   indent=4)

        def get_data_fn(data, **kwargs):
            _input, _label = self.model.get_data(data)
            poison_label = torch.ones_like(_label) * label
            poison_input = (_input + mark).clamp(0, 1)
            return poison_input, poison_label

        self.model._validate(print_prefix='Validate Trigger Tgt',
                             get_data_fn=get_data_fn,
                             indent=4)

        if not self.attack.mark.random_pos:
            overlap = jaccard_idx(mark.mean(dim=0),
                                  self.real_mask,
                                  select_num=self.attack.mark.mark_height *
                                  self.attack.mark.mark_width)
            print(f'    Jaccard index: {overlap:.3f}')

        for param in generator.parameters():
            param.requires_grad = False
        return losses.avg, mark