def _sp_double_backward_update(pos_out: Tensor, neg_out: Tensor, param: Parameter, gamma: float, l1_reg: float, l2_reg: float, pos: Tensor = None): param.grad = None # first backward neg_out.backward() neg = param.grad.relu_().add_(eps) if pos is None: param.grad = None pos_out.backward() pos = param.grad.relu_().add_(eps) if l1_reg > 0: pos.add_(l1_reg) if l2_reg > 0: pos = pos.add(param.data, alpha=l2_reg) multiplier = neg.div_(pos) if gamma != 1: multiplier.pow_(gamma) param.data.mul_(multiplier)
def proto2object(proto: Parameter_PB) -> Parameter: data = protobuf_tensor_deserializer(proto.tensor) param = Parameter(data, requires_grad=proto.requires_grad) if proto.HasField("grad"): param.grad = protobuf_tensor_deserializer(proto.grad) # opacus monkey patches this onto the Parameter class if proto.HasField("grad_sample"): param.grad_sample = protobuf_tensor_deserializer(proto.grad_sample) return param
def _double_backward_update(V: Tensor, WH: Tensor, param: Parameter, beta: float, gamma: float, l1_reg: float, l2_reg: float, pos: Tensor = None): param.grad = None if beta == 2: output_neg = V output_pos = WH elif beta == 1: output_neg = V / WH.add(eps) output_pos = None elif beta == 0: WH_eps = WH.add(eps) output_pos = WH_eps.reciprocal_() output_neg = output_pos.square().mul_(V) else: WH_eps = WH.add(eps) output_neg = WH_eps.pow(beta - 2).mul_(V) output_pos = WH_eps.pow_(beta - 1) # first backward WH.backward(output_neg, retain_graph=pos is None) neg = param.grad.relu_().add_(eps) if pos is None: param.grad = None WH.backward(output_pos) pos = param.grad.relu_().add_(eps) if l1_reg > 0: pos.add_(l1_reg) if l2_reg > 0: pos = pos.add(param.data, alpha=l2_reg) multiplier = neg.div_(pos) if gamma != 1: multiplier.pow_(gamma) param.data.mul_(multiplier)
def pytorch_parameters(self): from torch import from_numpy from torch.nn import Parameter kernel_parameter = None bias_parameter = None if self._K is not None: kernel_tensor = from_numpy(self._K.data) kernel_parameter = Parameter(kernel_tensor, requires_grad=False) if self._KG is not None: kernel_parameter.grad = from_numpy(self._KG.data) if self._bias is not None: bias_tensor = from_numpy(self._bias.data) bias_parameter = Parameter(bias_tensor, requires_grad=False) if self._biasG is not None: bias_parameter.grad = from_numpy(self._biasG.data) return (kernel_parameter, bias_parameter)
def _double_backward_update(V: Tensor, WH: Tensor, param: Parameter, beta: float, gamma: float, l1_reg: float, l2_reg: float, pos: Tensor = None): param.grad = None if beta == 2: output_neg = V output_pos = WH elif beta == 1: output_neg = V / WH.add(eps) output_pos = None elif beta == 0: output_neg = V / (WH * WH).add(eps) output_pos = 1 / WH.add(eps) else: output_neg = WH.pow(beta - 2) * V output_pos = WH.pow(beta - 1) # first backward WH.backward(output_neg, retain_graph=pos is None) neg = torch.clone(param.grad).relu_().add_(eps) if pos is None: param.grad.zero_() WH.backward(output_pos) pos = torch.clone(param.grad).relu_().add_(eps) if l1_reg > 0: pos.add_(l1_reg) if l2_reg > 0: pos = pos.add(param.data, alpha=l2_reg) multiplier = neg / pos if gamma != 1: multiplier.pow_(gamma) param.data.mul_(multiplier)
def _compress_module_param_dim( param: Parameter, target_dim: int, idxs_to_keep: Tensor, module: Optional[Module] = None, optimizer: Optional[Optimizer] = None, ): if param.dim() == 1: target_dim = 0 if param.size(target_dim) == 1 and idxs_to_keep.numel() > 1: # DW Conv return if param.size(target_dim) % idxs_to_keep.size(0) != 0: _LOGGER.debug( "skipping compression of parameter due to shape incompatibility") stride = param.data.size(target_dim) // idxs_to_keep.size(0) if stride > 1: idxs_to_keep = idxs_to_keep.reshape(-1, 1).expand(-1, stride).reshape(-1) param.data = (param.data[idxs_to_keep, ...] if target_dim == 0 else param.data[:, idxs_to_keep, ...]) if param.grad is not None: param.grad = (param.grad[idxs_to_keep, ...] if target_dim == 0 else param.grad[:, idxs_to_keep, ...]) if (optimizer is not None and param in optimizer.state and ("momentum_buffer" in optimizer.state[param])): optimizer.state[param]["momentum_buffer"] = ( optimizer.state[param]["momentum_buffer"][idxs_to_keep, ...] if target_dim == 0 else optimizer.state[param]["momentum_buffer"][:, idxs_to_keep, ...]) # update module attrs if module is not None: # Batch Norm if param.dim() == 1: if hasattr(module, "num_features"): module.num_features = param.size(0) # BN running mean and var are not stored as Parameters so we must # update them here if hasattr(module, "running_mean") and (module.running_mean.size(0) == idxs_to_keep.size(0)): module.running_mean = module.running_mean[idxs_to_keep] if hasattr(module, "running_var") and (module.running_var.size(0) == idxs_to_keep.size(0)): module.running_var = module.running_var[idxs_to_keep] # Linear elif target_dim == 0 and hasattr(module, "out_features"): module.out_features = param.size(0) elif target_dim == 1 and hasattr(module, "in_features"): module.in_features = param.size(1) # Conv elif target_dim == 0 and hasattr(module, "out_channels"): module.out_channels = param.size(0) elif target_dim == 1 and hasattr(module, "in_channels"): module.in_channels = param.size(1) if (hasattr(module, "groups") and module.groups > 1 and (hasattr(module, "out_channels") and hasattr(module, "in_channels"))): module.groups = param.size(0) // param.size(1)
def attack(model, criterion, img, label, eps, attack_type, iters, clean_clean_img=None): assert not model.training adv = img.clone().detach() adv = Parameter(adv, requires_grad=True) if attack_type == 'fgsm': iterations = 1 else: iterations = iters if attack_type == 'pgd': step = 2 / 255 else: step = eps / iterations noise = 0 for j in range(iterations): outputs = None if aug_test is None: out_adv = model(normalize(adv.clone())) loss = criterion(out_adv, label) loss.backward() else: adv_aux = adv * (1.0 - aug_test_lambda) for i in range( aug_test ): # TODO Check why this uses so much memory... it ain't normal fam adv_aux = adv_aux + aug_test_lambda * clean_clean_img[ torch.randperm(label.size(0))] out = model(normalize(adv_aux)) if outputs is None: outputs = out else: outputs += out out_adv = outputs / aug_test loss = criterion(out_adv, label) loss.backward() if attack_type == 'mim': adv_mean = torch.mean(torch.abs(adv.grad), dim=1, keepdim=True) adv_mean = torch.mean(torch.abs(adv_mean), dim=2, keepdim=True) adv_mean = torch.mean(torch.abs(adv_mean), dim=3, keepdim=True) adv.grad = adv.grad / adv_mean noise = noise + adv.grad else: assert adv.grad is not None noise = adv.grad # Optimization step adv.data = adv.data + step * noise.sign() # adv.data = adv.data + step * adv.grad.sign() if attack_type == 'pgd': adv.data = torch.where(adv.data > img.data + eps, img.data + eps, adv.data) adv.data = torch.where(adv.data < img.data - eps, img.data - eps, adv.data) adv.data.clamp_(0.0, 1.0) adv.grad.data.zero_() return adv.detach()
def _data_proto2object(proto: Parameter_PB) -> Parameter: data = protobuf_tensor_deserializer(proto.tensor) param = Parameter(data, requires_grad=proto.requires_grad) if proto.HasField("grad"): param.grad = protobuf_tensor_deserializer(proto.grad) return param