def attach(self, optimizer: torch.optim.Optimizer): r""" Attaches the privacy engine to the optimizer. Attaches to the ``PrivacyEngine`` an optimizer object,and injects itself into the optimizer's step. To do that it, 1. Validates that the model does not have unsupported layers. 2. Adds a pointer to this object (the ``PrivacyEngine``) inside the optimizer. 3. Moves optimizer's original ``step()`` function to ``original_step()``. 4. Monkeypatches the optimizer's ``step()`` function to call ``step()`` on the query engine automatically whenever it would call ``step()`` for itself. Args: optimizer: The optimizer to which the privacy engine will attach """ if hasattr(optimizer, "privacy_engine"): if optimizer.privacy_engine != self: raise ValueError( f"Trying to attach to optimizer: {optimizer}, but that optimizer is " f"already attached to a different Privacy Engine: {optimizer.privacy_engine}." ) else: warnings.warn( "Trying to attach twice to the same optimizer. Nothing to do." ) return self.validator.validate(self.module) norm_clipper = (clipping.ConstantFlatClipper(self.max_grad_norm) if not isinstance(self.max_grad_norm, list) else clipping.ConstantPerLayerClipper(self.max_grad_norm)) if self.misc_settings.get("experimental", False): norm_clipper = clipping._Dynamic_Clipper_( [self.max_grad_norm], self.misc_settings.get("clip_per_layer", False), self.misc_settings.get("clipping_method", clipping.ClippingMethod.STATIC), self.misc_settings.get("clipping_ratio", 0.0), self.misc_settings.get("clipping_momentum", 0.0), ) self.clipper = PerSampleGradientClipper( self.module, norm_clipper, self.batch_first, self.loss_reduction, ) if isinstance(self.module._module, torch.nn.parallel.DistributedDataParallel): if isinstance(norm_clipper, clipping.ConstantPerLayerClipper): # The DDP hooks are stored in `self.privacy_engine.module.ddp_hooks` self._register_ddp_hooks() else: raise ValueError( """The Opacus DDP hook only supports constant per-layer clipping. If you need a different clipper for simple (not optimized) distributed training, you can use `opacus.layers.dp_ddp.DifferentiallyPrivateDistributedDataParallel`""" ) def dp_zero_grad(self): self.privacy_engine.zero_grad() self.original_zero_grad() def dp_step(self, closure=None, is_empty=False): # When the DDP hooks are activated, there is no need for ``PrivacyEngine.step()`` # because the clipping and noising are performed by the hooks at the end of the backward pass if hasattr(self.privacy_engine.module, "ddp_hooks"): # We just update the accountant self.privacy_engine.steps += 1 else: self.privacy_engine.step(is_empty) if isinstance( self.privacy_engine.module._module, DifferentiallyPrivateDistributedDataParallel, ): average_gradients(self.privacy_engine.module) self.original_step(closure) def poisson_dp_step(self, closure=None): # Perform one step as usual self.dp_step(closure) # Taking empty steps to simulate empty batches num_empty_batches = self.privacy_engine._sample_poisson_empty_batches( ) for _ in range(num_empty_batches): self.zero_grad() self.dp_step(closure, is_empty=True) optimizer.privacy_engine = self optimizer.dp_step = types.MethodType(dp_step, optimizer) optimizer.original_step = optimizer.step optimizer.step = types.MethodType( poisson_dp_step if self.poisson else dp_step, optimizer) optimizer.original_zero_grad = optimizer.zero_grad optimizer.zero_grad = types.MethodType(dp_zero_grad, optimizer) def virtual_step(self): if hasattr(self.module, "ddp_hooks"): raise NotImplementedError( "DDP hook does not support virtual steps.") self.privacy_engine.virtual_step() optimizer.virtual_step = types.MethodType(virtual_step, optimizer) # create a cross reference for detaching self.optimizer = optimizer if self.poisson: # Optional initial step on empty batch num_empty_batches = self._sample_poisson_empty_batches() for _ in range(num_empty_batches): self.optimizer.zero_grad() for p in self.module.parameters(): if p.requires_grad: p.grad = torch.zeros_like(p) self.optimizer.dp_step(closure=None, is_empty=True)
def attach(self, optimizer: torch.optim.Optimizer): r""" Attaches the privacy engine to the optimizer. Attaches to the ``PrivacyEngine`` an optimizer object,and injects itself into the optimizer's step. To do that it, 1. Validates that the model does not have unsupported layers. 2. Adds a pointer to this object (the ``PrivacyEngine``) inside the optimizer. 3. Moves optimizer's original ``step()`` function to ``original_step()``. 4. Monkeypatches the optimizer's ``step()`` function to call ``step()`` on the query engine automatically whenever it would call ``step()`` for itself. Args: optimizer: The optimizer to which the privacy engine will attach """ self.validator.validate(self.module) norm_clipper = ( clipping.ConstantFlatClipper(self.max_grad_norm) if not isinstance(self.max_grad_norm, list) else clipping.ConstantPerLayerClipper(self.max_grad_norm) ) if self.misc_settings.get("experimental", False): norm_clipper = clipping._Dynamic_Clipper_( [self.max_grad_norm], self.misc_settings.get("clip_per_layer", False), self.misc_settings.get( "clipping_method", clipping.ClippingMethod.STATIC ), self.misc_settings.get("clipping_ratio", 0.0), self.misc_settings.get("clipping_momentum", 0.0), ) self.clipper = PerSampleGradientClipper( self.module, norm_clipper, self.batch_first, self.loss_reduction, ) def dp_zero_grad(self): self.privacy_engine.zero_grad() self.original_zero_grad() def dp_step(self, closure=None, is_empty=False): self.privacy_engine.step(is_empty) if isinstance( self.privacy_engine.module, DifferentiallyPrivateDistributedDataParallel ): average_gradients(self.privacy_engine.module) self.original_step(closure) def poisson_dp_step(self, closure=None): # Perform one step as usual self.dp_step(closure) # Taking empty steps to simulate empty batches num_empty_batches = self.privacy_engine._sample_poisson_empty_batches() for _ in range(num_empty_batches): self.zero_grad() self.dp_step(closure, is_empty=True) optimizer.privacy_engine = self optimizer.dp_step = types.MethodType(dp_step, optimizer) optimizer.original_step = optimizer.step optimizer.step = types.MethodType( poisson_dp_step if self.poisson else dp_step, optimizer ) optimizer.original_zero_grad = optimizer.zero_grad optimizer.zero_grad = types.MethodType(dp_zero_grad, optimizer) def virtual_step(self): self.privacy_engine.virtual_step() optimizer.virtual_step = types.MethodType(virtual_step, optimizer) # create a cross reference for detaching self.optimizer = optimizer if self.poisson: # Optional initial step on empty batch num_empty_batches = self._sample_poisson_empty_batches() for _ in range(num_empty_batches): self.optimizer.zero_grad() for p in self.module.parameters(): if p.requires_grad: p.grad = torch.zeros_like(p) self.optimizer.dp_step(closure=None, is_empty=True)