def _test_master_maintain_logic_experiment(self): for _t in range(100): self.ffp16_optim.zero_grad() for pair in self.data_set: yy_pred = (pair[0].mm(self.pparameters[0]).clamp(min=0).mm( self.pparameters[1])) lloss = (yy_pred - pair[1]).pow(2).sum() lloss = lloss / len(self.data_set) with fp16_optimizer.scale_loss( lloss, self.ffp16_optim, delay_unscale=False) as scaled_loss: scaled_loss.backward() # Run backprop # Check for overflow self.ffp16_optim.step()
def _test_memory_efficient_logic_exp(self): for _t in range(100): self.MEoptim.zero_grad() for pair in self.data_set: y_pred = (pair[0].mm(self.parameters[0]).clamp(min=0).mm( self.parameters[1])) loss = (y_pred - pair[1]).pow(2).sum() loss = loss / len(self.data_set) with fp16_optimizer.scale_loss( loss, self.MEoptim, delay_unscale=False) as scaled_loss: scaled_loss.backward() # Run backprop # Check for overflow self.MEoptim.loss_scaler.check_overflow(self.MEoptim.param_groups) # If no overflow, unscale grad and update as usual if not self.MEoptim.loss_scaler.is_overflow: self.MEoptim.loss_scaler.unscale_grads( self.MEoptim.param_groups) self.MEoptim.is_scaled = False self.MEoptim.inner_optimizer.step() """ if use the following code, weights can have slight abrevations.