def vector_to_gradients(vec, parameters): r"""Convert one vector to the parameters Arguments: vec (Tensor): a single vector represents the parameters of a model. parameters (Iterable[Tensor]): an iterator of Tensors that are the parameters of a model. """ # Ensure vec of type Tensor if not isinstance(vec, torch.Tensor): raise TypeError('expected torch.Tensor, but got: {}'.format( torch.typename(vec))) # Flag for the device where the parameter is located param_device = None # Pointer for slicing the vector for each parameter pointer = 0 for param in parameters: # Ensure the parameters are located in the same device param_device = _check_param_device(param, param_device) # The length of the parameter num_param = param.numel() if param.grad is None: param.grad = torch.zeros_like(param) # Slice the vector, reshape it, and replace the old data of the parameter param.grad.data = vec[pointer:pointer + num_param].view_as(param).data # Increment the pointer pointer += num_param
def vector_to_grads(vec, parameters): r"""Convert one vector to the parameters. :param vec: a single vector represents the parameters of a model. :type vec: torch.Tensor :param parameters: an iterator of Tensors that are the parameters of a model. :type parameters: list[torch.Tensor] """ # Ensure vec of type Tensor if not isinstance(vec, torch.Tensor): raise TypeError( "expected torch.Tensor, but got: {}".format(torch.typename(vec)) ) # Flag for the device where the parameter is located param_device = None # Pointer for slicing the vector for each parameter gradient pointer = 0 for param in parameters: # Ensure the parameters are located in the same device param_device = _check_param_device(param, param_device) # The length of the parameter num_param = param.numel() # Slice the vector, reshape it, and replace the gradient data of # the parameter param.grad = vec[pointer : pointer + num_param].view(param.size()).data # Increment the pointer pointer += num_param
def parameters_to_grad_vector(parameters): # Flag for the device where the parameter is located param_device = None vec = [] for param in parameters: # Ensure the parameters are located in the same device param_device = _check_param_device(param, param_device) vec.append(param.grad.view(-1)) return torch.cat(vec)
def vector_to_parameters(vector, parameters): param_device = None pointer = 0 for param in parameters: param_device = _check_param_device(param, param_device) num_param = param.numel() param.data.copy_(vector[pointer:pointer + num_param] .view_as(param).data) pointer += num_param
def get_grad(self): param_device = None vec = [] for param in self.parameters(): # Ensure the parameters are located in the same device param_device = _check_param_device(param, param_device) vec.append(param.grad.view(-1)) grad_vec = torch.cat(vec) norm_grad = grad_vec.norm(2) return { 'grad': grad_vec.tolist(), 'norm': norm_grad.item(), }
def gradients_to_vector(parameters): r"""Convert gradients to one vector Arguments: parameters (Iterable[Tensor]): an iterator of Tensors that are the parameters of a model. Returns: The gradients of the parameters represented by a single vector """ # Flag for the device where the parameter is located param_device = None vec = [] for param in parameters: # Ensure the parameters are located in the same device param_device = _check_param_device(param, param_device) assert hasattr(param, 'grad'), "Param has no grad attribute" vec.append(param.grad.view(-1)) return torch.cat(vec)
def grad_vector_to_parameters(vec, parameters): # Ensure vec of type Tensor if not isinstance(vec, torch.Tensor): raise TypeError('expected torch.Tensor, but got: {}' .format(torch.typename(vec))) # Flag for the device where the parameter is located param_device = None # Pointer for slicing the vector for each parameter pointer = 0 for param in parameters: # Ensure the parameters are located in the same device param_device = _check_param_device(param, param_device) # The length of the parameter num_param = param.numel() # Slice the vector, reshape it, and replace the old data of the parameter # param.data = vec[pointer:pointer + num_param].view_as(param).data param.grad = vec[pointer:pointer + num_param].view_as(param).clone() # Increment the pointer pointer += num_param