コード例 #1
0
ファイル: train_one_for_all.py プロジェクト: hnt4499/DPR
    def backward(
        self,
        loss: torch.Tensor,
        optimizer: torch.optim.Optimizer,
        scheduler,
        step: bool,
    ):
        """Handling back-propagation."""
        if self.cfg.fp16:
            from apex import amp

            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            if self.cfg.train.max_grad_norm > 0:
                torch.nn.utils.clip_grad_norm_(
                    amp.master_params(optimizer), self.cfg.train.max_grad_norm
                )
        else:
            loss.backward()
            if self.cfg.train.max_grad_norm > 0:
                torch.nn.utils.clip_grad_norm_(
                    self.model.parameters(), self.cfg.train.max_grad_norm
                )

        if step:
            optimizer.step()
            scheduler.step()
            self.model.zero_grad()
コード例 #2
0
def optimizer_step_spottune(optimizer_main: Optimizer,
                            optimizer_policy: Optimizer, loss: torch.Tensor,
                            **params) -> torch.Tensor:
    """
    Performs the backward pass with respect to ``loss``, as well as a gradient step.

    ``params`` is used to change the optimizer's parameters.

    Examples
    --------
    >>> optimizer = Adam(model.parameters(), lr=1)
    >>> optimizer_step(optimizer, loss) # perform a gradient step
    >>> optimizer_step(optimizer, loss, lr=1e-3) # set lr to 1e-3 and perform a gradient step
    >>> optimizer_step(optimizer, loss, betas=(0, 0)) # set betas to 0 and perform a gradient step

    Notes
    -----
    The incoming ``optimizer``'s parameters are not restored to their original values.
    """
    lr_main, lr_policy = params['lr_main'], params['lr_policy']

    set_params(optimizer_main, lr=lr_main)
    set_params(optimizer_policy, lr=lr_policy)

    optimizer_main.zero_grad()
    optimizer_policy.zero_grad()

    loss.backward()
    optimizer_main.step()
    optimizer_policy.step()

    return loss
コード例 #3
0
ファイル: apex_utils.py プロジェクト: singlasahil14/archai
 def backward(self, loss:torch.Tensor, multi_optim:MultiOptim)->None:
     if self.is_mixed():
         optim = self._get_optim(multi_optim)
         with self._amp.scale_loss(loss, optim) as scaled_loss:
             scaled_loss.backward()
     else:
         loss.backward()
コード例 #4
0
    def _run_backward(self, tensor: Tensor, model: Optional[Module],
                      *args: Any, **kwargs: Any) -> None:
        """Lightning-independent backward logic.

        Currently only used by Lightning Lite. Subject to further refactors.
        """
        tensor.backward(*args, **kwargs)
コード例 #5
0
def policy_svg(policy: TVLinearPolicy, value: Tensor) -> lqr.Linear:
    """Computes the policy SVG from the estimated return."""
    # pylint:disable=invalid-name
    policy.zero_grad(set_to_none=True)
    value.backward()
    K, k = policy.standard_form()
    return K.grad.clone(), k.grad.clone()
コード例 #6
0
 def _backpropagate(self, loss: torch.Tensor):
     self.optimizer.zero_grad()
     loss.backward()
     if self.gradient_clipping_value is not None:
         torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                        self.gradient_clipping_value)
     self.optimizer.step()
コード例 #7
0
    def backward_step(self, model: nn.Module, loss: torch.Tensor,
                      optimizer: optim.Optimizer, scaler: amp.GradScaler):
        if optimizer is None:
            return

        loss = loss / self.steps
        if scaler is not None:
            loss = scaler.scale(loss)

        if self.is_start_cycle:
            optimizer.zero_grad()

        if isinstance(
                model,
                nn.parallel.DistributedDataParallel) and not self.is_end_cycle:
            with model.no_sync():
                loss.backward()
        else:
            loss.backward()

        if self.is_end_cycle:
            if scaler is None:
                optimizer.step()
            else:
                scaler.step(optimizer)
                scaler.update()

        self.inc_counter()
コード例 #8
0
ファイル: nmf.py プロジェクト: asifzubair/pytorch-NMF
def _sp_double_backward_update(pos_out: Tensor,
                               neg_out: Tensor,
                               param: Parameter,
                               gamma: float,
                               l1_reg: float,
                               l2_reg: float,
                               pos: Tensor = None):
    param.grad = None
    # first backward
    neg_out.backward()
    neg = param.grad.relu_().add_(eps)

    if pos is None:
        param.grad = None
        pos_out.backward()
        pos = param.grad.relu_().add_(eps)

    if l1_reg > 0:
        pos.add_(l1_reg)
    if l2_reg > 0:
        pos = pos.add(param.data, alpha=l2_reg)
    multiplier = neg.div_(pos)
    if gamma != 1:
        multiplier.pow_(gamma)
    param.data.mul_(multiplier)
コード例 #9
0
ファイル: Utils.py プロジェクト: vladmosin/NeuralODE
def backprop(loss: torch.Tensor, model: torch.nn.Module, optimizer):
    optimizer.zero_grad()
    loss.backward()
#    for i, param in enumerate(model.parameters()):
#        print(param.shape)
#        param.grad.data.clamp_(-1, 1)
    optimizer.step()
コード例 #10
0
def step(iteration: int, loss: Tensor, optimizer: Adam,
         scheduler: ExponentialLR) -> None:
    """Do one training step."""
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step(iteration)
コード例 #11
0
ファイル: train.py プロジェクト: jrobine/smaller-world-models
 def optimize_agent_network(loss: Tensor) -> None:
     # since PPO optimizes multiple times per batch, logging is done in
     # a separate function, log_ppo_stats
     agent_optimizer.zero_grad()
     loss.backward()
     nn.utils.clip_grad_norm_(agent_network.parameters(), max_norm=0.5)
     agent_optimizer.step()
コード例 #12
0
    def update_action_values(self, guess: torch.Tensor, error, state_hash):
        # error = torch.from_numpy(error).float()
        # initial_gradient = torch.randn([1, 1, 3, 3])
        # guess.retain_grad()
        # guess.backward(initial_gradient) ##  We now have the grad in each layer
        # torch.nn.utils.clip_grad_value_(self.conv1.weight, 0.1)
        # torch.nn.utils.clip_grad_value_(self.conv2.weight, 0.1)
        # torch.nn.utils.clip_grad_value_(self.conv3.weight, 0.1)
        # torch.nn.utils.clip_grad_value_(self.conv_final.weight, 0.1)



        initial_gradient = torch.ones([1, 1, 3, 3]).to('cuda')
        guess.retain_grad()
        guess.backward(initial_gradient, retain_graph=True)


        for name, parameter in self.named_parameters():
            if not parameter.requires_grad:
                continue
            if state_hash not in self.e[name]:
                self.e[name][state_hash] = 0
            for state in self.e[name].keys():
                self.e[name][state] = self.e[name][state] * self.lambd * self.gamma
            self.e[name][state_hash] += parameter.grad
            for state in self.e[name].keys():
                parameter.data -= self.alpha * error * self.e[name][state]
            parameter.grad.zero_()
コード例 #13
0
def compute_grad_receptive_field(mod: ModuleInfo, input: Tensor,
                                 output: Tensor,
                                 infos: ModuleInfoIndex) -> Tuple[int, ...]:

    if not isinstance(output, Tensor):
        # We cannot backward() from this. Skip, we just won't be able
        # to compute the receptive field here without some other
        # affordance to combine the tensors in this output into a
        # "loss"
        return (-1, )

    # https://github.com/rogertrullo/Receptive-Field-in-Pytorch/blob/master/Receptive_Field.ipynb
    fake_grad = torch.zeros(output.shape)
    # batch=0, channel=0
    center_pos = (0, 0, *[i // 2 for i in output.shape[2:]])
    fake_grad[center_pos] = 1

    # zero_grad everything before and including this module, since backward() accumulates
    for i in range(0, mod.input_order + 1):
        infos.by_input_order[i].module.zero_grad()

    # retain_graph so we can run this multiple times and not drop the
    # intermediate results from forward()
    output.backward(gradient=fake_grad, retain_graph=True)

    # Find the extent of pixels affected; drop batch/channel
    nonzero_idxs = input.grad.nonzero(as_tuple=True)[2:]
    rf_dims = [(d.max() - d.min() + 1).item() for d in nonzero_idxs]
    return tuple(rf_dims)
コード例 #14
0
    def backward(self, trainer, loss: Tensor, optimizer: Optimizer,
                 optimizer_idx: int) -> None:
        """Override backward with your own implementation if you need to

        :param trainer: Pointer to the trainer
        :param loss: Loss is already scaled by accumulated grads
        :param optimizer: Current optimizer being used
        :param optimizer_idx: Index of the current optimizer being used

        Called to perform backward step.
        Feel free to override as needed.

        The loss passed in has already been scaled for accumulated gradients if requested.

        .. code-block:: python

            def backward(self, use_amp, loss, optimizer):
                if use_amp:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

        """
        if trainer.precision == 16:

            # .backward is not special on 16-bit with TPUs
            if not trainer.on_tpu:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
        else:
            loss.backward()
コード例 #15
0
 def _backprop_step(self, loss: Tensor) -> None:
     # Clean gradients
     self.optimizer.zero_grad()
     # Backpropate the loss
     loss.backward()
     # Update the params
     self.optimizer.step()
コード例 #16
0
ファイル: imagemodel.py プロジェクト: ndrsn0208/trojanzoo
            def after_loss_fn_new(_input: torch.Tensor, _label: torch.Tensor, _output: torch.Tensor,
                                  loss: torch.Tensor, optimizer: Optimizer, loss_fn: Callable[..., torch.Tensor] = None,
                                  amp: bool = False, scaler: torch.cuda.amp.GradScaler = None, **kwargs):
                noise = torch.zeros_like(_input)
                adv_loss_fn = functools.partial(self.adv_loss, _label=_label)

                for m in range(self.pgd.iteration):
                    if amp:
                        scaler.step(optimizer)
                        scaler.update()
                    else:
                        optimizer.step()
                    self.eval()
                    adv_x, _ = self.pgd.optimize(_input=_input, noise=noise,
                                                 loss_fn=adv_loss_fn,
                                                 iteration=1, epsilon=adv_train_epsilon)
                    self.train()
                    loss = loss_fn(adv_x, _label)
                    if callable(after_loss_fn_old):
                        after_loss_fn_old(_input=_input, _label=_label, _output=_output,
                                          loss=loss, optimizer=optimizer, loss_fn=loss_fn,
                                          amp=amp, scaler=scaler, **kwargs)
                    if amp:
                        scaler.scale(loss).backward()
                    else:
                        loss.backward()
コード例 #17
0
    def backward(
        self,
        model: 'LightningModule',
        closure_loss: torch.Tensor,
        optimizer: 'Optimizer',
        opt_idx: int,
        should_accumulate: bool,
        *args: Any,
        **kwargs: Any,
    ) -> torch.Tensor:
        """performs the actual backpropagation

        Args:
            model: the model to be optimized
            closure_loss: the loss value obtained from the closure
            optimizer: the optimizer to perform the step lateron
            opt_idx: the optimizer's index
            should_accumulate: whether to accumulate gradients or not

        """
        automatic_optimization = model.automatic_optimization

        # do backward pass
        if automatic_optimization:
            model.backward(closure_loss, optimizer, opt_idx)
        else:
            closure_loss.backward(*args, **kwargs)

        # once backward has been applied, release graph
        closure_loss = closure_loss.detach()

        return closure_loss
コード例 #18
0
 def update_gradient(self, loss: torch.Tensor):
     """
     1. back-propagation using auto-grad backward()
     2. update model using optimizer step
     :param loss: loss tensor from criterion output
     """
     loss.backward()
     self.optimizer.step()
コード例 #19
0
def update_networks_on_loss(loss: torch.Tensor, *networks) -> None:
    if not loss:
        return
    for network in networks:
        network.zero_grad()
    loss.backward()
    for network in networks:
        network.optimizer.step()
コード例 #20
0
    def minimize(
        self,
        loss: torch.Tensor,
        optimizer_name: str = "primary",
        *,
        retain_graph: bool = False,
        checkpoint_interval: Optional[float] = None,
        clip_grad_max_norm: Optional[float] = None,
    ) -> None:
        """Compute gradients and use them to minimize a loss function."""
        model = cast("Buddy", self)._model
        assert model is not None, "No model attached!"

        # Get optimizer
        self._instantiate_optimizer(optimizer_name)
        optimizer: torch.optim.Optimizer = self._optimizer_dict[optimizer_name]

        # Update learning rate using scheduler if possible
        schedulers = self._optimizer_config.learning_rate_schedulers
        if optimizer_name in schedulers:
            self._set_learning_rate(
                schedulers[optimizer_name](self._optimizer_config.global_steps),
                optimizer_name,
            )

        # Take gradient step
        optimizer.zero_grad()
        loss.backward(retain_graph=retain_graph)  # type: ignore
        if clip_grad_max_norm is not None:
            torch.nn.utils.clip_grad_norm_(
                optimizer.param_groups[0]["params"],
                max_norm=clip_grad_max_norm,
            )
        optimizer.step()

        # Update global step count
        self._optimizer_config.global_steps += 1

        # Autocheckpoint procedure
        if checkpoint_interval is None:
            checkpoint_interval = self._optimizer_checkpoint_interval

        # Disable autocheckpoint if interval is 0
        if checkpoint_interval == 0:
            return

        if self._optimizer_last_checkpoint_time is None:
            # First iteration
            self._optimizer_last_checkpoint_time = time.time()
        elif (
            time.time() - cast(float, self._optimizer_last_checkpoint_time)
            > self._optimizer_checkpoint_interval
        ):  # pragma: no cover
            # Checkpoint!
            cast("_BuddyCheckpointing", self).save_checkpoint()
            self._optimizer_last_checkpoint_time = time.time()
コード例 #21
0
 def _get_gradient_from_torch(self, f: torch.Tensor):
     """
     Get the gradient of f w.r.t. the policy's parameters.
     :param f: The parametric function.
     :return: the gradient.
     """
     f.backward()
     g = self._get_gradient()
     self.zero_grad()
     return g
コード例 #22
0
 def backward(self, loss: torch.Tensor,  optimizer: nn.Module,
              retain_graph: bool = False):
     r"""Use backward to scale the loss for mixed precision."""
     if self.precision == "mixed":
         with amp.scale_loss(loss, optimizer) as scaled_loss:
             scaled_loss.backward(retain_graph=retain_graph)
     else:
         loss.backward(retain_graph=retain_graph)
     if not retain_graph:
         optimizer.step()
コード例 #23
0
 def _backprop_step(self, loss: Tensor, grad_clip: float = .1) -> None:
     # Clean gradients
     self.optimizer.zero_grad()
     # Backpropate the loss
     loss.backward()
     # Safeguard for Gradient explosion
     if isinstance(grad_clip, float):
         torch.nn.utils.clip_grad_norm_(self.model.parameters(), grad_clip)
     # Update the params
     self.optimizer.step()
コード例 #24
0
ファイル: runner.py プロジェクト: cnstark/easytorch
    def backward(self, loss: torch.Tensor):
        """Backward and update params.

        Args:
            loss (torch.Tensor): loss
        """

        self.optim.zero_grad()
        loss.backward()
        self.optim.step()
コード例 #25
0
    def backward(self, tensor: torch.Tensor) -> None:
        """Computes the gradient of the specified tensor w.r.t. graph leaves.

        Args:
            tensor (torch.Tensor): Tensor of which the derivative will be computed.
        """
        if self.amp_is_enabled:
            self.scaler.scale(tensor).backward()
        else:
            tensor.backward()
コード例 #26
0
    def bw_step(self, loss: torch.Tensor, optimizer: optim.Optimizer):
        if optimizer is None:
            return

        loss.backward(gradient=1 / self.steps)
        if self.is_start_cycle:
            optimizer.zero_grad()
        if self.is_end_cycle:
            optimizer.step()

        self.inc_counter()
コード例 #27
0
 def _backward_pass(
     self,
     targets: Tensor,
     in_queue: GpuAwareQueue[LocalBackwardData],
     out_queue: GpuAwareQueue[LocalBackwardData],
     inputs: Tensor,
     activations: Tensor,
 ) -> None:
     backward_data = cast(E2EBackwardData, in_queue.get(self.device))
     activations.backward(gradient=backward_data.e2e_gradients)
     out_queue.put(E2EBackwardData(inputs.grad))
コード例 #28
0
ファイル: base.py プロジェクト: hilbert9221/NRI-MPM
    def optimize(opt: Optimizer, loss: torch.Tensor):
        """
        Optimize the parameters based on the loss and the optimizer.

        Args:
            opt: optimizer
            loss: loss, a scalar
        """
        opt.zero_grad()
        loss.backward()
        opt.step()
コード例 #29
0
    def backward(self, loss: torch.Tensor) -> None:
        """
        Computer gradients with respect to the loss.

        Calls :func:`zero_grad` and then computes the gradient using
        `torch.Tensor.backward <https://pytorch.org/docs/stable/
        tensors.html#torch.Tensor.backward>`_. See :mod:`torch.autograd` for
        more information.
        """
        # TODO (aadcock): Add gradient accumulation logic
        self.zero_grad()
        loss.backward()
コード例 #30
0
 def _backward(
     self,
     loss: Tensor,
     opt: torch.optim.Optimizer,
     params: Optional[Iterable[Tensor]] = None,
     grad_clip: Optional[float] = None,
 ) -> None:
     opt.zero_grad()
     loss.backward()
     grad_clip = grad_clip or self.config.grad_clip
     if params is not None and grad_clip is not None:
         nn.utils.clip_grad_norm_(params, grad_clip)
     opt.step()