Ejemplo n.º 1
0
 def forward(self, data: List[Tensor], state: Dict[str,
                                                   Any]) -> List[Tensor]:
     results = []
     if self.model is None:
         initials = data[:len(data) // 2]
         finals = data[len(data) // 2:]
         for idx, (initial, final) in enumerate(zip(initials, finals)):
             retain_graph = self.retain_graph or not idx == len(finals) - 1
             results.append(
                 get_gradient(final,
                              initial,
                              tape=state['tape'],
                              retain_graph=retain_graph))
     else:
         finals = data
         trainable_params = [
             p for p in self.model.parameters() if p.requires_grad
         ] if isinstance(
             self.model,
             torch.nn.Module) else self.model.trainable_variables
         for idx, final in enumerate(finals):
             retain_graph = self.retain_graph or not idx == len(finals) - 1
             results.append(
                 get_gradient(final,
                              trainable_params,
                              tape=state['tape'],
                              retain_graph=retain_graph))
     return results
Ejemplo n.º 2
0
    def forward(self, data: List[Tensor], state: Dict[str, Any]) -> List[Tensor]:
        results = []
        if self.model is None:
            initials = data[:len(data) // 2]
            finals = data[len(data) // 2:]
            for idx, (initial, final) in enumerate(zip(initials, finals)):
                retain_graph = self.retain_graph or not idx == len(finals) - 1
                results.append(get_gradient(final, initial, tape=state['tape'], retain_graph=retain_graph))
        else:
            finals = data
            if self.framework == "tf":
                trainable_params = self.model.trainable_variables
                for idx, final in enumerate(finals):
                    gradient = get_gradient(final, trainable_params, tape=state['tape'])
                    results.append(gradient)
            elif self.framework == "torch":
                trainable_params = [p for p in self.model.parameters() if p.requires_grad]
                for idx, final in enumerate(finals):
                    # get_gradinet
                    retain_graph = self.retain_graph or not idx == len(finals) - 1
                    gradient = get_gradient(final, trainable_params, retain_graph=retain_graph)
                    results.append(gradient)
            else:
                raise ValueError(f"Unrecognized framework {self.framework}")

        return results
Ejemplo n.º 3
0
def update_model(model: Union[tf.keras.Model, torch.nn.Module],
                 loss: Union[tf.Tensor, torch.Tensor],
                 tape: Optional[tf.GradientTape] = None,
                 retain_graph: bool = True):
    """Update `model` weights based on a given `loss`.

    This method can be used with TensorFlow models:
    ```python
    m = fe.build(fe.architecture.tensorflow.LeNet, optimizer_fn="adam")
    x = tf.ones((3,28,28,1))  # (batch, height, width, channels)
    y = tf.constant((1, 0, 1))
    with tf.GradientTape(persistent=True) as tape:
        pred = fe.backend.feed_forward(m, x)  # [[~0.5, ~0.5], [~0.5, ~0.5], [~0.5, ~0.5]]
        loss = fe.backend.sparse_categorical_crossentropy(y_pred=pred, y_true=y)  # ~2.3
        fe.backend.update_model(m, loss=loss, tape=tape)
    ```

    This method can be used with PyTorch models:
    ```python
    m = fe.build(fe.architecture.pytorch.LeNet, optimizer_fn="adam")
    x = torch.ones((3,1,28,28))  # (batch, channels, height, width)
    y = torch.tensor((1, 0, 1))
    pred = fe.backend.feed_forward(m, x)  # [[~0.5, ~0.5], [~0.5, ~0.5], [~0.5, ~0.5]]
    loss = fe.backend.sparse_categorical_crossentropy(y_pred=pred, y_true=y)  # ~2.3
    fe.backend.update_model(m, loss=loss)
    ```

    Args:
        model: A neural network instance to update.
        loss: A loss value to compute gradients from.
        tape: A TensorFlow GradientTape which was recording when the `loss` was computed (iff using TensorFlow).
        retain_graph: Whether to keep the model graph in memory (applicable only for PyTorch).

    Raises:
        ValueError: If `model` is an unacceptable data type.
    """
    loss = reduce_mean(loss)
    if isinstance(model, tf.keras.Model):
        # scale up loss for mixed precision training to avoid underflow
        if isinstance(model.current_optimizer, mixed_precision.LossScaleOptimizer):
            loss = model.current_optimizer.get_scaled_loss(loss)
        # for multi-gpu training, the gradient will be combined by sum, normalize the loss
        strategy = tf.distribute.get_strategy()
        if isinstance(strategy, tf.distribute.MirroredStrategy):
            loss = loss / strategy.num_replicas_in_sync
        gradients = get_gradient(loss, model.trainable_variables, tape=tape)
        with tape.stop_recording():
            # scale down gradient to balance scale-up loss
            if isinstance(model.current_optimizer, mixed_precision.LossScaleOptimizer):
                gradients = model.current_optimizer.get_unscaled_gradients(gradients)
            model.current_optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    elif isinstance(model, torch.nn.Module):
        gradients = get_gradient(loss, model.parameters(), retain_graph=retain_graph)
        for gradient, parameter in zip(gradients, model.parameters()):
            parameter.grad = gradient
        model.current_optimizer.step()
    else:
        raise ValueError("Unrecognized model instance {}".format(type(model)))
Ejemplo n.º 4
0
 def forward(self, data: List[Tensor], state: Dict[str, Any]) -> Tensor:
     data, loss = data
     grad = get_gradient(target=loss, sources=data, tape=state['tape'], retain_graph=self.retain_graph)
     adverse_data = clip_by_value(data + self.epsilon * sign(grad),
                                  min_value=self.clip_low or reduce_min(data),
                                  max_value=self.clip_high or reduce_max(data))
     return adverse_data
Ejemplo n.º 5
0
    def _get_gradient(
            self,
            loss: Union[Tensor, List[Tensor]],
            tape: Optional[tf.GradientTape] = None
    ) -> Union[Tensor, List[Tensor]]:
        """Get gradient from loss with repect to self.model.

        Args:
            loss: Input loss.
            tape: A TensorFlow GradientTape which was recording when the `loss` was computed (iff using TensorFlow).

        Returns:
            Computed gradients.
        """
        if self.framework == "tf":
            gradients = get_gradient(loss,
                                     self.model.trainable_variables,
                                     tape=tape)

        else:  # self.framework == "torch"
            trainable_params = [
                p for p in self.model.parameters() if p.requires_grad
            ]
            try:
                gradients = get_gradient(loss,
                                         trainable_params,
                                         retain_graph=self.retain_graph)
            except RuntimeError as err:
                if err.args and isinstance(
                        err.args[0], str
                ) and err.args[0].startswith(
                        'one of the variables needed for gradient computation has been modified by an inplace operation'
                ):
                    raise RuntimeError(
                        "When computing gradients for '{}', some variables it relied on during the forward pass had"
                        " been updated. Consider setting defer=True in earlier UpdateOps related to models which "
                        "interact with this one.".format(
                            self.model.model_name))
                raise err

        return gradients
Ejemplo n.º 6
0
 def forward(self, data: List[Tensor], state: Dict[str,
                                                   Any]) -> List[Tensor]:
     initials = data[:len(data) // 2]
     finals = data[len(data) // 2:]
     results = []
     for initial, final in zip(initials, finals):
         results.append(
             get_gradient(final,
                          initial,
                          tape=state['tape'],
                          retain_graph=self.retain_graph))
     return results
Ejemplo n.º 7
0
def update_model(
        model: Union[tf.keras.Model, torch.nn.Module],
        loss: Union[None, tf.Tensor, torch.Tensor] = None,
        gradients: Optional[List[Union[tf.Tensor, torch.Tensor]]] = None,
        tape: Optional[tf.GradientTape] = None,
        retain_graph: bool = True,
        scaler: Optional[torch.cuda.amp.GradScaler] = None,
        defer: bool = False,
        deferred: Optional[Dict[str, List[Callable[[],
                                                   None]]]] = None) -> None:
    """Update `model` weights based on a given `loss`.

    This method can be used with TensorFlow models:
    ```python
    m = fe.build(fe.architecture.tensorflow.LeNet, optimizer_fn="adam")
    x = tf.ones((3,28,28,1))  # (batch, height, width, channels)
    y = tf.constant((1, 0, 1))
    with tf.GradientTape(persistent=True) as tape:
        pred = fe.backend.feed_forward(m, x)  # [[~0.5, ~0.5], [~0.5, ~0.5], [~0.5, ~0.5]]
        loss = fe.backend.sparse_categorical_crossentropy(y_pred=pred, y_true=y)  # ~2.3
        fe.backend.update_model(m, loss=loss, tape=tape)
    ```

    This method can be used with PyTorch models:
    ```python
    m = fe.build(fe.architecture.pytorch.LeNet, optimizer_fn="adam")
    x = torch.ones((3,1,28,28))  # (batch, channels, height, width)
    y = torch.tensor((1, 0, 1))
    pred = fe.backend.feed_forward(m, x)  # [[~0.5, ~0.5], [~0.5, ~0.5], [~0.5, ~0.5]]
    loss = fe.backend.sparse_categorical_crossentropy(y_pred=pred, y_true=y)  # ~2.3
    fe.backend.update_model(m, loss=loss)
    ```

    Args:
        model: A neural network instance to update.
        loss: A loss value to compute gradients from, mutually exclusive with `gradients`.
        gradients: A list of tensors to update the models, mutually exclusive with `loss`.
        tape: A TensorFlow GradientTape which was recording when the `loss` was computed (iff using TensorFlow).
        retain_graph: Whether to keep the model graph in memory (applicable only for PyTorch).
        scaler: A PyTorch loss scaler that scales loss when PyTorch mixed precision is used.
        defer: If True, then the model update function will be stored into the `deferred` dictionary rather than
            applied immediately.
        deferred: A dictionary in which model update functions are stored.

    Raises:
        ValueError: If `model` is an unacceptable data type.
        RuntimeError: If attempting to modify a PyTorch model which relied on gradients within a different PyTorch model
            which has in turn already undergone a non-deferred update.
    """
    if loss is not None:
        loss = reduce_mean(loss)
    if isinstance(model, tf.keras.Model):
        if loss is not None:
            # scale up loss for mixed precision training to avoid underflow
            if isinstance(model.current_optimizer,
                          mixed_precision.LossScaleOptimizer):
                loss = model.current_optimizer.get_scaled_loss(loss)
            # for multi-gpu training, the gradient will be combined by sum, normalize the loss
            strategy = tf.distribute.get_strategy()
            if isinstance(strategy, tf.distribute.MirroredStrategy):
                loss = loss / strategy.num_replicas_in_sync
            gradients = get_gradient(loss,
                                     model.trainable_variables,
                                     tape=tape)
        with tape.stop_recording():
            # scale down gradient to balance scale-up loss
            if isinstance(model.current_optimizer,
                          mixed_precision.LossScaleOptimizer):
                gradients = model.current_optimizer.get_unscaled_gradients(
                    gradients)
            if defer:
                deferred.setdefault(
                    model.model_name,
                    []).append(lambda: model.current_optimizer.apply_gradients(
                        zip(gradients, model.trainable_variables)))
            else:
                model.current_optimizer.apply_gradients(
                    zip(gradients, model.trainable_variables))
    elif isinstance(model, torch.nn.Module):
        trainable_params = [p for p in model.parameters() if p.requires_grad]
        # scale up loss for mixed precision training to avoid underflow
        if scaler is not None:
            loss = scaler.scale(loss)
        if loss is not None:
            try:
                gradients = get_gradient(loss,
                                         trainable_params,
                                         retain_graph=retain_graph)
            except RuntimeError as err:
                if err.args and isinstance(
                        err.args[0], str
                ) and err.args[0].startswith(
                        'one of the variables needed for gradient computation has been modified by an inplace operation'
                ):
                    raise RuntimeError(
                        "When computing gradients for '{}', some variables it relied on during the forward pass had"
                        " been updated. Consider setting defer=True in earlier UpdateOps related to models which "
                        "interact with this one.".format(model.model_name))
                raise err
        for gradient, parameter in zip(gradients, trainable_params):
            if parameter.grad is not None:
                parameter.grad += gradient
            else:
                parameter.grad = gradient.clone()
        if defer:
            # Only need to call once per model since gradients are getting accumulated
            deferred[model.model_name] = [
                lambda: _torch_step(model.current_optimizer, scaler)
            ]
        else:
            _torch_step(model.current_optimizer, scaler)

            if deferred:
                deferred.pop(model.model_name,
                             None)  # Don't need those deferred steps anymore
    else:
        raise ValueError("Unrecognized model instance {}".format(type(model)))