Beispiel #1
0
    def _backpropagates_gradient(
        self,
        tape: tf.GradientTape,
        models: List[tf.keras.Model],
        loss: tf.float32,
        optimizer: tf.keras.optimizers.Adam,
    ) -> None:
        """ Backpropagates the gradient of the loss into the given networks"""

        trainable_variables = sum(
            [model.trainable_variables for model in models], [])
        gradients = tape.gradient(loss, trainable_variables)
        optimizer.apply_gradients(zip(gradients, trainable_variables))
Beispiel #2
0
 def _optimize(
     self,
     train_vars: List[Variable],
     loss: Tensor,
     optim: Optimizer,
     tape: tf.GradientTape,
 ) -> None:
     """Optimize the variables."""
     if self.mixed_precision:
         loss = optim.get_scaled_loss(loss)
     grads = tape.gradient(loss, train_vars)
     if self.mixed_precision:
         grads = optim.get_unscaled_gradients(grads)
     optim.apply_gradients(zip(grads, train_vars))
def grads_calc(tape: tf.GradientTape,
               last_cnn_output: Model,
               top_class_channel: List[List[int]]):
    """
    """
    # Calcula o gradiente do modelo para saída máxima (pesos)
    grads = tape.gradient(top_class_channel, last_cnn_output)
    # Retira pela media dos pixel (global average pooling)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    # Convert array to numpy array
    pesos = last_cnn_output.numpy()[0]
    pooled_grads = pooled_grads.numpy()
    # Multiplica pesos pela importância deles no resultados
    for i in range(pooled_grads.shape[-1]):
        pesos[:, :, i] *= pooled_grads[i]
    # pooled grads == ack
    return pesos
Beispiel #4
0
def dpg(
    q_max: tf.Tensor,
    a_max: tf.Tensor,
    tape: tf.GradientTape,
    dqda_clipping: float = None,
    clip_norm: bool = False,
) -> tf.Tensor:
    """Deterministic policy gradient loss, similar to trfl.dpg."""

    # Calculate the gradient dq/da.
    dqda = tape.gradient([q_max], [a_max])[0]

    if dqda is None:
        raise ValueError('q_max needs to be a function of a_max.')

    # Clipping the gradient dq/da.
    if dqda_clipping is not None:
        if dqda_clipping <= 0:
            raise ValueError(
                'dqda_clipping should be bigger than 0, {} found'.format(
                    dqda_clipping))
        if clip_norm:
            dqda = tf.clip_by_norm(dqda, dqda_clipping, axes=-1)
        else:
            dqda = tf.clip_by_value(dqda, -1. * dqda_clipping, dqda_clipping)

    # Target_a ensures correct gradient calculated during backprop.
    target_a = dqda + a_max
    # Stop the gradient going through Q network when backprop.
    target_a = tf.stop_gradient(target_a)
    # Gradient only go through actor network.
    loss = 0.5 * tf.reduce_sum(tf.square(target_a - a_max), axis=-1)
    # This recovers the DPG because (letting w be the actor network weights):
    # d(loss)/dw = 0.5 * (2 * (target_a - a_max) * d(target_a - a_max)/dw)
    #            = (target_a - a_max) * [d(target_a)/dw  - d(a_max)/dw]
    #            = dq/da * [d(target_a)/dw  - d(a_max)/dw]  # by defn of target_a
    #            = dq/da * [0 - d(a_max)/dw]                # by stop_gradient
    #            = - dq/da * da/dw

    return loss
Beispiel #5
0
  def optimizer_minimize(self, loss: tf.Tensor, tape: tf.GradientTape,
                         optimizer: tf.optimizers.Optimizer, model: k.Model):
    """apply gradients
    
    Args:
        loss (tf.Tensor): 
        tape (tf.GradientTape): 
        optimizer (tf.optimizers.Optimizer): 
        model (k.Model):
    """
    with tape:
      scaled_loss = loss / self.strategy.num_replicas_in_sync
      if isinstance(optimizer,
                    tf.keras.mixed_precision.experimental.LossScaleOptimizer):
        scaled_loss = optimizer.get_scaled_loss(loss)

    grad = tape.gradient(scaled_loss, model.trainable_variables)
    if isinstance(optimizer,
                  tf.keras.mixed_precision.experimental.LossScaleOptimizer):
      grad = optimizer.get_unscaled_gradients(grad)
    optimizer.apply_gradients(zip(grad, model.trainable_variables))
    return scaled_loss
Beispiel #6
0
    def train_complete(self,
                       tape: tf.GradientTape,
                       training_info,
                       valid_masks=None):
        """Complete one iteration of training.

        `train_complete` should calculate gradients and update parameters using
        those gradients.

        Args:
            tape (tf.GradientTape): the tape which are used for calculating
                gradient. All the previous `train_interval` `train_step()` for
                are called under the context of this tape.
            training_info (nested Tensor): information collected for training.
                It is batched from each `info` returned bt `train_step()`
            valid_masks (tf.Tensor): masks indicating which samples are valid.
                shape=(T, B), dtype=tf.float32
        Returns:
            loss_info (LossInfo): loss information
            grads_and_vars (list[tuple]): list of gradient and variable tuples
        """

        with tape:
            loss_info = self.calc_loss(training_info)
            if valid_masks is not None:
                loss_info = tf.nest.map_structure(
                    lambda l: tf.reduce_mean(l * valid_masks), loss_info)
            else:
                loss_info = tf.nest.map_structure(lambda l: tf.reduce_mean(l),
                                                  loss_info)

        vars = self.variables
        grads = tape.gradient(loss_info.loss, vars)
        grads_and_vars = tuple(zip(grads, vars))
        self._optimizer.apply_gradients(grads_and_vars)
        return loss_info, grads_and_vars
Beispiel #7
0
 def update(self, tape: tf.GradientTape, loss):
     grad = tape.gradient(loss, self.net.trainable_variables)
     self.optimizer.apply_gradients(zip(grad, self.net.trainable_variables))
Beispiel #8
0
    def train_complete(self,
                       tape: tf.GradientTape,
                       training_info,
                       valid_masks=None,
                       weight=1.0):
        """Complete one iteration of training.

        `train_complete` should calculate gradients and update parameters using
        those gradients.

        Args:
            tape (tf.GradientTape): the tape which are used for calculating
                gradient. All the previous `train_interval` `train_step()`
                are called under the context of this tape.
            training_info (nested Tensor): information collected for training.
                It is batched from each `info` returned bt `train_step()`
            valid_masks (tf.Tensor): masks indicating which samples are valid.
                shape=(T, B), dtype=tf.float32
            weight (float): weight for this batch. Loss will be multiplied with
                this weight before calculating gradient
        Returns:
            loss_info (LossInfo): loss information
            grads_and_vars (list[tuple]): list of gradient and variable tuples
        """
        with tape:
            loss_info = self.calc_loss(training_info)
            if valid_masks is not None:
                loss_info = tf.nest.map_structure(
                    lambda l: tf.reduce_mean(l * valid_masks)
                    if len(l.shape) == 2 else l, loss_info)
            else:
                loss_info = tf.nest.map_structure(lambda l: tf.reduce_mean(l),
                                                  loss_info)
            if isinstance(loss_info.scalar_loss, tf.Tensor):
                assert len(loss_info.scalar_loss.shape) == 0
                loss_info = loss_info._replace(
                    loss=loss_info.loss + loss_info.scalar_loss)
            loss = weight * loss_info.loss

        opt_and_var_sets = self._get_cached_opt_and_var_sets()
        all_grads_and_vars = ()
        for i, (optimizer, vars) in enumerate(opt_and_var_sets):
            if len(vars) == 0:
                continue
            assert optimizer is not None, "optimizer needs to be provides at __init__()"
            grads = tape.gradient(loss, vars)
            grads_and_vars = tuple(zip(grads, vars))
            all_grads_and_vars = all_grads_and_vars + grads_and_vars
            if self._gradient_clipping is not None:
                if self._clip_by_global_norm:
                    grads, global_norm = tf.clip_by_global_norm(
                        grads, self._gradient_clipping)
                    grads_and_vars = tuple(zip(grads, vars))
                    alf.utils.common.run_if(
                        alf.utils.common.should_record_summaries(), lambda: tf.
                        summary.scalar("global_grad_norm/%s" % i, global_norm))
                else:
                    grads_and_vars = eager_utils.clip_gradient_norms(
                        grads_and_vars, self._gradient_clipping)

            optimizer.apply_gradients(grads_and_vars)

        self.after_train(training_info)

        return loss_info, all_grads_and_vars
def minimize_variables(this_loss, vars: tf.Tensor,
                       optimizer: tf.optimizers.Optimizer,
                       tape: tf.GradientTape):
    grads = tape.gradient(this_loss, vars)
    optimizer.apply_gradients(zip(grads, vars))
Beispiel #10
0
    def train_complete(self,
                       tape: tf.GradientTape,
                       training_info: TrainingInfo,
                       weight=1.0):
        """Complete one iteration of training.

        `train_complete` should calculate gradients and update parameters using
        those gradients.

        Args:
            tape (tf.GradientTape): the tape which are used for calculating
                gradient. All the previous `train_interval` `train_step()` for
                are called under the context of this tape.
            training_info (TrainingInfo): information collected for training.
                training_info.info are the batched from each policy_step.info
                returned by train_step()
            weight (float): weight for this batch. Loss will be multiplied with
                this weight before calculating gradient
        Returns:
            a tuple of the following:
            loss_info (LossInfo): loss information
            grads_and_vars (list[tuple]): list of gradient and variable tuples
        """
        valid_masks = tf.cast(
            tf.not_equal(training_info.step_type, StepType.LAST), tf.float32)

        # reward shaping
        if self._reward_shaping_fn is not None:
            # record unshaped extrinsic rewards given by the environment
            self.add_reward_summary("reward/raw", training_info.reward)
            training_info = training_info._replace(
                reward=self._reward_shaping_fn(training_info.reward))

        # record shaped extrinsic rewards actually used for training
        self.add_reward_summary("reward/extrinsic", training_info.reward)

        with tape:
            loss_info = self.calc_loss(training_info)
            loss_info = tf.nest.map_structure(
                lambda l: tf.reduce_mean(l * valid_masks), loss_info)
            loss = weight * loss_info.loss

        var_sets = self._get_cached_var_sets()
        all_grads_and_vars = ()
        for i, vars, optimizer in zip(
                range(len(var_sets)), var_sets, self._optimizers):
            grads = tape.gradient(loss, vars)
            grads_and_vars = tuple(zip(grads, vars))
            all_grads_and_vars = all_grads_and_vars + grads_and_vars
            if self._gradient_clipping is not None:
                if self._clip_by_global_norm:
                    grads, global_norm = tf.clip_by_global_norm(
                        grads, self._gradient_clipping)
                    grads_and_vars = tuple(zip(grads, vars))
                    alf.utils.common.run_if(
                        alf.utils.common.should_record_summaries(), lambda: tf.
                        summary.scalar("global_grad_norm/%s" % i, global_norm))
                else:
                    grads_and_vars = eager_utils.clip_gradient_norms(
                        grads_and_vars, self._gradient_clipping)

            optimizer.apply_gradients(grads_and_vars)

        return loss_info, all_grads_and_vars
Beispiel #11
0
def _calculate_and_apply_gradients(model: tf.keras.Sequential,
                                   optimizer: tf.keras.optimizers,
                                   gradient_tape: tf.GradientTape,
                                   loss: [float]):
    gradients = gradient_tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))