Exemple #1
0
  def train_step(
      self,
      inputs: Tuple[NestedTensorDict, NestedTensorDict],
      model: ModelType,
      optimizer: tf.keras.optimizers.Optimizer,
      metrics: Optional[Sequence[tf.keras.metrics.Metric]] = None
  ) -> Dict[str, tf.Tensor]:
    features, labels = inputs
    input_dict = {"features": features}
    if self.task_config.model_call_needs_labels:
      input_dict["labels"] = labels

    is_mixed_precision = isinstance(optimizer,
                                    tf.keras.mixed_precision.LossScaleOptimizer)

    with tf.GradientTape() as tape:
      outputs = model(**input_dict, training=True)
      loss, loss_dict = model.compute_losses(labels=labels, outputs=outputs)
      loss = loss / tf.distribute.get_strategy().num_replicas_in_sync
      if is_mixed_precision:
        loss = optimizer.get_scaled_loss(loss)

    tvars = model.trainable_variables
    grads = tape.gradient(loss, tvars)
    if is_mixed_precision:
      grads = optimizer.get_unscaled_gradients(grads)

    optimizer.apply_gradients(list(zip(grads, tvars)))

    logs = {"loss": loss}
    if metrics:
      for m in metrics:
        m.update_state(loss_dict[m.name])
    return logs
Exemple #2
0
def step_train(conf, data: dict, model: MoCo,
               opt: tf.keras.optimizers.Optimizer, step):
    # feat = data['image']
    label = data['label']
    _step = -1 if step % 100 > 0 else step

    if step == 0:
        _, _, _ = model(data, step=_step)
        model.update_initial()

    with tf.GradientTape() as tape:
        assignment, agg_n, agg_k = model(data, step=_step)
        loss = model.losses[0]

        gradients = tape.gradient(loss, model.trainable_scope)

        opt.apply_gradients(zip(gradients, model.trainable_scope))

    model.update_queues(agg_n, agg_k)
    model.update_momentum()

    if _step > 0:
        acc, nmi, ari, sc = hook(agg_n.numpy(), label.numpy(),
                                 assignment.numpy())

        tf.summary.scalar('eval/nmi', nmi, step)
        tf.summary.scalar('eval/acc', acc, step)
        tf.summary.scalar('eval/ari', ari, step)
        tf.summary.scalar('eval/sc', sc, step)
    return loss.numpy()
    def training(self, policy_model: tf.keras.Model,
                 value_model: tf.keras.Model,
                 policy_optimizer: tf.keras.optimizers.Optimizer,
                 value_optimizer: tf.keras.optimizers.Optimizer,
                 observations0: tf.Tensor, actions: tf.Tensor,
                 returns: tf.Tensor):
        with tf.GradientTape(persistent=True) as tape:
            means, log_stds = policy_model(observations0)
            stddevs = tf.math.exp(log_stds)

            values = value_model(observations0)

            baseline = returns - values

            dists = tfp.distributions.Normal(means, stddevs)
            log_probs = dists.log_prob(actions)

            policy_loss = -tf.math.reduce_mean(log_probs * baseline)
            values_loss = self.huber_loss(returns, values)

        policy_grads = tape.gradient(policy_loss,
                                     policy_model.trainable_variables)
        policy_optimizer.apply_gradients(
            zip(policy_grads, policy_model.trainable_variables))

        value_grads = tape.gradient(values_loss,
                                    value_model.trainable_variables)
        value_optimizer.apply_gradients(
            zip(value_grads, value_model.trainable_variables))
Exemple #4
0
def step_train(mu_hat, data: dict, model: MiCE,
               opt: tf.keras.optimizers.Optimizer, step):
    label = data['label']
    _step = -1 if step % 100 > 0 else step

    if step == 0:
        _, _, _ = model(data, step=-1, training=False)
        model.update_initial()

    with tf.GradientTape() as tape:
        assignment, f, v = model(data, step=_step)
        loss = model.losses[0]

        gradients = tape.gradient(loss, model.trainable_scope)

        opt.apply_gradients(zip(gradients, model.trainable_scope))

    model.update_queues(v)
    model.update_momentum()
    mu_hat = mu_hat + model.mu_hat(assignment, v).numpy()

    if _step > 0:
        acc, nmi, ari, sc = hook(f.numpy(), label.numpy(), assignment.numpy())

        tf.summary.scalar('eval/nmi', nmi, step)
        tf.summary.scalar('eval/acc', acc, step)
        tf.summary.scalar('eval/ari', ari, step)
        tf.summary.scalar('eval/sc', sc, step)
    return loss.numpy(), mu_hat
Exemple #5
0
    def training(self, actor_model: tf.keras.Model,
                 critic_model: tf.keras.Model,
                 target_actor_model: tf.keras.Model,
                 target_critic_model: tf.keras.Model,
                 actor_optimizer: tf.keras.optimizers.Optimizer,
                 critic_optimizer: tf.keras.optimizers.Optimizer,
                 observations0: tf.Tensor, observations1: tf.Tensor,
                 actions: tf.Tensor, rewards: tf.Tensor,
                 discount_factor: float, tau: float):
        with tf.GradientTape() as tape:
            target_actions = target_actor_model(observations1)
            y = rewards + discount_factor * target_critic_model(
                [observations1, target_actions])
            values = critic_model([observations0, actions])
            critic_loss = self.huber_loss(y, values)

        critic_grads = tape.gradient(critic_loss,
                                     critic_model.trainable_variables)
        critic_optimizer.apply_gradients(
            zip(critic_grads, critic_model.trainable_variables))

        with tf.GradientTape() as tape:
            actions = actor_model(observations0)
            values = critic_model([observations0, actions])
            actor_loss = -tf.math.reduce_mean(values)

        actor_grads = tape.gradient(actor_loss,
                                    actor_model.trainable_variables)
        actor_optimizer.apply_gradients(
            zip(actor_grads, actor_model.trainable_variables))

        self.update_target(target_actor_model, actor_model, tau)
        self.update_target(target_critic_model, critic_model, tau)
Exemple #6
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs

        input_partition_dims = self.task_config.train_input_partition_dims
        if input_partition_dims:
            strategy = tf.distribute.get_strategy()
            features = strategy.experimental_split_to_logical_devices(
                features, input_partition_dims)

        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)
            # Casting output layer as float32 is necessary when mixed_precision is
            # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            # Computes per-replica loss.
            loss = self.build_losses(model_outputs=outputs,
                                     labels=labels,
                                     aux_losses=model.losses)
            # Scales loss as the default gradients allreduce performs sum inside the
            # optimizer.
            scaled_loss = loss / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient before apply_gradients when LossScaleOptimizer is
        # used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: loss}
        if metrics:
            self.process_metrics(metrics, labels, outputs)
            logs.update({m.name: m.result() for m in metrics})

        return logs
Exemple #7
0
    def train_step(self, inputs, model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer, metrics):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        with tf.GradientTape() as tape:
            outputs = model(inputs, training=True)
            # Computes per-replica loss.
            loss = self.build_losses(labels=inputs,
                                     model_outputs=outputs,
                                     metrics=metrics,
                                     aux_losses=model.losses)
            # Scales loss as the default gradients allreduce performs sum inside the
            # optimizer.
            # TODO(b/154564893): enable loss scaling.
            scaled_loss = loss / tf.distribute.get_strategy(
            ).num_replicas_in_sync
        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        optimizer.apply_gradients(list(zip(grads, tvars)))
        self.process_metrics(metrics, inputs, outputs)
        return {self.loss: loss}
Exemple #8
0
def train_batch(images, labels, loss: list, model: tf.keras.Model,
                optimizer: tf.keras.optimizers.Optimizer) -> tuple:
    """
    A tf function to train the model on a batch.
    :param images: the images
    :param labels: the labels in the batch
    :param loss: the loss functions
    :param model: the model
    :param optimizer: the optimization technique
    :return: 
    """
    with tf.GradientTape() as tape:
        tape.watch(model.trainable_variables)
        # outputs = model(images, training=True, _checkpoint=True)
        outputs = model(images)
        regularization_loss = tf.reduce_sum(model.losses)
        pred_loss = []
        if len(loss) == 1:
            outputs = (outputs, None)
        for output, label, loss_fn in zip(outputs, labels, loss):
            pred_loss.append(loss_fn(label, output))
        total_loss = tf.reduce_sum(pred_loss) + regularization_loss
    grads = tape.gradient(total_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return pred_loss, total_loss
Exemple #9
0
def train(model: callable, dataset: callable, optimizer: tf.keras.optimizers.Optimizer, n_epochs: int = 5):
    """
    Trains a model.

    Parameters
    ----------
    model : callable
        Model must be callable as f(X) where X is the feature matrix
        for a batch.
    dataset : callable
        A method that returns an iterable when called. The iterable
        yields batches of X, y, e.g. the feature matrix and target.
    optimizer : tf.keras.optimizers.Optimizer
        An optimizer instance that supports the apply_gradients
        method.
    n_epochs : int, optional
        Number of epochs, by default 5
    """
    for epoch_i in range(n_epochs):
        print(f"epoch: {epoch_i+1:>4}", end='')
        epoch_loss = 0.0
        epoch_steps = 0
        for x, y in dataset():
            with tf.GradientTape() as tape:
                prediction = model(x)
                loss = mean_squared_error(prediction, y)
                epoch_loss += np.mean(loss)
                epoch_steps += 1
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        print(f", loss={epoch_loss/epoch_steps:>9.2f}")
def train_step(initial_state: tf.Tensor, model: tf.keras.Model,
               optimizer: tf.keras.optimizers.Optimizer, gamma: float,
               max_steps_per_episode: int) -> tf.Tensor:
    """Runs a model training step."""

    with tf.GradientTape() as tape:

        # Run the model for one episode to collect training data
        # print('passo 2',passo)
        action_probs, values, rewards = run_episode(initial_state, model,
                                                    max_steps_per_episode)

        # Calculate expected returns
        returns = get_expected_return(rewards, gamma)

        # Convert training data to appropriate TF tensor shapes
        action_probs, values, returns = [
            tf.expand_dims(x, 1) for x in [action_probs, values, returns]
        ]

        # Calculating loss values to update our network
        loss = compute_loss(action_probs, values, returns)

    # Compute the gradients from the loss
    grads = tape.gradient(loss, model.trainable_variables)
    # Apply the gradients to the model's parameters
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    episode_reward = tf.math.reduce_sum(rewards)

    return episode_reward
Exemple #11
0
def __initialize_optimizer(model: utils.PersonalizationLayersDecorator,
                           optimizer: tf.keras.optimizers.Optimizer):
    zero_gradient = tf.nest.map_structure(tf.zeros_like,
                                          model.base_model.trainable_variables)
    optimizer.apply_gradients(
        zip(zero_gradient, model.base_model.trainable_variables))
    assert optimizer.variables()
Exemple #12
0
def train_step(initial_state: tf.Tensor, model: tf.keras.Model,
               optimizer: tf.keras.optimizers.Optimizer, gamma: float,
               max_steps_per_episode: int) -> tf.Tensor:
    """
    tf.function applies the context to this function so this can be compiled
    into a callable TensorFlow graph, which will be fast. tf.GradientTape does
    automatic differentiation to loss function.

    :return:
    """
    with tf.GradientTape() as tape:
        # Run the model for one episode to collect training data
        action_probs, values, rewards = run_episode(initial_state, model,
                                                    max_steps_per_episode)
        # Calculate expected returns
        returns = get_expected_return(rewards, gamma)
        # Convert training data shape to appropriate tensor shape
        action_probs, values, returns = [
            tf.expand_dims(x, 1) for x in [action_probs, values, returns]
        ]
        # Calculate loss
        loss = compute_loss(action_probs, values, returns)

    # Compute gradients from the loss
    grads = tape.gradient(loss, model.trainable_variables)
    # Apply gradients to model parameters
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    episode_reward = tf.math.reduce_sum(rewards)

    return episode_reward
def discriminator_train_step(
        generator: Generator, discriminator: Discriminator, X: tf.Tensor,
        Z: tf.Tensor, c: tf.Tensor,
        optimizer: tf.keras.optimizers.Optimizer) -> tf.Tensor:
    """Single training step of discriminator

    Arguments:
        generator {Generator} -- the generator instance
        discriminator {Discriminator} -- the discriminator instance
        X {tf.Tensor} -- the real sample input
        Z {tf.Tensor} -- the random noise
        c {tf.Tensor} -- the conditional information
        optimizer {tf.keras.optimizers.Optimizer} -- the optimizer for bp update

    Returns:
        tf.Tensor -- the discriminator loss of current step
    """
    with tf.GradientTape() as tape:
        G_sample = generator(Z, c)
        d_real = discriminator(X)
        d_fake = discriminator(G_sample)
        loss = discriminator_loss(d_real, d_fake)
    gradients = tape.gradient(loss, discriminator.trainable_variables)
    optimizer.apply_gradients(zip(gradients,
                                  discriminator.trainable_variables))
    return loss
def _initialize_optimizer_vars(model: tff.learning.Model,
                               optimizer: tf.keras.optimizers.Optimizer):
    """Ensures variables holding the state of `optimizer` are created."""
    delta = tf.nest.map_structure(tf.zeros_like, _get_weights(model).trainable)
    model_weights = _get_weights(model)
    grads_and_vars = tf.nest.map_structure(lambda x, v: (x, v), delta,
                                           model_weights.trainable)
    optimizer.apply_gradients(grads_and_vars, name='server_update')
Exemple #15
0
def train_actor(act: tf.keras.Model, cri: tf.keras.Model,
                act_opt: tf.keras.optimizers.Optimizer, states):
    with tf.GradientTape() as tape:
        actions = act(states, training=True)
        cri_value = cri([states, actions], training=True)
        act_loss = -tf.reduce_mean(cri_value)
    act_grad = tape.gradient(act_loss, act.trainable_variables)
    act_opt.apply_gradients(zip(act_grad, act.trainable_variables))
Exemple #16
0
  def train_step(self,
                 inputs,
                 model: tf.keras.Model,
                 optimizer: tf.keras.optimizers.Optimizer,
                 metrics=None):
    """Does forward and backward.

    With distribution strategies, this method runs on devices.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
    if isinstance(inputs, tuple) and len(inputs) == 2:
      features, labels = inputs
    else:
      features, labels = inputs, inputs
    with tf.GradientTape() as tape:
      outputs = model(features, training=True)
      # Computes per-replica loss.
      if model.compiled_loss:
        loss = model.compiled_loss(
            labels, outputs, regularization_losses=model.losses)
        loss += self.build_losses(
            labels=labels, model_outputs=outputs, aux_losses=None)
      else:
        loss = self.build_losses(
            labels=labels, model_outputs=outputs, aux_losses=model.losses)
      # Scales loss as the default gradients allreduce performs sum inside the
      # optimizer.
      scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync

      # For mixed precision, when a LossScaleOptimizer is used, the loss is
      # scaled to avoid numeric underflow.
      if isinstance(optimizer,
                    tf.keras.mixed_precision.LossScaleOptimizer):
        scaled_loss = optimizer.get_scaled_loss(scaled_loss)

    tvars = model.trainable_variables
    grads = tape.gradient(scaled_loss, tvars)

    if isinstance(optimizer,
                  tf.keras.mixed_precision.LossScaleOptimizer):
      grads = optimizer.get_unscaled_gradients(grads)
    optimizer.apply_gradients(list(zip(grads, tvars)))
    logs = {self.loss: loss}
    if metrics:
      self.process_metrics(metrics, labels, outputs)
    if model.compiled_metrics:
      self.process_compiled_metrics(model.compiled_metrics, labels, outputs)
      logs.update({m.name: m.result() for m in metrics or []})
      logs.update({m.name: m.result() for m in model.metrics})
    return logs
Exemple #17
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        images, labels = inputs
        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(
                images,
                image_shape=labels['image_info'][:, 1, :],
                anchor_boxes=labels['anchor_boxes'],
                gt_boxes=labels['gt_boxes'],
                gt_classes=labels['gt_classes'],
                gt_masks=(labels['gt_masks']
                          if self.task_config.model.include_mask else None),
                training=True)
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            # Computes per-replica loss.
            losses = self.build_losses(outputs=outputs,
                                       labels=labels,
                                       aux_losses=model.losses)
            scaled_loss = losses['total_loss'] / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient when LossScaleOptimizer is used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: losses['total_loss']}

        if metrics:
            for m in metrics:
                m.update_state(losses[m.name])

        return logs
Exemple #18
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None) -> Mapping[str, Any]:
        """Does forward and backward.

    This example assumes input is a tuple of (features, labels), which follows
    the output from data loader, i.e., Parser. The output from Parser is fed
    into train_step to perform one step forward and backward pass. Other data
    structure, such as dictionary, can also be used, as long as it is consistent
    between output from Parser and input used here.

    Args:
      inputs: A tuple of input tensors of (features, labels).
      model: A tf.keras.Model instance.
      optimizer: The optimizer for this training step.
      metrics: A nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs
        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)
            # Casting output layer as float32 is necessary when mixed_precision is
            # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            # Computes per-replica loss.
            loss = self.build_losses(model_outputs=outputs,
                                     labels=labels,
                                     aux_losses=model.losses)
            # Scales loss as the default gradients allreduce performs sum inside the
            # optimizer.
            scaled_loss = loss / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient before apply_gradients when LossScaleOptimizer is
        # used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: loss}
        if metrics:
            self.process_metrics(metrics, labels, outputs)
        return logs
def initialize_optimizer_vars(model: tf.keras.Model,
                              optimizer: tf.keras.optimizers.Optimizer):
    """Ensures variables holding the state of `optimizer` are created."""
    delta = tf.nest.map_structure(tf.zeros_like, model.trainable_variables)
    grads_and_vars = tf.nest.map_structure(lambda x, v: (x, v), delta,
                                           model.trainable_weights)
    optimizer.apply_gradients(grads_and_vars, name='server_update')

    assert optimizer.variables()
Exemple #20
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs

        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)
            # Casting output layer as float32 is necessary when mixed_precision is
            # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            losses = self.build_losses(outputs['raw_output'], labels)

            scaled_loss = losses['total_loss'] / num_replicas
            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        # compute the gradient
        tvars = model.trainable_variables
        gradients = tape.gradient(scaled_loss, tvars)

        # get unscaled loss if the scaled loss was used
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            gradients = optimizer.get_unscaled_gradients(gradients)

        if self.task_config.gradient_clip_norm > 0.0:
            gradients, _ = tf.clip_by_global_norm(
                gradients, self.task_config.gradient_clip_norm)

        optimizer.apply_gradients(list(zip(gradients, tvars)))

        logs = {self.loss: losses['total_loss']}

        if metrics:
            for m in metrics:
                m.update_state(losses[m.name])
                logs.update({m.name: m.result()})

        return logs
    def meta_train(self,
                   task_generator: Callable[[], Generator[tuple, None, None]],
                   n_episode: int,
                   n_way: int,
                   ks_shots: int,
                   kq_shots: int,
                   optimizer: tf.keras.optimizers.Optimizer,
                   episode_end_callback=None):
        """Trains the model on the meta-training set.

        Args:
            task_generator (callable): A callable returning a generator of few_shot tasks. Each task should be a couple
                (support_set, query_set), themselves being a tuple (data, label).
            n_episode (int): Number of episodes for meta-training.
            n_way (int): Number of ways (or classes per episode).
            ks_shots (int): Number of image per class in the support set.
            kq_shots (int): Number of image per class in the query set.
            optimizer (tf.keras.optimizer): A Keras optimizer for training.
            episode_end_callback (Callable): callback called at the end of each episode.
        """

        self.encoder.compile(optimizer)

        for episode in range(n_episode):
            # Open a GradientTape to record the operations run
            # during the forward pass, which enables autodifferentiation.
            with tf.GradientTape() as tape:
                support_set, query_set = task_generator().__next__()

                # Run the forward pass of the layer.
                # The operations that the layer applies
                # to its inputs are going to be recorded
                # on the GradientTape.
                distrib, support_labels, query_labels = run_episode(
                    support_set, query_set, n_way, ks_shots, kq_shots,
                    self.encoder)
                distrib = tf.transpose(distrib)

                loss_value = _compute_loss(distrib, query_labels, n_way)

            # Use the gradient tape to automatically retrieve
            # the gradients of the trainable variables with respect to the loss.
            grads = tape.gradient(loss_value, self.encoder.trainable_weights)

            # Run one step of gradient descent by updating
            # the value of the variables to minimize the loss.
            optimizer.apply_gradients(
                zip(grads, self.encoder.trainable_weights))

            if episode_end_callback is not None:
                args = {
                    'episode': episode,
                    'episode_loss': loss_value,
                    'episode_gradients': grads
                }
                episode_end_callback(**args)
Exemple #22
0
def update_weights(optimizer: tf.keras.optimizers.Optimizer, network: Network, batch, weight_decay: float):
  """
  Updates the weights of the network based on gradient optimisation.

  Parameters
  ----------
  optimiser : tf.keras.optimizers.Optimizer
    The optimiser to use for the weight updates task.
  network : Network
    The network on which to perform the weight updates.
  batch :

  weight_decay : float


  Returns
  -------
  tensorflow.python.framework.ops.EagerTensor
    The scaled-down version of the input tensor ???
  """
  #print(type(network))
  #print(type(batch))
  loss = 0
  with tf.GradientTape() as tape:
    for image, actions, targets in batch:
      # Initial step, from the real observation.
      value, reward, policy_logits, hidden_state = network.initial_inference(image)
      predictions = [(1.0, value, reward, policy_logits)]

      # Recurrent steps, from action and previous hidden state.
      for action in actions:
        value, reward, policy_logits, hidden_state = network.recurrent_inference(hidden_state, action)
        predictions.append((1.0 / len(actions), value, reward, policy_logits))

        hidden_state = scale_gradient(hidden_state, 0.5)

      #potential bomb!!! comparing what tree says, what we say and we want them
      for prediction, target in zip(predictions, targets):
        gradient_scale, value, reward, policy_logits = prediction
        target_value, target_reward, target_policy = target
        l = (
            scalar_loss(value, target_value)
            + scalar_loss(reward, target_reward)
            + tf.nn.softmax_cross_entropy_with_logits(logits=policy_logits, labels=target_policy)
        )

        loss += scale_gradient(l, gradient_scale)

    all_weights = network.get_weights()
    for weights in all_weights:
        loss += weight_decay * tf.nn.l2_loss(weights)
  
  grad = tape.gradient(loss, all_weights)
  optimizer.apply_gradients(zip(grad, all_weights))
  network.update_steps()
  print(network.training_steps())
Exemple #23
0
def train_step(x_batch, y_batch, model:CNN_5, OP:tf.keras.optimizers.Optimizer, metrics:list, layer_lr:list):
    with tf.GradientTape() as tape:
        y_batch_pred = model(x_batch)
        loss = model.loss(y_batch_pred,y_batch)
    grad = tape.gradient(loss, model.trainable_variables)
    grad = [tf.multiply(g,w) for g,w in zip(grad,layer_lr)]
    OP.apply_gradients(zip(grad, model.trainable_variables))

    metrics[0](loss)
    metrics[1]((y_batch+1)/2,y_batch_pred)
Exemple #24
0
def __train_step(X: tf.Tensor, y: tf.Tensor, model: tf.keras.Model,
                 optimizer: tf.keras.optimizers.Optimizer,
                 loss_fn: tf.keras.losses.Loss) -> None:
    with tf.GradientTape() as tape:
        logits = model(X, training=True)
        loss_value = loss_fn(y, logits)

    optimizer.apply_gradients(
        zip(tape.gradient(loss_value, model.trainable_variables),
            model.trainable_variables))
def train_one_step(model: models.Model,
                   optimizer: tf.keras.optimizers.Optimizer, x: tf.Tensor,
                   y: tf.Tensor):
    with tf.GradientTape() as tape:
        logits = model(x)
        loss = compute_loss(logits, y)
    grads = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    accuracy = compute_accuracy(logits, y)
    return loss, accuracy
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: A tuple of of input tensors of (features, labels).
      model: A tf.keras.Model instance.
      optimizer: The optimizer for this training step.
      metrics: A nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs
        is_multilabel = self.task_config.train_data.is_multilabel
        if self.task_config.losses.one_hot and not is_multilabel:
            labels = tf.one_hot(labels, self.task_config.model.num_classes)

        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)

            # Computes per-replica loss.
            loss = self.build_losses(model_outputs=outputs,
                                     labels=labels,
                                     aux_losses=model.losses)
            # Scales loss as the default gradients allreduce performs sum inside the
            # optimizer.
            scaled_loss = loss / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient before apply_gradients when LossScaleOptimizer is
        # used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: loss}
        if metrics:
            self.process_metrics(metrics, labels, outputs)
        elif model.compiled_metrics:
            self.process_compiled_metrics(model.compiled_metrics, labels,
                                          outputs)
            logs.update({m.name: m.result() for m in model.metrics})
        return logs
Exemple #27
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs
        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            # Computes per-replica loss.
            loss, cls_loss, box_loss, model_loss = self.build_losses(
                outputs=outputs, labels=labels, aux_losses=model.losses)
            scaled_loss = loss / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient when LossScaleOptimizer is used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: loss}

        all_losses = {
            'total_loss': loss,
            'cls_loss': cls_loss,
            'box_loss': box_loss,
            'model_loss': model_loss,
        }
        if metrics:
            for m in metrics:
                m.update_state(all_losses[m.name])
                logs.update({m.name: m.result()})

        return logs
def create_optimizer_vars(
        model: model_lib.Model,
        optimizer: tf.keras.optimizers.Optimizer) -> Iterable[tf.Variable]:
    """Applies a placeholder update to optimizer to enable getting its variables."""
    delta = tf.nest.map_structure(tf.zeros_like,
                                  get_global_variables(model).trainable)
    grads_and_vars = tf.nest.map_structure(
        lambda x, v: (-1.0 * x, v), tf.nest.flatten(delta),
        tf.nest.flatten(get_global_variables(model).trainable))
    optimizer.apply_gradients(grads_and_vars, name='server_update')
    return optimizer.variables()
def _initialize_optimizer_vars(model: tff.learning.Model,
                               optimizer: tf.keras.optimizers.Optimizer):
    """Creates optimizer variables to assign the optimizer's state."""
    # Create zero gradients to force an update that doesn't modify.
    # Force eagerly constructing the optimizer variables. Normally Keras lazily
    # creates the variables on first usage of the optimizer. Optimizers such as
    # Adam, Adagrad, or using momentum need to create a new set of variables shape
    # like the model weights.
    model_weights = tff.learning.ModelWeights.from_model(model)
    zero_gradient = [tf.zeros_like(t) for t in model_weights.trainable]
    optimizer.apply_gradients(zip(zero_gradient, model_weights.trainable))
    assert optimizer.variables()
Exemple #30
0
def train_critic(cri: tf.keras.Model, act_target: tf.keras.Model,
                 cri_target: tf.keras.Model,
                 cri_opt: tf.keras.optimizers.Optimizer, states, actions,
                 rewards, next_states, gamma: float):
    with tf.GradientTape() as tape:
        target_actions = act_target(next_states, training=True)
        y = rewards + gamma * cri_target([next_states, target_actions],
                                         training=True)
        cri_value = cri([states, actions], training=True)
        cri_loss = mse(y, cri_value)
    cri_grad = tape.gradient(cri_loss, cri.trainable_variables)
    cri_opt.apply_gradients(zip(cri_grad, cri.trainable_variables))