Esempio n. 1
0
  def train_step(
      self,
      inputs: Tuple[NestedTensorDict, NestedTensorDict],
      model: ModelType,
      optimizer: tf.keras.optimizers.Optimizer,
      metrics: Optional[Sequence[tf.keras.metrics.Metric]] = None
  ) -> Dict[str, tf.Tensor]:
    features, labels = inputs
    input_dict = {"features": features}
    if self.task_config.model_call_needs_labels:
      input_dict["labels"] = labels

    is_mixed_precision = isinstance(optimizer,
                                    tf.keras.mixed_precision.LossScaleOptimizer)

    with tf.GradientTape() as tape:
      outputs = model(**input_dict, training=True)
      loss, loss_dict = model.compute_losses(labels=labels, outputs=outputs)
      loss = loss / tf.distribute.get_strategy().num_replicas_in_sync
      if is_mixed_precision:
        loss = optimizer.get_scaled_loss(loss)

    tvars = model.trainable_variables
    grads = tape.gradient(loss, tvars)
    if is_mixed_precision:
      grads = optimizer.get_unscaled_gradients(grads)

    optimizer.apply_gradients(list(zip(grads, tvars)))

    logs = {"loss": loss}
    if metrics:
      for m in metrics:
        m.update_state(loss_dict[m.name])
    return logs
Esempio n. 2
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs

        input_partition_dims = self.task_config.train_input_partition_dims
        if input_partition_dims:
            strategy = tf.distribute.get_strategy()
            features = strategy.experimental_split_to_logical_devices(
                features, input_partition_dims)

        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)
            # Casting output layer as float32 is necessary when mixed_precision is
            # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            # Computes per-replica loss.
            loss = self.build_losses(model_outputs=outputs,
                                     labels=labels,
                                     aux_losses=model.losses)
            # Scales loss as the default gradients allreduce performs sum inside the
            # optimizer.
            scaled_loss = loss / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient before apply_gradients when LossScaleOptimizer is
        # used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: loss}
        if metrics:
            self.process_metrics(metrics, labels, outputs)
            logs.update({m.name: m.result() for m in metrics})

        return logs
Esempio n. 3
0
  def train_step(self,
                 inputs,
                 model: tf.keras.Model,
                 optimizer: tf.keras.optimizers.Optimizer,
                 metrics=None):
    """Does forward and backward.

    With distribution strategies, this method runs on devices.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
    if isinstance(inputs, tuple) and len(inputs) == 2:
      features, labels = inputs
    else:
      features, labels = inputs, inputs
    with tf.GradientTape() as tape:
      outputs = model(features, training=True)
      # Computes per-replica loss.
      if model.compiled_loss:
        loss = model.compiled_loss(
            labels, outputs, regularization_losses=model.losses)
        loss += self.build_losses(
            labels=labels, model_outputs=outputs, aux_losses=None)
      else:
        loss = self.build_losses(
            labels=labels, model_outputs=outputs, aux_losses=model.losses)
      # Scales loss as the default gradients allreduce performs sum inside the
      # optimizer.
      scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync

      # For mixed precision, when a LossScaleOptimizer is used, the loss is
      # scaled to avoid numeric underflow.
      if isinstance(optimizer,
                    tf.keras.mixed_precision.LossScaleOptimizer):
        scaled_loss = optimizer.get_scaled_loss(scaled_loss)

    tvars = model.trainable_variables
    grads = tape.gradient(scaled_loss, tvars)

    if isinstance(optimizer,
                  tf.keras.mixed_precision.LossScaleOptimizer):
      grads = optimizer.get_unscaled_gradients(grads)
    optimizer.apply_gradients(list(zip(grads, tvars)))
    logs = {self.loss: loss}
    if metrics:
      self.process_metrics(metrics, labels, outputs)
    if model.compiled_metrics:
      self.process_compiled_metrics(model.compiled_metrics, labels, outputs)
      logs.update({m.name: m.result() for m in metrics or []})
      logs.update({m.name: m.result() for m in model.metrics})
    return logs
Esempio n. 4
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs

        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)
            # Casting output layer as float32 is necessary when mixed_precision is
            # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            losses = self.build_losses(outputs['raw_output'], labels)

            scaled_loss = losses['total_loss'] / num_replicas
            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        # compute the gradient
        tvars = model.trainable_variables
        gradients = tape.gradient(scaled_loss, tvars)

        # get unscaled loss if the scaled loss was used
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            gradients = optimizer.get_unscaled_gradients(gradients)

        if self.task_config.gradient_clip_norm > 0.0:
            gradients, _ = tf.clip_by_global_norm(
                gradients, self.task_config.gradient_clip_norm)

        optimizer.apply_gradients(list(zip(gradients, tvars)))

        logs = {self.loss: losses['total_loss']}

        if metrics:
            for m in metrics:
                m.update_state(losses[m.name])
                logs.update({m.name: m.result()})

        return logs
Esempio n. 5
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        images, labels = inputs
        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(
                images,
                image_shape=labels['image_info'][:, 1, :],
                anchor_boxes=labels['anchor_boxes'],
                gt_boxes=labels['gt_boxes'],
                gt_classes=labels['gt_classes'],
                gt_masks=(labels['gt_masks']
                          if self.task_config.model.include_mask else None),
                training=True)
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            # Computes per-replica loss.
            losses = self.build_losses(outputs=outputs,
                                       labels=labels,
                                       aux_losses=model.losses)
            scaled_loss = losses['total_loss'] / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient when LossScaleOptimizer is used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: losses['total_loss']}

        if metrics:
            for m in metrics:
                m.update_state(losses[m.name])

        return logs
Esempio n. 6
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None) -> Mapping[str, Any]:
        """Does forward and backward.

    This example assumes input is a tuple of (features, labels), which follows
    the output from data loader, i.e., Parser. The output from Parser is fed
    into train_step to perform one step forward and backward pass. Other data
    structure, such as dictionary, can also be used, as long as it is consistent
    between output from Parser and input used here.

    Args:
      inputs: A tuple of input tensors of (features, labels).
      model: A tf.keras.Model instance.
      optimizer: The optimizer for this training step.
      metrics: A nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs
        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)
            # Casting output layer as float32 is necessary when mixed_precision is
            # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            # Computes per-replica loss.
            loss = self.build_losses(model_outputs=outputs,
                                     labels=labels,
                                     aux_losses=model.losses)
            # Scales loss as the default gradients allreduce performs sum inside the
            # optimizer.
            scaled_loss = loss / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient before apply_gradients when LossScaleOptimizer is
        # used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: loss}
        if metrics:
            self.process_metrics(metrics, labels, outputs)
        return logs
Esempio n. 7
0
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs
        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)
            outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                            outputs)

            # Computes per-replica loss.
            loss, cls_loss, box_loss, model_loss = self.build_losses(
                outputs=outputs, labels=labels, aux_losses=model.losses)
            scaled_loss = loss / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient when LossScaleOptimizer is used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: loss}

        all_losses = {
            'total_loss': loss,
            'cls_loss': cls_loss,
            'box_loss': box_loss,
            'model_loss': model_loss,
        }
        if metrics:
            for m in metrics:
                m.update_state(all_losses[m.name])
                logs.update({m.name: m.result()})

        return logs
    def train_step(self,
                   inputs: Tuple[Any, Any],
                   model: tf.keras.Model,
                   optimizer: tf.keras.optimizers.Optimizer,
                   metrics: Optional[List[Any]] = None):
        """Does forward and backward.

    Args:
      inputs: A tuple of of input tensors of (features, labels).
      model: A tf.keras.Model instance.
      optimizer: The optimizer for this training step.
      metrics: A nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
        features, labels = inputs
        is_multilabel = self.task_config.train_data.is_multilabel
        if self.task_config.losses.one_hot and not is_multilabel:
            labels = tf.one_hot(labels, self.task_config.model.num_classes)

        num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
        with tf.GradientTape() as tape:
            outputs = model(features, training=True)

            # Computes per-replica loss.
            loss = self.build_losses(model_outputs=outputs,
                                     labels=labels,
                                     aux_losses=model.losses)
            # Scales loss as the default gradients allreduce performs sum inside the
            # optimizer.
            scaled_loss = loss / num_replicas

            # For mixed_precision policy, when LossScaleOptimizer is used, loss is
            # scaled for numerical stability.
            if isinstance(optimizer,
                          tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(scaled_loss)

        tvars = model.trainable_variables
        grads = tape.gradient(scaled_loss, tvars)
        # Scales back gradient before apply_gradients when LossScaleOptimizer is
        # used.
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(list(zip(grads, tvars)))

        logs = {self.loss: loss}
        if metrics:
            self.process_metrics(metrics, labels, outputs)
        elif model.compiled_metrics:
            self.process_compiled_metrics(model.compiled_metrics, labels,
                                          outputs)
            logs.update({m.name: m.result() for m in model.metrics})
        return logs
Esempio n. 9
0
  def train_step(
      self,
      inputs: Tuple[Any, Any],
      model: tf.keras.Model,
      optimizer: tf.keras.optimizers.Optimizer,
      metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
    """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
    images, labels = inputs
    num_replicas = tf.distribute.get_strategy().num_replicas_in_sync

    with tf.GradientTape() as tape:
      outputs = model(
          inputs=images,
          image_info=labels['image_info'],
          training=True)
      outputs = tf.nest.map_structure(
          lambda x: tf.cast(x, tf.float32), outputs)

      # Computes per-replica loss.
      losses = self.build_losses(
          labels=labels,
          model_outputs=outputs,
          aux_losses=model.losses)
      scaled_loss = losses['total_loss'] / num_replicas

      # For mixed_precision policy, when LossScaleOptimizer is used, loss is
      # scaled for numerical stability.
      if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
        scaled_loss = optimizer.get_scaled_loss(scaled_loss)

    tvars = model.trainable_variables
    grads = tape.gradient(scaled_loss, tvars)
    # Scales back gradient when LossScaleOptimizer is used.
    if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
      grads = optimizer.get_unscaled_gradients(grads)
    optimizer.apply_gradients(list(zip(grads, tvars)))

    logs = {self.loss: losses['total_loss']}

    if metrics:
      for m in metrics:
        m.update_state(losses[m.name])

    if self.task_config.evaluation.report_train_mean_iou:
      segmentation_labels = {
          'masks': labels['category_mask'],
          'valid_masks': labels['valid_mask'],
          'image_info': labels['image_info']
      }
      self.process_metrics(
          metrics=[self.train_mean_iou],
          labels=segmentation_labels,
          model_outputs=outputs['segmentation_outputs'])
      logs.update({
          self.train_mean_iou.name:
              self.train_mean_iou.result()
      })

    return logs
Esempio n. 10
0
  def train_step(self,
                 inputs: Tuple,
                 model: tf.keras.Model,
                 optimizer: tf.keras.optimizers.Optimizer,
                 metrics: Optional[List[Any]] = None):
    """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
    features, labels = inputs

    input_partition_dims = self.task_config.train_input_partition_dims
    if input_partition_dims:
      strategy = tf.distribute.get_strategy()
      features = strategy.experimental_split_to_logical_devices(
          features, input_partition_dims)
    
    input_shape = self.task_config.model.input_size[:2]
    normalized_boxes = box_ops.normalize_boxes(labels['raw_bboxes'], input_shape)
    bbox_color = tf.constant([[1.0, 1.0, 0.0, 1.0]])
    self.image_summary_manager.write_summaries({
      'input_images': features,
      'bbox': tf.image.draw_bounding_boxes(features, normalized_boxes, bbox_color)
    })
    
    num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
    with tf.GradientTape() as tape:
      outputs = model(features, training=True)
      # Casting output layer as float32 is necessary when mixed_precision is
      # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
      outputs = tf.nest.map_structure(
          lambda x: tf.cast(x, tf.float32), outputs)

      # Computes per-replica loss.
      loss, giou_loss, conf_loss, prob_loss = self.build_losses(
          model_outputs=outputs, labels=labels, aux_losses=model.losses)
      # Scales loss as the default gradients allreduce performs sum inside the
      # optimizer.
      scaled_loss = loss / num_replicas

      # For mixed_precision policy, when LossScaleOptimizer is used, loss is
      # scaled for numerical stability.
      if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
        scaled_loss = optimizer.get_scaled_loss(scaled_loss)
    
    tvars = model.trainable_variables
    grads = tape.gradient(scaled_loss, tvars)
    # Scales back gradient before apply_gradients when LossScaleOptimizer is
    # used.
    if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
      grads = optimizer.get_unscaled_gradients(grads)
    optimizer.apply_gradients(list(zip(grads, tvars)))

    logs = {self.loss: loss}
    all_losses = {
      'giou_loss': giou_loss,
      'conf_loss': conf_loss,
      'prob_loss': prob_loss
    }
    if metrics:
      # process metrics uses labels and outputs, metrics.mean uses values only
      for m in metrics:
        m.update_state(all_losses[m.name])
        logs.update({m.name: m.result()})

    return logs