Ejemplo n.º 1
0
    def build_outputs(self, features, labels, mode):
        backbone_features = self._backbone_fn(
            features, is_training=(mode == mode_keys.TRAIN))
        fpn_features = self._fpn_fn(backbone_features,
                                    is_training=(mode == mode_keys.TRAIN))
        cls_outputs, box_outputs = self._head_fn(
            fpn_features, is_training=(mode == mode_keys.TRAIN))
        model_outputs = {
            'cls_outputs': cls_outputs,
            'box_outputs': box_outputs,
        }

        # Print number of parameters and FLOPS in model.
        batch_size, _, _, _ = backbone_features.values()[0].get_shape(
        ).as_list()
        benchmark_utils.compute_model_statistics(
            batch_size, is_training=(mode == mode_keys.TRAIN))

        if mode != mode_keys.TRAIN:
            boxes, scores, classes, valid_detections = self._generate_detections_fn(
                box_outputs, cls_outputs, labels['anchor_boxes'],
                labels['image_info'][:, 1:2, :])
            model_outputs.update({
                'num_detections': valid_detections,
                'detection_boxes': boxes,
                'detection_classes': classes,
                'detection_scores': scores,
            })
        return model_outputs
Ejemplo n.º 2
0
    def predict(self, features):
        """Returns a TPUEstimatorSpec for prediction.

    Args:
      features: a dict of Tensors including the input images and other label
        tensors used for prediction.

    Returns:
      a TPUEstimatorSpec object used for prediction.
    """
        images = features['images']
        labels = features['labels']

        outputs = self.build_outputs(images, labels, mode=mode_keys.PREDICT)
        # Log model statistics.
        batch_size = images.get_shape().as_list()[0]
        _, _ = benchmark_utils.compute_model_statistics(
            batch_size=batch_size,
            json_file_path=os.path.join(self._model_dir,
                                        'predict_model_stats.json'))

        predictions = self.build_predictions(outputs, labels)

        tpu_estimator_spec = tf.estimator.tpu.TPUEstimatorSpec(
            mode=tf.estimator.ModeKeys.PREDICT, predictions=predictions)

        if self._use_tpu:
            return tpu_estimator_spec
        else:
            return tpu_estimator_spec.as_estimator_spec()
Ejemplo n.º 3
0
    def evaluate(self, images, labels):
        """Returns a TPUEstimatorSpec for evaluation.

    Args:
      images: a Tensor of shape [batch_size, height, width, channel]
        representing the input image tensor.
      labels: a dict of label tensors.

    Returns:
      a TPUEstimatorSpec object used for evaluation.
    """
        outputs = self.build_outputs(images, labels, mode=mode_keys.EVAL)
        # Log model statistics.
        batch_size = images.get_shape().as_list()[0]
        _, _ = benchmark_utils.compute_model_statistics(
            batch_size=batch_size,
            json_file_path=os.path.join(self._model_dir,
                                        'eval_model_stats.json'))

        model_loss = self.build_losses(outputs, labels)

        eval_metrics = self.build_metrics(outputs, labels)

        tpu_estimator_spec = tf.estimator.tpu.TPUEstimatorSpec(
            mode=tf.estimator.ModeKeys.EVAL,
            loss=model_loss,
            eval_metrics=eval_metrics)

        if self._use_tpu:
            return tpu_estimator_spec
        else:
            return tpu_estimator_spec.as_estimator_spec()
Ejemplo n.º 4
0
  def build_outputs(self, images, labels, mode):
    """Builds the model forward pass and generates outputs.

    It wraps the implementation in `_build_outputs` with some code to handle
    bfloat16 scope.

    Args:
      images: a Tensor of shape [batch_size, height, width, channel],
        representing the input image.
      labels: a dict of Tensors that includes labels used for training/eval.
      mode: one of mode_keys.TRAIN, mode_keys.EVAL, mode_keys.PREDICT.

    Returns:
      a dict of output tensors.
    """
    if self._use_bfloat16:
      with tf.tpu.bfloat16_scope():
        def cast_outputs_to_float(d):
          for k, v in sorted(six.iteritems(d)):
            if isinstance(v, dict):
              cast_outputs_to_float(v)
            else:
              d[k] = tf.cast(v, tf.float32)

        # Casts class and box outputs to tf.float32.
        outputs = self._build_outputs(images, labels, mode)
        cast_outputs_to_float(outputs)
    else:
      outputs = self._build_outputs(images, labels, mode)

    # Log model statistics.
    batch_size = images.get_shape().as_list()[0]
    _, _ = benchmark_utils.compute_model_statistics(batch_size)

    return outputs
Ejemplo n.º 5
0
    def build_outputs(self, features, labels, mode):
        backbone_features = self._backbone_fn(
            features, is_training=(mode == mode_keys.TRAIN))
        fpn_features = self._fpn_fn(backbone_features,
                                    is_training=(mode == mode_keys.TRAIN))
        logits = self._head_fn(fpn_features,
                               is_training=(mode == mode_keys.TRAIN))
        outputs = {
            'logits': logits,
        }

        # Print number of parameters and FLOPS in model.
        batch_size, _, _, _ = backbone_features.values()[0].get_shape(
        ).as_list()
        benchmark_utils.compute_model_statistics(
            batch_size, is_training=(mode == mode_keys.TRAIN))
        return outputs
Ejemplo n.º 6
0
    def _build_outputs(self, images, labels, mode):
        batch_size = tf.shape(images)[0]
        if 'anchor_boxes' in labels:
            anchor_boxes = labels['anchor_boxes']
        else:
            anchor_boxes = anchor.Anchor(
                self._params.architecture.min_level,
                self._params.architecture.max_level,
                self._params.anchor.num_scales,
                self._params.anchor.aspect_ratios,
                self._params.anchor.anchor_size,
                images.get_shape().as_list()[1:3]).multilevel_boxes

            for level in anchor_boxes:
                anchor_boxes[level] = tf.tile(
                    tf.expand_dims(anchor_boxes[level], 0), [batch_size, 1, 1])

        backbone_features = self._backbone_fn(
            images, is_training=(mode == mode_keys.TRAIN))
        fpn_features = self._fpn_fn(backbone_features,
                                    is_training=(mode == mode_keys.TRAIN))

        if self._params.architecture.output_flat_fpn_features:
            flat_fpn_features_list = []
            for level in range(self._params.architecture.min_level,
                               self._params.architecture.max_level + 1):
                flat_fpn_features_list.append(
                    tf.reshape(fpn_features[level], [batch_size, -1]))
            flat_fpn_features = tf.concat(flat_fpn_features_list, axis=1)
            flat_fpn_features = tf.identity(flat_fpn_features,
                                            'RawFpnFeatures')

        cls_outputs, box_outputs = self._head_fn(
            fpn_features, is_training=(mode == mode_keys.TRAIN))
        model_outputs = {
            'cls_outputs': cls_outputs,
            'box_outputs': box_outputs,
        }

        tf.logging.info('Computing number of FLOPs before NMS...')
        static_batch_size = images.get_shape().as_list()[0]
        if static_batch_size:
            _, _ = benchmark_utils.compute_model_statistics(static_batch_size)

        if mode != mode_keys.TRAIN:
            detection_results = self._generate_detections_fn(
                box_outputs, cls_outputs, anchor_boxes,
                labels['image_info'][:, 1:2, :])
            model_outputs.update(detection_results)
        return model_outputs
Ejemplo n.º 7
0
    def _build_outputs(self, images, labels, mode):
        if 'anchor_boxes' in labels:
            anchor_boxes = labels['anchor_boxes']
        else:
            anchor_boxes = anchor.Anchor(
                self._anchor_params.min_level, self._anchor_params.max_level,
                self._anchor_params.num_scales,
                self._anchor_params.aspect_ratios,
                self._anchor_params.anchor_size,
                images.get_shape().as_list()[1:3]).multilevel_boxes

            batch_size = tf.shape(images)[0]
            for level in anchor_boxes:
                anchor_boxes[level] = tf.tile(
                    tf.expand_dims(anchor_boxes[level], 0), [batch_size, 1, 1])

        backbone_features = self._backbone_fn(
            images, is_training=(mode == mode_keys.TRAIN))
        fpn_features = self._fpn_fn(backbone_features,
                                    is_training=(mode == mode_keys.TRAIN))
        cls_outputs, box_outputs = self._head_fn(
            fpn_features, is_training=(mode == mode_keys.TRAIN))
        model_outputs = {
            'cls_outputs': cls_outputs,
            'box_outputs': box_outputs,
        }

        tf.logging.info('Computing number of FLOPs before NMS...')
        _, _ = benchmark_utils.compute_model_statistics(
            images.get_shape().as_list()[0])

        if mode != mode_keys.TRAIN:
            detection_results = self._generate_detections_fn(
                box_outputs, cls_outputs, anchor_boxes,
                labels['image_info'][:, 1:2, :])
            model_outputs.update(detection_results)
        return model_outputs
Ejemplo n.º 8
0
    def train(self, images, labels):
        """Returns a TPUEstimatorSpec for training.

    Args:
      images: a Tensor of shape [batch_size, height, width, channel]
        representing the input image tensor.
      labels: a dict of label tensors.

    Returns:
      a TPUEstimatorSpec object used for training.
    """
        # If the input image is transposed, we need to revert it back to the
        # original shape before it's used in the computation.
        if self._transpose_input:
            if self._space_to_depth_block_size > 1:
                # HWNC -> NHWC
                images = tf.transpose(images, [2, 0, 1, 3])
            else:
                # HWCN -> NHWC
                images = tf.transpose(images, [3, 0, 1, 2])

        outputs = self.build_outputs(images, labels, mode=mode_keys.TRAIN)
        # Log model statistics.
        batch_size = images.get_shape().as_list()[0]
        _, _ = benchmark_utils.compute_model_statistics(
            batch_size=batch_size,
            json_file_path=os.path.join(self._model_dir,
                                        'train_model_stats.json'))

        model_loss = self.build_losses(outputs, labels)

        global_step = tf.train.get_global_step()

        learning_rate = self._learning_rate_fn(global_step)
        self.add_scalar_summary('learning_rate', learning_rate)

        # Sets up the optimizer.
        optimizer = self._optimizer_fn(learning_rate)
        if self._use_tpu:
            optimizer = tf.tpu.CrossShardOptimizer(optimizer)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # Gets all trainable variables and apply the variable filter.
        train_var_list = filter_trainable_variables(tf.trainable_variables(),
                                                    self._frozen_var_prefix)

        # Gets the regularization variables and apply the regularization loss.
        regularization_var_list = filter_regularization_variables(
            train_var_list, self._regularization_var_regex)
        l2_regularization_loss = self._l2_weight_decay * tf.add_n(
            [tf.nn.l2_loss(v) for v in regularization_var_list])

        self.add_scalar_summary('l2_regularization_loss',
                                l2_regularization_loss)

        total_loss = model_loss + l2_regularization_loss

        grads_and_vars = optimizer.compute_gradients(total_loss,
                                                     train_var_list)
        if self._gradient_clip_norm > 0.0:
            grads = [gv[0] for gv in grads_and_vars]
            tvars = [gv[1] for gv in grads_and_vars]
            clipped_grads, _ = tf.clip_by_global_norm(grads,
                                                      self._gradient_clip_norm)
            grads_and_vars = list(zip(clipped_grads, tvars))

        with tf.control_dependencies(update_ops):
            train_op = optimizer.apply_gradients(grads_and_vars, global_step)

        scaffold_fn = self.restore_from_checkpoint()
        if self._enable_summary:
            host_call_fn = self.summarize()
        else:
            host_call_fn = None

        tpu_estimator_spec = tf.estimator.tpu.TPUEstimatorSpec(
            mode=tf.estimator.ModeKeys.TRAIN,
            loss=total_loss,
            train_op=train_op,
            host_call=host_call_fn,
            scaffold_fn=scaffold_fn)

        if self._use_tpu:
            return tpu_estimator_spec
        else:
            return tpu_estimator_spec.as_estimator_spec()
Ejemplo n.º 9
0
 def _log_model_statistics(self, batched_input):
   batch_size, _, _, _ = batched_input.get_shape().as_list()
   _, _ = benchmark_utils.compute_model_statistics(
       batch_size)