def model(inputs, is_training):
    """Creation of the model graph."""
    inputs = conv2d_fixed_padding(
        inputs=inputs,
        filters=64,
        kernel_size=7,
        strides=2,
        is_training=is_training,
        data_format=data_format)
    inputs = tf.identity(inputs, 'initial_conv')
    inputs = batch_norm_relu(inputs, is_training, data_format=data_format)

    pooled_inputs = tf.layers.max_pooling2d(
        inputs=inputs, pool_size=3, strides=2, padding='SAME',
        data_format=data_format)
    if is_training and FLAGS.mlperf_logging:
      resnet_log_helper.log_max_pool(input_tensor=inputs,
                                     output_tensor=pooled_inputs)
    inputs = tf.identity(pooled_inputs, 'initial_max_pool')

    inputs = block_group(
        inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0],
        strides=1, is_training=is_training, name='block_group1',
        data_format=data_format)
    inputs = block_group(
        inputs=inputs, filters=128, block_fn=block_fn, blocks=layers[1],
        strides=2, is_training=is_training, name='block_group2',
        data_format=data_format)
    inputs = block_group(
        inputs=inputs, filters=256, block_fn=block_fn, blocks=layers[2],
        strides=2, is_training=is_training, name='block_group3',
        data_format=data_format)
    inputs = block_group(
        inputs=inputs, filters=512, block_fn=block_fn, blocks=layers[3],
        strides=2, is_training=is_training, name='block_group4',
        data_format=data_format)

    # The activation is 7x7 so this is a global average pool.
    # TODO(huangyp): reduce_mean will be faster.
    pool_size = (inputs.shape[1], inputs.shape[2])
    inputs = tf.layers.average_pooling2d(
        inputs=inputs, pool_size=pool_size, strides=1, padding='VALID',
        data_format=data_format)
    inputs = tf.identity(inputs, 'final_avg_pool')
    inputs = tf.reshape(
        inputs, [-1, 2048 if block_fn is bottleneck_block else 512])
    if is_training and FLAGS.mlperf_logging:
      mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_DENSE, value=num_classes)
    inputs = tf.layers.dense(
        inputs=inputs,
        units=num_classes,
        kernel_initializer=tf.random_normal_initializer(stddev=.01))
    inputs = tf.identity(inputs, 'final_dense')
    if is_training and FLAGS.mlperf_logging:
      mlperf_log.resnet_print(
          key=mlperf_log.MODEL_HP_FINAL_SHAPE, value=inputs.shape.as_list()[1:])
    return inputs
  def __call__(self, inputs, training):
    """Add operations to classify a batch of input images.

    Args:
      inputs: A Tensor representing a batch of input images.
      training: A boolean. Set to True to add operations required only when
        training the classifier.

    Returns:
      A logits Tensor with shape [<batch_size>, self.num_classes].
    """

    # Drop batch size from shape logging.
    mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_INITIAL_SHAPE,
                            value=inputs.shape.as_list()[1:])

    with self._model_variable_scope():
      if self.data_format == 'channels_first':
        # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
        # This provides a large performance boost on GPU. See
        # https://www.tensorflow.org/performance/performance_guide#data_formats
        inputs = tf.transpose(inputs, [0, 3, 1, 2])

      if (self.resnet_version == 1) or (self.resnet_version == 2):
        inputs = conv2d_fixed_padding(
            inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size,
            strides=self.conv_stride, data_format=self.data_format)

      elif self.resnet_version == 14: # v1d architecture
        inputs = conv2d_fixed_padding(
            inputs=inputs, filters=self.num_filters // 2, kernel_size=3,
            strides=self.conv_stride, data_format=self.data_format)
        inputs = batch_norm(inputs, training, self.data_format)
        inputs = conv2d_fixed_padding(
            inputs=inputs, filters=self.num_filters // 2, kernel_size=3,
            strides=1, data_format=self.data_format)
        inputs = batch_norm(inputs, training, self.data_format)
        inputs = conv2d_fixed_padding(
            inputs=inputs, filters=self.num_filters, kernel_size=3,
            strides=1, data_format=self.data_format)

      elif self.resnet_version == 24: # v1-simple architecture
        inputs = conv2d_fixed_padding(
            inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size,
            strides=self.conv_stride, data_format=self.data_format)

      elif self.resnet_version == 34: # v1cs architecture
        inputs = conv2d_fixed_padding(
            inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size,
            strides=self.conv_stride, data_format=self.data_format)

      inputs = tf.identity(inputs, 'initial_conv')

      # We do not include batch normalization or activation functions in V2
      # for the initial conv1 because the first ResNet unit will perform these
      # for both the shortcut and non-shortcut paths as part of the first
      # block's projection. Cf. Appendix of [2].
      if (self.resnet_version == 1) or (self.resnet_version == 14) \
         or (self.resnet_version == 24) or (self.resnet_version == 34):
        inputs = batch_norm(inputs, training, self.data_format)

        mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU)
        inputs = tf.nn.relu(inputs)

      if self.first_pool_size:
        pooled_inputs = tf.layers.max_pooling2d(
            inputs=inputs, pool_size=self.first_pool_size,
            strides=self.first_pool_stride, padding='SAME',
            data_format=self.data_format)
        resnet_log_helper.log_max_pool(input_tensor=inputs, output_tensor=pooled_inputs)
        inputs = tf.identity(pooled_inputs, 'initial_max_pool')

      feat_s = list()
      if self.resnet_version == 24: # mlperf open
        import nets.manual_blocks_t as manual_blocks
        # group1
        for i, num_blocks in enumerate(self.block_sizes[0:1]):
          num_filters = self.num_filters * (2**i)
          inputs = block_layer(
              inputs=inputs, filters=num_filters, bottleneck=self.bottleneck,
              block_fn=self.block_fn, blocks=num_blocks,
              strides=self.block_strides[i], training=training,
              name='block_layer{}'.format(i + 1), data_format=self.data_format,
              version=self.resnet_version)
        # group2
        inputs = manual_blocks.block_m(inputs=inputs, filters=128,
                          training=training, strides=2,
                          data_format=self.data_format)
        inputs = manual_blocks.block_m0(inputs=inputs, filters=128,
                          training=training, strides=1,
                          data_format=self.data_format)
        # group3
        inputs = manual_blocks.block_m1(inputs=inputs, filters=256,
                          training=training, strides=2,
                          data_format=self.data_format)
        inputs = manual_blocks.block_m2(inputs=inputs, filters=256,
                          training=training, strides=1,
                          data_format=self.data_format)
        if self.enable_at:
          feat_s.append(inputs)
        # group4
        inputs = manual_blocks.block_m3(inputs=inputs, filters=512,
                          training=training, strides=2,
                          data_format=self.data_format)
        inputs = manual_blocks.block_m4(inputs=inputs, filters=512,
                          training=training, strides=1,
                          data_format=self.data_format)
        if self.enable_at:
          feat_s.append(inputs)

      elif self.resnet_version == 34: # dawnbench
        import nets.manual_blocks as manual_blocks
        # group1~2
        for i, num_blocks in enumerate(self.block_sizes[0:2]):
          num_filters = self.num_filters * (2**i)
          inputs = block_layer(
              inputs=inputs, filters=num_filters, bottleneck=self.bottleneck,
              block_fn=self.block_fn, blocks=num_blocks,
              strides=self.block_strides[i], training=training,
              name='block_layer{}'.format(i + 1), data_format=self.data_format,
              version=self.resnet_version)
        # group3
        inputs = manual_blocks.block_m1(inputs=inputs, filters=256,
                          training=training, strides=2,
                          data_format=self.data_format)
        inputs = manual_blocks.block_m2(inputs=inputs, filters=256,
                          training=training, strides=1,
                          data_format=self.data_format)
        if self.enable_at:
          feat_s.append(inputs)
        # group4
        inputs = manual_blocks.block_m3(inputs=inputs, filters=512,
                          training=training, strides=2,
                          data_format=self.data_format)
        inputs = manual_blocks.block_m4(inputs=inputs, filters=512,
                          training=training, strides=1,
                          data_format=self.data_format)
        if self.enable_at:
          feat_s.append(inputs)

      else: # standard v1, v1d, v2
        for i, num_blocks in enumerate(self.block_sizes):
          num_filters = self.num_filters * (2**i)
          inputs = block_layer(
              inputs=inputs, filters=num_filters, bottleneck=self.bottleneck,
              block_fn=self.block_fn, blocks=num_blocks,
              strides=self.block_strides[i], training=training,
              name='block_layer{}'.format(i + 1), data_format=self.data_format,
              version=self.resnet_version)
          if (i > 1) and self.enable_at:
            feat_s.append(inputs)

      # Only apply the BN and ReLU for model that does pre_activation in each
      # building/bottleneck block, eg resnet V2.
      if self.pre_activation:
        inputs = batch_norm(inputs, training, self.data_format)

        mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU)
        inputs = tf.nn.relu(inputs)

      # The current top layer has shape
      # `batch_size x pool_size x pool_size x final_size`.
      # ResNet does an Average Pooling layer over pool_size,
      # but that is the same as doing a reduce_mean. We do a reduce_mean
      # here because it performs better than AveragePooling2D.
      axes = [2, 3] if self.data_format == 'channels_first' else [1, 2]
      inputs = tf.reduce_mean(inputs, axes, keepdims=True)
      inputs = tf.identity(inputs, 'final_reduce_mean')

      inputs = tf.reshape(inputs, [-1, inputs.get_shape().as_list()[-1]])
      mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_DENSE,
                              value=self.num_classes)
      inputs = tf.layers.dense(
        inputs=inputs,
        units=self.num_classes,
        kernel_initializer=tf.random_normal_initializer(stddev=.01))
      inputs = tf.identity(inputs, 'final_dense')

      # Drop batch size from shape logging.
      mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_FINAL_SHAPE,
                              value=inputs.shape.as_list()[1:])
      return inputs, feat_s
예제 #3
0
 def log_max_pool(self, input_tensor, output_tensor):
     if self.model == 'resnet50':
         resnet_log_helper.log_max_pool(input_tensor, output_tensor)