def model(inputs, is_training):
    """Creation of the model graph."""
    if use_resnetd_stem:
      inputs = conv2d_fixed_padding(
          inputs=inputs, filters=32, kernel_size=3, strides=2,
          data_format=data_format, name="init_conv_0")
      inputs = norm_activation(
          inputs, is_training, data_format=data_format,
          layer=norm_act_layer, bn_momentum=bn_momentum,
          name="init_norm_0")
      inputs = conv2d_fixed_padding(
          inputs=inputs, filters=32, kernel_size=3, strides=1,
          data_format=data_format, name="init_conv_1")
      inputs = norm_activation(
          inputs, is_training, data_format=data_format,
          layer=norm_act_layer, bn_momentum=bn_momentum,
          name="init_norm_1")
      inputs = conv2d_fixed_padding(
          inputs=inputs, filters=64, kernel_size=3, strides=1,
          data_format=data_format, name="init_conv_2")
    else:
      inputs = conv2d_fixed_padding(
          inputs=inputs, filters=64, kernel_size=7, strides=2,
          data_format=data_format, name="init_conv_0")

    inputs = tf.identity(inputs, 'initial_conv')
    if not pre_activation:
      inputs = norm_activation(inputs, is_training, data_format=data_format,
                               layer=norm_act_layer, bn_momentum=bn_momentum,
                               name="init_norm_2")

    if not skip_stem_max_pool:
      if replace_stem_max_pool:
        inputs = conv2d_fixed_padding(
            inputs=inputs, filters=64,
            kernel_size=3, strides=2, data_format=data_format,
            name="init_conv_3")
        inputs = norm_activation(
            inputs, is_training, data_format=data_format,
            bn_momentum=bn_momentum, name="init_norm_3")
      else:
        inputs = tf.layers.max_pooling2d(
            inputs=inputs, pool_size=3, strides=2, padding='SAME',
            data_format=data_format)
        inputs = tf.identity(inputs, 'initial_max_pool')

    custom_block_group = functools.partial(
        block_group,
        data_format=data_format,
        dropblock_size=dropblock_size,
        pre_activation=pre_activation,
        norm_act_layer=norm_act_layer,
        se_ratio=se_ratio,
        resnetd_shortcut=resnetd_shortcut,
        bn_momentum=bn_momentum)

    num_layers = len(layers) + 1
    stride_c2 = 2 if skip_stem_max_pool else 1

    # Multiscale feature extractions
    inputs = multiscale(
        inputs=inputs,
        data_format=data_format,
        scales=scales,
        custom_block_group=custom_block_group,
        filters=64,
        block_fn=block_fn,
        layer=layers[0],
        stride_c2=stride_c2,
        is_training=is_training,
        name='block_group1',
        scope_name='multiscale1',
        dropblock_keep_prob=dropblock_keep_probs[0],
        drop_connect_rate=resnet_layers.get_drop_connect_rate(
            drop_connect_rate, 2, num_layers))
    inputs = multiscale(
        inputs=inputs,
        data_format=data_format,
        scales=scales,
        custom_block_group=custom_block_group,
        filters=128,
        block_fn=block_fn,
        layer=layers[1],
        stride_c2=stride_c2,
        is_training=is_training,
        name='block_group2',
        scope_name='multiscale2',
        dropblock_keep_prob=dropblock_keep_probs[1],
        drop_connect_rate=resnet_layers.get_drop_connect_rate(
            drop_connect_rate, 3, num_layers))
    inputs = multiscale(
        inputs=inputs,
        data_format=data_format,
        scales=scales,
        custom_block_group=custom_block_group,
        filters=256,
        block_fn=block_fn,
        layer=layers[2],
        stride_c2=stride_c2,
        is_training=is_training,
        name='block_group3',
        scope_name='multiscale3',
        dropblock_keep_prob=dropblock_keep_probs[2],
        drop_connect_rate=resnet_layers.get_drop_connect_rate(
            drop_connect_rate, 4, num_layers))
    inputs = multiscale(
        inputs=inputs,
        data_format=data_format,
        scales=scales,
        custom_block_group=custom_block_group,
        filters=512,
        block_fn=block_fn,
        layer=layers[3],
        stride_c2=stride_c2,
        is_training=is_training,
        name='block_group4',
        scope_name='multiscale4',
        dropblock_keep_prob=dropblock_keep_probs[3],
        use_pool=False,
        drop_connect_rate=resnet_layers.get_drop_connect_rate(
            drop_connect_rate, 5, num_layers))

    if pre_activation:
      inputs = norm_activation(inputs, is_training, data_format=data_format,
                               layer=norm_act_layer, bn_momentum=bn_momentum)

    # The activation is 7x7 so this is a global average pool.
    # TODO(huangyp): reduce_mean will be faster.
    if data_format == 'channels_last':
      # pool_size = (inputs.shape[1], inputs.shape[2])
      reduction_indices = [1, 2]
    else:
      # pool_size = (inputs.shape[2], inputs.shape[3])
      reduction_indices = [2, 3]
    inputs = tf.reduce_mean(inputs, reduction_indices=reduction_indices)
    # inputs = tf.layers.average_pooling2d(
    #     inputs=inputs, pool_size=pool_size, strides=1, padding='VALID',
    #     data_format=data_format)
    inputs = tf.identity(inputs, 'final_avg_pool')
    inputs = tf.reshape(
        inputs, [-1, 2048 if block_fn is bottleneck_block else 512])

    if dropout_rate is not None:
      tf.logging.info('using dropout')
      inputs = tf.layers.dropout(
          inputs, rate=dropout_rate, training=is_training)
    tf.logging.info('Predense shape: {}'.format(inputs.shape))

    inputs = tf.layers.dense(
        inputs=inputs,
        units=num_classes,
        kernel_initializer=tf.random_normal_initializer(stddev=.01))
    inputs = tf.identity(inputs, 'final_dense')
    return inputs
Example #2
0
    def model(inputs, is_training):
        """Creation of the model graph."""
        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=64,
                                      kernel_size=7,
                                      strides=1,
                                      data_format=data_format)

        inputs = tf.identity(inputs, 'initial_conv')
        if not pre_activation:
            inputs = norm_activation(inputs,
                                     is_training,
                                     data_format=data_format,
                                     layer=norm_act_layer,
                                     bn_momentum=bn_momentum)

        inputs = tf.layers.max_pooling2d(inputs=inputs,
                                         pool_size=3,
                                         strides=2,
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.identity(inputs, 'initial_max_pool')

        custom_block_group = functools.partial(
            block_group,
            data_format=data_format,
            dropblock_size=dropblock_size,
            pre_activation=pre_activation,
            norm_act_layer=norm_act_layer,
            se_ratio=se_ratio,
            resnetd_shortcut=resnetd_shortcut,
            bn_momentum=bn_momentum)

        num_layers = len(layers) + 1
        stride_c2 = 1  #  if skip_stem_max_pool else 1

        ## Block 1
        inputs = scale_invariance(
            inputs=inputs,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[0],
            name='block_group1',
            filters=64,
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 2, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[0],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        inputs = tf.layers.max_pooling3d(inputs=inputs,
                                         pool_size=(scales, 2, 2),
                                         strides=(scales, 2, 2),
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.squeeze(inputs, 1)  # Squeeze the last dim

        ## Block 2
        inputs = scale_invariance(
            inputs=inputs,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[1],
            name='block_group2',
            filters=128,
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 3, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[1],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        inputs = tf.layers.max_pooling3d(inputs=inputs,
                                         pool_size=(scales, 2, 2),
                                         strides=(scales, 2, 2),
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.squeeze(inputs, 1)  # Squeeze the last dim

        ## Block 3
        inputs = scale_invariance(
            inputs=inputs,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[2],
            name='block_group3',
            filters=256,
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 4, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[2],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        inputs = tf.layers.max_pooling3d(inputs=inputs,
                                         pool_size=(scales, 2, 2),
                                         strides=(scales, 2, 2),
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.squeeze(inputs, 1)  # Squeeze the last dim

        ## Block 4
        inputs = scale_invariance(
            inputs=inputs,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[3],
            name='block_group4',
            filters=512,
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 5, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[3],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        inputs = tf.layers.max_pooling3d(inputs=inputs,
                                         pool_size=(scales, 2, 2),
                                         strides=(scales, 2, 2),
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.squeeze(inputs, 1)  # Squeeze the last dim

        if pre_activation:
            inputs = norm_activation(inputs,
                                     is_training,
                                     data_format=data_format,
                                     layer=norm_act_layer,
                                     bn_momentum=bn_momentum)

        # The activation is 7x7 so this is a global average pool.
        # TODO(huangyp): reduce_mean will be faster.
        if data_format == 'channels_last':
            pool_size = (inputs.shape[1], inputs.shape[2])
        else:
            pool_size = (inputs.shape[2], inputs.shape[3])
        inputs = tf.layers.average_pooling2d(inputs=inputs,
                                             pool_size=pool_size,
                                             strides=1,
                                             padding='VALID',
                                             data_format=data_format)
        inputs = tf.identity(inputs, 'final_avg_pool')
        inputs = tf.reshape(
            inputs, [-1, 2048 if block_fn is bottleneck_block else 512])

        if dropout_rate is not None:
            tf.logging.info('using dropout')
            inputs = tf.layers.dropout(inputs,
                                       rate=dropout_rate,
                                       training=is_training)

        inputs = tf.layers.dense(
            inputs=inputs,
            units=num_classes,
            kernel_initializer=tf.random_normal_initializer(stddev=.01))
        inputs = tf.identity(inputs, 'final_dense')
        return inputs
Example #3
0
    def model(inputs, is_training):
        """Creation of the model graph."""
        #####
        ## Dont use dilation on the first conv/pool. Use it everywhere else.
        #####
        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=64,
                                      kernel_size=7,
                                      strides=1,
                                      data_format=data_format)

        inputs = tf.identity(inputs, 'initial_conv')
        if not pre_activation:
            inputs = norm_activation(inputs,
                                     is_training,
                                     data_format=data_format,
                                     layer=norm_act_layer,
                                     bn_momentum=bn_momentum)

        inputs = tf.layers.max_pooling2d(inputs=inputs,
                                         pool_size=3,
                                         strides=2,
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.identity(inputs, 'initial_max_pool')

        #####

        custom_block_group = functools.partial(
            block_group,
            data_format=data_format,
            dropblock_size=dropblock_size,
            pre_activation=pre_activation,
            norm_act_layer=norm_act_layer,
            se_ratio=se_ratio,
            resnetd_shortcut=resnetd_shortcut,
            bn_momentum=bn_momentum)

        num_layers = len(layers) + 1
        stride_c2 = 1  #  if skip_stem_max_pool else 1

        ## Block S1/C1
        c1 = tf.identity(inputs)
        inputs = scale_invariance(
            inputs=c1,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[0],
            name='block_group1',
            filters=64,
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 2, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[0],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        inputs = tf.layers.max_pooling3d(inputs=inputs,
                                         pool_size=(scales, 2, 2),
                                         strides=(scales, 1, 1),
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.squeeze(inputs, 1)  # Squeeze the last dim

        ## Block S2b
        c2b = scale_invariance(
            inputs=c1,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[0],
            name='block_groups2b',
            filters=128,
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 2, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[0],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        c2b = tf.layers.max_pooling3d(inputs=c2b,
                                      pool_size=(scales, 2, 2),
                                      strides=(scales, 1, 1),
                                      padding='SAME',
                                      data_format=data_format)
        c2b = tf.squeeze(c2b, 1)  # Squeeze the last dim

        ## Block S2/C2
        inputs = scale_invariance(
            inputs=inputs,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[1],
            name='block_group2',
            filters=128,
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 3, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[1],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        inputs = tf.layers.max_pooling3d(inputs=inputs,
                                         pool_size=(scales, 2, 2),
                                         strides=(scales, 1, 1),
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.squeeze(inputs, 1)  # Squeeze the last dim
        c2 = tf.identity(inputs)

        ## Block S3/C3
        inputs = scale_invariance(
            inputs=inputs,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[2],
            name='block_group3',
            filters=256,
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 4, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[2],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        inputs = tf.layers.max_pooling3d(inputs=inputs,
                                         pool_size=(scales, 2, 2),
                                         strides=(scales, 1, 1),
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.squeeze(inputs, 1)  # Squeeze the last dim

        # Prep C3 for merge
        merge_size = c2b.get_shape().as_list()
        inputs = tf.cast(inputs, tf.float32)
        inputs = tf.image.resize(inputs,
                                 merge_size[1:3],
                                 align_corners=True,
                                 method=RESIZE_METHOD)
        inputs = tf.cast(inputs, c2b.dtype)
        c2 = tf.cast(c2, tf.float32)
        c2 = tf.image.resize(c2,
                             merge_size[1:3],
                             align_corners=True,
                             method=RESIZE_METHOD)
        c2 = tf.cast(c2, c2b.dtype)

        # Merge C2 and C2b with C3
        inputs = tf.concat([inputs, c2, c2b], -1)

        ## Block S4/C4
        inputs = scale_invariance(
            inputs=inputs,
            scales=scales,
            is_training=is_training,
            block_fn=block_fn,
            layers=layers[3],
            name='block_group4',
            filters=512,  # Inception-style merge for C2->S4
            drop_connect_rate=resnet_layers.get_drop_connect_rate(
                drop_connect_rate, 5, num_layers),
            dropblock_keep_probs=dropblock_keep_probs[3],
            stride_c2=stride_c2,
            custom_block_group=custom_block_group,
            data_format=data_format)
        inputs = tf.layers.max_pooling3d(inputs=inputs,
                                         pool_size=(scales, 2, 2),
                                         strides=(scales, 1, 1),
                                         padding='SAME',
                                         data_format=data_format)
        inputs = tf.squeeze(inputs, 1)  # Squeeze the last dim

        if pre_activation:
            inputs = norm_activation(inputs,
                                     is_training,
                                     data_format=data_format,
                                     layer=norm_act_layer,
                                     bn_momentum=bn_momentum)

        # The activation is 7x7 so this is a global average pool.
        # TODO(huangyp): reduce_mean will be faster.
        if data_format == 'channels_last':
            pool_size = (inputs.shape[1], inputs.shape[2])
        else:
            pool_size = (inputs.shape[2], inputs.shape[3])
        inputs = tf.layers.average_pooling2d(inputs=inputs,
                                             pool_size=pool_size,
                                             strides=1,
                                             padding='VALID',
                                             data_format=data_format)
        inputs = tf.identity(inputs, 'final_avg_pool')
        inputs = tf.reshape(
            inputs, [-1, 2048 if block_fn is bottleneck_block else 512])

        if dropout_rate is not None:
            tf.logging.info('using dropout')
            inputs = tf.layers.dropout(inputs,
                                       rate=dropout_rate,
                                       training=is_training)

        inputs = tf.layers.dense(
            inputs=inputs,
            units=num_classes,
            kernel_initializer=tf.random_normal_initializer(stddev=.01))
        inputs = tf.identity(inputs, 'final_dense')
        return inputs