def model(inputs, is_training): """Creation of the model graph.""" if use_resnetd_stem: inputs = conv2d_fixed_padding( inputs=inputs, filters=32, kernel_size=3, strides=2, data_format=data_format, name="init_conv_0") inputs = norm_activation( inputs, is_training, data_format=data_format, layer=norm_act_layer, bn_momentum=bn_momentum, name="init_norm_0") inputs = conv2d_fixed_padding( inputs=inputs, filters=32, kernel_size=3, strides=1, data_format=data_format, name="init_conv_1") inputs = norm_activation( inputs, is_training, data_format=data_format, layer=norm_act_layer, bn_momentum=bn_momentum, name="init_norm_1") inputs = conv2d_fixed_padding( inputs=inputs, filters=64, kernel_size=3, strides=1, data_format=data_format, name="init_conv_2") else: inputs = conv2d_fixed_padding( inputs=inputs, filters=64, kernel_size=7, strides=2, data_format=data_format, name="init_conv_0") inputs = tf.identity(inputs, 'initial_conv') if not pre_activation: inputs = norm_activation(inputs, is_training, data_format=data_format, layer=norm_act_layer, bn_momentum=bn_momentum, name="init_norm_2") if not skip_stem_max_pool: if replace_stem_max_pool: inputs = conv2d_fixed_padding( inputs=inputs, filters=64, kernel_size=3, strides=2, data_format=data_format, name="init_conv_3") inputs = norm_activation( inputs, is_training, data_format=data_format, bn_momentum=bn_momentum, name="init_norm_3") else: inputs = tf.layers.max_pooling2d( inputs=inputs, pool_size=3, strides=2, padding='SAME', data_format=data_format) inputs = tf.identity(inputs, 'initial_max_pool') custom_block_group = functools.partial( block_group, data_format=data_format, dropblock_size=dropblock_size, pre_activation=pre_activation, norm_act_layer=norm_act_layer, se_ratio=se_ratio, resnetd_shortcut=resnetd_shortcut, bn_momentum=bn_momentum) num_layers = len(layers) + 1 stride_c2 = 2 if skip_stem_max_pool else 1 # Multiscale feature extractions inputs = multiscale( inputs=inputs, data_format=data_format, scales=scales, custom_block_group=custom_block_group, filters=64, block_fn=block_fn, layer=layers[0], stride_c2=stride_c2, is_training=is_training, name='block_group1', scope_name='multiscale1', dropblock_keep_prob=dropblock_keep_probs[0], drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 2, num_layers)) inputs = multiscale( inputs=inputs, data_format=data_format, scales=scales, custom_block_group=custom_block_group, filters=128, block_fn=block_fn, layer=layers[1], stride_c2=stride_c2, is_training=is_training, name='block_group2', scope_name='multiscale2', dropblock_keep_prob=dropblock_keep_probs[1], drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 3, num_layers)) inputs = multiscale( inputs=inputs, data_format=data_format, scales=scales, custom_block_group=custom_block_group, filters=256, block_fn=block_fn, layer=layers[2], stride_c2=stride_c2, is_training=is_training, name='block_group3', scope_name='multiscale3', dropblock_keep_prob=dropblock_keep_probs[2], drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 4, num_layers)) inputs = multiscale( inputs=inputs, data_format=data_format, scales=scales, custom_block_group=custom_block_group, filters=512, block_fn=block_fn, layer=layers[3], stride_c2=stride_c2, is_training=is_training, name='block_group4', scope_name='multiscale4', dropblock_keep_prob=dropblock_keep_probs[3], use_pool=False, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 5, num_layers)) if pre_activation: inputs = norm_activation(inputs, is_training, data_format=data_format, layer=norm_act_layer, bn_momentum=bn_momentum) # The activation is 7x7 so this is a global average pool. # TODO(huangyp): reduce_mean will be faster. if data_format == 'channels_last': # pool_size = (inputs.shape[1], inputs.shape[2]) reduction_indices = [1, 2] else: # pool_size = (inputs.shape[2], inputs.shape[3]) reduction_indices = [2, 3] inputs = tf.reduce_mean(inputs, reduction_indices=reduction_indices) # inputs = tf.layers.average_pooling2d( # inputs=inputs, pool_size=pool_size, strides=1, padding='VALID', # data_format=data_format) inputs = tf.identity(inputs, 'final_avg_pool') inputs = tf.reshape( inputs, [-1, 2048 if block_fn is bottleneck_block else 512]) if dropout_rate is not None: tf.logging.info('using dropout') inputs = tf.layers.dropout( inputs, rate=dropout_rate, training=is_training) tf.logging.info('Predense shape: {}'.format(inputs.shape)) inputs = tf.layers.dense( inputs=inputs, units=num_classes, kernel_initializer=tf.random_normal_initializer(stddev=.01)) inputs = tf.identity(inputs, 'final_dense') return inputs
def model(inputs, is_training): """Creation of the model graph.""" inputs = conv2d_fixed_padding(inputs=inputs, filters=64, kernel_size=7, strides=1, data_format=data_format) inputs = tf.identity(inputs, 'initial_conv') if not pre_activation: inputs = norm_activation(inputs, is_training, data_format=data_format, layer=norm_act_layer, bn_momentum=bn_momentum) inputs = tf.layers.max_pooling2d(inputs=inputs, pool_size=3, strides=2, padding='SAME', data_format=data_format) inputs = tf.identity(inputs, 'initial_max_pool') custom_block_group = functools.partial( block_group, data_format=data_format, dropblock_size=dropblock_size, pre_activation=pre_activation, norm_act_layer=norm_act_layer, se_ratio=se_ratio, resnetd_shortcut=resnetd_shortcut, bn_momentum=bn_momentum) num_layers = len(layers) + 1 stride_c2 = 1 # if skip_stem_max_pool else 1 ## Block 1 inputs = scale_invariance( inputs=inputs, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[0], name='block_group1', filters=64, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 2, num_layers), dropblock_keep_probs=dropblock_keep_probs[0], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) inputs = tf.layers.max_pooling3d(inputs=inputs, pool_size=(scales, 2, 2), strides=(scales, 2, 2), padding='SAME', data_format=data_format) inputs = tf.squeeze(inputs, 1) # Squeeze the last dim ## Block 2 inputs = scale_invariance( inputs=inputs, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[1], name='block_group2', filters=128, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 3, num_layers), dropblock_keep_probs=dropblock_keep_probs[1], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) inputs = tf.layers.max_pooling3d(inputs=inputs, pool_size=(scales, 2, 2), strides=(scales, 2, 2), padding='SAME', data_format=data_format) inputs = tf.squeeze(inputs, 1) # Squeeze the last dim ## Block 3 inputs = scale_invariance( inputs=inputs, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[2], name='block_group3', filters=256, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 4, num_layers), dropblock_keep_probs=dropblock_keep_probs[2], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) inputs = tf.layers.max_pooling3d(inputs=inputs, pool_size=(scales, 2, 2), strides=(scales, 2, 2), padding='SAME', data_format=data_format) inputs = tf.squeeze(inputs, 1) # Squeeze the last dim ## Block 4 inputs = scale_invariance( inputs=inputs, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[3], name='block_group4', filters=512, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 5, num_layers), dropblock_keep_probs=dropblock_keep_probs[3], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) inputs = tf.layers.max_pooling3d(inputs=inputs, pool_size=(scales, 2, 2), strides=(scales, 2, 2), padding='SAME', data_format=data_format) inputs = tf.squeeze(inputs, 1) # Squeeze the last dim if pre_activation: inputs = norm_activation(inputs, is_training, data_format=data_format, layer=norm_act_layer, bn_momentum=bn_momentum) # The activation is 7x7 so this is a global average pool. # TODO(huangyp): reduce_mean will be faster. if data_format == 'channels_last': pool_size = (inputs.shape[1], inputs.shape[2]) else: pool_size = (inputs.shape[2], inputs.shape[3]) inputs = tf.layers.average_pooling2d(inputs=inputs, pool_size=pool_size, strides=1, padding='VALID', data_format=data_format) inputs = tf.identity(inputs, 'final_avg_pool') inputs = tf.reshape( inputs, [-1, 2048 if block_fn is bottleneck_block else 512]) if dropout_rate is not None: tf.logging.info('using dropout') inputs = tf.layers.dropout(inputs, rate=dropout_rate, training=is_training) inputs = tf.layers.dense( inputs=inputs, units=num_classes, kernel_initializer=tf.random_normal_initializer(stddev=.01)) inputs = tf.identity(inputs, 'final_dense') return inputs
def model(inputs, is_training): """Creation of the model graph.""" ##### ## Dont use dilation on the first conv/pool. Use it everywhere else. ##### inputs = conv2d_fixed_padding(inputs=inputs, filters=64, kernel_size=7, strides=1, data_format=data_format) inputs = tf.identity(inputs, 'initial_conv') if not pre_activation: inputs = norm_activation(inputs, is_training, data_format=data_format, layer=norm_act_layer, bn_momentum=bn_momentum) inputs = tf.layers.max_pooling2d(inputs=inputs, pool_size=3, strides=2, padding='SAME', data_format=data_format) inputs = tf.identity(inputs, 'initial_max_pool') ##### custom_block_group = functools.partial( block_group, data_format=data_format, dropblock_size=dropblock_size, pre_activation=pre_activation, norm_act_layer=norm_act_layer, se_ratio=se_ratio, resnetd_shortcut=resnetd_shortcut, bn_momentum=bn_momentum) num_layers = len(layers) + 1 stride_c2 = 1 # if skip_stem_max_pool else 1 ## Block S1/C1 c1 = tf.identity(inputs) inputs = scale_invariance( inputs=c1, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[0], name='block_group1', filters=64, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 2, num_layers), dropblock_keep_probs=dropblock_keep_probs[0], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) inputs = tf.layers.max_pooling3d(inputs=inputs, pool_size=(scales, 2, 2), strides=(scales, 1, 1), padding='SAME', data_format=data_format) inputs = tf.squeeze(inputs, 1) # Squeeze the last dim ## Block S2b c2b = scale_invariance( inputs=c1, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[0], name='block_groups2b', filters=128, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 2, num_layers), dropblock_keep_probs=dropblock_keep_probs[0], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) c2b = tf.layers.max_pooling3d(inputs=c2b, pool_size=(scales, 2, 2), strides=(scales, 1, 1), padding='SAME', data_format=data_format) c2b = tf.squeeze(c2b, 1) # Squeeze the last dim ## Block S2/C2 inputs = scale_invariance( inputs=inputs, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[1], name='block_group2', filters=128, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 3, num_layers), dropblock_keep_probs=dropblock_keep_probs[1], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) inputs = tf.layers.max_pooling3d(inputs=inputs, pool_size=(scales, 2, 2), strides=(scales, 1, 1), padding='SAME', data_format=data_format) inputs = tf.squeeze(inputs, 1) # Squeeze the last dim c2 = tf.identity(inputs) ## Block S3/C3 inputs = scale_invariance( inputs=inputs, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[2], name='block_group3', filters=256, drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 4, num_layers), dropblock_keep_probs=dropblock_keep_probs[2], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) inputs = tf.layers.max_pooling3d(inputs=inputs, pool_size=(scales, 2, 2), strides=(scales, 1, 1), padding='SAME', data_format=data_format) inputs = tf.squeeze(inputs, 1) # Squeeze the last dim # Prep C3 for merge merge_size = c2b.get_shape().as_list() inputs = tf.cast(inputs, tf.float32) inputs = tf.image.resize(inputs, merge_size[1:3], align_corners=True, method=RESIZE_METHOD) inputs = tf.cast(inputs, c2b.dtype) c2 = tf.cast(c2, tf.float32) c2 = tf.image.resize(c2, merge_size[1:3], align_corners=True, method=RESIZE_METHOD) c2 = tf.cast(c2, c2b.dtype) # Merge C2 and C2b with C3 inputs = tf.concat([inputs, c2, c2b], -1) ## Block S4/C4 inputs = scale_invariance( inputs=inputs, scales=scales, is_training=is_training, block_fn=block_fn, layers=layers[3], name='block_group4', filters=512, # Inception-style merge for C2->S4 drop_connect_rate=resnet_layers.get_drop_connect_rate( drop_connect_rate, 5, num_layers), dropblock_keep_probs=dropblock_keep_probs[3], stride_c2=stride_c2, custom_block_group=custom_block_group, data_format=data_format) inputs = tf.layers.max_pooling3d(inputs=inputs, pool_size=(scales, 2, 2), strides=(scales, 1, 1), padding='SAME', data_format=data_format) inputs = tf.squeeze(inputs, 1) # Squeeze the last dim if pre_activation: inputs = norm_activation(inputs, is_training, data_format=data_format, layer=norm_act_layer, bn_momentum=bn_momentum) # The activation is 7x7 so this is a global average pool. # TODO(huangyp): reduce_mean will be faster. if data_format == 'channels_last': pool_size = (inputs.shape[1], inputs.shape[2]) else: pool_size = (inputs.shape[2], inputs.shape[3]) inputs = tf.layers.average_pooling2d(inputs=inputs, pool_size=pool_size, strides=1, padding='VALID', data_format=data_format) inputs = tf.identity(inputs, 'final_avg_pool') inputs = tf.reshape( inputs, [-1, 2048 if block_fn is bottleneck_block else 512]) if dropout_rate is not None: tf.logging.info('using dropout') inputs = tf.layers.dropout(inputs, rate=dropout_rate, training=is_training) inputs = tf.layers.dense( inputs=inputs, units=num_classes, kernel_initializer=tf.random_normal_initializer(stddev=.01)) inputs = tf.identity(inputs, 'final_dense') return inputs