def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None): """Basic MobileNet block combining: - depthwise conv + BN + relu - 1x1 conv + BN + relu """ with tf.variable_scope(scope, 'block', [net]) as sc: num_out_channels = int(num_out_channels * width_multiplier) if stride[0] == 1 and stride[1] == 1: # Depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d(net, kernel_size, depth_multiplier=1, stride=stride, scope='conv_dw') else: # Mimic CAFFE padding if stride > 1. net = custom_layers.pad2d(net, pad=padding) net = custom_layers.depthwise_convolution2d(net, kernel_size, padding='VALID', depth_multiplier=1, stride=stride, scope='conv_dw') # Pointwise convolution. net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw') return net
def max_avg_pool2d(net, stride=[2, 2], scope=None): with tf.variable_scope(scope, 'max_avg_pool', [net]) as sc: ksize = [3, 3] padding = [1, 1] # Additional Caffe padding. net = custom_layers.pad2d(net, pad=padding) # Max + Avg pooling. mnet = slim.max_pool2d(net, ksize, stride, padding='VALID') anet = slim.avg_pool2d(net, ksize, stride, padding='VALID') return mnet + anet
def mobilenet_block(net, num_out_channels, stride=[1, 1], leaders=False, scope=None): """Basic MobileNet block combining: - depthwise conv + BN + relu - 1x1 conv + BN + relu """ with tf.variable_scope(scope, 'block', [net]) as sc: num_out_channels = int(num_out_channels * width_multiplier) kernel_size = [3, 3] if stride[0] == 1 and stride[1] == 1: # Classic depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d( net, kernel_size, depth_multiplier=1, stride=stride, scope='conv_dw') else: if leaders: # Special Depthwise Leader convolution when stride > 1 # net = custom_layers.pad2d(net, pad=(1, 1)) net = custom_layers.depthwise_leaders_convolution2d( net, kernel_size, padding='SAME', stride=stride, rates=[1, 2, 3], pooling_sizes=[5, 3, 1], pooling_type='AVG', activation_fn=tf.nn.relu, scope='conv_lead_dw') else: # Mimic CAFFE padding if stride > 1. net = custom_layers.pad2d(net, pad=(1, 1)) net = custom_layers.depthwise_convolution2d( net, kernel_size, padding='VALID', depth_multiplier=1, stride=stride, scope='conv_dw') # Pointwise convolution. net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw') return net
def mobilenets(inputs, num_classes=1000, width_multiplier=1.0, is_training=True, dropout_keep_prob=0.5, scope='MobileNets'): """MobileNets implementation. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ kernel_size = [3, 3] padding = [(kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2] def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None): """Basic MobileNet block combining: - depthwise conv + BN + relu - 1x1 conv + BN + relu """ with tf.variable_scope(scope, 'block', [net]) as sc: num_out_channels = int(num_out_channels * width_multiplier) if stride[0] == 1 and stride[1] == 1: # Depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d(net, kernel_size, depth_multiplier=1, stride=stride, scope='conv_dw') else: # Mimic CAFFE padding if stride > 1. net = custom_layers.pad2d(net, pad=padding) net = custom_layers.depthwise_convolution2d(net, kernel_size, padding='VALID', depth_multiplier=1, stride=stride, scope='conv_dw') # Pointwise convolution. net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw') return net with tf.variable_scope(scope, 'MobileNets', [inputs]) as sc: end_points = {} # First full convolution... net = custom_layers.pad2d(inputs, pad=padding) net = slim.conv2d(net, 32, kernel_size, stride=[2, 2], padding='VALID', scope='conv1') # net = slim.conv2d(inputs, 32, [ksize, ksize], stride=[2, 2], scope='conv1') # Then, MobileNet blocks! net = mobilenet_block(net, 64, scope='block2') net = mobilenet_block(net, 128, stride=[2, 2], scope='block3') net = mobilenet_block(net, 128, scope='block4') net = mobilenet_block(net, 256, stride=[2, 2], scope='block5') net = mobilenet_block(net, 256, scope='block6') net = mobilenet_block(net, 512, stride=[2, 2], scope='block7') # Intermediate blocks... for i in range(5): net = mobilenet_block(net, 512, scope='block%i' % (i + 8)) # Final blocks. net = mobilenet_block(net, 1024, stride=[2, 2], scope='block13') net = mobilenet_block(net, 1024, scope='block14') # Spatial pooling + fully connected layer. net = custom_layers.spatial_mean(net, keep_dims=True, scope='spatial_mean14') net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, normalizer_params=None, biases_initializer=tf.zeros_initializer(), scope='conv_fc15') net = custom_layers.spatial_squeeze(net) # net = slim.fully_connected(net, 1000, scope='fc15') # Logits padding... net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0)) return net, end_points
def mobilenets_btree(inputs, num_classes=1000, kernel_size=[3, 3], width_multiplier=1.0, dropouts=[0.5], pad_logits=True, is_training=True, reuse=None, scope='MobileNets'): """MobileNets implementation. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ # MobileNets kernel size and padding (for layers with stride > 1). # kernel_size = [3, 3] padding = [(kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2] def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None): """Basic MobileNet block combining: - depthwise conv + BN + relu - 1x1 conv + BN + relu """ with tf.variable_scope(scope, 'block', [net]) as sc: num_out_channels = int(num_out_channels * width_multiplier) if stride[0] == 1 and stride[1] == 1: # Depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d(net, kernel_size, depth_multiplier=1, stride=stride, scope='conv_dw') else: # Mimic CAFFE padding if stride > 1 => usually better accuracy. net = custom_layers.pad2d(net, pad=padding) net = custom_layers.depthwise_convolution2d(net, kernel_size, padding='VALID', depth_multiplier=1, stride=stride, scope='conv_dw') # Pointwise convolution. net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw') return net def mobilenet_block_btree_v1(net, num_out_channels, stride=[1, 1], split=2, scope=None): """Basic MobileNet block combining: - depthwise conv + BN + relu - 1x1 conv + BN + relu """ with tf.variable_scope(scope, 'block', [net]) as sc: num_out_channels = int(num_out_channels * width_multiplier) # Depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d(net, kernel_size, depth_multiplier=1, stride=stride, scope='conv_dw') # Split-pointwise convolution. net = btree_layers.conv2d_1x1_split(net, num_out_channels, split=split, scope='conv_pw_split') return net def mobilenet_block_btree_v2(net, num_out_channels, stride=[1, 1], split=2, scope=None): """Combination of ResNets block and B-tree. """ with tf.variable_scope(scope, 'block', [net]) as sc: # Start with Batch Norm. net = custom_layers.batch_norm(net) # Depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d(net, kernel_size, depth_multiplier=1, stride=stride, activation_fn=None, scope='conv_dw') # Split-pointwise convolution. num_out_channels = int(num_out_channels * width_multiplier) net = btree_layers.conv2d_1x1_split(net, num_out_channels, split=split, activation_fn=None, normalizer_fn=None, scope='conv_pw_split') return net with tf.variable_scope(scope, 'MobileNets', [inputs], reuse=reuse) as sc: end_points = {} # First full convolution... net = custom_layers.pad2d(inputs, pad=padding) net = slim.conv2d(net, 32, kernel_size, stride=[2, 2], padding='VALID', scope='conv1') # net = slim.conv2d(inputs, 32, kernel_size, stride=[2, 2], # padding='SAME', scope='conv1') # Then, MobileNet blocks! net = mobilenet_block(net, 64, scope='block2') net = mobilenet_block(net, 128, stride=[2, 2], scope='block3') net = mobilenet_block(net, 128, scope='block4') net = mobilenet_block(net, 256, stride=[2, 2], scope='block5') net = mobilenet_block(net, 256, scope='block6') net = mobilenet_block(net, 512, stride=[2, 2], scope='block7') # Intermediate blocks... for i in range(8, 16): with tf.variable_scope(scope, 'resblock_%i' % i, [net]) as sc: # Residual block... res = net net = mobilenet_block_btree_v2(net, 512, split=4, scope='block%i_a' % i) net = btree_layers.translate_channels( net, delta=64, scope='ch_translate_%i_a' % i) net = mobilenet_block_btree_v2(net, 512, split=4, scope='block%i_b' % i) net = btree_layers.translate_channels( net, delta=-64, scope='ch_translate_%i_b' % i) net = mobilenet_block_btree_v2(net, 512, split=4, scope='block%i_c' % i) net = tf.add(res, net, 'residual_sum_%i' % i) net = custom_layers.batch_norm(net) # Final blocks. net = mobilenet_block(net, 1024, stride=[2, 2], scope='block13') net = mobilenet_block(net, 1024, scope='block14') # Spatial pooling + fully connected layer. net = custom_layers.spatial_mean(net, keep_dims=True, scope='spatial_mean14') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, normalizer_params=None, biases_initializer=tf.zeros_initializer(), scope='conv_fc15') net = custom_layers.spatial_squeeze(net) # Logits padding: get everyone to the same number of classes. if pad_logits: net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0)) return net, end_points