Beispiel #1
0
def ssd300_blocks(net, end_points):
    # block 6: 3x3 conv
    net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
    net = slim.batch_norm(net)
    end_points['block6'] = net
    # block 7: 1x1 conv
    net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
    net = slim.batch_norm(net)
    end_points['block7'] = net
    # block 8/9/10/11: 1x1 and 3x3 convolutions with stride 2 (except lasts)
    end_point = 'block8'
    with tf.variable_scope(end_point):
        net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
        net = slim.batch_norm(net)
        net = custom_layers.pad2d(net, pad=(1, 1))
        net = slim.conv2d(net,
                          512, [3, 3],
                          stride=2,
                          scope='conv3x3',
                          padding='VALID')
        net = slim.batch_norm(net)
    end_points[end_point] = net
    end_point = 'block9'
    with tf.variable_scope(end_point):
        net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
        net = slim.batch_norm(net)
        net = custom_layers.pad2d(net, pad=(1, 1))
        net = slim.conv2d(net,
                          256, [3, 3],
                          stride=2,
                          scope='conv3x3',
                          padding='VALID')
        net = slim.batch_norm(net)
    end_points[end_point] = net
    end_point = 'block10'
    with tf.variable_scope(end_point):
        net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
        net = slim.batch_norm(net)
        net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        net = slim.batch_norm(net)
    end_points[end_point] = net
    end_point = 'block11'
    with tf.variable_scope(end_point):
        net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
        net = slim.batch_norm(net)
        net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        net = slim.batch_norm(net)
    end_points[end_point] = net
    return net, end_points
Beispiel #2
0
 def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None):
     """Basic MobileNet block combining:
      - depthwise conv + BN + relu
      - 1x1 conv + BN + relu
     """
     with tf.variable_scope(scope, 'block', [net]) as sc:
         num_out_channels = int(num_out_channels * width_multiplier)
         if stride[0] == 1 and stride[1] == 1:
             # Depthwise convolution with stride=1
             net = custom_layers.depthwise_convolution2d(net,
                                                         kernel_size,
                                                         depth_multiplier=1,
                                                         stride=stride,
                                                         scope='conv_dw')
         else:
             # Mimic CAFFE padding if stride > 1 => usually better accuracy.
             net = custom_layers.pad2d(net, pad=padding)
             net = custom_layers.depthwise_convolution2d(net,
                                                         kernel_size,
                                                         padding='VALID',
                                                         depth_multiplier=1,
                                                         stride=stride,
                                                         scope='conv_dw')
         # Pointwise convolution.
         net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw')
         return net
Beispiel #3
0
def largenetwork(net, end_point, end_points):
    print('Large network')
    with tf.variable_scope(end_point):
        net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
        net = custom_layers.pad2d(net, pad=(1, 1))
        net = slim.conv2d(net,
                          256, [3, 3],
                          stride=2,
                          scope='conv3x3',
                          padding='VALID')
    end_points[end_point] = net
    end_point = 'block10'
    print(net)
    with tf.variable_scope(end_point):
        net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
        net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
    end_points[end_point] = net
    end_point = 'block11'
    print(net)
    with tf.variable_scope(end_point):
        net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
        net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
    end_points[end_point] = net
    print(net)
    return end_points
 def max_avg_pool2d(net, stride=[2, 2], scope=None):
     with tf.variable_scope(scope, 'max_avg_pool', [net]) as sc:
         ksize = [3, 3]
         padding = [1, 1]
         # Additional Caffe padding.
         net = custom_layers.pad2d(net, pad=padding)
         # Max + Avg pooling.
         mnet = slim.max_pool2d(net, ksize, stride, padding='VALID')
         anet = slim.avg_pool2d(net, ksize, stride, padding='VALID')
         return mnet + anet
 def mobilenet_block(net, num_out_channels, stride=[1, 1],
                     leaders=False, scope=None):
     """Basic MobileNet block combining:
      - depthwise conv + BN + relu
      - 1x1 conv + BN + relu
     """
     with tf.variable_scope(scope, 'block', [net]) as sc:
         num_out_channels = int(num_out_channels * width_multiplier)
         kernel_size = [3, 3]
         if stride[0] == 1 and stride[1] == 1:
             # Classic depthwise convolution with stride=1
             net = custom_layers.depthwise_convolution2d(
                 net, kernel_size,
                 depth_multiplier=1, stride=stride,
                 scope='conv_dw')
         else:
             if leaders:
                 # Special Depthwise Leader convolution when stride > 1
                 # net = custom_layers.pad2d(net, pad=(1, 1))
                 net = custom_layers.depthwise_leaders_convolution2d(
                     net,
                     kernel_size,
                     padding='SAME',
                     stride=stride,
                     rates=[1, 2, 3],
                     pooling_sizes=[5, 3, 1],
                     pooling_type='AVG',
                     activation_fn=tf.nn.relu,
                     scope='conv_lead_dw')
             else:
                 # Mimic CAFFE padding if stride > 1.
                 net = custom_layers.pad2d(net, pad=(1, 1))
                 net = custom_layers.depthwise_convolution2d(
                     net, kernel_size, padding='VALID',
                     depth_multiplier=1, stride=stride,
                     scope='conv_dw')
         # Pointwise convolution.
         net = slim.conv2d(net, num_out_channels, [1, 1],
                           scope='conv_pw')
         return net
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
    """
    #if data_format == 'NCHW':
    # inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))
    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        #inception
        '''
        net1 = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net1
        net1 = slim.max_pool2d(net1, [2, 2], scope='pool1')
        # Block 2.
        #net2 = slim.repeat(net1, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        net2_1 = slim.conv2d(net1, 128, [3, 3], scope='conv2_0')
        net2_2 = slim.conv2d(net2_1, 128, [3, 3], scope='conv2_1')
        net2 = net2_1 + net2_2
        end_points['block2'] = net2
        net2 = slim.max_pool2d(net2, [2, 2], scope='pool2')
        # Block 3.
        #net3 = slim.repeat(net2, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        net3_1 = slim.conv2d(net2, 256, [3, 3], scope='conv3_1')
        #net3_1 = slim.batch_norm(net3_1, scope='bn3_1')
        net3_2 = slim.conv2d(net3_1, 256, [3, 3], scope='conv3_2')
        #net3_2 = slim.batch_norm(net3_2, scope='bn3_2')
        net3_3 = slim.conv2d(net3_2, 256, [3, 3], scope='conv3_3')
        net3 = net3_1 + net3_3
        end_points['block3'] = net3
        net3 = slim.max_pool2d(net3, [2, 2], scope='pool3')
        # Block 4.
        net4 = slim.repeat(net3, 2, slim.conv2d, 512, [3, 3], scope='conv4')
        net4 = slim.batch_norm(net4, scope='conv4_bn')
        with tf.variable_scope('Mixed_4'):
            with tf.variable_scope('Branch_0'):
                tower_conv = slim.conv2d(net4, 256, 1, scope='Conv2d_1x1')
                # 38,38,128
            with tf.variable_scope('Branch_1'):
                tower_conv1_0 = slim.conv2d(net4, 256, 1, scope='Conv2d_0a_1x1')
                tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3')
                # 38,38,128
            with tf.variable_scope('Branch_2'):
                tower_conv2_0 = slim.conv2d(net4, 128, 1, scope='Conv2d_0a_1x1')
                tower_conv2_1 = slim.conv2d(tower_conv2_0, 128, 3, scope='Conv2d_0b_3x3')
                tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, scope='Conv2d_0c_3x3')
                # 38,38,128
            with tf.variable_scope('Branch_3'):
                tower_pool = slim.avg_pool2d(net4, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3')
                tower_pool_1 = slim.conv2d(tower_pool, 256, 1, scope='Conv2d_0b_1x1')
            net4 = tf.concat([tower_conv, tower_conv1_1,
                              tower_conv2_2, tower_pool_1], 1)
        end_points['block4'] = net4
        net4 = slim.max_pool2d(net4, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net4, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        net = tf.layers.batch_normalization(net, training=True)
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        '''
        #resnet
        net1_ = slim.conv2d(inputs, 64, [3,3], scope="conv1")
        # Block1.
        net1 = slim.repeat(net1_, 3, slim.conv2d, 64, [3, 3], scope='conv1')
        #net1_ = tf.concat([net1_]*2,1)
        #print(net1_.shape)
        net1 = net1 + net1_
        end_points['block1'] = net1
        net1 = slim.max_pool2d(net1, [2, 2], scope='pool1')
        # 150,150,64
        
        # Block 2.
        net2 = slim.repeat(net1, 3, slim.conv2d, 128, [3, 3], scope='conv2')
        net1 = tf.concat([net1]*2,1)
        net2 = net2+net1
        end_points['block2'] = net2
        net2 = slim.max_pool2d(net2, [2, 2], scope='pool2')
        # 75,75,128
        
        # Block 3.
        net3 = slim.repeat(net2, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        net2 = tf.concat([net2]*2,1)
        net3_p = net3 + net2
        #end_points['block3'] = net3
        net3 = slim.max_pool2d(net3_p, [2, 2], scope='pool3')
        # 38,38,256
        # Block 3.
        net4 = slim.repeat(net3, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        net3 = tf.concat([net3] * 2, 1)
        net4_p = net4 + net3
        end_points['block4'] = net4_p
        net4 = slim.max_pool2d(net4_p, [2, 2], scope='pool4')
        # 19,19,512
        # Block 5.
        net5 = slim.repeat(net4, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        net5 = net4 + net5 
        end_points['block5'] = net5
        net5 = slim.max_pool2d(net5, [3, 3], stride=1, scope='pool5')
        # 17
        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net6 = slim.conv2d(net5, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net6

        net6 = tf.layers.dropout(net6, rate=dropout_keep_prob, training=is_training)
        # 17,17
        # Block 7: 1x1 conv. Because the f**k.
        net7 = slim.conv2d(net6, 1024, [1, 1], scope='conv7')
        #end_points['block7'] = net7
        net7_ = tf.layers.dropout(net7, rate=dropout_keep_prob, training=is_training)
        # 17,17

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        net8 = tf.layers.batch_normalization(net7_, training=True)
        with tf.variable_scope(end_point):
            net8 = slim.conv2d(net8, 256, [1, 1], scope='conv1x1')
            #17
            net8 = custom_layers.pad2d(net8, pad=(1, 1))
            #21
            net8 = slim.conv2d(net8, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
            #10,10
        #end_points[end_point] = net8
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net9 = slim.conv2d(net8, 128, [1, 1], scope='conv1x1')
            net9 = custom_layers.pad2d(net9, pad=(1, 1))
            net9 = slim.conv2d(net9, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
            #5,5
        #end_points[end_point] = net9
        
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net10 = slim.conv2d(net9, 128, [1, 1], scope='conv1x1')
            net10 = slim.conv2d(net10, 256, [3, 3], scope='conv3x3', padding='VALID')
            #3,3
        #end_points[end_point] = net10
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net11 = slim.conv2d(net10, 128, [1, 1], scope='conv1x1')
            net11 = slim.conv2d(net11, 256, [3, 3], scope='conv3x3', padding='VALID')
            #1,1
        end_points[end_point] = net11
        
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net10_a = tf.transpose(net11, perm=(0, 2, 3, 1))#nchw
            net10_a = tf.image.resize_nearest_neighbor(net10_a, (3,3))
            net10_a = tf.transpose(net10_a, perm=(0, 3, 1, 2))#nchw
            
            net10_a = slim.conv2d(net10_a, 256, [3,3], scope='pre10_3x3')
            net10_b = slim.conv2d(net10, 256, [1, 1], scope='pre10_1x1')
            net10_o = net10_a + net10_b
        end_points[end_point] = net10_o #3
        
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net9_a = tf.transpose(net10_o, perm=(0, 2, 3, 1))#nchw
            net9_a = tf.image.resize_nearest_neighbor(net9_a, (5,5))
            net9_a = tf.transpose(net9_a, perm=(0, 3, 1, 2))#nchw
            
            net9_a = slim.conv2d(net9_a, 256, [3,3], scope='pre9_3x3')
            net9_b = slim.conv2d(net9, 256, [1, 1], scope='pre9_1x1')
            net9_o = net9_a + net9_b
        end_points[end_point] = net9_o#5
        
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net8_a = tf.transpose(net9_o, perm=(0, 2, 3, 1))#nchw
            net8_a = tf.image.resize_nearest_neighbor(net8_a, (10,10))
            net8_a = tf.transpose(net8_a, perm=(0, 3, 1, 2))#nchw
            
            net8_a = slim.conv2d(net8_a, 512, [3,3], padding='SAME', scope='pre8_3x3')
            net8_b = slim.conv2d(net8, 512, [1, 1], scope='pre8_1x1')#10
            net8_o = net8_a + net8_b
        end_points[end_point] = net8_o#10

        end_point = 'block7'
        with tf.variable_scope(end_point):
            net7_a = tf.transpose(net8_o, perm=(0, 2, 3, 1))#nchw
            net7_a = tf.image.resize_nearest_neighbor(net7_a, (19, 19))#
            net7_a = tf.transpose(net7_a, perm=(0, 3, 1, 2))#nchw
            
            net7_a = slim.conv2d(net7_a, 1024, [3, 3], padding='SAME', scope='pre7_3x3')
            net7_b = slim.conv2d(net7, 1024, [1, 1], scope='pre7_1x1')
            net7_o = net7_a + net7_b
        end_points[end_point] = net7_o
        
        end_point = 'block4'
        with tf.variable_scope(end_point):
            net4_a = tf.transpose(net7_o, perm=(0, 2, 3, 1))#nchw
            net4_a = tf.image.resize_nearest_neighbor(net4_a, (38, 38))#
            net4_a = tf.transpose(net4_a, perm=(0, 3, 1, 2))#nchw
            
            net4_a = slim.conv2d(net4_a, 512, [3, 3], padding='SAME', scope='pre4_3x3')
            net4_b = slim.conv2d(net4_p, 512, [1, 1], scope='pre4_1x1')
            #print("asdfasdfasdf", net4_a.shape, net4_b.shape)
            net4_o = net4_a + net4_b#38
        end_points[end_point] = net4_o
        
        end_point = 'block3'
        with tf.variable_scope(end_point):
            net3_a = tf.transpose(net4_o, perm=(0, 2, 3, 1))#nchw
            net3_a = tf.image.resize_nearest_neighbor(net3_a, (75, 75))#
            net3_a = tf.transpose(net3_a, perm=(0, 3, 1, 2))#nchw
            
            net3_a = slim.conv2d(net3_a, 512, [3, 3], padding='SAME', scope='pre3_3x3')
            net3_b = slim.conv2d(net3_p, 512, [1, 1], scope='pre3_1x1')
            print("asdfasdfasdf", net3_a.shape, net3_b.shape)
            net3_o = net3_a + net3_b#75
        end_points[end_point] = net3_o
        
        #original
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        
        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer],
                                          num_classes,
                                          anchor_sizes[i],
                                          anchor_ratios[i],
                                          normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
Beispiel #7
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3.txt, 1, 2))

    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.txt.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(
                feat_layers
        ):  #feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer], num_classes,
                                          anchor_sizes[i], anchor_ratios[i],
                                          normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)
            '''
            p和l是没有进行softmax的,而p预测的是每个框的类别所以要加入一个prediction_fn(softmax) 
            
            '''
        return predictions, localisations, logits, end_points
def text_net(inputs,
             feat_layers=TextboxNet.default_params.feat_layers,
             anchor_sizes=TextboxNet.default_params.anchor_sizes,
             anchor_ratios = TextboxNet.default_params.anchor_ratios,
             normalizations=TextboxNet.default_params.normalizations,
             is_training=True,
             dropout_keep_prob=0.5,
             reuse=None,
             scope='text_box_384'):
	end_points = {}
	with tf.variable_scope(scope, 'text_box_300', [inputs], reuse=reuse):  # 300*300 384*383
		# Original VGG-16 blocks.
		net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')  # 300 384
		end_points['conv1'] = net
		net = slim.max_pool2d(net, [2, 2], scope='pool1')  # 150
		# Block 2.
		net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')  # 150 192
		end_points['conv2'] = net
		net = slim.max_pool2d(net, [2, 2], scope='pool2')  # 75
		# Block 3.
		net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')  # 75 81
		end_points['conv3'] = net
		net = slim.max_pool2d(net, [2, 2], scope='pool3')  # 38
		# Block 4.
		net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')  # 38 40
		end_point = 'conv4'

		end_points[end_point] = net
		net = slim.max_pool2d(net, [2, 2], scope='pool4')  # 19
		# Block 5.
		net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')  # 19
		end_points['conv5'] = net
		net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')  # 19

		# Additional SSD blocks.
		# Block 6: let's dilate the hell out of it!
		net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')  # 19
		end_points['conv6'] = net
		# Block 7: 1x1 conv. Because the f**k.
		net = slim.conv2d(net, 1024, [1, 1], scope='conv7')  # 19
		end_point = 'conv7'

		end_points[end_point] = net

		# Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2
		end_point = 'conv8'
		with tf.variable_scope(end_point):
			net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
			net = custom_layers.pad2d(net, pad=(1, 1))
			net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')

		end_points[end_point] = net  # 10
		end_point = 'conv9'
		with tf.variable_scope(end_point):
			net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
			net = custom_layers.pad2d(net, pad=(1, 1))
			net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')

		end_points[end_point] = net # 5
		end_point = 'conv10'
		with tf.variable_scope(end_point):
			net = slim.conv2d(net, 128, [1, 1], scope='conv1x1', padding= 'VALID')
			net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')

		end_points[end_point] = net  # 3
		end_point = 'conv11'
		with tf.variable_scope(end_point):
			net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
			net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')

		end_points[end_point] = net  #



		end_point = feat_layers[0]
		with tf.variable_scope(end_point):
			net_dilation1 = slim.conv2d(end_points[end_point], 128, [3, 3], stride=1, scope='dilation1')

			# net_dilation2 = custom_layers.pad2d(net, pad=(0, 4))

			net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 9], padding='SAME', stride=1, scope='dilation2')

			net_dilation3 = slim.conv2d(end_points[end_point], 128, [9, 1], stride=1, padding='SAME', scope='dilation3')
			# net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(4, 0))
			net_inception = tf.concat(values=[net_dilation1, net_dilation2, net_dilation3], axis=3)

		end_points[end_point] = net_inception

		end_point= feat_layers[1]
		with tf.variable_scope(end_point):
			net_dilation1 = slim.conv2d(end_points[end_point], 1024, [1, 1], stride=1, scope='dilation1')

			net_dilation2 = slim.conv2d(end_points[end_point], 1024, [1, 7], stride=1, scope='dilation2')
			# net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3))

			net_dilation3 = slim.conv2d(end_points[end_point], 1024, [7, 1], stride=1, scope='dilation3')
			# net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0))

			net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3)

		end_points[end_point] = net_inception


		end_point = 'conv8'
		with tf.variable_scope(end_point):

			net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1,scope='dilation1')

			net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2')
			# net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3))

			net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3')
			# net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0))

			net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3)

		end_points[end_point] = net_inception


		end_point = feat_layers[3]
		with tf.variable_scope(end_point):
			net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1,scope='dilation1')

			net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2')
			# net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3))

			net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3')
			# net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0))
			net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3)

		end_points[end_point] = net_inception # 5

		end_point = 'conv10'
		with tf.variable_scope(end_point):

			net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1, scope='dilation1')

			net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2')
			# net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3))

			net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3')
			# net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0))
			net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3)

		end_points[end_point] = net_inception  # 3


		end_point = 'conv11'
		with tf.variable_scope(end_point):

			net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1,scope='dilation1')

			net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 5], stride=1, scope='dilation2')
			# net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 2))

			net_dilation3 = slim.conv2d(end_points[end_point], 128, [5, 1], stride=1, scope='dilation3')
			# net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(2, 0))
			net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3)

		end_points[end_point] = net_inception  # 1

		# Prediction and localisations layers.
		predictions = []
		logits = []
		localisations = []
		for i, layer in enumerate(feat_layers):
			with tf.variable_scope(layer + '_box'):
				p, loc = text_multibox_layer(layer,
				                             end_points[layer],
				                             anchor_sizes[i],
				                             anchor_ratios[i],
				                             normalizations[i])
			prediction_fn = slim.softmax
			predictions.append(prediction_fn(p))
			logits.append(p)
			localisations.append(loc)

		return predictions, localisations, logits, end_points
Beispiel #9
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_512_vgg'):
    """SSD net definition.
    """
    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], 1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block12'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [4, 4], scope='conv4x4', padding='VALID')
            # Fix padding to match Caffe version (pad=1).
            # pad_shape = [(i-j) for i, j in zip(layer_shape(net), [0, 1, 1, 0])]
            # net = tf.slice(net, [0, 0, 0, 0], pad_shape, name='caffe_pad')
        end_points[end_point] = net

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_vgg_300.ssd_multibox_layer(end_points[layer],
                                                      num_classes,
                                                      anchor_sizes[i],
                                                      anchor_ratios[i],
                                                      normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
def text_net(inputs,
             feat_layers=TextboxNet.default_params.feat_layers,
             normalizations=TextboxNet.default_params.normalizations,
             is_training=True,
             dropout_keep_prob=0.5,
             reuse=None,
             scope='text_box_300'):
    end_points = {}
    with tf.variable_scope(scope, 'text_box_300', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['conv1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['conv2'] = net  # 150,150 128
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3. # 75 75 256
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['conv3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3', padding='SAME')
        # Block 4. # 38 38 512
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['conv4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5. # 19 19 512
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['conv5'] = net
        net = slim.max_pool2d(net, [3, 3],
                              stride=1,
                              scope='pool5',
                              padding='SAME')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['conv6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['conv7'] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'conv8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'conv9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'conv10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'global'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = text_multibox_layer(layer, end_points[layer],
                                           normalizations[i])
            #predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return localisations, logits, end_points
def text_net(inputs,
             feat_layers=TextboxNet.default_params.feat_layers,
             anchor_sizes=TextboxNet.default_params.anchor_sizes,
             anchor_ratios=TextboxNet.default_params.anchor_ratios,
             normalizations=TextboxNet.default_params.normalizations,
             is_training=True,
             dropout_keep_prob=0.5,
             reuse=None,
             scope='text_box_384',
             update_feat_shapes=False):
    end_points = {}
    with tf.compat.v1.variable_scope(scope,
                                     'text_box_300', [inputs],
                                     reuse=reuse):  # 300*300 384*383
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3],
                          scope='conv1')  # 300 384
        end_points['conv1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')  # 150
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3],
                          scope='conv2')  # 150 192
        end_points['conv2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')  # 75
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3],
                          scope='conv3')  # 75 81
        end_points['conv3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')  # 38
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
                          scope='conv4')  # 38 40
        end_point = 'conv4'

        end_points[end_point] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')  # 19
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
                          scope='conv5')  # 19
        end_points['conv5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')  # 19

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')  # 19
        end_points['conv6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')  # 19
        end_point = 'conv7'

        end_points[end_point] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2
        end_point = 'conv8'
        with tf.compat.v1.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')

        end_points[end_point] = net  # 10
        end_point = 'conv9'
        with tf.compat.v1.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')

        end_points[end_point] = net  # 5
        end_point = 'conv10'
        with tf.compat.v1.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')

        end_points[end_point] = net  # 3
        end_point = 'conv11'
        with tf.compat.v1.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')

        end_points[end_point] = net  #

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        shape_list = []
        for i, layer in enumerate(feat_layers):
            with tf.compat.v1.variable_scope(layer + '_box'):
                p, loc, shape = text_multibox_layer(layer, end_points[layer],
                                                    anchor_sizes[i],
                                                    anchor_ratios[i],
                                                    normalizations[i])
            prediction_fn = slim.softmax
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(loc)
            shape_list.append(shape)

        if update_feat_shapes is True:
            return predictions, localisations, logits, end_points, shape_list
        else:
            return predictions, localisations, logits, end_points
Beispiel #12
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
        SSD网络的定义
        ssd_net(输入,类的数量,特征层的名称列表,anchor的实际大小,anchor的长宽比,正则化,是否训练,扔出网络的概率
                ,预测,重用,命名绑定)




    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(
            scope, 'ssd_300_vgg', [inputs],
            reuse=reuse):  #命名空间管理:创建命名空间,reuse=none\false时只能在该命名空间创建变量
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3],
                          scope='conv1')  #conv1卷积
        end_points['block1'] = net  #300X300
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net  #150x150
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net  #75x75
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net  #38x38
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net  #19x19
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')  #13x13的感受野
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  #10x10
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  #5x5
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  #3X3
        end_point = 'block11'

        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  #1X1
        # Prediction and localisations layers.  预测和位置层
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer], num_classes,
                                          anchor_sizes[i], anchor_ratios[i],
                                          normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)  #对每个特征图类别的网络
            localisations.append(l)  #对每个特征图测量位置的网络
        return predictions, localisations, logits, end_points
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_mobilenetv1'):
    """SSD net definition.
    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    end_points = {}

    min_depth = 32 
    depth_multiplier = 1.0
    with tf.variable_scope(scope, 'ssd_300_mobilenetv1', [inputs], reuse=reuse):
        input_shape = inputs.get_shape().as_list()
        if len(input_shape) != 4:
          raise ValueError('Invalid input tensor rank, expected 4, was: %d' %
                     len(input_shape))

        with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_training=is_training)):	
          with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training):
            net, end_points = mobilenet_v1.mobilenet_v1_base(inputs, scope='MobilenetV1',
                                          min_depth=min_depth,
                                          depth_multiplier=depth_multiplier,
                                          conv_defs=None)
        '''
        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        end_point = 'block13'
        with tf.variable_scope(end_point)
          net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='atrous_conv')
          end_points['block13_atrous'] = net
          net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
          # Block 7: 1x1 conv. Because the f**k.
          net = slim.conv2d(net, 1024, [1, 1], scope='conv1x1')
          end_points['block13'] = net
          net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        '''

        # Block 14/15/16/17: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block14'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], biases_initializer=None, trainable=is_training, scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 512, [3, 3], biases_initializer=None, trainable=is_training, stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block15'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], biases_initializer=None, trainable=is_training, scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [3, 3], biases_initializer=None, trainable=is_training, stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block16'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], biases_initializer=None, trainable=is_training, scope='conv1x1')
            net = slim.conv2d(net, 256, [3, 3], biases_initializer=None, trainable=is_training, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        '''
        end_point = 'block17'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 64, [1, 1], biases_initializer=None, trainable=is_training, scope='conv1x1')
            net = slim.conv2d(net, 128, [3, 3], biases_initializer=None, trainable=is_training, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        '''

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        

        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer],
                                          num_classes,
                                          anchor_sizes[i],
                                          anchor_ratios[i],
                                          normalizations[i],
										  is_training=is_training)
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)
#        end_points['logits'] = logits
#        end_points['predictions'] = predictions
#        end_points['localisations'] = localisations

        return predictions, localisations, logits, end_points
Beispiel #14
0
def mobilenets(inputs,
               num_classes=1000,
               width_multiplier=1.0,
               is_training=True,
               dropout_keep_prob=0.5,
               pad_logits=True,
               scope='MobileNets'):
    """MobileNets implementation.
    Args:
        inputs: a tensor of size [batch_size, height, width, channels].
        num_classes: number of predicted classes.
        is_training: whether or not the model is being trained.
        dropout_keep_prob: the probability that activations are kept in the dropout
            layers during training.
        scope: Optional scope for the variables.

    Returns:
        the last op containing the log predictions and end_points dict.
    """
    # MobileNets kernel size and padding (for layers with stride > 1).
    kernel_size = [3, 3]
    padding = [(kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2]

    def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None):
        """Basic MobileNet block combining:
         - depthwise conv + BN + relu
         - 1x1 conv + BN + relu
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            num_out_channels = int(num_out_channels * width_multiplier)
            if stride[0] == 1 and stride[1] == 1:
                # Depthwise convolution with stride=1
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            else:
                # Mimic CAFFE padding if stride > 1 => usually better accuracy.
                net = custom_layers.pad2d(net, pad=padding)
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            padding='VALID',
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            # Pointwise convolution.
            net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw')
            return net

    with tf.variable_scope(scope, 'MobileNets', [inputs]) as sc:
        end_points = {}
        # First full convolution...
        net = custom_layers.pad2d(inputs, pad=padding)
        net = slim.conv2d(net,
                          32,
                          kernel_size,
                          stride=[2, 2],
                          padding='VALID',
                          scope='conv1')
        # net = slim.conv2d(inputs, 32, kernel_size, stride=[2, 2],
        #                   padding='SAME', scope='conv1')
        # Then, MobileNet blocks!
        net = mobilenet_block(net, 64, scope='block2')
        net = mobilenet_block(net, 128, stride=[2, 2], scope='block3')
        net = mobilenet_block(net, 128, scope='block4')
        net = mobilenet_block(net, 256, stride=[2, 2], scope='block5')
        net = mobilenet_block(net, 256, scope='block6')
        net = mobilenet_block(net, 512, stride=[2, 2], scope='block7')
        # Intermediate blocks...
        for i in range(5):
            net = mobilenet_block(net, 512, scope='block%i' % (i + 8))
        # Final blocks.
        net = mobilenet_block(net, 1024, stride=[2, 2], scope='block13')
        net = mobilenet_block(net, 1024, scope='block14')
        # Spatial pooling + fully connected layer.
        net = custom_layers.spatial_mean(net,
                                         keep_dims=True,
                                         scope='spatial_mean14')
        net = slim.conv2d(net,
                          1000, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          normalizer_params=None,
                          biases_initializer=tf.zeros_initializer(),
                          scope='conv_fc15')
        net = custom_layers.spatial_squeeze(net)

        # Logits padding: get everyone to the same number of classes.
        if pad_logits:
            net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0))
        return net, end_points
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
    """
    #if data_format == 'NCHW':

    #inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))
    print("====", inputs.shape)
    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        #original
        '''
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net4 = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        
        # 38x38
        #end_points['block4'] = net
        net = slim.max_pool2d(net4, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        #end_points['block7'] = net
        net7 = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        
        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net7, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net8 = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        #end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net8, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net9 = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        #end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net9, 128, [1, 1], scope='conv1x1')
            net10 = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        #end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net11 = slim.conv2d(net10, 128, [1, 1], scope='conv1x1')
            net11 = slim.conv2d(net11, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net11
        #print("================net11 shape", net11.shape)
        # NCHW
        #################################################################################
        end_point = 'block10be'
        with tf.variable_scope(end_point):
            net10_a = tf.transpose(net11, perm=(0, 2, 3, 1))#nchw
            net10_a = tf.image.resize_nearest_neighbor(net10_a, (3,3))
            net10_a = tf.transpose(net10_a, perm=(0, 3, 1, 2))#nchw
            
            net10_a = slim.conv2d(net10_a, 256, [3,3], scope='pre10_3x3')
            net10_b = slim.conv2d(net10, 256, [1, 1], scope='pre10_1x1')
            net10_o = net10_a + net10_b
        end_points[end_point] = net10_o #3
        
        end_point = 'block9be'
        with tf.variable_scope(end_point):
            net9_a = tf.transpose(net10_o, perm=(0, 2, 3, 1))#nchw
            net9_a = tf.image.resize_nearest_neighbor(net9_a, (5,5))
            net9_a = tf.transpose(net9_a, perm=(0, 3, 1, 2))#nchw
            
            net9_a = slim.conv2d(net9_a, 256, [3,3], scope='pre9_3x3')
            net9_b = slim.conv2d(net9, 256, [1, 1], scope='pre9_1x1')
            net9_o = net9_a + net9_b
        end_points[end_point] = net9_o#5
        
        end_point = 'block8be'
        with tf.variable_scope(end_point):
            net8_a = tf.transpose(net9_o, perm=(0, 2, 3, 1))#nchw
            net8_a = tf.image.resize_nearest_neighbor(net8_a, (10,10))
            net8_a = tf.transpose(net8_a, perm=(0, 3, 1, 2))#nchw
            
            net8_a = slim.conv2d(net8_a, 512, [3,3], padding='SAME', scope='pre8_3x3')
            net8_b = slim.conv2d(net8, 512, [1, 1], scope='pre8_1x1')#10
            net8_o = net8_a + net8_b
        end_points[end_point] = net8_o#10

        end_point = 'block7be'
        with tf.variable_scope(end_point):
            net7_a = tf.transpose(net8_o, perm=(0, 2, 3, 1))#nchw
            net7_a = tf.image.resize_nearest_neighbor(net7_a, (19, 19))#
            net7_a = tf.transpose(net7_a, perm=(0, 3, 1, 2))#nchw
            
            net7_a = slim.conv2d(net7_a, 1024, [3, 3], padding='SAME', scope='pre7_3x3')
            net7_b = slim.conv2d(net7, 1024, [1, 1], scope='pre7_1x1')
            net7_o = net7_a + net7_b
        end_points[end_point] = net7_o
        
        end_point = 'block4be'
        with tf.variable_scope(end_point):
            net4_a = tf.transpose(net7_o, perm=(0, 2, 3, 1))#nchw
            net4_a = tf.image.resize_nearest_neighbor(net4_a, (38, 38))#
            net4_a = tf.transpose(net4_a, perm=(0, 3, 1, 2))#nchw
            
            net4_a = slim.conv2d(net4_a, 512, [3, 3], padding='SAME', scope='pre4_3x3')
            net4_b = slim.conv2d(net4, 512, [1, 1], scope='pre4_1x1')
            #print("asdfasdfasdf", net4_a.shape, net4_b.shape)
            net4_o = net4_a + net4_b#38
        end_points[end_point] = net4_o
        '''
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer], num_classes,
                                          anchor_sizes[i], anchor_ratios[i],
                                          normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
Beispiel #16
0
def text_net(inputs,
             feat_layers=TextboxNet.default_params.feat_layers,
             anchor_sizes=TextboxNet.default_params.anchor_sizes,
             anchor_ratios=TextboxNet.default_params.anchor_ratios,
             normalizations=TextboxNet.default_params.normalizations,
             is_training=True,
             dropout_keep_prob=0.5,
             reuse=None,
             scope='text_box_384',
             update_feat_shapes=False):
    """
	Define the backbone (13 original vgg layers + 10 extra conv layers to extract multi-scale feature maps form as SSD)
	of the textboxes and the neck net -- 6 textbox layers.
	:param inputs: input image size
	:param feat_layers: feature map which connect to the textbox layer
	:param anchor_sizes: multi-scale anchor sizes
	:param anchor_ratios: multi-scale anchor aspect ratios [2.0, 1. / 2, 3.0, 1. / 3, 4.0, 1. / 4, 5., 1. / 5]
	:param normalizations: 
	:param is_training: train or not
	:param dropout_keep_prob: 
	:param reuse: 
	:param scope: 
	:param update_feat_shapes: 
	:return: [predictions, localisations, logits, end_points, shape_list(if update_feat_shapes=True)]
	"""
    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'text_box_300', [inputs],
                           reuse=reuse):  # 300*300 384*384
        ######################################
        # 前五个 Blocks,首先照搬 VGG16 架构   #
        # 注意这里使用 end_points 标注中间结果  #
        ######################################
        # ——————————————————Original VGG-16 blocks (total 13 conv layers)———————————————————————
        # Block 1.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3],
                          scope='conv1')  # 300 384
        end_points['conv1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')  # 150
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3],
                          scope='conv2')  # 150 192
        end_points['conv2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')  # 75
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3],
                          scope='conv3')  # 75 81
        end_points['conv3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')  # 38
        # Block 4.
        end_point = 'conv4'
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
                          scope='conv4')  # 38 40
        end_points[end_point] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')  # 19
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
                          scope='conv5')  # 19
        end_points['conv5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1,
                              scope='pool5')  # 19  Pooling size 2 -> 3

        ####################################
        # 后六个 Blocks,使用额外卷积层      #
        ####################################
        # ————————————Additional SSD blocks.——————————————————————
        # Block 6: let's dilate the hell out of it!  dilation -> 6
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')  # 19
        end_points['conv6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        end_point = 'conv7'
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')  # 19
        end_points[end_point] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'conv8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  # 10

        end_point = 'conv9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  # 5

        end_point = 'conv10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  # 3

        end_point = 'conv11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  #

        ######################################
        # 每个中间层 end_points 返回中间结果   #
        # 将各层预测结果存入列表,返回给优化函数 #
        ######################################
        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        shape_list = []
        # feat_layers=['conv4', 'conv7', 'conv8', 'conv9', 'conv10', 'conv11']
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                cls, loc, shape = text_multibox_layer(layer, end_points[layer],
                                                      anchor_sizes[i],
                                                      anchor_ratios[i],
                                                      normalizations[i])
            prediction_fn = slim.softmax
            # Prediction of conference and bbox location of each textbox layer.
            predictions.append(prediction_fn(cls))
            logits.append(cls)
            localisations.append(loc)
            shape_list.append(shape)

        if update_feat_shapes is True:
            return predictions, localisations, logits, end_points, shape_list
        else:
            return predictions, localisations, logits, end_points
Beispiel #17
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """ SSD net definition. """
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        # Block 1.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net  # shape = (batch_size, 300, 300, 64)
        net = slim.max_pool2d(
            net, [2, 2], scope='pool1')  # shape = (batch_size, 150, 150, 64)
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net  # shape=(2, 150, 150, 128)
        net = slim.max_pool2d(net, [2, 2],
                              scope='pool2')  # shape=(2, 75, 75, 128)
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net  # shape=(2, 75, 75, 256)
        net = slim.max_pool2d(
            net, [2, 2],
            scope='pool3')  # shape=(2, 38, 38, 256), default padding='VALID'
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net  # shape=(2, 38, 38, 256)
        net = slim.max_pool2d(net, [2, 2],
                              scope='pool4')  # shape=(2, 19, 19, 512)
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net  # shape=(2, 19, 19, 512)
        net = slim.max_pool2d(net, [3, 3], stride=1,
                              scope='pool5')  # shape=(2, 19, 19, 512)

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net  # shape=(2, 19, 19, 1024)
        net = tf.layers.dropout(
            net, rate=dropout_keep_prob,
            training=is_training)  # shape=(2, 16, 16, 1024)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net  # shape=(2, 19, 19, 1024)
        net = tf.layers.dropout(
            net, rate=dropout_keep_prob,
            training=is_training)  # shape=(2, 19, 19, 1024)
        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1],
                              scope='conv1x1')  # shape=(2, 19, 19, 256)
            net = custom_layers.pad2d(net,
                                      pad=(1, 1))  # shape=(2, 21, 21, 256)
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')  # shape=(2, 10, 10, 512)
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1],
                              scope='conv1x1')  # shape=(2, 10, 10, 128)
            net = custom_layers.pad2d(net,
                                      pad=(1, 1))  # shape=(2, 12, 12, 128)
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')  # shape=(2, 5, 5, 256)
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1],
                              scope='conv1x1')  # shape=(2, 5, 5, 128)
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')  # shape=(2, 3, 3, 256)
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1],
                              scope='conv1x1')  # shape=(2, 3, 3, 128)
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')  # shape=(2, 1, 1, 256)
        end_points[end_point] = net

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer], num_classes,
                                          anchor_sizes[i], anchor_ratios[i],
                                          normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    end_points = {}
    # 定义ssd网络,前半部分是vgg-16网络
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        # 原始vgg-16
        # 两个卷积层、一个最大池化层
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        # 特征层1:block2,两个卷积层、1个最大池化层
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        # 特征层2:block3,三个卷积层、1个最大池化层
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        # 特征层3:block4,三个卷积层、1个最大池化层
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        # 特征层4:block5,三个卷积层、1个最大池化层
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # ssd相比起vgg多加的层
        # Block 6: let's dilate the hell out of it!
        # 特征层5:block6,一个卷积层、1个最大池化层,使用了dropout训练技巧
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        # 特征层6:block7,一个卷积层(1x1卷积核,相当于在3个图像通道做叠加平均)、1个最大池化层,使用了dropout训练技巧
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        # block8-block11都是特征层
        # padding='VALID'经过filter kernel之后,尺寸可能会变小;padding='SAME'表示经过filter kernel之后,尺寸保持跟输入不变
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        # Prediction and localisations layers.
        # 预测层与定位层
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer], num_classes,
                                          anchor_sizes[i], anchor_ratios[i],
                                          normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_512_vgg'):
    """SSD net definition.
    """

    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        # print("====net.shape = {}".format(net))
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.txt.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], 1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        if IS_FPN:
            stride_b8_to_b12 = 1
        else:
            stride_b8_to_b12 = 2
        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=stride_b8_to_b12,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=stride_b8_to_b12,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=stride_b8_to_b12,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        end_point = 'concat'
        with tf.variable_scope(end_point):
            high_feature_list = []
            for i in range(9, 12):
                high_feature_list.append(end_points['block' + str(i)])
            high_feature = tf.concat(high_feature_list, 1)
            end_points[end_point] = high_feature

        end_point = 'block4_concat'
        with tf.variable_scope(end_point):
            net_block4 = slim.conv2d(end_points['concat'],
                                     128, [1, 1],
                                     scope='conv1x1')
            # print("net_block4 ========{}".format(net_block4))
            net_block4 = tf.layers.conv2d_transpose(
                net_block4,
                512,
                kernel_size=3,
                strides=(4, 4),
                data_format='channels_first')
            # print("net_block4 ========{}".format(net_block4))
            end_points['block4'] += net_block4

        end_point = 'block7_concat'
        with tf.variable_scope(end_point):
            net_block7 = slim.conv2d(end_points['concat'],
                                     128, [1, 1],
                                     scope='conv1x1')
            # print("net_block4 ========{}".format(net_block4))
            net_block7 = tf.layers.conv2d_transpose(
                net_block7,
                1024,
                kernel_size=1,
                strides=(2, 2),
                data_format='channels_first')
            # print("net_block4 ========{}".format(net_block4))
            end_points['block7'] += net_block7

        end_point = 'block8_concat'
        with tf.variable_scope(end_point):
            net_block8 = slim.conv2d(end_points['concat'],
                                     512, [1, 1],
                                     scope='conv1x1')
            end_points['block8'] += net_block8
        # with tf.variable_scope(end_point):

        # print("=debug===========high_feature = {}".format(high_feature))

        # for i in range(1,12):
        # print("=debug===== "+str(i)+"  = {}".format(end_points['block'+str(i)]))
        # Prediction and localisations layers.
        predictions_pest = []
        logits_pest = []
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                # print("=debug===== "+str(i)+"  = {}".format(end_points[layer]))
                # 这里获取每一个anchor的预测值
                pest, p, l = ssd_vgg_300.ssd_multibox_layer(
                    end_points[layer], num_classes, anchor_sizes[i],
                    anchor_ratios[i], normalizations[i])
            predictions_pest.append(prediction_fn(pest))
            logits_pest.append(pest)

            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions_pest, logits_pest, predictions, localisations, logits, end_points
Beispiel #20
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_512_vgg',
            DSSD_FLAG=False):
    """SSD net definition.
    """
    # End_points collect relevant activations for external use.
    end_points = {}
    if inputs.shape[2] == inputs.shape[3]:
        mode = 'bnwh'
    else:
        mode = 'bwhn'
    with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], 1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block12'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [4, 4],
                              scope='conv4x4',
                              padding='VALID')
            # Fix padding to match Caffe version (pad=1).
            # pad_shape = [(i-j) for i, j in zip(layer_shape(net), [0, 1, 1, 0])]
            # net = tf.slice(net, [0, 0, 0, 0], pad_shape, name='caffe_pad')
        end_points[end_point] = net

        # Prediction and localisations layers.
        # rever_feat_layers = list(reversed(feat_layers))
        # for i, l in enumerate(rever_feat_layers):
        #     if i == 0: continue
        #     l_ = rever_feat_layers[i - 1]
        #
        #     end_points[l] = tf.concat([upbilinear([end_points[l_], end_points[l]], name=l_), end_points[l]],axis=1)

        # with tf.variable_scope("fpn11"):
        #
        #
        #     end_points['block11'] = tf.add(upbilinear([end_points['block12'], end_points['block11']],name ="up_11",mode=mode),
        #                                    end_points['block11'],name= "fpn_block11")
        #
        # with tf.variable_scope("fpn10"):
        #
        #     end_points['block10'] = tf.add(upbilinear([end_points['block11'],end_points['block10']],name ="up_10",mode=mode),
        #                                    end_points['block10'],
        #                                    name="fpn_block10")
        # with tf.variable_scope("fpn9"):
        #     b9 = slim.conv2d(end_points['block10'], 256, [1, 1], scope='9conv1x1')
        #     b9_ = slim.conv2d(end_points['block9'], 256, [1, 1], scope='9_conv1x1')
        #     end_points['block9'] = tf.add(upbilinear([end_points['block10'],end_points['block9']],name ="up_9",mode=mode),
        #                                   end_points['block9'],
        #                                    name="fpn_block9")
        # with tf.variable_scope("fpn8"):
        #     b8 = slim.conv2d(end_points['block9'], 512, [1, 1], scope='8conv1x1')
        #
        #     end_points['block8'] = tf.add(upbilinear([b8, end_points['block8']],name ="up_8",mode=mode), end_points['block8'],
        #                                    name="fpn_block8")
        # with tf.variable_scope("fpn7"):
        #     b7 = slim.conv2d(end_points['block8'], 1024, [1, 1], scope='7conv1x1')
        #
        #     end_points['block7'] = tf.add(upbilinear([b7, end_points['block7']],name ="up_7",mode=mode), end_points['block7'],
        #                                   name="fpn_block7")
        # with tf.variable_scope("fpn4"):
        #     b4 = slim.conv2d(end_points['block7'], 512, [1, 1], scope='4conv1x1')
        #
        #     end_points['block4'] = tf.add(upbilinear([b4, end_points['block4']],name ="up_4",mode=mode), end_points['block4'],
        #                                   name="fpn_block4")

        if DSSD_FLAG:
            with tf.variable_scope("dssd11"):

                de_12 = slim.conv2d_transpose(end_points['block12'],
                                              512, [3, 3],
                                              stride=2,
                                              scope="de_12")
                con_12 = slim.conv2d(de_12, 512, [3, 3], scope='conv_12')
                bn_12 = slim.batch_norm(con_12, is_training=is_training)

                con_11 = slim.conv2d(end_points["block11"],
                                     512, [3, 3],
                                     scope="conv11")
                bn_11 = slim.batch_norm(con_11, is_training=is_training)
                relu_11 = tf.nn.relu(bn_11)
                con_11 = slim.conv2d(relu_11, 512, [3, 3], scope="conv11_2")
                bn_11 = slim.batch_norm(con_11, is_training=is_training)

                end_points["block11"] = tf.nn.relu(tf.multiply(bn_12, bn_11))

            with tf.variable_scope("dssd10"):

                de_11 = slim.conv2d_transpose(end_points['block11'],
                                              512, [3, 3],
                                              stride=2,
                                              scope="de_11")
                con_11 = slim.conv2d(de_11, 512, [3, 3], scope='conv_11')
                bn_11 = slim.batch_norm(con_11, is_training=is_training)

                con_10 = slim.conv2d(end_points["block10"],
                                     512, [3, 3],
                                     scope="conv10")
                bn_10 = slim.batch_norm(con_10, is_training=is_training)
                relu_10 = tf.nn.relu(bn_10)
                con_10 = slim.conv2d(relu_10, 512, [3, 3], scope="conv10_2")
                bn_10 = slim.batch_norm(con_10, is_training=is_training)

                end_points["block10"] = tf.nn.relu(tf.multiply(bn_11, bn_10))

            with tf.variable_scope("dssd9"):

                de_10 = slim.conv2d_transpose(end_points['block10'],
                                              512, [3, 3],
                                              stride=2,
                                              scope="de_10")
                con_10 = slim.conv2d(de_10, 512, [3, 3], scope='conv_10')
                bn_10 = slim.batch_norm(con_10, is_training=is_training)

                con_9 = slim.conv2d(end_points["block9"],
                                    512, [3, 3],
                                    scope="conv9")
                bn_9 = slim.batch_norm(con_9, is_training=is_training)
                relu_9 = tf.nn.relu(bn_9)
                con_9 = slim.conv2d(relu_9, 512, [3, 3], scope="conv9_2")
                bn_9 = slim.batch_norm(con_9, is_training=is_training)

                end_points["block9"] = tf.nn.relu(tf.multiply(bn_10, bn_9))

            with tf.variable_scope("dssd8"):

                de_9 = slim.conv2d_transpose(end_points['block9'],
                                             512, [3, 3],
                                             stride=2,
                                             scope="de_9")
                con_9 = slim.conv2d(de_9, 512, [3, 3], scope='conv_9')
                bn_9 = slim.batch_norm(con_9, is_training=is_training)

                con_8 = slim.conv2d(end_points["block8"],
                                    512, [3, 3],
                                    scope="conv8")
                bn_8 = slim.batch_norm(con_8, is_training=is_training)
                relu_8 = tf.nn.relu(bn_8)
                con_8 = slim.conv2d(relu_8, 512, [3, 3], scope="conv8_2")
                bn_8 = slim.batch_norm(con_8, is_training=is_training)

                end_points["block8"] = tf.nn.relu(tf.multiply(bn_9, bn_8))

            with tf.variable_scope("dssd7"):

                de_8 = slim.conv2d_transpose(end_points['block8'],
                                             512, [3, 3],
                                             stride=2,
                                             scope="de_8")
                con_8 = slim.conv2d(de_8, 512, [3, 3], scope='conv_8')
                bn_8 = slim.batch_norm(con_8, is_training=is_training)

                con_7 = slim.conv2d(end_points["block7"],
                                    512, [3, 3],
                                    scope="conv7")
                bn_7 = slim.batch_norm(con_7, is_training=is_training)
                relu_7 = tf.nn.relu(bn_7)
                con_7 = slim.conv2d(relu_7, 512, [3, 3], scope="conv7_2")
                bn_7 = slim.batch_norm(con_7, is_training=is_training)

                end_points["block7"] = tf.nn.relu(tf.multiply(bn_8, bn_7))

            with tf.variable_scope("dssd4"):

                de_7 = slim.conv2d_transpose(end_points['block7'],
                                             512, [3, 3],
                                             stride=2,
                                             scope="de_7")
                con_7 = slim.conv2d(de_7, 512, [3, 3], scope='conv_7')
                bn_7 = slim.batch_norm(con_7, is_training=is_training)

                con_4 = slim.conv2d(end_points["block4"],
                                    512, [3, 3],
                                    scope="conv4")
                bn_4 = slim.batch_norm(con_4, is_training=is_training)
                relu_4 = tf.nn.relu(bn_4)
                con_4 = slim.conv2d(relu_4, 512, [3, 3], scope="conv4_2")
                bn_4 = slim.batch_norm(con_4, is_training=is_training)

                end_points["block4"] = tf.nn.relu(tf.multiply(bn_7, bn_4))

        #
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_vgg_300.ssd_multibox_layer(end_points[layer],
                                                      num_classes,
                                                      anchor_sizes[i],
                                                      anchor_ratios[i],
                                                      normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
Beispiel #21
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    """
      net = layers_lib.repeat(
          inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
      net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
      net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
      net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
      net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
    """
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        ######################################
        # 前五个 Blocks,首先照搬 VGG16 架构   #
        # 注意这里使用 end_points 标注中间结果  #
        ######################################
        # ——————————————————Original VGG-16 blocks.———————————————————————
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1,
                              scope='pool5')  # 池化层由2修改到三
        ####################################
        # 后六个 Blocks,使用额外卷积层      #
        ####################################
        # ————————————Additional SSD blocks.——————————————————————
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        ######################################
        # 每个中间层 end_points 返回中间结果   #
        # 将各层预测结果存入列表,返回给优化函数 #
        ######################################
        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        # feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11']
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer], num_classes,
                                          anchor_sizes[i], anchor_ratios[i],
                                          normalizations[i])
                """
                框的数目等于anchor_sizes[i]和anchor_ratios[i]的长度和
                anchor_sizes=[(21., 45.),
                              (45., 99.),
                              (99., 153.),
                              (153., 207.),
                              (207., 261.),
                              (261., 315.)]
                anchor_ratios=[[2, .5],
                               [2, .5, 3, 1./3],
                               [2, .5, 3, 1./3],
                               [2, .5, 3, 1./3],
                               [2, .5],
                               [2, .5]]
                normalizations=[20, -1, -1, -1, -1, -1]
                """
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
Beispiel #22
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg',
            large=True,
            medium=True):
    """SSD net definition.
    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        print(net)
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        print(net)
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        print(net)
        end_point = 'block9'
        end_points = tf.cond(
            large, lambda: largenetwork(net, end_point, end_points),
            lambda: tf.cond(medium, lambda: mediumnetwork(
                net, end_point, end_points), lambda: foveanetwork(
                    net, end_point, end_points)))
        # Prediction and localisations layers.
        #print(feat_layers)
        #find_predictions_large(end_points,num_classes,anchor_sizes,anchor_ratios,normalizations,prediction_fn)
        #find_predictions_fovea(end_points,num_classes,anchor_sizes,anchor_ratios,normalizations,prediction_fn)
        predictions, localisations, logits = tf.cond(
            large, lambda: find_predictions_large(
                end_points, num_classes, anchor_sizes, anchor_ratios,
                normalizations, prediction_fn), lambda: tf.cond(
                    medium, lambda: find_predictions_medium(
                        end_points, num_classes, anchor_sizes, anchor_ratios,
                        normalizations,
                        prediction_fn), lambda: find_predictions_fovea(
                            end_points, num_classes, anchor_sizes,
                            anchor_ratios, normalizations, prediction_fn)))
        return predictions, localisations, logits, end_points
Beispiel #23
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
    """
    #if data_format == 'NCHW':
    #inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')

        # transform net4 to net7
        net4_net7 = net
        net4_net7 = slim.conv2d(net4_net7, 1024, [1, 1], scope='net4_net7')

        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # Block 7: 1x1 conv.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net  #19*19*1024
        net7_net4 = tensor_resize.tensor4_resize(net, [38, 38])
        net7_net4 = slim.conv2d(net7_net4, 512, [1, 1], scope='net7_net4')

        # # merge block4 to block7
        end_points['block7'] = end_points['block7'] + net4_net7

        # # Transform net7 to net8
        net7_net8 = end_points['block7']
        net7_net8 = slim.max_pool2d(net7_net8, [3, 3],
                                    stride=2,
                                    scope='net7_net8_mp')
        net7_net8 = slim.conv2d(net7_net8, 512, [1, 1], scope='net7_net8')

        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        net8_net7 = tensor_resize.tensor4_resize(net, [19, 19])
        net8_net7 = slim.conv2d(net8_net7, 1024, [1, 1], scope='net8_net7')

        # merge block7 to block8
        end_points[end_point] = end_points[end_point] + net7_net8

        # # Transform net8 to net9
        net8_net9 = end_points[end_point]
        net8_net9 = slim.max_pool2d(net8_net9, [3, 3],
                                    stride=2,
                                    scope='net8_net9_mp')
        net8_net9 = slim.conv2d(net8_net9, 256, [1, 1], scope='net8_net9')

        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        net9_net8 = tensor_resize.tensor4_resize(net, [10, 10])
        net9_net8 = slim.conv2d(net9_net8, 512, [1, 1], scope='net9_net8')

        # merge block8 to block9
        end_points[end_point] = end_points[end_point] + net8_net9

        # Transform net9 to net10
        net9_net10 = end_points[end_point]
        net9_net10 = slim.max_pool2d(net9_net10, [3, 3],
                                     stride=2,
                                     scope='net9_net10_mp')
        net9_net10 = slim.conv2d(net9_net10, 256, [1, 1], scope='net9_net10')

        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  #3*3*256
        net10_net9 = tensor_resize.tensor4_resize(net, [5, 5])
        net10_net9 = slim.conv2d(net10_net9, 256, [1, 1], scope='net10_net9')

        # # merge block9 to block10
        end_points[end_point] = end_points[end_point] + net9_net10

        # # Transform net10 to net11
        net10_net11 = end_points[end_point]
        net10_net11 = slim.max_pool2d(net10_net11, [3, 3],
                                      stride=2,
                                      scope='net10_net11_mp',
                                      padding='VALID')
        net10_net11 = slim.conv2d(net10_net11,
                                  256, [1, 1],
                                  scope='net10_net11')

        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net  #1*1*256

        #upsample net11 and Transform net11 to net10 !
        net11_net10 = tensor_resize.tensor4_resize(net, [3, 3])
        net11_net10 = slim.conv2d(net11_net10,
                                  256, [1, 1],
                                  scope='net11_net10')

        # # merge block10 to block11
        end_points[end_point] = end_points[end_point] + net10_net11

        end_points['block4'] = end_points['block4'] + net7_net4
        end_points['block7'] = end_points['block7'] + net8_net7
        end_points['block8'] = end_points['block8'] + net9_net8
        end_points['block9'] = end_points['block9'] + net10_net9
        end_points['block10'] = end_points['block10'] + net11_net10

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer], num_classes,
                                          anchor_sizes[i], anchor_ratios[i],
                                          normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
Beispiel #24
0
def ssd_net(inputs,
            num_classes,
            feature_layers,
            anchor_sizes,
            anchor_ratios,
            normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_vgg'):
    #structure of SSD net

    outputs = {}
    with tf.variable_scope(scope, 'ssd_vgg', [inputs], reuse=reuse):
        # Structure of vgg16
        # Block1
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        outputs['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        outputs['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        outputs['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        outputs['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        outputs['block5'] = net
        net = slim.max_pool2d(net, [3, 3], 1, scope='pool5')

        # Additional SSD blocks
        # Block 6
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        outputs['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # block 7
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        outputs['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block8
        with tf.variable_scope('block8'):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        outputs['block8'] = net
        # Block 9
        with tf.variable_scope('block9'):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        outputs['block9'] = net
        # Block 10
        with tf.variable_scope('block10'):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        outputs['block10'] = net
        # Block 11
        with tf.variable_scope('block11'):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        outputs['block11'] = net
        # Block 12
        with tf.variable_scope('block12'):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [4, 4],
                              stride=2,
                              scope='conv4x4',
                              padding='VALID')
        outputs['block12'] = net

        # Prediction and locolization
        predictions = []  # Class prediction
        logits = []  # Probability of class
        locations = []  # Location prediction
        for i, layer in enumerate(feature_layers):  # Block 4,7,8,9,10,11,12
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layers(outputs[layer], num_classes,
                                           anchor_sizes[i], anchor_ratios[i],
                                           normalizations[i])
                predictions.append(
                    prediction_fn(p))  # Here use softmax to predict classs
            logits.append(p)
            locations.append(l)
    return predictions, locations, logits, outputs
Beispiel #25
0
def text_net(inputs,
             feat_layers=default_params.feat_layers,
             normalizations=default_params.normalizations,
             is_training=True,
             dropout_keep_prob=0.5,
             scope='vgg_16'):  # checked
    feature_layers = {}
    with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=None):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        feature_layers['conv4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        feature_layers['conv7'] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'conv8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        feature_layers['conv8'] = net

        end_point = 'conv9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        feature_layers['conv9'] = net

        end_point = 'conv10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        feature_layers['conv10'] = net

        end_point = 'conv11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        feature_layers['conv11'] = net
        localisations, logits = text_detect_net(feature_layers, feat_layers)
        return localisations, logits, feature_layers
Beispiel #26
0
def ssd_net(
        inputs,  #定义ssd网络结构
        num_classes=SSDNet.default_params.num_classes,  #分类数
        feat_layers=SSDNet.default_params.feat_layers,  #特征层
        anchor_sizes=SSDNet.default_params.anchor_sizes,
        anchor_ratios=SSDNet.default_params.anchor_ratios,
        normalizations=SSDNet.default_params.normalizations,  #正则化
        is_training=True,
        dropout_keep_prob=0.5,
        prediction_fn=slim.softmax,
        reuse=None,
        scope='ssd_300_vgg'):
    """SSD net definition.
    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    end_points = {}  #用于收集每一层输出结果
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3],
                          scope='conv1')  #VGG16网络的第一个conv,重复2次卷积,核为3x3,64个特征
        end_points['block1'] = net  #conv1_2结果存入end_points,name='block1'
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3],
                          scope='conv2')  #重复2次卷积,核为3x3,128个特征
        end_points['block2'] = net  #conv2_2结果存入end_points,name='block2'
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3],
                          scope='conv3')  #重复3次卷积,核为3x3,256个特征
        end_points['block3'] = net  #conv3_3结果存入end_points,name='block3'
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
                          scope='conv4')  #重复3次卷积,核为3x3,512个特征
        end_points['block4'] = net  #conv4_3结果存入end_points,name='block4'
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
                          scope='conv5')  #重复3次卷积,核为3x3,512个特征
        end_points['block5'] = net  #conv5_3结果存入end_points,name='block5'
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.                                                  #去掉了VGG的全连接层
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6,
                          scope='conv6')  #将VGG基础网络最后的池化层结果做扩展卷积(带孔卷积);
        end_points['block6'] = net  #conv6结果存入end_points,name='block6'
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)  #dropout层
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(
            net, 1024, [1, 1],
            scope='conv7')  #将dropout后的网络做1x1卷积,输出1024特征,name='block7'
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)  #将卷积后的网络继续做dropout

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(
                net, 256, [1, 1], scope='conv1x1'
            )  #对上述dropout的网络做1x1卷积,然后做3x3卷积,,输出512特征图,name=‘block8’
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(
                net, 128, [1, 1],
                scope='conv1x1')  #对上述网络做1x1卷积,然后做3x3卷积,输出256特征图,name=‘block9’
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(
                net, 128, [1, 1],
                scope='conv1x1')  #对上述网络做1x1卷积,然后做3x3卷积,输出256特征图,name=‘block10’
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(
                net, 128, [1, 1],
                scope='conv1x1')  #对上述网络做1x1卷积,然后做3x3卷积,输出256特征图,name=‘block11’
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        # Prediction and localisations layers. #预测和定位
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):  #遍历特征层
            with tf.variable_scope(layer + '_box'):  #起个命名范围
                p, l = ssd_multibox_layer(
                    end_points[
                        layer],  #做多尺度大小box预测的特征层,返回每个cell中每个先验框预测的类别p和预测的位置l
                    num_classes,  #种类数
                    anchor_sizes[i],  #先验框尺度(同一特征图上的先验框尺度和长宽比一致)
                    anchor_ratios[i],  #先验框长宽比
                    normalizations[i])  #每个特征正则化信息,目前是只对第一个特征图做归一化操作;
            #把每一层的预测收集
            predictions.append(prediction_fn(p))  #prediction_fn为softmax,预测类别
            logits.append(p)  #把每个cell每个先验框预测的类别的概率值存在logits中
            localisations.append(l)  #预测位置信息

        return predictions, localisations, logits, end_points  #返回类别预测结果,位置预测结果,所属某个类别的概率值,以及特征层
Beispiel #27
0
def ssd_net(inputs,
            num_classes,
            feat_layers,
            normalizations,
            is_training,
            dropout_keep_prob,
            prediction_fn,
            reuse,
            scope):
    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_640_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        print("nnnn-block1 begin")
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='r2_crcr1')
        end_points['block1'] = net
        print("uuuu-block1 end")

        print("nnnn-block2 begin")
        net = slim.max_pool2d(net, [2, 2], scope='bbpool1')
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='r2_crcr2')
        end_points['block2'] = net
        print("uuuu-block2 end")


        print("nnnn-block3 begin")
        net = slim.max_pool2d(net, [2, 2], scope='ddpool2')
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='r3_crcr3')
        end_points['block3'] = net
        print("uuuu-block3 end")


        print("nnnn-block4 begin")
        net = slim.max_pool2d(net, [2, 2], scope='ffpool3')
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='r3_crcr4')
        end_points['block4'] = net
        print("uuuu-block4 end")


        print("nnnn-block5 begin")
        net = slim.max_pool2d(net, [2, 2], scope='hhpool4')
        # rate as `[dilation]`/`pad` in prototxt?, if is `[dilation]` then set rate=1
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], rate=1, scope='r3_crcr5')
        end_points['block5'] = net
        print("uuuu-block5 end")


        print("nnnn-block6 begin")
        # pool5: kernel_size: 3->2, stride: 1->2, +pad:1,, where to put `pad:1`?
        net = slim.max_pool2d(net, [2, 2], stride=2, scope='jjpool5')
        net = slim.conv2d(net, 1024, [3, 3], rate=1, scope='kkfc6')
        end_points['block6'] = net
        print("uuuu-block6 end")


        print("nnnn-block7 begin")
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        net = slim.conv2d(net, 1024, [1, 1], scope='llfc7')
        end_points['block7'] = net
        print("uuuu-block7 end")


        print("nnnn-block8 begin")
        # conv61->conv62
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        end_point = 'block8'
        with tf.variable_scope(end_point):
            # paper: 1x1x128
            net = slim.conv2d(net, 256, [1, 1], scope='mmconv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            # paper: 3x3x512-s2
            net = slim.conv2d(net, 512, [3, 3], stride=2, scope='nnconv3x3', padding='VALID')
        end_points[end_point] = net
        print("uuuu-block8 end")

        print("nnnn-block9 begin")
        end_point = 'block9'
        # conv71->conv72
        with tf.variable_scope(end_point):
            # paper: 1x1x128
            net = slim.conv2d(net, 128, [1, 1], scope='ooconv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            # paper: 3x3x256-s2
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='ppconv3x3', padding='VALID')
        end_points[end_point] = net
        print("uuuu-block9 end")


        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        addn = 1
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                print("nnnn-begin process----" + layer + '_box')
                p, l = MultiboxLayer(addn, 
                                          end_points[layer],
                                          num_classes,
                                          normalizations[i])
                addn = 0
                print("uuuu-end process----" + layer + '_box')

            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)
        print("[final end]")
        return predictions, localisations, logits, end_points
Beispiel #28
0
    def __additional_ssd_block(self, end_points, net):
        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!

        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        net = slim.batch_norm(net)
        net = self.__dropout(net)
        end_points['block6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        net = slim.batch_norm(net)
        net = self.__dropout(net)
        end_points['block7'] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = slim.batch_norm(net)
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
            net = slim.batch_norm(net)
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.batch_norm(net)
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
            net = slim.batch_norm(net)
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.batch_norm(net)
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
            net = slim.batch_norm(net)
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.batch_norm(net)
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
            net = slim.batch_norm(net)
        end_points[end_point] = net
        end_point = 'block12'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.batch_norm(net)
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [4, 4],
                              scope='conv4x4',
                              padding='VALID')
            net = slim.batch_norm(net)
        end_points[end_point] = net

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(self.feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = self.ssd_multibox_layer(end_points[layer],
                                               self.num_classes,
                                               self.anchor_sizes[i],
                                               self.anchor_ratios[i],
                                               self.normalizations[i])
            predictions.append(slim.softmax(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
Beispiel #29
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """
    SSD net definition.
    """

    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # 基础 VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')  # 150*150*64
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')  # 75*75*128
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')  # 38*38*256
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')  # 19*19*512
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1,
                              scope='pool5')  # 19*19*512

        # 添加的 SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net,
                              256, [3, 3],
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        # 每一层特征图的预测
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                # 特征图的需要框的点数*每个点的框数
                # 每一层特征图框的大小和框的变化已经定好了
                pred, loc = ssd_multibox_layer(end_points[layer], num_classes,
                                               anchor_sizes[i],
                                               anchor_ratios[i],
                                               normalizations[i])
            predictions.append(prediction_fn(pred))
            logits.append(pred)
            localisations.append(loc)

        return predictions, localisations, logits, end_points

    pass
Beispiel #30
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_512_vgg'):
    """SSD net definition.
    """
    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], 1, scope='pool5')

        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block12'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [4, 4],
                              scope='conv4x4',
                              padding='VALID')
            # Fix padding to match Caffe version (pad=1).
            # pad_shape = [(i-j) for i, j in zip(layer_shape(net), [0, 1, 1, 0])]
            # net = tf.slice(net, [0, 0, 0, 0], pad_shape, name='caffe_pad')
        end_points[end_point] = net

        # Prediction and localisations layers.
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_vgg_300.ssd_multibox_layer(end_points[layer],
                                                      num_classes,
                                                      anchor_sizes[i],
                                                      anchor_ratios[i],
                                                      normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_points
Beispiel #31
0
def ssd_net(inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layer=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_raios,
            normalizations=SSDNet.default_params.normalizations,
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reues=None,
            scope='ssd_512_vgg'):
    """SSD net definition"""
    end_points = {}
    with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reues):
        # Original VGG-16 blocks
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool5')

        # Additional SSD blocks
        # Block 6
        net = slim.conv2d(net, 1024, [3, 3], rate=6,
                          scope='conv6')  #TODO: find the meaning of this rate
        end_points['block6'] = net
        # Block 7
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts)
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              srtide=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              srtide=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              srtide=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net
        end_point = 'block12'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              srtide=2,
                              scope='conv3x3',
                              padding='VALID')
        end_points[end_point] = net

        # Prediction and localisations layers
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layer):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer], num_classes,
                                          anchor_sizes[i], anchor_ratios[i],
                                          normalizations[i])
            predictions.append(prediction_fn(p))
            logits.append(p)
            localisations.append(l)

        return predictions, localisations, logits, end_point