Esempio n. 1
0
def conv_feat_layers(inputs, width, detect, training):
    #
    # convolutional features maps for detection and recognition
    #
    # detect = True for detection
    # detect = False for recognition
    #
    # width reduce: per 8, for detection
    # height reduce: per 8, for detection
    #
    # width reduce: per 8, for recognition
    # height reduce: per 64, for recognition
    #

    #
    # recog-inputs should have shape [ batch, 64, width, channel]
    #
    # height_norm = 64
    #
    # 64, 32, 16, 8
    # 6, 4, 2, 1
    #

    #
    # detection
    #
    # [3,3; 1,1],
    # [6,6; 2,2], [8,8; 2,2], [10,10; 2,2]
    # [20,20; 4,4], [22,22; 4,4], [24,24; 4,4]
    # [48,48; 8,8], [50,50; 8,8], [52,52; 8,8],
    #
    # anchor width:  8,
    # anchor height: 8, 16, 24, 32, 48,
    #
    # feature_layer --> receptive_field
    # [0,0] --> [0:52, 0:52]
    # [0,1] --> [0:52, 8:52+8]
    # [i,j] --> [8*i:52+8*i, 8*j:52+8*j]
    #
    # feature_layer --> anchor_center
    # [0,0] --> [26, 26]
    # [0,1] --> [26, 26+8]
    # [i,j] --> [26+8*i, 26+8*j]
    #

    #
    layer_params = [
        [64, (3, 3), (1, 1), 'same', True, True, 'conv1'],
        [64, (3, 3), (1, 1), 'same', True, True, 'conv2'],
        [64, (2, 2), (2, 2), 'valid', True, True, 'pool1'],  # for pool
        [128, (3, 3), (1, 1), 'same', True, True, 'conv3'],
        [128, (3, 3), (1, 1), 'same', True, True, 'conv4'],
        [128, (2, 2), (2, 2), 'valid', True, True, 'pool2'],  # for pool
        [256, (3, 3), (1, 1), 'same', True, True, 'conv5'],
        [256, (3, 3), (1, 1), 'same', True, True, 'conv6'],
        [256, (2, 2), (2, 2), 'valid', True, True, 'pool3']
    ]  # for pool

    layers_detect = [[512, (3, 3), (1, 1), 'same', True, True,
                      'feat_d']]  # for feat

    layers_recog = [[256, (3, 1), (1, 1), 'valid', True, True, 'conv_r1'],
                    [256, (3, 1), (1, 1), 'valid', True, True, 'conv_r2'],
                    [256, (3, 1), (1, 1), 'valid', True, True, 'conv_r3'],
                    [512, (2, 1), (1, 1), 'valid', True, True,
                     'feat_r']]  # for feat

    #
    with tf.variable_scope("conv_comm"):
        #
        inputs = layers.conv_layer(inputs, layer_params[0], training)
        inputs = layers.conv_layer(inputs, layer_params[1], training)
        inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]],
                                   name='padd1')
        inputs = layers.conv_layer(inputs, layer_params[2], training)
        #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool1')
        #
        params = [[64, 3, (1, 1), 'same', True, True, 'conv1'],
                  [64, 3, (1, 1), 'same', True, False, 'conv2']]
        inputs = layers.block_resnet(inputs, params, training, 'res1')
        #
        inputs = layers.conv_layer(inputs, layer_params[3], training)
        inputs = layers.conv_layer(inputs, layer_params[4], training)
        inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]],
                                   name='padd2')
        inputs = layers.conv_layer(inputs, layer_params[5], training)
        #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool2')
        #
        params = [[128, 3, (1, 1), 'same', True, True, 'conv1'],
                  [128, 3, (1, 1), 'same', True, False, 'conv2']]
        inputs = layers.block_resnet(inputs, params, training, 'res2')
        #
        inputs = layers.conv_layer(inputs, layer_params[6], training)
        inputs = layers.conv_layer(inputs, layer_params[7], training)
        inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]],
                                   name='padd3')
        inputs = layers.conv_layer(inputs, layer_params[8], training)
        #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool3')
        #
        params = [[256, 3, (1, 1), 'same', True, True, 'conv1'],
                  [256, 3, (1, 1), 'same', True, False, 'conv2']]
        inputs = layers.block_resnet(inputs, params, training, 'res3')
        #

    if detect:
        #
        inputs = layers.conv_layer(inputs, layers_detect[0], training)
        #
        conv_feat = tf.squeeze(inputs, axis=0, name='detect_feat')  # squeeze
        #
    else:  # recog
        #
        inputs = layers.conv_layer(inputs, layers_recog[0], training)
        inputs = layers.conv_layer(inputs, layers_recog[1], training)
        inputs = layers.conv_layer(inputs, layers_recog[2], training)
        #
        params = [[256, 2, (1, 1), 'same', True, True, 'conv1'],
                  [256, 2, (1, 1), 'same', True, False, 'conv2']]
        inputs = layers.block_resnet(inputs, params, training, 'res_r1')
        #
        inputs = layers.conv_layer(inputs, layers_recog[3], training)
        #
        conv_feat = tf.squeeze(inputs, axis=0, name='recog_feat')  # squeeze

    #
    # Calculate resulting sequence length from original image widths
    #
    two = tf.constant(2, dtype=tf.float32, name='two')
    #
    w = tf.cast(width, tf.float32)
    #
    w = tf.div(w, two)
    w = tf.ceil(w)
    #
    w = tf.div(w, two)
    w = tf.ceil(w)
    #
    w = tf.div(w, two)
    w = tf.ceil(w)
    #
    w = tf.cast(w, tf.int32)
    #
    # Vectorize
    sequence_length = tf.reshape(w, [-1], name='seq_len')
    #

    #
    return conv_feat, sequence_length
def conv_feat_layers(inputs, width, training):
    #
    # convolutional features maps for detection and recognition
    #

    #
    # recog-inputs should have shape [ batch, 64, width, channel]
    #
    # height_norm = 36
    #

    #
    # detection
    #
    # [3,1; 1,1],
    # [9,2; 3,2], [9,2; 3,2], [9,2; 3,2]
    # [18,4; 6,4], [18,4; 6,4], [18,4; 6,4]
    # [36,8; 12,8], [36,8; 12,8], [36,8; 12,8],
    #
    # anchor width:  8,
    # anchor height: 12, 24, 36, 48,
    #
    # feature_layer --> receptive_field
    # [0,0] --> [0:36, 0:8]
    # [0,1] --> [0:36, 8:8+8]
    # [i,j] --> [12*i:36+12*i, 8*j:8+8*j]
    #
    # feature_layer --> anchor_center
    # [0,0] --> [18, 4]
    # [0,1] --> [18, 4+8]
    # [i,j] --> [18+12*i, 4+8*j]
    #

    #
    layer_params = [
        [64, (3, 3), (1, 1), 'same', True, True, 'conv1'],
        [64, (3, 3), (1, 1), 'same', True, True, 'conv2'],
        [64, (2, 2), (2, 2), 'valid', True, True, 'pool1'],  # for pool
        [128, (3, 3), (1, 1), 'same', True, True, 'conv3'],
        [128, (3, 3), (1, 1), 'same', True, True, 'conv4'],
        [128, (2, 2), (2, 2), 'valid', True, True, 'pool2'],  # for pool
        [256, (3, 3), (1, 1), 'same', True, True, 'conv5'],
        [256, (3, 3), (1, 1), 'same', True, True, 'conv6'],
        [256, (3, 2), (3, 2), 'valid', True, True, 'pool3'],  # for pool
        [512, (3, 1), (1, 1), 'valid', True, True, 'feat_c']
    ]  # for feat

    #
    with tf.variable_scope("conv_comm"):
        #
        inputs = layers.conv_layer(inputs, layer_params[0], training)
        inputs = layers.conv_layer(inputs, layer_params[1], training)
        inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]],
                                   name='padd1')
        inputs = layers.conv_layer(inputs, layer_params[2], training)
        #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool1')
        #
        params = [[64, 3, (1, 1), 'same', True, True, 'conv1'],
                  [64, 3, (1, 1), 'same', True, False, 'conv2']]
        inputs = layers.block_resnet_others(inputs, params, True, training,
                                            'res1')
        #
        inputs = layers.conv_layer(inputs, layer_params[3], training)
        inputs = layers.conv_layer(inputs, layer_params[4], training)
        inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]],
                                   name='padd2')
        inputs = layers.conv_layer(inputs, layer_params[5], training)
        #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool2')
        #
        params = [[128, 3, (1, 1), 'same', True, True, 'conv1'],
                  [128, 3, (1, 1), 'same', True, False, 'conv2']]
        inputs = layers.block_resnet_others(inputs, params, True, training,
                                            'res2')
        #
        inputs = layers.conv_layer(inputs, layer_params[6], training)
        inputs = layers.conv_layer(inputs, layer_params[7], training)
        inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]],
                                   name='padd3')
        inputs = layers.conv_layer(inputs, layer_params[8], training)
        #inputs = layers.maxpool_layer(inputs, (3,2), (3,2), 'valid', 'pool3')
        #
        params = [[256, 3, (1, 1), 'same', True, True, 'conv1'],
                  [256, 3, (1, 1), 'same', True, False, 'conv2']]
        inputs = layers.block_resnet_others(inputs, params, True, training,
                                            'res3')
        #
        inputs = layers.conv_layer(inputs, layer_params[9], training)
        #
        conv_feat = tf.squeeze(inputs, axis=0, name='conv_feat')  # squeeze
        #
    #
    # Calculate resulting sequence length from original image widths
    #
    two = tf.constant(2, dtype=tf.float32, name='two')
    #
    w = tf.cast(width, tf.float32)
    #
    w = tf.div(w, two)
    w = tf.ceil(w)
    #
    w = tf.div(w, two)
    w = tf.ceil(w)
    #
    w = tf.div(w, two)
    w = tf.ceil(w)
    #
    w = tf.cast(w, tf.int32)
    #
    # Vectorize
    sequence_length = tf.reshape(w, [-1], name='seq_len')
    #

    #
    return conv_feat, sequence_length
Esempio n. 3
0
def conv_feat_layers(inputs, training):
    #
    # convolutional features maps for detection
    #
    # [3,1; 1,1],
    # [9,2; 3,2], [9,2; 3,2], [9,2; 3,2]
    # [18,4; 6,4], [18,4; 6,4], [18,4; 6,4]
    # [36,8; 12,8], [36,8; 12,8], [36,8; 12,8],
    #
    # anchor width:  8,
    # anchor height: 6, 12, 24, 36,
    #
    # feature_layer --> receptive_field
    # [0,0] --> [0:36, 0:8]
    # [0,1] --> [0:36, 8:8+8]
    # [i,j] --> [12*i:36+12*i, 8*j:8+8*j]
    #
    # feature_layer --> anchor_center
    # [0,0] --> [18, 4]
    # [0,1] --> [18, 4+8]
    # [i,j] --> [18+12*i, 4+8*j]
    #
    #
    with tf.variable_scope("conv_comm"):
        #
        inputs = layers.conv_layer(
            inputs, [64, (7, 7),
                     (2, 2), 'same', True, True, 'conv1'], training)
        inputs = layers.maxpool_layer(inputs, (3, 3), (2, 2), 'same', 'pool1')

        inputs = layers.block_resnet(inputs,
                                     64,
                                     relu=True,
                                     training=training,
                                     name='res1_1')
        inputs = layers.block_resnet(inputs,
                                     64,
                                     relu=True,
                                     training=training,
                                     name='res1_2')

        inputs = layers.block_resnet_half(inputs,
                                          128,
                                          relu=True,
                                          training=training,
                                          name='res2_1')
        inputs = layers.block_resnet(inputs,
                                     128,
                                     relu=True,
                                     training=training,
                                     name='res2_2')

        inputs = layers.block_resnet_half(inputs,
                                          256,
                                          relu=True,
                                          training=training,
                                          name='res3_1')
        inputs = layers.block_resnet(inputs,
                                     256,
                                     relu=True,
                                     training=training,
                                     name='res3_2')

        inputs = layers.block_resnet_half(inputs,
                                          512,
                                          relu=True,
                                          training=training,
                                          name='res4_1')
        inputs = layers.block_resnet(inputs,
                                     512,
                                     relu=True,
                                     training=training,
                                     name='res4_2')

        #
        # conv_feat = layers.conv_layer(inputs, [512, (3, 1), (1, 1), 'valid', True, True, 'conv_feat'], training)
        #
        feat_size = tf.shape(inputs)
        #
    #
    # Calculate resulting sequence length from original image widths
    #
    # two = tf.constant(2, dtype=tf.float32, name='two')
    # #
    # w = tf.cast(width, tf.float32)
    # for _ in range(5):
    #     w = tf.div(w, two)
    #     w = tf.ceil(w)
    # #
    # w = tf.cast(w, tf.int32)
    # #
    # print("w before tile:", w)
    # w = tf.tile(w, [feat_size[1]])
    # print("w before flatten shape:", w.shape)
    # #
    # # Flatten -> Vectorize
    # sequence_length = tf.reshape(w, [-1], name='seq_len')
    # print("w after flatten shape:", sequence_length.shape)
    #
    return inputs  #, sequence_length