def conv_feat_layers(inputs, width, detect, training): # # convolutional features maps for detection and recognition # # detect = True for detection # detect = False for recognition # # width reduce: per 8, for detection # height reduce: per 8, for detection # # width reduce: per 8, for recognition # height reduce: per 64, for recognition # # # recog-inputs should have shape [ batch, 64, width, channel] # # height_norm = 64 # # 64, 32, 16, 8 # 6, 4, 2, 1 # # # detection # # [3,3; 1,1], # [6,6; 2,2], [8,8; 2,2], [10,10; 2,2] # [20,20; 4,4], [22,22; 4,4], [24,24; 4,4] # [48,48; 8,8], [50,50; 8,8], [52,52; 8,8], # # anchor width: 8, # anchor height: 8, 16, 24, 32, 48, # # feature_layer --> receptive_field # [0,0] --> [0:52, 0:52] # [0,1] --> [0:52, 8:52+8] # [i,j] --> [8*i:52+8*i, 8*j:52+8*j] # # feature_layer --> anchor_center # [0,0] --> [26, 26] # [0,1] --> [26, 26+8] # [i,j] --> [26+8*i, 26+8*j] # # layer_params = [ [64, (3, 3), (1, 1), 'same', True, True, 'conv1'], [64, (3, 3), (1, 1), 'same', True, True, 'conv2'], [64, (2, 2), (2, 2), 'valid', True, True, 'pool1'], # for pool [128, (3, 3), (1, 1), 'same', True, True, 'conv3'], [128, (3, 3), (1, 1), 'same', True, True, 'conv4'], [128, (2, 2), (2, 2), 'valid', True, True, 'pool2'], # for pool [256, (3, 3), (1, 1), 'same', True, True, 'conv5'], [256, (3, 3), (1, 1), 'same', True, True, 'conv6'], [256, (2, 2), (2, 2), 'valid', True, True, 'pool3'] ] # for pool layers_detect = [[512, (3, 3), (1, 1), 'same', True, True, 'feat_d']] # for feat layers_recog = [[256, (3, 1), (1, 1), 'valid', True, True, 'conv_r1'], [256, (3, 1), (1, 1), 'valid', True, True, 'conv_r2'], [256, (3, 1), (1, 1), 'valid', True, True, 'conv_r3'], [512, (2, 1), (1, 1), 'valid', True, True, 'feat_r']] # for feat # with tf.variable_scope("conv_comm"): # inputs = layers.conv_layer(inputs, layer_params[0], training) inputs = layers.conv_layer(inputs, layer_params[1], training) inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]], name='padd1') inputs = layers.conv_layer(inputs, layer_params[2], training) #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool1') # params = [[64, 3, (1, 1), 'same', True, True, 'conv1'], [64, 3, (1, 1), 'same', True, False, 'conv2']] inputs = layers.block_resnet(inputs, params, training, 'res1') # inputs = layers.conv_layer(inputs, layer_params[3], training) inputs = layers.conv_layer(inputs, layer_params[4], training) inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]], name='padd2') inputs = layers.conv_layer(inputs, layer_params[5], training) #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool2') # params = [[128, 3, (1, 1), 'same', True, True, 'conv1'], [128, 3, (1, 1), 'same', True, False, 'conv2']] inputs = layers.block_resnet(inputs, params, training, 'res2') # inputs = layers.conv_layer(inputs, layer_params[6], training) inputs = layers.conv_layer(inputs, layer_params[7], training) inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]], name='padd3') inputs = layers.conv_layer(inputs, layer_params[8], training) #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool3') # params = [[256, 3, (1, 1), 'same', True, True, 'conv1'], [256, 3, (1, 1), 'same', True, False, 'conv2']] inputs = layers.block_resnet(inputs, params, training, 'res3') # if detect: # inputs = layers.conv_layer(inputs, layers_detect[0], training) # conv_feat = tf.squeeze(inputs, axis=0, name='detect_feat') # squeeze # else: # recog # inputs = layers.conv_layer(inputs, layers_recog[0], training) inputs = layers.conv_layer(inputs, layers_recog[1], training) inputs = layers.conv_layer(inputs, layers_recog[2], training) # params = [[256, 2, (1, 1), 'same', True, True, 'conv1'], [256, 2, (1, 1), 'same', True, False, 'conv2']] inputs = layers.block_resnet(inputs, params, training, 'res_r1') # inputs = layers.conv_layer(inputs, layers_recog[3], training) # conv_feat = tf.squeeze(inputs, axis=0, name='recog_feat') # squeeze # # Calculate resulting sequence length from original image widths # two = tf.constant(2, dtype=tf.float32, name='two') # w = tf.cast(width, tf.float32) # w = tf.div(w, two) w = tf.ceil(w) # w = tf.div(w, two) w = tf.ceil(w) # w = tf.div(w, two) w = tf.ceil(w) # w = tf.cast(w, tf.int32) # # Vectorize sequence_length = tf.reshape(w, [-1], name='seq_len') # # return conv_feat, sequence_length
def conv_feat_layers(inputs, width, training): # # convolutional features maps for detection and recognition # # # recog-inputs should have shape [ batch, 64, width, channel] # # height_norm = 36 # # # detection # # [3,1; 1,1], # [9,2; 3,2], [9,2; 3,2], [9,2; 3,2] # [18,4; 6,4], [18,4; 6,4], [18,4; 6,4] # [36,8; 12,8], [36,8; 12,8], [36,8; 12,8], # # anchor width: 8, # anchor height: 12, 24, 36, 48, # # feature_layer --> receptive_field # [0,0] --> [0:36, 0:8] # [0,1] --> [0:36, 8:8+8] # [i,j] --> [12*i:36+12*i, 8*j:8+8*j] # # feature_layer --> anchor_center # [0,0] --> [18, 4] # [0,1] --> [18, 4+8] # [i,j] --> [18+12*i, 4+8*j] # # layer_params = [ [64, (3, 3), (1, 1), 'same', True, True, 'conv1'], [64, (3, 3), (1, 1), 'same', True, True, 'conv2'], [64, (2, 2), (2, 2), 'valid', True, True, 'pool1'], # for pool [128, (3, 3), (1, 1), 'same', True, True, 'conv3'], [128, (3, 3), (1, 1), 'same', True, True, 'conv4'], [128, (2, 2), (2, 2), 'valid', True, True, 'pool2'], # for pool [256, (3, 3), (1, 1), 'same', True, True, 'conv5'], [256, (3, 3), (1, 1), 'same', True, True, 'conv6'], [256, (3, 2), (3, 2), 'valid', True, True, 'pool3'], # for pool [512, (3, 1), (1, 1), 'valid', True, True, 'feat_c'] ] # for feat # with tf.variable_scope("conv_comm"): # inputs = layers.conv_layer(inputs, layer_params[0], training) inputs = layers.conv_layer(inputs, layer_params[1], training) inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]], name='padd1') inputs = layers.conv_layer(inputs, layer_params[2], training) #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool1') # params = [[64, 3, (1, 1), 'same', True, True, 'conv1'], [64, 3, (1, 1), 'same', True, False, 'conv2']] inputs = layers.block_resnet_others(inputs, params, True, training, 'res1') # inputs = layers.conv_layer(inputs, layer_params[3], training) inputs = layers.conv_layer(inputs, layer_params[4], training) inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]], name='padd2') inputs = layers.conv_layer(inputs, layer_params[5], training) #inputs = layers.maxpool_layer(inputs, (2,2), (2,2), 'valid', 'pool2') # params = [[128, 3, (1, 1), 'same', True, True, 'conv1'], [128, 3, (1, 1), 'same', True, False, 'conv2']] inputs = layers.block_resnet_others(inputs, params, True, training, 'res2') # inputs = layers.conv_layer(inputs, layer_params[6], training) inputs = layers.conv_layer(inputs, layer_params[7], training) inputs = layers.padd_layer(inputs, [[0, 0], [0, 0], [0, 1], [0, 0]], name='padd3') inputs = layers.conv_layer(inputs, layer_params[8], training) #inputs = layers.maxpool_layer(inputs, (3,2), (3,2), 'valid', 'pool3') # params = [[256, 3, (1, 1), 'same', True, True, 'conv1'], [256, 3, (1, 1), 'same', True, False, 'conv2']] inputs = layers.block_resnet_others(inputs, params, True, training, 'res3') # inputs = layers.conv_layer(inputs, layer_params[9], training) # conv_feat = tf.squeeze(inputs, axis=0, name='conv_feat') # squeeze # # # Calculate resulting sequence length from original image widths # two = tf.constant(2, dtype=tf.float32, name='two') # w = tf.cast(width, tf.float32) # w = tf.div(w, two) w = tf.ceil(w) # w = tf.div(w, two) w = tf.ceil(w) # w = tf.div(w, two) w = tf.ceil(w) # w = tf.cast(w, tf.int32) # # Vectorize sequence_length = tf.reshape(w, [-1], name='seq_len') # # return conv_feat, sequence_length
def conv_feat_layers(inputs, training): # # convolutional features maps for detection # # [3,1; 1,1], # [9,2; 3,2], [9,2; 3,2], [9,2; 3,2] # [18,4; 6,4], [18,4; 6,4], [18,4; 6,4] # [36,8; 12,8], [36,8; 12,8], [36,8; 12,8], # # anchor width: 8, # anchor height: 6, 12, 24, 36, # # feature_layer --> receptive_field # [0,0] --> [0:36, 0:8] # [0,1] --> [0:36, 8:8+8] # [i,j] --> [12*i:36+12*i, 8*j:8+8*j] # # feature_layer --> anchor_center # [0,0] --> [18, 4] # [0,1] --> [18, 4+8] # [i,j] --> [18+12*i, 4+8*j] # # with tf.variable_scope("conv_comm"): # inputs = layers.conv_layer( inputs, [64, (7, 7), (2, 2), 'same', True, True, 'conv1'], training) inputs = layers.maxpool_layer(inputs, (3, 3), (2, 2), 'same', 'pool1') inputs = layers.block_resnet(inputs, 64, relu=True, training=training, name='res1_1') inputs = layers.block_resnet(inputs, 64, relu=True, training=training, name='res1_2') inputs = layers.block_resnet_half(inputs, 128, relu=True, training=training, name='res2_1') inputs = layers.block_resnet(inputs, 128, relu=True, training=training, name='res2_2') inputs = layers.block_resnet_half(inputs, 256, relu=True, training=training, name='res3_1') inputs = layers.block_resnet(inputs, 256, relu=True, training=training, name='res3_2') inputs = layers.block_resnet_half(inputs, 512, relu=True, training=training, name='res4_1') inputs = layers.block_resnet(inputs, 512, relu=True, training=training, name='res4_2') # # conv_feat = layers.conv_layer(inputs, [512, (3, 1), (1, 1), 'valid', True, True, 'conv_feat'], training) # feat_size = tf.shape(inputs) # # # Calculate resulting sequence length from original image widths # # two = tf.constant(2, dtype=tf.float32, name='two') # # # w = tf.cast(width, tf.float32) # for _ in range(5): # w = tf.div(w, two) # w = tf.ceil(w) # # # w = tf.cast(w, tf.int32) # # # print("w before tile:", w) # w = tf.tile(w, [feat_size[1]]) # print("w before flatten shape:", w.shape) # # # # Flatten -> Vectorize # sequence_length = tf.reshape(w, [-1], name='seq_len') # print("w after flatten shape:", sequence_length.shape) # return inputs #, sequence_length