def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): """ Creates YOLO v3 tiny model. :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. Dimension batch_size may be undefined. The channel order is RGB. :param num_classes: number of predicted classes. :param is_training: whether is training or not. :param data_format: data format NCHW or NHWC. :param reuse: whether or not the network and its variables should be reused. :return: """ # it will be needed later on img_size = inputs.get_shape().as_list()[1:3] # transpose the inputs to NCHW if data_format == 'NCHW': inputs = tf.transpose(inputs, [0, 3, 1, 2]) # normalize values to range [0..1] inputs = inputs / 255 # set batch norm params batch_norm_params = { 'decay': _BATCH_NORM_DECAY, 'epsilon': _BATCH_NORM_EPSILON, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } # Set activation_fn and parameters for conv2d, batch_norm. with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format): with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.relu(x)): with tf.variable_scope('yolo-v3-tiny'): for i in range(6): inputs = _conv2d_fixed_padding( inputs, 16 * pow(2, i), 3) if i == 4: route_1 = inputs if i == 5: inputs = slim.max_pool2d( inputs, [2, 2], stride=1, padding="SAME", scope='pool2') else: inputs = slim.max_pool2d( inputs, [2, 2], scope='pool2') inputs = _conv2d_fixed_padding(inputs, 1024, 3) inputs = _conv2d_fixed_padding(inputs, 256, 1) route_2 = inputs inputs = _conv2d_fixed_padding(inputs, 512, 3) # inputs = _conv2d_fixed_padding(inputs, 255, 1) detect_1 = _detection_layer( inputs, num_classes, _ANCHORS[3:6], img_size, data_format) detect_1 = tf.identity(detect_1, name='detect_1') inputs = _conv2d_fixed_padding(route_2, 128, 1) upsample_size = route_1.get_shape().as_list() inputs = _upsample(inputs, upsample_size, data_format) inputs = tf.concat([inputs, route_1], axis=1 if data_format == 'NCHW' else 3) inputs = _conv2d_fixed_padding(inputs, 256, 3) # inputs = _conv2d_fixed_padding(inputs, 255, 1) detect_2 = _detection_layer( inputs, num_classes, _ANCHORS[0:3], img_size, data_format) detect_2 = tf.identity(detect_2, name='detect_2') detections = tf.concat([detect_1, detect_2], axis=1) detections = tf.identity(detections, name='detections') return detections
def yolo_v3_tiny_pan(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): """ Creates YOLO v3 tiny pan model. :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. Dimension batch_size may be undefined. The channel order is RGB. :param num_classes: number of predicted classes. :param is_training: whether is training or not. :param data_format: data format NCHW or NHWC. :param reuse: whether or not the network and its variables should be reused. :return: """ _ANCHORS = [(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] # it will be needed later on img_size = inputs.get_shape().as_list()[1:3] # transpose the inputs to NCHW if data_format == 'NCHW': inputs = tf.transpose(inputs, [0, 3, 1, 2]) # normalize values to range [0..1] inputs = inputs / 255 # set batch norm params batch_norm_params = { 'decay': _BATCH_NORM_DECAY, 'epsilon': _BATCH_NORM_EPSILON, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } # Set activation_fn and parameters for conv2d, batch_norm. with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format): with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)): with tf.variable_scope('yolo-v3-tiny-pan'): # 0 conv 16 3 x 3/ 1 544 x 544 x 3 -> 544 x 544 x 16 0.256 BF inputs = _conv2d_fixed_padding(inputs, 16, 3) # 1 max 2 x 2/ 2 544 x 544 x 16 -> 272 x 272 x 16 0.005 BF inputs = slim.max_pool2d( inputs, [2, 2], scope='pool2') route_1 = inputs #2 conv 32 3 x 3/ 1 272 x 272 x 16 -> 272 x 272 x 32 0.682 BF inputs = _conv2d_fixed_padding(inputs, 16, 3) #3 max 2 x 2/ 2 272 x 272 x 32 -> 136 x 136 x 32 0.002 BF inputs = slim.max_pool2d( inputs, [2, 2], scope='pool2') route_3 = inputs #4 conv 64 3 x 3/ 1 136 x 136 x 32 -> 136 x 136 x 64 0.682 BF inputs = _conv2d_fixed_padding(inputs, 16, 3) #5 max 2 x 2/ 2 136 x 136 x 64 -> 68 x 68 x 64 0.001 BF inputs = slim.max_pool2d( inputs, [2, 2], scope='pool2') route_5 = inputs #6 conv 128 3 x 3/ 1 68 x 68 x 64 -> 68 x 68 x 128 0.682 BF inputs = _conv2d_fixed_padding(inputs, 16, 3) route_6 = inputs #7 max 2 x 2/ 2 68 x 68 x 128 -> 34 x 34 x 128 0.001 BF inputs = slim.max_pool2d( inputs, [2, 2], scope='pool2') route_7 = inputs #8 conv 256 3 x 3/ 1 34 x 34 x 128 -> 34 x 34 x 256 0.682 BF inputs = _conv2d_fixed_padding(inputs, 16, 3) route_8 = inputs #9 max 2 x 2/ 2 34 x 34 x 256 -> 17 x 17 x 256 0.000 BF inputs = slim.max_pool2d( inputs, [2, 2], scope='pool2') route_9 = inputs #10 conv 512 3 x 3/ 1 17 x 17 x 256 -> 17 x 17 x 512 0.682 BF inputs = _conv2d_fixed_padding(inputs, 16, 3) #11 max 2 x 2/ 1 17 x 17 x 512 -> 17 x 17 x 512 0.001 BF inputs = slim.max_pool2d( inputs, [2, 2], stride=1, padding="SAME", scope='pool2') #12 conv 1024 3 x 3/ 1 17 x 17 x 512 -> 17 x 17 x1024 2.727 BF inputs = _conv2d_fixed_padding(inputs, 1024, 3) #13 conv 256 1 x 1/ 1 17 x 17 x1024 -> 17 x 17 x 256 0.152 BF inputs = _conv2d_fixed_padding(inputs, 256, 1) #14 conv 512 3 x 3/ 1 17 x 17 x 256 -> 17 x 17 x 512 0.682 BF inputs = _conv2d_fixed_padding(inputs, 512, 3) route_14 = inputs #15 conv 128 1 x 1/ 1 17 x 17 x 512 -> 17 x 17 x 128 0.038 BF inputs = _conv2d_fixed_padding(inputs, 128, 1) #16 upsample 2x 17 x 17 x 128 -> 34 x 34 x 128 inputs = _upsample(inputs, route_8.get_shape().as_list(), data_format) #17 route 16 8 inputs = tf.concat([inputs, route_8], axis=1 if data_format == 'NCHW' else 3) #18 conv 128 1 x 1/ 1 34 x 34 x 384 -> 34 x 34 x 128 0.114 BF inputs = _conv2d_fixed_padding(inputs, 128, 1) #19 conv 256 3 x 3/ 1 34 x 34 x 128 -> 34 x 34 x 256 0.682 BF inputs = _conv2d_fixed_padding(inputs, 256, 3) route_19 = inputs #20 conv 128 1 x 1/ 1 34 x 34 x 256 -> 34 x 34 x 128 0.076 BF inputs = _conv2d_fixed_padding(inputs, 128, 1) #21 upsample 2x 34 x 34 x 128 -> 68 x 68 x 128 inputs = _upsample(inputs, route_6.get_shape().as_list(), data_format) #22 route 21 6 inputs = tf.concat([inputs, route_6], axis=1 if data_format == 'NCHW' else 3) #23 conv 64 1 x 1/ 1 68 x 68 x 256 -> 68 x 68 x 64 0.152 BF inputs = _conv2d_fixed_padding(inputs, 64, 1) #24 conv 128 3 x 3/ 1 68 x 68 x 64 -> 68 x 68 x 128 0.682 BF inputs = _conv2d_fixed_padding(inputs, 128, 3) route_24 = inputs #25 route 1 inputs = route_1 #26 reorg / 2 272 x 272 x 16 -> 136 x 136 x 64 inputs = _reorg(inputs, 2) #27 route 3 26 inputs = tf.concat([route_3, inputs], axis=1 if data_format == 'NCHW' else 3) #28 reorg / 2 136 x 136 x 96 -> 68 x 68 x 384 inputs = _reorg(inputs, 2) #29 route 5 28 inputs = tf.concat([route_5, inputs], axis=1 if data_format == 'NCHW' else 3) #30 reorg / 2 68 x 68 x 448 -> 34 x 34 x1792 inputs = _reorg(inputs, 2) #31 route 7 30 inputs = tf.concat([route_7, inputs], axis=1 if data_format == 'NCHW' else 3) #32 reorg / 2 34 x 34 x1920 -> 17 x 17 x7680 inputs = _reorg(inputs, 2) #33 route 9 32 inputs = tf.concat([route_9, inputs], axis=1 if data_format == 'NCHW' else 3) route_33 = inputs #34 conv 64 1 x 1/ 1 17 x 17 x7936 -> 17 x 17 x 64 0.294 BF inputs = _conv2d_fixed_padding(inputs, 64, 1) #35 upsample 4x 17 x 17 x 64 -> 68 x 68 x 64 inputs = _upsample(inputs, route_24.get_shape().as_list(), data_format) #36 route 35 24 inputs = tf.concat([inputs, route_24], axis=1 if data_format == 'NCHW' else 3) #37 conv 128 3 x 3/ 1 68 x 68 x 192 -> 68 x 68 x 128 2.046 BF inputs = _conv2d_fixed_padding(inputs, 128, 3) #38 conv 18 1 x 1/ 1 68 x 68 x 128 -> 68 x 68 x 18 0.021 BF inputs = _conv2d_fixed_padding(inputs, 18, 1) #39 yolo detect_1 = _detection_layer(inputs, num_classes, _ANCHORS[0:3], img_size, data_format) detect_1 = tf.identity(detect_1, name='detect_1') #40 route 33 inputs = route_33 #41 conv 128 1 x 1/ 1 17 x 17 x7936 -> 17 x 17 x 128 0.587 BF inputs = _conv2d_fixed_padding(inputs, 128, 1) #42 upsample 2x 17 x 17 x 128 -> 34 x 34 x 128 inputs = _upsample(inputs, route_19.get_shape().as_list(), data_format) #43 route 42 19 inputs = tf.concat([inputs, route_19], axis=1 if data_format == 'NCHW' else 3) #44 conv 256 3 x 3/ 1 34 x 34 x 384 -> 34 x 34 x 256 2.046 BF inputs = _conv2d_fixed_padding(inputs, 256, 3) #45 conv 18 1 x 1/ 1 34 x 34 x 256 -> 34 x 34 x 18 0.011 BF inputs = _conv2d_fixed_padding(inputs, 18, 1) #46 yolo detect_2 = _detection_layer(inputs, num_classes, _ANCHORS[3:6], img_size, data_format) detect_2 = tf.identity(detect_2, name='detect_2') #47 route 33 inputs = route_33 #48 conv 256 1 x 1/ 1 17 x 17 x7936 -> 17 x 17 x 256 1.174 BF inputs = _conv2d_fixed_padding(inputs, 256, 1) #49 route 48 14 inputs = tf.concat([inputs, route_14], axis=1 if data_format == 'NCHW' else 3) #50 conv 512 3 x 3/ 1 17 x 17 x 768 -> 17 x 17 x 512 2.046 BF inputs = _conv2d_fixed_padding(inputs, 512, 3) #51 conv 18 1 x 1/ 1 17 x 17 x 512 -> 17 x 17 x 18 0.005 BF inputs = _conv2d_fixed_padding(inputs, 18, 1) #52 yolo detect_3 = _detection_layer(inputs, num_classes, _ANCHORS[6:9], img_size, data_format) detect_3 = tf.identity(detect_3, name='detect_3') detections = tf.concat([detect_1, detect_2, detect_3], axis=1) detections = tf.identity(detections, name='detections') return detections
def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): # it will be needed later on img_size = inputs.get_shape().as_list()[1:3] # transpose the inputs to NCHW if data_format == 'NCHW': inputs = tf.transpose(inputs, [0, 3, 1, 2]) # normalize values to range [0..1] inputs = inputs / 255 # set batch norm params batch_norm_params = { 'decay': _BATCH_NORM_DECAY, 'epsilon': _BATCH_NORM_EPSILON, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } # Set activation_fn and parameters for conv2d, batch_norm. with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format): with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)): #with tf.variable_scope('yolo-v3-tiny'): for i in range(6): inputs = _conv2d_fixed_padding( inputs, 16 * pow(2, i), 3) if i == 4: route_1 = inputs if i == 5: inputs = slim.max_pool2d( inputs, [2, 2], stride=1, padding="SAME", scope='pool2') else: inputs = slim.max_pool2d( inputs, [2, 2], scope='pool2') inputs = _conv2d_fixed_padding(inputs, 1024, 3) inputs = _conv2d_fixed_padding(inputs, 256, 1) route_2 = inputs inputs = _conv2d_fixed_padding(inputs, 512, 3) # inputs = _conv2d_fixed_padding(inputs, 255, 1) detect_1 = _detection_layer( inputs, num_classes, _ANCHORS[3:6], img_size, data_format) detect_1 = tf.identity(detect_1, name='detect_1') inputs = _conv2d_fixed_padding(route_2, 128, 1) upsample_size = route_1.get_shape().as_list() inputs = _upsample(inputs, upsample_size, data_format) inputs = tf.concat([inputs, route_1], axis=1 if data_format == 'NCHW' else 3) inputs = _conv2d_fixed_padding(inputs, 256, 3) # inputs = _conv2d_fixed_padding(inputs, 255, 1) detect_2 = _detection_layer( inputs, num_classes, _ANCHORS[0:3], img_size, data_format) detect_2 = tf.identity(detect_2, name='detect_2') detections = tf.concat([detect_1, detect_2], axis=1) detections = tf.identity(detections, name='detections') return detections
def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): """ Creates YOLO v3 tiny model. :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. Dimension batch_size may be undefined. The channel order is RGB. :param num_classes: number of predicted classes. :param is_training: whether is training or not. :param data_format: data format NCHW or NHWC. :param reuse: whether or not the network and its variables should be reused. :return: """ # it will be needed later on img_size = inputs.get_shape().as_list()[1:3] # transpose the inputs to NCHW if data_format == 'NCHW': inputs = tf.transpose(inputs, [0, 3, 1, 2]) # normalize values to range [0..1] inputs = inputs / 255 # set batch norm params batch_norm_params = { 'decay': _BATCH_NORM_DECAY, 'epsilon': _BATCH_NORM_EPSILON, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } # Set activation_fn and parameters for conv2d, batch_norm. with slim.arg_scope( [slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format): with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu( x, alpha=_LEAKY_RELU)): with tf.variable_scope('yolo-v3-tiny'): #CSPDARKENT BEGIN net = _conv2d_fixed_padding(inputs, 32, kernel_size=3) net = _conv2d_fixed_padding(inputs, 64, kernel_size=3) net, _ = _tiny_res_block(net, 64, data_format) net, _ = _tiny_res_block(net, 128, data_format) net, feat = _tiny_res_block(net, 256, data_format) net = _conv2d_fixed_padding(net, 512, kernel_size=3) feat2 = net #CSPDARKNET END net = _conv2d_fixed_padding(feat2, 256, kernel_size=1) route = net net = _conv2d_fixed_padding(route, 512, kernel_size=3) detect_1 = _detection_layer(net, num_classes, _ANCHORS[3:6], img_size, data_format) detect_1 = tf.identity(detect_1, name='detect_1') net = _conv2d_fixed_padding(route, 128, kernel_size=1) upsample_size = feat.get_shape().as_list() net = _upsample(net, upsample_size, data_format) net = tf.concat([feat, net], axis=1 if data_format == 'NCHW' else 3) net = _conv2d_fixed_padding(net, 256, kernel_size=3) detect_2 = _detection_layer(net, num_classes, _ANCHORS[1:4], img_size, data_format) detect_2 = tf.identity(detect_2, name='detect_2') detections = tf.concat([detect_1, detect_2], axis=1) detections = tf.identity(detections, name='detections') return detections