def yolo3lite_predictions(feature_maps, feature_channel_nums, num_anchors, num_classes, use_spp=False): f1, f2, f3 = feature_maps f1_channel_num, f2_channel_num, f3_channel_num = feature_channel_nums # feature map 1 head & output (13x13 for 416 input) if use_spp: x, y1 = make_spp_depthwise_separable_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='pred_1') else: x, y1 = make_depthwise_separable_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='pred_1') # upsample fpn merge for feature map 1 & 2 x = compose( DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) # feature map 2 head & output (26x26 for 416 input) x, y2 = make_depthwise_separable_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='pred_2') # upsample fpn merge for feature map 2 & 3 x = compose( DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) # feature map 3 head & output (52x52 for 416 input) x, y3 = make_depthwise_separable_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='pred_3') return y1, y2, y3
def tiny_yolo3lite_predictions(feature_maps, feature_channel_nums, num_anchors, num_classes): f1, f2 = feature_maps f1_channel_num, f2_channel_num = feature_channel_nums # feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) # feature map 1 output (13x13 for 416 input) y1 = compose( # DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='pred_1'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) # upsample fpn merge for feature map 1 & 2 x2 = compose( DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) # feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), # DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='pred_2'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return y1, y2
def Spp_Conv2D_BN_Leaky(x, num_filters): y1 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(x) y2 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(x) y3 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(x) y = compose( Concatenate(), DarknetConv2D_BN_Leaky(num_filters, (1, 1)))([y1, y2, y3, x]) return y
def yolo3_spp_body(inputs, num_anchors, num_classes, weights_path=None): """Create YOLO_V3 SPP model CNN body in Keras.""" darknet = Model(inputs, darknet53_body(inputs)) if weights_path is not None: darknet.load_weights(weights_path, by_name=True) print('Load weights {}.'.format(weights_path)) # f1: 13 x 13 x 1024 f1 = darknet.output # f2: 26 x 26 x 512 f2 = darknet.layers[152].output # f3: 52 x 52 x 256 f3 = darknet.layers[92].output f1_channel_num = 1024 f2_channel_num = 512 f3_channel_num = 256 # feature map 1 head & output (19x19 for 608 input) x, y1 = make_spp_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5)) # upsample fpn merge for feature map 1 & 2 x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) # feature map 2 head & output (38x38 for 608 input) x, y2 = make_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5)) # upsample fpn merge for feature map 2 & 3 x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) # feature map 3 head & output (76x76 for 608 input) x, y3 = make_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5)) return Model(inputs, [y1, y2, y3])
def _main(args): config_path = os.path.expanduser(args.config_path) weights_path = os.path.expanduser(args.weights_path) assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( config_path) assert weights_path.endswith( '.weights'), '{} is not a .weights file'.format(weights_path) output_path = os.path.expanduser(args.output_path) assert output_path.endswith( '.h5'), 'output path {} is not a .h5 file'.format(output_path) output_root = os.path.splitext(output_path)[0] # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') major, minor, revision = np.ndarray( shape=(3,), dtype='int32', buffer=weights_file.read(12)) if (major * 10 + minor) >= 2 and major < 1000 and minor < 1000: seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8)) else: seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4)) print('Weights Header: ', major, minor, revision, seen) print('Parsing Darknet config.') unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('Creating Keras model.') input_layer = Input(shape=(None, None, 3), name='image_input') prev_layer = input_layer all_layers = [] weight_decay = float(cfg_parser['net_0']['decay'] ) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 out_index = [] for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] padding = 'same' if pad == 1 and stride == 1 else 'valid' # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape) conv_bias = np.ndarray( shape=(filters,), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray( shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters bn_weight_list = [ bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2] # running var ] conv_weights = np.ndarray( shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [ conv_weights, conv_bias ] # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation == 'mish': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) # Create Conv2D layer if stride > 1: # Darknet uses left and top padding instead of 'same' mode prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer) conv_layer = (Conv2D( filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding))(prev_layer) if batch_normalize: conv_layer = (BatchNormalization( weights=bn_weight_list))(conv_layer) prev_layer = conv_layer if activation == 'linear': all_layers.append(prev_layer) # elif activation == 'mish': # act_layer = Activation(mish)(prev_layer) # prev_layer = act_layer # all_layers.append(act_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: print('Concatenating route layers:', layers) concatenate_layer = Concatenate()(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D( pool_size=(size, size), strides=(stride, stride), padding='same')(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('avgpool'): all_layers.append( AveragePooling2D()(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('shortcut'): index = int(cfg_parser[section]['from']) activation = cfg_parser[section]['activation'] assert activation == 'linear', 'Only linear activation supported.' all_layers.append(Add()([all_layers[index], prev_layer])) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) assert stride == 2, 'Only stride=2 supported.' all_layers.append(UpSampling2D(stride)(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('reorg'): block_size = int(cfg_parser[section]['stride']) assert block_size == 2, 'Only reorg with stride 2 supported.' all_layers.append( Lambda( # space_to_depth_x2, # output_shape=space_to_depth_x2_output_shape, lambda x: tf.nn.space_to_depth(x, block_size=2), name='space_to_depth_x2')(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('region'): with open('{}_anchors.txt'.format(output_root), 'w') as f: print(cfg_parser[section]['anchors'], file=f) elif section.startswith('yolo'): out_index.append(len(all_layers) - 1) all_layers.append(None) prev_layer = all_layers[-1] elif (section.startswith('net') or section.startswith('cost') or section.startswith('softmax')): pass else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Create and save model. if len(out_index) == 0: out_index.append(len(all_layers) - 1) if args.yolo4_reorder: # reverse the output tensor index for YOLOv4 cfg & weights, # since it use a different yolo outout order out_index.reverse() model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index]) print(model.summary()) if args.weights_only: model.save_weights('{}'.format(output_path)) print('Saved Keras weights to {}'.format(output_path)) else: model.save('{}'.format(output_path)) print('Saved Keras model to {}'.format(output_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format(count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if args.plot_model: plot(model, to_file='{}.png'.format(output_root), show_shapes=True) print('Saved model plot to {}.png'.format(output_root))