예제 #1
0
def yolo3lite_predictions(feature_maps, feature_channel_nums, num_anchors, num_classes, use_spp=False):
    f1, f2, f3 = feature_maps
    f1_channel_num, f2_channel_num, f3_channel_num = feature_channel_nums

    # feature map 1 head & output (13x13 for 416 input)
    if use_spp:
        x, y1 = make_spp_depthwise_separable_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5),
                                                         block_id_str='pred_1')
    else:
        x, y1 = make_depthwise_separable_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5),
                                                     block_id_str='pred_1')

    # upsample fpn merge for feature map 1 & 2
    x = compose(
        DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
        UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    # feature map 2 head & output (26x26 for 416 input)
    x, y2 = make_depthwise_separable_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5),
                                                 block_id_str='pred_2')

    # upsample fpn merge for feature map 2 & 3
    x = compose(
        DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
        UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    # feature map 3 head & output (52x52 for 416 input)
    x, y3 = make_depthwise_separable_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5),
                                                 block_id_str='pred_3')

    return y1, y2, y3
예제 #2
0
def tiny_yolo3lite_predictions(feature_maps, feature_channel_nums, num_anchors, num_classes):
    f1, f2 = feature_maps
    f1_channel_num, f2_channel_num = feature_channel_nums

    # feature map 1 transform
    x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1)

    # feature map 1 output (13x13 for 416 input)
    y1 = compose(
        # DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)),
        Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='pred_1'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    # upsample fpn merge for feature map 1 & 2
    x2 = compose(
        DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
        UpSampling2D(2))(x1)

    # feature map 2 output (26x26 for 416 input)
    y2 = compose(
        Concatenate(),
        # DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)),
        Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='pred_2'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2])

    return y1, y2
예제 #3
0
def Spp_Conv2D_BN_Leaky(x, num_filters):
    y1 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(x)
    y2 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(x)
    y3 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(x)

    y = compose(
        Concatenate(),
        DarknetConv2D_BN_Leaky(num_filters, (1, 1)))([y1, y2, y3, x])
    return y
예제 #4
0
def yolo3_spp_body(inputs, num_anchors, num_classes, weights_path=None):
    """Create YOLO_V3 SPP model CNN body in Keras."""
    darknet = Model(inputs, darknet53_body(inputs))
    if weights_path is not None:
        darknet.load_weights(weights_path, by_name=True)
        print('Load weights {}.'.format(weights_path))

    # f1: 13 x 13 x 1024
    f1 = darknet.output
    # f2: 26 x 26 x 512
    f2 = darknet.layers[152].output
    # f3: 52 x 52 x 256
    f3 = darknet.layers[92].output

    f1_channel_num = 1024
    f2_channel_num = 512
    f3_channel_num = 256

    # feature map 1 head & output (19x19 for 608 input)
    x, y1 = make_spp_last_layers(f1, f1_channel_num // 2,
                                 num_anchors * (num_classes + 5))

    # upsample fpn merge for feature map 1 & 2
    x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    # feature map 2 head & output (38x38 for 608 input)
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))

    # upsample fpn merge for feature map 2 & 3
    x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    # feature map 3 head & output (76x76 for 608 input)
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))

    return Model(inputs, [y1, y2, y3])
예제 #5
0
def _main(args):
    config_path = os.path.expanduser(args.config_path)
    weights_path = os.path.expanduser(args.weights_path)
    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
        config_path)
    assert weights_path.endswith(
        '.weights'), '{} is not a .weights file'.format(weights_path)

    output_path = os.path.expanduser(args.output_path)
    assert output_path.endswith(
        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    major, minor, revision = np.ndarray(
        shape=(3,), dtype='int32', buffer=weights_file.read(12))
    if (major * 10 + minor) >= 2 and major < 1000 and minor < 1000:
        seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
    else:
        seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
    print('Weights Header: ', major, minor, revision, seen)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Creating Keras model.')
    input_layer = Input(shape=(None, None, 3), name='image_input')
    prev_layer = input_layer
    all_layers = []

    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0
    out_index = []
    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            padding = 'same' if pad == 1 and stride == 1 else 'valid'

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn' if batch_normalize else '  ', activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters,),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation == 'mish':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            # Create Conv2D layer
            if stride > 1:
                # Darknet uses left and top padding instead of 'same' mode
                prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer)
            conv_layer = (Conv2D(
                filters, (size, size),
                strides=(stride, stride),
                kernel_regularizer=l2(weight_decay),
                use_bias=not batch_normalize,
                weights=conv_weights,
                activation=act_fn,
                padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)
            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            # elif activation == 'mish':
            #     act_layer = Activation(mish)(prev_layer)
            #     prev_layer = act_layer
            #     all_layers.append(act_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]
            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = Concatenate()(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    pool_size=(size, size),
                    strides=(stride, stride),
                    padding='same')(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('avgpool'):
            all_layers.append(
                AveragePooling2D()(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('shortcut'):
            index = int(cfg_parser[section]['from'])
            activation = cfg_parser[section]['activation']
            assert activation == 'linear', 'Only linear activation supported.'
            all_layers.append(Add()([all_layers[index], prev_layer]))
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            assert stride == 2, 'Only stride=2 supported.'
            all_layers.append(UpSampling2D(stride)(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('reorg'):
            block_size = int(cfg_parser[section]['stride'])
            assert block_size == 2, 'Only reorg with stride 2 supported.'
            all_layers.append(
                Lambda(
                    # space_to_depth_x2,
                    # output_shape=space_to_depth_x2_output_shape,
                    lambda x: tf.nn.space_to_depth(x, block_size=2),
                    name='space_to_depth_x2')(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('region'):
            with open('{}_anchors.txt'.format(output_root), 'w') as f:
                print(cfg_parser[section]['anchors'], file=f)

        elif section.startswith('yolo'):
            out_index.append(len(all_layers) - 1)
            all_layers.append(None)
            prev_layer = all_layers[-1]

        elif (section.startswith('net') or section.startswith('cost') or
              section.startswith('softmax')):
            pass

        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    if len(out_index) == 0: out_index.append(len(all_layers) - 1)

    if args.yolo4_reorder:
        # reverse the output tensor index for YOLOv4 cfg & weights,
        # since it use a different yolo outout order
        out_index.reverse()

    model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
    print(model.summary())
    if args.weights_only:
        model.save_weights('{}'.format(output_path))
        print('Saved Keras weights to {}'.format(output_path))
    else:
        model.save('{}'.format(output_path))
        print('Saved Keras model to {}'.format(output_path))

    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(count, count +
                                                       remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))

    if args.plot_model:
        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
        print('Saved model plot to {}.png'.format(output_root))