Esempio n. 1
0
def prediction(x,i,num_priors,min_s,max_s,aspect,num_classes,img_size):
    a=Conv2D(num_priors*4,(3,3),padding='same',name=str(i)+'_mbox_loc')(x)
    mbox_loc_flat=Flatten(name=str(i)+'_mbox_loc_flat')(a)
    b=Conv2D(num_priors*num_classes,(3,3),padding='same',name=str(i)+'_mbox_conf')(x)
    mbox_conf_flat=Flatten(name=str(i)+'_mbox_conf_flat')(b)
    mbox_priorbox=PriorBox(img_size,min_size=min_s,max_size=max_s,aspect_ratios=aspect,variances=[0.1,0.1,0.2,0.2],name=str(i)+'_mbox_priorbox')(x)
    return mbox_loc_flat,mbox_conf_flat,mbox_priorbox
Esempio n. 2
0
def SSD300(input_shape, num_classes=21,weights=None):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """

    net = {}
    # Block 1
    input_tensor = input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor

    if weights==None:
        vgg_weights = 'imagenet'
    else:
        vgg_weights = None
    ### VGG layers
    vgg = VGG16(input_tensor=input_tensor,weights=vgg_weights,
                  input_shape=input_shape, include_top=False)

    for layer in vgg.layers:
        layer.trainable = False
    #block 1
    net['conv1_1'] = vgg.layers[1](net['input'])
    net['conv1_2'] = vgg.layers[2](net['conv1_1'])
    net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool1')(net['conv1_2'])

    #block 2
    net['conv2_1'] = vgg.layers[4](net['pool1'])
    net['conv2_2'] = vgg.layers[5](net['conv2_1'])
    net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool2')(net['conv2_2'])

    # block 3
    net['conv3_1'] = vgg.layers[7](net['pool2'])
    net['conv3_2'] = vgg.layers[8](net['conv3_1'])
    net['conv3_3'] = vgg.layers[9](net['conv3_2'])
    net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool3')(net['conv3_3'])

    # block 4
    net['conv4_1'] = vgg.layers[11](net['pool3'])
    net['conv4_2'] = vgg.layers[12](net['conv4_1'])
    net['conv4_3'] = vgg.layers[13](net['conv4_2'])
    net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool4')(net['conv4_3'])

    # block 5
    net['conv5_1'] = vgg.layers[15](net['pool4'])
    net['conv5_2'] = vgg.layers[16](net['conv5_1'])
    net['conv5_3'] = vgg.layers[17](net['conv5_2'])

    net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same',
                                name='pool5')(net['conv5_3'])

    ### Beginning of SSD layers
    # FC6
    net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6),
                                     activation='relu', border_mode='same',
                                     name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Convolution2D(1024, 1, 1, activation='relu',
                               border_mode='same', name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # Block 6
    net['conv6_1'] = Convolution2D(256, 1, 1, activation='relu',
                                   border_mode='same',
                                   name='conv6_1')(net['fc7'])
    net['conv6_2'] = Convolution2D(512, 3, 3, subsample=(2, 2),
                                   activation='relu', border_mode='same',
                                   name='conv6_2')(net['conv6_1'])
    # Block 7
    net['conv7_1'] = Convolution2D(128, 1, 1, activation='relu',
                                   border_mode='same',
                                   name='conv7_1')(net['conv6_2'])
    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
    net['conv7_2'] = Convolution2D(256, 3, 3, subsample=(2, 2),
                                   activation='relu', border_mode='valid',
                                   name='conv7_2')(net['conv7_2'])
    # Block 8
    net['conv8_1'] = Convolution2D(128, 1, 1, activation='relu',
                                   border_mode='same',
                                   name='conv8_1')(net['conv7_2'])
    net['conv8_2'] = Convolution2D(256, 3, 3, subsample=(2, 2),
                                   activation='relu', border_mode='same',
                                   name='conv8_2')(net['conv8_1'])
    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])

    ### Prediction layers at different depths
    # Prediction from conv4_3
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 3
    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
                      name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
                      name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])

    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3,
                                        border_mode='same',
                                        name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3,
                                         border_mode='same',
                                         name=name)(net['fc7'])
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])
    # Prediction from conv6_2
    num_priors = 6
    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
                      name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
                      name=name)(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])
    priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])
    # Prediction from conv7_2
    num_priors = 6
    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
                      name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
                      name=name)(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])
    priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])
    # Prediction from conv8_2
    num_priors = 6
    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
                      name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
                      name=name)(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])
    priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])
    # Prediction from pool6
    num_priors = 6
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Dense(num_priors * num_classes, name=name)(net['pool6'])
    net['pool6_mbox_conf_flat'] = x
    priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])
    # Gather all predictions
    net['mbox_loc'] = merge([net['conv4_3_norm_mbox_loc_flat'],
                             net['fc7_mbox_loc_flat'],
                             net['conv6_2_mbox_loc_flat'],
                             net['conv7_2_mbox_loc_flat'],
                             net['conv8_2_mbox_loc_flat'],
                             net['pool6_mbox_loc_flat']],
                            mode='concat', concat_axis=1, name='mbox_loc')
    net['mbox_conf'] = merge([net['conv4_3_norm_mbox_conf_flat'],
                              net['fc7_mbox_conf_flat'],
                              net['conv6_2_mbox_conf_flat'],
                              net['conv7_2_mbox_conf_flat'],
                              net['conv8_2_mbox_conf_flat'],
                              net['pool6_mbox_conf_flat']],
                             mode='concat', concat_axis=1, name='mbox_conf')
    net['mbox_priorbox'] = merge([net['conv4_3_norm_mbox_priorbox'],
                                  net['fc7_mbox_priorbox'],
                                  net['conv6_2_mbox_priorbox'],
                                  net['conv7_2_mbox_priorbox'],
                                  net['conv8_2_mbox_priorbox'],
                                  net['pool6_mbox_priorbox']],
                                 mode='concat', concat_axis=1,
                                 name='mbox_priorbox')
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    net['predictions'] = merge([net['mbox_loc'],
                               net['mbox_conf'],
                               net['mbox_priorbox']],
                               mode='concat', concat_axis=2,
                               name='predictions')
    model = Model(net['input'], net['predictions'])
    if weights:
        model.load_weights(weights)
    return model
Esempio n. 3
0
def RGBD_SSD300(input_shape, depth_input_shape, num_classes=21):
    vgg16 = VGG16(weights='imagenet', include_top=False)
    weights = vgg16.get_weights()
    input_layer = Input(shape=input_shape)
    depth_input_layer = Input(shape=depth_input_shape)

    conv1_1 = Conv2D(64, (3, 3),
                     name='rgb_conv1_1',
                     padding='same',
                     activation='relu',
                     weights=[weights[0], weights[1]])(input_layer)

    depth_conv1_1 = Conv2D(
        64,
        (3, 3),
        name='depth_conv1_1',
        padding='same',
        activation='relu',
        #                    weights=[weights[0], weights[1]]
    )(depth_input_layer)

    conv1_2 = Conv2D(64, (3, 3),
                     name='conv1_2',
                     padding='same',
                     activation='relu',
                     weights=[weights[2], weights[3]])(conv1_1)
    depth_conv1_2 = Conv2D(64, (3, 3),
                           name='depth_conv1_2',
                           padding='same',
                           activation='relu',
                           weights=[weights[2], weights[3]])(depth_conv1_1)

    pool1 = MaxPooling2D(
        name='pool1',
        pool_size=(2, 2),
        strides=(2, 2),
        padding='same',
    )(conv1_2)
    depth_pool1 = MaxPooling2D(
        name='depth_pool1',
        pool_size=(2, 2),
        strides=(2, 2),
        padding='same',
    )(depth_conv1_2)

    # Block 2
    conv2_1 = Conv2D(128, (3, 3),
                     name='conv2_1',
                     padding='same',
                     activation='relu',
                     weights=[weights[4], weights[5]])(pool1)
    depth_conv2_1 = Conv2D(128, (3, 3),
                           name='depth_conv2_1',
                           padding='same',
                           activation='relu',
                           weights=[weights[4], weights[5]])(depth_pool1)

    conv2_2 = Conv2D(128, (3, 3),
                     name='conv2_2',
                     padding='same',
                     activation='relu',
                     weights=[weights[6], weights[7]])(conv2_1)
    depth_conv2_2 = Conv2D(128, (3, 3),
                           name='depth_conv2_2',
                           padding='same',
                           activation='relu',
                           weights=[weights[6], weights[7]])(depth_conv2_1)

    pool2 = MaxPooling2D(name='pool2',
                         pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same')(conv2_2)

    depth_pool2 = MaxPooling2D(name='depth_pool2',
                               pool_size=(2, 2),
                               strides=(2, 2),
                               padding='same')(depth_conv2_2)

    # Block 3
    conv3_1 = Conv2D(256, (3, 3),
                     name='conv3_1',
                     padding='same',
                     activation='relu',
                     weights=[weights[8], weights[9]])(pool2)
    depth_conv3_1 = Conv2D(256, (3, 3),
                           name='depth_conv3_1',
                           padding='same',
                           activation='relu',
                           weights=[weights[8], weights[9]])(depth_pool2)

    conv3_2 = Conv2D(256, (3, 3),
                     name='conv3_2',
                     padding='same',
                     activation='relu',
                     weights=[weights[10], weights[11]])(conv3_1)

    depth_conv3_2 = Conv2D(256, (3, 3),
                           name='depth_conv3_2',
                           padding='same',
                           activation='relu',
                           weights=[weights[10], weights[11]])(depth_conv3_1)

    conv3_3 = Conv2D(256, (3, 3),
                     name='conv3_3',
                     padding='same',
                     activation='relu',
                     weights=[weights[12], weights[13]])(conv3_2)

    depth_conv3_3 = Conv2D(256, (3, 3),
                           name='depth_conv3_3',
                           padding='same',
                           activation='relu',
                           weights=[weights[12], weights[13]])(depth_conv3_2)

    pool3 = MaxPooling2D(name='pool3',
                         pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same')(conv3_3)

    depth_pool3 = MaxPooling2D(name='depth_pool3',
                               pool_size=(2, 2),
                               strides=(2, 2),
                               padding='same')(depth_conv3_3)

    # Block 4
    conv4_1 = Conv2D(512, (3, 3),
                     name='conv4_1',
                     padding='same',
                     activation='relu',
                     weights=[weights[14], weights[15]])(pool3)
    depth_conv4_1 = Conv2D(512, (3, 3),
                           name='depth_conv4_1',
                           padding='same',
                           activation='relu',
                           weights=[weights[14], weights[15]])(depth_pool3)

    conv4_2 = Conv2D(512, (3, 3),
                     name='conv4_2',
                     padding='same',
                     activation='relu',
                     weights=[weights[16], weights[17]])(conv4_1)

    depth_conv4_2 = Conv2D(512, (3, 3),
                           name='depth_conv4_2',
                           padding='same',
                           activation='relu',
                           weights=[weights[16], weights[17]])(depth_conv4_1)

    conv4_3 = Conv2D(512, (3, 3),
                     name='conv4_3',
                     padding='same',
                     activation='relu',
                     weights=[weights[18], weights[19]])(conv4_2)
    depth_conv4_3 = Conv2D(512, (3, 3),
                           name='depth_conv4_3',
                           padding='same',
                           activation='relu',
                           weights=[weights[18], weights[19]])(depth_conv4_2)

    pool4 = MaxPooling2D(name='pool4',
                         pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same')(conv4_3)

    depth_pool4 = MaxPooling2D(name='depth_pool4',
                               pool_size=(2, 2),
                               strides=(2, 2),
                               padding='same')(depth_conv4_3)

    # Block 5
    conv5_1 = Conv2D(512, (3, 3),
                     name='conv5_1',
                     padding='same',
                     activation='relu',
                     weights=[weights[20], weights[21]])(pool4)

    depth_conv5_1 = Conv2D(512, (3, 3),
                           name='depth_conv5_1',
                           padding='same',
                           activation='relu',
                           weights=[weights[20], weights[21]])(depth_pool4)

    conv5_2 = Conv2D(512, (3, 3),
                     name='conv5_2',
                     padding='same',
                     activation='relu',
                     weights=[weights[22], weights[23]])(conv5_1)

    depth_conv5_2 = Conv2D(512, (3, 3),
                           name='depth_conv5_2',
                           padding='same',
                           activation='relu',
                           weights=[weights[22], weights[23]])(depth_conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     name='conv5_3',
                     padding='same',
                     activation='relu',
                     weights=[weights[24], weights[25]])(conv5_2)

    depth_conv5_3 = Conv2D(512, (3, 3),
                           name='depth_conv5_3',
                           padding='same',
                           activation='relu',
                           weights=[weights[24], weights[25]])(depth_conv5_2)

    pool5 = MaxPooling2D(name='pool5',
                         pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same')(conv5_3)

    depth_pool5 = MaxPooling2D(name='depth_pool5',
                               pool_size=(3, 3),
                               strides=(1, 1),
                               padding='same')(depth_conv5_3)

    concat_pool5 = concatenate([pool5, depth_pool5],
                               axis=1,
                               name='concat_pool5')

    # FC6
    fc6 = Conv2D(1024, (3, 3),
                 name='fc6',
                 dilation_rate=(6, 6),
                 padding='same',
                 activation='relu')(pool5)

    fc6 = Dropout(0.5, name='drop6')(fc6)
    # FC7
    fc7 = Conv2D(1024, (1, 1), name='fc7', padding='same',
                 activation='relu')(fc6)

    fc7 = Dropout(0.5, name='drop7')(fc7)

    # Block 6
    conv6_1 = Conv2D(256, (1, 1),
                     name='conv6_1',
                     padding='same',
                     activation='relu')(fc7)
    conv6_2 = Conv2D(512, (3, 3),
                     name='conv6_2',
                     strides=(2, 2),
                     padding='same',
                     activation='relu')(conv6_1)

    # Block 7
    conv7_1 = Conv2D(128, (1, 1),
                     name='conv7_1',
                     padding='same',
                     activation='relu')(conv6_2)
    conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     name='conv7_2',
                     padding='valid',
                     strides=(2, 2),
                     activation='relu')(conv7_1z)

    # Block 8
    conv8_1 = Conv2D(128, (1, 1),
                     name='conv8_1',
                     padding='same',
                     activation='relu')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     name='conv8_2',
                     padding='same',
                     strides=(2, 2),
                     activation='relu')(conv8_1)

    # Last Pool
    pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2)

    # Prediction from conv4_3
    num_priors = 3
    img_size = (input_shape[1], input_shape[0])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    concat_conv4_3 = concatenate([conv4_3, depth_conv4_3], axis=3)
    conv4_3_norm = Normalize(20, name='conv4_3_norm')(concat_conv4_3)
    conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                                   name='conv4_3_norm_mbox_loc',
                                   padding='same')(conv4_3_norm)
    conv4_3_norm_mbox_loc_flat = Flatten(
        name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc)
    conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                                    name=name,
                                    padding='same')(conv4_3_norm)
    conv4_3_norm_mbox_conf_flat = Flatten(
        name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf)
    conv4_3_norm_mbox_priorbox = PriorBox(img_size,
                                          30.0,
                                          name='conv4_3_norm_mbox_priorbox',
                                          aspect_ratios=[2],
                                          variances=[0.1, 0.1, 0.2,
                                                     0.2])(conv4_3_norm)

    # Prediction from fc7
    num_priors = 6
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                           padding='same',
                           name=name)(fc7)
    fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf)

    fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                          name='fc7_mbox_loc',
                          padding='same')(fc7)
    fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc)
    fc7_mbox_priorbox = PriorBox(img_size,
                                 60.0,
                                 name='fc7_mbox_priorbox',
                                 max_size=114.0,
                                 aspect_ratios=[2, 3],
                                 variances=[0.1, 0.1, 0.2, 0.2])(fc7)

    # Prediction from conv6_2
    num_priors = 6
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv6_2)
    conv6_2_mbox_conf_flat = Flatten(
        name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf)
    conv6_2_mbox_loc = Conv2D(num_priors * 4, (
        3,
        3,
    ),
                              name='conv6_2_mbox_loc',
                              padding='same')(conv6_2)
    conv6_2_mbox_loc_flat = Flatten(
        name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc)
    conv6_2_mbox_priorbox = PriorBox(img_size,
                                     114.0,
                                     max_size=168.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv6_2_mbox_priorbox')(conv6_2)
    # Prediction from conv7_2
    num_priors = 6
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv7_2)
    conv7_2_mbox_conf_flat = Flatten(
        name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf)
    conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                              padding='same',
                              name='conv7_2_mbox_loc')(conv7_2)
    conv7_2_mbox_loc_flat = Flatten(
        name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc)
    conv7_2_mbox_priorbox = PriorBox(img_size,
                                     168.0,
                                     max_size=222.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv7_2_mbox_priorbox')(conv7_2)
    # Prediction from conv8_2
    num_priors = 6
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv8_2)
    conv8_2_mbox_conf_flat = Flatten(
        name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf)
    conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                              padding='same',
                              name='conv8_2_mbox_loc')(conv8_2)
    conv8_2_mbox_loc_flat = Flatten(
        name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc)
    conv8_2_mbox_priorbox = PriorBox(img_size,
                                     222.0,
                                     max_size=276.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv8_2_mbox_priorbox')(conv8_2)

    # Prediction from pool6
    num_priors = 6
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    pool6_mbox_loc_flat = Dense(num_priors * 4,
                                name='pool6_mbox_loc_flat')(pool6)
    pool6_mbox_conf_flat = Dense(num_priors * num_classes, name=name)(pool6)
    pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6)
    pool6_mbox_priorbox = PriorBox(img_size,
                                   276.0,
                                   max_size=330.0,
                                   aspect_ratios=[2, 3],
                                   variances=[0.1, 0.1, 0.2, 0.2],
                                   name='pool6_mbox_priorbox')(pool6_reshaped)
    # Gather all predictions
    mbox_loc = concatenate([
        conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat, conv6_2_mbox_loc_flat,
        conv7_2_mbox_loc_flat, conv8_2_mbox_loc_flat, pool6_mbox_loc_flat
    ],
                           axis=1,
                           name='mbox_loc')
    mbox_conf = concatenate([
        conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat,
        conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat, conv8_2_mbox_conf_flat,
        pool6_mbox_conf_flat
    ],
                            axis=1,
                            name='mbox_conf')
    mbox_priorbox = concatenate([
        conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox,
        conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox
    ],
                                axis=1,
                                name='mbox_priorbox')
    if hasattr(mbox_loc, '_keras_shape'):
        num_boxes = mbox_loc._keras_shape[-1] // 4
    elif hasattr(mbox_loc, 'int_shape'):
        num_boxes = K.int_shape(mbox_loc)[-1] // 4
    mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc)
    mbox_conf = Reshape((num_boxes, num_classes),
                        name='mbox_conf_logits')(mbox_conf)
    mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf)
    predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox],
                              axis=2,
                              name='predictions')
    model = Model(inputs=[input_layer, depth_input_layer], outputs=predictions)
    return model
Esempio n. 4
0
    def __init__(self, input_shape, num_classes=21):
        super(SSD300, self).__init__()

        self.input_shape = input_shape
        self.num_classes = num_classes

        img_size = (self.input_shape[1], self.input_shape[0])

        # vgg16
        self.block1_conv_3x3x64 = ConvLayers2D(layers=2,
                                               filters=64,
                                               kernel_size=3,
                                               pool=True,
                                               name="block1_conv_3x3x64")
        self.block2_conv_3x3x128 = ConvLayers2D(layers=2,
                                                filters=128,
                                                kernel_size=3,
                                                pool=True,
                                                name="block2_conv_3x3x128")
        self.block3_conv_3x3x256 = ConvLayers2D(layers=3,
                                                filters=256,
                                                kernel_size=3,
                                                pool=True,
                                                name="block3_conv_3x3x256")
        self.block4_conv_3x3x512 = ConvLayers2D(layers=3,
                                                filters=512,
                                                kernel_size=3,
                                                pool=True,
                                                name="block4_conv_3x3x512")
        self.block5_conv_3x3x512 = ConvLayers2D(layers=3,
                                                filters=512,
                                                kernel_size=3,
                                                pool=True,
                                                pool_size=(3, 3),
                                                pool_strides=(1, 1),
                                                name="block5_conv_3x3x512")

        self.block6_conv_3x3x1024 = Conv2D(1024, (3, 3),
                                           dilation_rate=(6, 6),
                                           activation='relu',
                                           padding='same',
                                           name='block6_conv_3x3x1024')
        self.block6_conv_1x1x1024 = Conv2D(1024, (1, 1),
                                           activation='relu',
                                           padding='same',
                                           name='block6_conv_1x1x1024')

        self.block7_conv_1x1x256 = Conv2D(256, (1, 1),
                                          activation='relu',
                                          padding='same',
                                          name='block7_conv_1x1x256')
        self.block7_conv_3x3x512 = Conv2D(512, (3, 3),
                                          subsample=(2, 2),
                                          activation='relu',
                                          padding='same',
                                          name='block7_conv_3x3x512')

        self.block8_conv_1x1x128 = Conv2D(128, (1, 1),
                                          activation='relu',
                                          padding='same',
                                          name='block8_conv_1x1x128')
        self.block8_conv_3x3x256 = Conv2D(256, (3, 3),
                                          subsample=(2, 2),
                                          activation='relu',
                                          padding='valid',
                                          name='block8_conv_3x3x256')

        self.block9_conv_1x1x128 = Conv2D(128, (1, 1),
                                          activation='relu',
                                          padding='same',
                                          name='block9_conv_1x1x128')
        self.block9_conv_3x3x256 = Conv2D(256, (3, 3),
                                          subsample=(2, 2),
                                          activation='relu',
                                          padding='same',
                                          name='block9_conv_3x3x256')

        self.block10_conv_1x1x128 = Conv2D(128,
                                           1,
                                           1,
                                           activation='relu',
                                           padding='same',
                                           name='block10_conv_1x1x128')
        self.block10_conv_3x3x256 = Conv2D(256, (3, 3),
                                           subsample=(2, 2),
                                           activation='relu',
                                           padding='same',
                                           name='block10_conv_3x3x256')

        self.flatten = Flatten()

        num_priors = 3
        self.block4_norm_mbox_loc = Conv2D(num_priors * 4,
                                           3,
                                           3,
                                           padding='same',
                                           name='block4_norm_mbox_loc')
        self.block4_norm_mbox_conf = Conv2D(num_priors * self.num_classes,
                                            3,
                                            3,
                                            padding='same',
                                            name='block4_norm_mbox_conf')
        self.block4_norm_mbox_priorbox = PriorBox(
            img_size,
            30.0,
            aspect_ratios=[2],
            variances=[0.1, 0.1, 0.2, 0.2],
            name='block4_norm_mbox_priorbox')
        self.block4_norm = Normalize(20, name='block4_norm')

        num_priors = 6
        self.block6_mbox_loc = Conv2D(num_priors * 4,
                                      3,
                                      3,
                                      padding='same',
                                      name='block6_mbox_loc')
        self.block6_mbox_conf = Conv2D(num_priors * num_classes,
                                       3,
                                       3,
                                       padding='same',
                                       name='block6_mbox_conf')
        self.block6_mbox_priorbox = PriorBox(img_size,
                                             60.0,
                                             max_size=114.0,
                                             aspect_ratios=[2, 3],
                                             variances=[0.1, 0.1, 0.2, 0.2],
                                             name='block6_mbox_priorbox')

        self.block7_mbox_loc = Conv2D(num_priors * 4,
                                      3,
                                      3,
                                      padding='same',
                                      name='block7_mbox_loc')
        self.block7_mbox_conf = Conv2D(num_priors * num_classes,
                                       3,
                                       3,
                                       padding='same',
                                       name='block7_mbox_conf')
        self.block7_mbox_priorbox = PriorBox(img_size,
                                             114.0,
                                             max_size=168.0,
                                             aspect_ratios=[2, 3],
                                             variances=[0.1, 0.1, 0.2, 0.2],
                                             name='block7_mbox_priorbox')

        self.block8_mbox_loc = Conv2D(num_priors * 4,
                                      3,
                                      3,
                                      padding='same',
                                      name='block8_mbox_loc')
        self.block8_mbox_conf = Conv2D(num_priors * num_classes,
                                       3,
                                       3,
                                       padding='same',
                                       name='block8_mbox_conf')
        self.block8_mbox_priorbox = PriorBox(img_size,
                                             168.0,
                                             max_size=222.0,
                                             aspect_ratios=[2, 3],
                                             variances=[0.1, 0.1, 0.2, 0.2],
                                             name='block8_mbox_priorbox')

        self.block9_mbox_loc = Conv2D(num_priors * 4,
                                      3,
                                      3,
                                      padding='same',
                                      name='block9_mbox_loc')
        self.block9_mbox_conf = Conv2D(num_priors * num_classes,
                                       3,
                                       3,
                                       padding='same',
                                       name='block9_mbox_conf')
        self.block9_mbox_priorbox = PriorBox(img_size,
                                             222.0,
                                             max_size=276.0,
                                             aspect_ratios=[2, 3],
                                             variances=[0.1, 0.1, 0.2, 0.2],
                                             name='block9_mbox_priorbox')

        self.block10_mbox_loc_flat = Dense(num_priors * 4,
                                           name='block10_mbox_loc_flat')
        self.block10_mbox_conf_flat = Dense(num_priors * num_classes,
                                            name='block10_mbox_conf_flat')
        self.block10_reshape = Reshape((1, 1, 256), name='block10_reshape')
        self.block10_mbox_priorbox = PriorBox(img_size,
                                              276.0,
                                              max_size=330.0,
                                              aspect_ratios=[2, 3],
                                              variances=[0.1, 0.1, 0.2, 0.2],
                                              name='block10_mbox_priorbox')

        self.concat_conf = Concatenate(axis=1, name='mbox_conf')
        self.concat_loc = Concatenate(axis=1, name='mbox_loc')
        self.concat_priorbox = Concatenate(axis=1, name='mbox_priorbox')
        self.concat_predictions = Concatenate(axis=2, name='mbox_predictions')

        self.reshape_loc = Reshape((4, 4), name='mbox_loc_final')
        self.reshape_conf = Reshape((4, self.num_classes),
                                    name='mbox_conf_logits')

        self.activate_softmax = Activation('softmax', name='mbox_conf_softmax')
Esempio n. 5
0
def SSD300(input_shape, num_classes=21):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    net = {}
    # Block 1 卷积层块
    input_tensor = input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor
    # 二维卷积层对二维输入进行滑动窗卷积
    # keras.layers.Conv2D(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None,
    # dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform',
    # bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None,
    # kernel_constraint=None, bias_constraint=None)
    net['conv1_1'] = Convolution2D(
        64,
        3,
        3,  # 64个过滤器;kernel_size:3,卷积窗口大小;strides:步长;
        activation='relu',  # 激活函数:ReLU
        border_mode='same',  # 过滤模式:same/valid
        name='conv1_1')(net['input'])
    net['conv1_2'] = Convolution2D(64,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_2')(net['conv1_1'])
    # 对空间数据的最大池化
    # keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)
    # strides 默认为 None,为 None 时大小等于
    net['pool1'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool1')(net['conv1_2'])
    # Block 2 卷积层块
    net['conv2_1'] = Convolution2D(128,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_1')(net['pool1'])
    net['conv2_2'] = Convolution2D(128,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_2')(net['conv2_1'])
    net['pool2'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool2')(net['conv2_2'])
    # Block 3 卷积层块
    net['conv3_1'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_1')(net['pool2'])
    net['conv3_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_2')(net['conv3_1'])
    net['conv3_3'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_3')(net['conv3_2'])
    net['pool3'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool3')(net['conv3_3'])
    # Block 4 卷积层块
    net['conv4_1'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_1')(net['pool3'])
    net['conv4_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_2')(net['conv4_1'])
    net['conv4_3'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_3')(net['conv4_2'])
    net['pool4'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool4')(net['conv4_3'])
    # Block 5 卷积层块
    net['conv5_1'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_1')(net['pool4'])
    net['conv5_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_2')(net['conv5_1'])
    net['conv5_3'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_3')(net['conv5_2'])
    net['pool5'] = MaxPooling2D((3, 3),
                                strides=(1, 1),
                                border_mode='same',
                                name='pool5')(net['conv5_3'])

    # FC6 该层对二维输入进行Atrous卷积,也即膨胀卷积或带孔洞的卷积。
    net['fc6'] = AtrousConvolution2D(1024,
                                     3,
                                     3,
                                     atrous_rate=(6, 6),
                                     activation='relu',
                                     border_mode='same',
                                     name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Convolution2D(1024,
                               1,
                               1,
                               activation='relu',
                               border_mode='same',
                               name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # Block 6
    net['conv6_1'] = Convolution2D(256,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_1')(net['fc7'])
    net['conv6_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_2')(net['conv6_1'])
    # Block 7
    net['conv7_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv7_1')(net['conv6_2'])
    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
    net['conv7_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='valid',
                                   name='conv7_2')(net['conv7_2'])
    # Block 8
    net['conv8_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_1')(net['conv7_2'])
    net['conv8_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_2')(net['conv8_1'])
    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])

    # Prediction from conv4_3
    # keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True,
    # beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones',
    # beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)
    # axis: 整数,需要标准化的轴 (通常是特征轴)
    # 批量标准化层 (Ioffe and Szegedy, 2014)。在每一个批次的数据中标准化前一层的激活项, 即,应用一个维持激活项平均值接近 0,标准差接近 1 的转换。
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 3
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])
    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4,
                                        3,
                                        3,
                                        border_mode='same',
                                        name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                         3,
                                         3,
                                         border_mode='same',
                                         name=name)(net['fc7'])
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=114.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])
    # Prediction from conv6_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        114.0,
                        max_size=168.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])
    # Prediction from conv7_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        168.0,
                        max_size=222.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])
    # Prediction from conv8_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        222.0,
                        max_size=276.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])
    # Prediction from pool6
    num_priors = 6
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Dense(num_priors * num_classes, name=name)(net['pool6'])
    net['pool6_mbox_conf_flat'] = x
    priorbox = PriorBox(img_size,
                        276.0,
                        max_size=330.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])
    # Gather all predictions
    net['mbox_loc'] = merge([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat']
    ],
                            mode='concat',
                            concat_axis=1,
                            name='mbox_loc')
    net['mbox_conf'] = merge([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat']
    ],
                             mode='concat',
                             concat_axis=1,
                             name='mbox_conf')
    net['mbox_priorbox'] = merge([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox']
    ],
                                 mode='concat',
                                 concat_axis=1,
                                 name='mbox_priorbox')
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    net['predictions'] = merge(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        mode='concat',
        concat_axis=2,
        name='predictions')
    model = Model(net['input'], net['predictions'])
    return model
Esempio n. 6
0
def SSD(input_shape, num_classes):
    """SSD512 architecture.
    # Arguments
        input_shape: Shape of the input image,
            expected to be either (512, 512, 3) or (3, 512, 512)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """

    # Block 1
    input_shape = (input_shape[1], input_shape[0], 3)
        
    input = Input(input_shape)
    resnet50 = ResNet50(input_shape=input_shape,include_top=False, weights='imagenet')
    FeatureExtractor = Model(inputs=resnet50.input, outputs=resnet50.get_layer('add_7').output)
    
    pool3 = FeatureExtractor(input)
    conv4_0 = Conv2DTranspose(512, (2, 2), name='conv4_0', activation='relu',   border_mode='valid')(pool3) #for VGG16,19,Resnet50
    
    # Block 4
    conv4_1 = Conv2D(512, (3, 3),activation='relu',padding='same',name='conv4_1')(conv4_0)
    conv4_2 = Conv2D(512, (3, 3),activation='relu',padding='same',name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),activation='relu',padding='same',name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D((2, 2), strides=(2, 2), padding='same',name='pool4')(conv4_3)
    # Block 5
    conv5_1 = Conv2D(512, (3, 3),
                     name='conv5_1',
                     padding='same',
                     activation='relu')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     name='conv5_2',
                     padding='same',
                     activation='relu')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     name='conv5_3',
                     padding='same',
                     activation='relu')(conv5_2)
    pool5 = MaxPooling2D(name='pool5',
                         pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same')(conv5_3)
    
    # FC6
    fc6 = Conv2D(1024, (3, 3),
                 name='fc6',
                 dilation_rate=(6, 6),
                 padding='same',
                 activation='relu'
                 )(pool5)  #5

    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    fc7 = Conv2D(1024, (1, 1),
                 name='fc7',
                 padding='same',
                 activation='relu'
                 )(fc6)
    # x = Dropout(0.5, name='drop7')(x)

    # Block 6
    conv6_1 = Conv2D(256, (1, 1),
                     name='conv6_1',
                     padding='same',
                     activation='relu')(fc7)
    conv6_2 = Conv2D(512, (3, 3),
                     name='conv6_2',
                     strides=(2, 2),
                     padding='same',
                     activation='relu')(conv6_1)

    # Block 7
    conv7_1 = Conv2D(128, (1, 1),
                     name='conv7_1',
                     padding='same',
                     activation='relu')(conv6_2)
    conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     name='conv7_2',
                     padding='valid',
                     strides=(2, 2),
                     activation='relu')(conv7_1z)
    
    # Block 8
    conv8_1 = Conv2D(128, (1, 1),
                     name='conv8_1',
                     padding='same',
                     activation='relu')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     name='conv8_2',
                     padding='same',
                     strides=(2, 2),
                     activation='relu')(conv8_1)
    
    # Last Pool
    pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2)  #8_2

    # Prediction from conv4_3
    num_priors = 3
    img_size = (input_shape[1], input_shape[0])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)

    conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3)   #4_3
    conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                                   name='conv4_3_norm_mbox_loc',
                                   padding='same')(conv4_3_norm)  
    conv4_3_norm_mbox_loc_flat = Flatten(name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc) 
    conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                                    name=name,
                                    padding='same')(conv4_3_norm)
    conv4_3_norm_mbox_conf_flat = Flatten(name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf)
    conv4_3_norm_mbox_priorbox = PriorBox(img_size, 30.0,
                                          name='conv4_3_norm_mbox_priorbox',
                                          aspect_ratios=[2],
                                          variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm)
    
    # Prediction from fc7
    num_priors = 6
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                           padding='same',
                           name=name)(fc7)
    fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf)

    fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                          name='fc7_mbox_loc',
                          padding='same')(fc7)
    fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc)
    fc7_mbox_priorbox = PriorBox(img_size, 60.0,
                                 name='fc7_mbox_priorbox',
                                 max_size=114.0,
                                 aspect_ratios=[2, 3],
                                 variances=[0.1, 0.1, 0.2, 0.2]
                                 )(fc7)

    # Prediction from conv6_2
    num_priors = 6
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv6_2)
    conv6_2_mbox_conf_flat = Flatten(name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf)
    conv6_2_mbox_loc = Conv2D(num_priors * 4, (3, 3,),
                              name='conv6_2_mbox_loc',
                              padding='same')(conv6_2)
    conv6_2_mbox_loc_flat = Flatten(name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc)
    conv6_2_mbox_priorbox = PriorBox(img_size, 114.0,
                                     max_size=168.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv6_2_mbox_priorbox')(conv6_2)

    # Prediction from conv7_2
    num_priors = 6
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv7_2)
    conv7_2_mbox_conf_flat = Flatten(name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf)
    conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                              padding='same',
                              name='conv7_2_mbox_loc')(conv7_2)
    conv7_2_mbox_loc_flat = Flatten(name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc)
    conv7_2_mbox_priorbox = PriorBox(img_size, 168.0,
                                     max_size=222.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv7_2_mbox_priorbox')(conv7_2)
    # Prediction from conv8_2
    num_priors = 6
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv8_2)
    conv8_2_mbox_conf_flat = Flatten(name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf)
    conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                              padding='same',
                              name='conv8_2_mbox_loc')(conv8_2)
    conv8_2_mbox_loc_flat = Flatten(name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc)
    conv8_2_mbox_priorbox = PriorBox(img_size, 222.0,
                                     max_size=276.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv8_2_mbox_priorbox')(conv8_2)
    
    # Prediction from pool6
    num_priors = 6
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    pool6_mbox_loc_flat = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(pool6)
    pool6_mbox_conf_flat = Dense(num_priors * num_classes, name=name)(pool6)
    pool6_reshaped = Reshape(target_shape,
                             name='pool6_reshaped')(pool6)
    pool6_mbox_priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3],
                                   variances=[0.1, 0.1, 0.2, 0.2],
                                   name='pool6_mbox_priorbox')(pool6_reshaped)
    # Gather all predictions
    
    mbox_loc = concatenate([conv4_3_norm_mbox_loc_flat,
                            fc7_mbox_loc_flat,
                            conv6_2_mbox_loc_flat,
                            conv7_2_mbox_loc_flat,
                            conv8_2_mbox_loc_flat,
                            pool6_mbox_loc_flat],
                           axis=1,
                           name='mbox_loc')
    mbox_conf = concatenate([conv4_3_norm_mbox_conf_flat,
                             fc7_mbox_conf_flat,
                             conv6_2_mbox_conf_flat,
                             conv7_2_mbox_conf_flat,
                             conv8_2_mbox_conf_flat,
                             pool6_mbox_conf_flat],
                            axis=1,
                            name='mbox_conf')
    mbox_priorbox = concatenate([conv4_3_norm_mbox_priorbox,
                                 fc7_mbox_priorbox,
                                 conv6_2_mbox_priorbox,
                                 conv7_2_mbox_priorbox,
                                 conv8_2_mbox_priorbox,
                                 pool6_mbox_priorbox],
                                axis=1,
                                name='mbox_priorbox')
    
    if hasattr(mbox_loc, '_keras_shape'):
        num_boxes = mbox_loc._keras_shape[-1] // 4
    elif hasattr(mbox_loc, 'int_shape'):
        num_boxes = K.int_shape(mbox_loc)[-1] // 4
    mbox_loc = Reshape((num_boxes, 4),
                       name='mbox_loc_final')(mbox_loc)
    mbox_conf = Reshape((num_boxes, num_classes),
                        name='mbox_conf_logits')(mbox_conf)
    mbox_conf = Activation('softmax',
                           name='mbox_conf_final')(mbox_conf)
    predictions = concatenate([mbox_loc,
                               mbox_conf,
                               mbox_priorbox],
                              axis=2,
                              name='predictions')
    model = Model(input, outputs=predictions)
    return model
def SSD_300(
        input_shape,
        num_classes=17 + 1,
        min_scale=None,
        max_scale=None,
        aspect_ratios_per_layer=[[2.0], [2.0, 3.0], [2.0, 3.0], [2.0, 3.0],
                                 [2.0, 3.0], [2.0, 3.0]],
        variances=[0.1, 0.1, 0.2, 0.2],
        scales=[30, 60, 114, 168, 222, 276, 330],
        # scales = [100, 168, 222, 276, 330],
        clip_boxes=True):
    '''
        Arguments:
            input_shape (tuple): The height and width and channel of the input images.
            min_scale (float): A float in [0, 1], the scaling factor for the size of the generated anchor boxes
                as a fraction of the shorter side of the input image.
            max_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if
                `self.two_boxes_for_ar1 == True`.
            aspect_ratios_per_layer (list, optional): The list of aspect ratios for which default boxes are to be
                generated for this layer.
            clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries.
            variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by
                its respective variance value.
    '''
    n_predictor_layers = 6  # The number of predictor conv layers in the network is 6 for the original SSD300.

    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    net = {}
    img_height, img_width, img_channels = input_shape[0], input_shape[
        1], input_shape[2]
    image_size = (input_shape[1], input_shape[0])
    # Block 1
    input_tensor = Input(shape=(img_height, img_width, img_channels))
    net['input'] = input_tensor
    net['conv1_1'] = Convolution2D(32,
                                   kernel_size=3,
                                   activation='relu',
                                   padding='same')(net['input'])
    net['conv1_2'] = Convolution2D(64,
                                   kernel_size=3,
                                   strides=2,
                                   activation='relu',
                                   padding='valid',
                                   name='conv1_2')(net['conv1_1'])

    # Block 2
    net['res2_1'] = Residual_Block(32, net['conv1_2'], name='res2_1')
    net['conv3_1'] = Convolution2D(128,
                                   kernel_size=3,
                                   activation='relu',
                                   padding='valid',
                                   strides=2,
                                   name='conv3_1')(net['res2_1'])

    # Block 3
    net['res4_1'] = Residual_Block(64, net['conv3_1'], name='res4_1')
    net['res4_2'] = Residual_Block(64, net['res4_1'], name='res4_2')
    net['conv4_3'] = Convolution2D(256,
                                   kernel_size=3,
                                   activation='relu',
                                   padding='valid',
                                   strides=2,
                                   name='conv4_3')(net['res4_2'])

    # Block 4
    net['res5_1'] = Residual_Block(128, net['conv4_3'], name='res5_1')
    net['res5_2'] = Residual_Block(128, net['res5_1'], name='res5_2')
    net['res5_3'] = Residual_Block(128, net['res5_2'], name='res5_3')
    net['res5_4'] = Residual_Block(128, net['res5_3'], name='res5_4')
    net['res5_5'] = Residual_Block(128, net['res5_4'], name='res5_5')
    net['res5_6'] = Residual_Block(128, net['res5_5'], name='res5_6')
    net['res5_7'] = Residual_Block(128, net['res5_6'], name='res5_7')
    net['res5_8'] = Residual_Block(128, net['res5_7'], name='res5_8')
    net['conv5_9'] = Convolution2D(512,
                                   kernel_size=3,
                                   activation='relu',
                                   padding='valid',
                                   strides=2,
                                   name='conv5_9')(net['res5_8'])

    # Block 5
    net['res6_1'] = Residual_Block(256, net['conv5_9'], name='res6_1')
    net['res6_2'] = Residual_Block(256, net['res6_1'], name='res6_2')
    net['res6_3'] = Residual_Block(256, net['res6_2'], name='res6_3')
    net['res6_4'] = Residual_Block(256, net['res6_3'], name='res6_4')
    net['res6_5'] = Residual_Block(
        256, net['res6_4'], name='res6_5')  # prediction from 6_5 layer 26
    net['res6_6'] = Residual_Block(256, net['res6_5'], name='res6_6')
    net['res6_7'] = Residual_Block(256, net['res6_6'], name='res6_7')
    net['res6_8'] = Residual_Block(256, net['res6_7'], name='res6_8')
    net['conv6_9'] = Convolution2D(1024,
                                   kernel_size=3,
                                   activation='relu',
                                   padding='valid',
                                   strides=2,
                                   name='conv6_9')(net['res6_8'])

    # Block 6
    net['res7_1'] = Residual_Block(512, net['conv6_9'], name='res7_1')
    net['res7_2'] = Residual_Block(
        512, net['res7_1'], name='res7_2')  # prediction from 7_2 layer 34
    net['res7_3'] = Residual_Block(512, net['res7_2'], name='res7_3')
    net['res7_4'] = Residual_Block(
        512, net['res7_3'], name='res7_4')  # prediction from 7_4 layer 34

    # Last pool
    net['pool7_5'] = GlobalAveragePooling2D(name='pool7_5')(net['res7_4'])

    # Prediction from conv5_9
    net['conv5_9_norm'] = Normalize(20)(net['conv5_9'])
    num_priors = 3
    net['conv5_9_norm_mbox_loc'] = Convolution2D(num_priors * 4,
                                                 kernel_size=3,
                                                 padding='same')(
                                                     net['conv5_9_norm'])
    net['conv5_9_norm_mbox_loc_flat'] = Flatten()(net['conv5_9_norm_mbox_loc'])

    net['conv5_9_norm_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                                  kernel_size=3,
                                                  padding='same')(
                                                      net['conv5_9_norm'])
    net['conv5_9_norm_mbox_conf_flat'] = Flatten()(
        net['conv5_9_norm_mbox_conf'])

    net['conv5_9_norm_mbox_priorbox'] = PriorBox(
        image_size,
        min_size=scales[0],
        aspect_ratios=aspect_ratios_per_layer[0],
        variances=variances)(net['conv5_9_norm'])

    # Prediction from res6_5
    num_priors = 6
    net['res6_5_mbox_loc'] = Convolution2D(num_priors * 4,
                                           kernel_size=3,
                                           padding='same')(net['res6_5'])
    net['res6_5_mbox_loc_flat'] = Flatten()(net['res6_5_mbox_loc'])

    net['res6_5_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                            kernel_size=3,
                                            padding='same')(net['res6_5'])
    net['res6_5_mbox_conf_flat'] = Flatten()(net['res6_5_mbox_conf'])

    net['res6_5_mbox_priorbox'] = PriorBox(
        image_size,
        min_size=scales[1],
        max_size=scales[2],
        aspect_ratios=aspect_ratios_per_layer[1],
        variances=variances)(net['res6_5'])

    # Prediction from conv6_9
    num_priors = 6
    net['conv6_9_mbox_loc'] = Convolution2D(num_priors * 4,
                                            kernel_size=3,
                                            padding='same')(net['conv6_9'])
    net['conv6_9_mbox_loc_flat'] = Flatten()(net['conv6_9_mbox_loc'])

    net['conv6_9_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                             kernel_size=3,
                                             padding='same')(net['conv6_9'])
    net['conv6_9_mbox_conf_flat'] = Flatten()(net['conv6_9_mbox_conf'])

    net['conv6_9_mbox_priorbox'] = PriorBox(
        image_size,
        min_size=scales[2],
        max_size=scales[3],
        aspect_ratios=aspect_ratios_per_layer[2],
        variances=variances)(net['conv6_9'])

    # Prediction from res7_2
    num_priors = 6
    net['res7_2_mbox_loc'] = Convolution2D(num_priors * 4,
                                           kernel_size=3,
                                           padding='same')(net['res7_2'])
    net['res7_2_mbox_loc_flat'] = Flatten()(net['res7_2_mbox_loc'])

    net['res7_2_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                            kernel_size=3,
                                            padding='same')(net['res7_2'])
    net['res7_2_mbox_conf_flat'] = Flatten()(net['res7_2_mbox_conf'])

    net['res7_2_mbox_priorbox'] = PriorBox(
        image_size,
        min_size=scales[3],
        max_size=scales[4],
        aspect_ratios=aspect_ratios_per_layer[3],
        variances=variances)(net['res7_2'])

    # Prediction from res7_4
    num_priors = 6
    net['res7_4_mbox_loc'] = Convolution2D(num_priors * 4,
                                           kernel_size=3,
                                           padding='same')(net['res7_4'])
    net['res7_4_mbox_loc_flat'] = Flatten()(net['res7_4_mbox_loc'])

    net['res7_4_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                            kernel_size=3,
                                            padding='same')(net['res7_4'])
    net['res7_4_mbox_conf_flat'] = Flatten()(net['res7_4_mbox_conf'])

    net['res7_4_mbox_priorbox'] = PriorBox(
        image_size,
        min_size=scales[4],
        max_size=scales[5],
        aspect_ratios=aspect_ratios_per_layer[4],
        variances=variances)(net['res7_4'])

    # Prediction from pool7_5
    num_priors = 6
    net['pool7_5_mbox_loc_flat'] = Dense(num_priors * 4)(net['pool7_5'])

    net['pool7_5_mbox_conf_flat'] = Dense(num_priors * num_classes)(
        net['pool7_5'])

    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 1024)
    else:
        target_shape = (1024, 1, 1)

    net['pool7_5_reshaped'] = Reshape(target_shape)(net['pool7_5'])
    net['pool7_5_mbox_priorbox'] = PriorBox(
        image_size,
        min_size=scales[5],
        max_size=scales[6],
        aspect_ratios=aspect_ratios_per_layer[5],
        variances=variances)(net['pool7_5_reshaped'])

    # Combine predictions

    # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4`
    # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)`
    net['mbox_loc'] = concatenate([
        net['conv5_9_norm_mbox_loc_flat'], net['res6_5_mbox_loc_flat'],
        net['conv6_9_mbox_loc_flat'], net['res7_2_mbox_loc_flat'],
        net['res7_4_mbox_loc_flat'], net['pool7_5_mbox_loc_flat']
    ],
                                  axis=1)

    # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes`
    # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)`
    net['mbox_conf'] = concatenate([
        net['conv5_9_norm_mbox_conf_flat'], net['res6_5_mbox_conf_flat'],
        net['conv6_9_mbox_conf_flat'], net['res7_2_mbox_conf_flat'],
        net['res7_4_mbox_conf_flat'], net['pool7_5_mbox_conf_flat']
    ],
                                   axis=1)

    # Output shape of anchors: `(batch, height, width, n_boxes, 8)`
    net['mbox_prior'] = concatenate([
        net['conv5_9_norm_mbox_priorbox'], net['res6_5_mbox_priorbox'],
        net['conv6_9_mbox_priorbox'], net['res7_2_mbox_priorbox'],
        net['res7_4_mbox_priorbox'], net['pool7_5_mbox_priorbox']
    ],
                                    axis=1)

    # Calculating number of boxes to isolate it using Reshape
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], '_int_shape'):
        num_boxes = net['mbox_loc']._int_shape[-1] // 4

    # Concatenate all predictions from different layers
    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1, the number of boxes per layer

    # Output shape of `mbox_loc`: (batch, n_boxes_total, 4)
    net['mbox_loc'] = Reshape((num_boxes, 4))(net['mbox_loc'])

    # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes)
    net['mbox_conf'] = Reshape((num_boxes, num_classes))(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax')(net['mbox_conf'])

    net['predictions'] = concatenate(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_prior']], axis=2)
    model = Model(net['input'], net['predictions'])

    # model = Model(net['input'], net['pool7_5']) # for debugging

    return model
Esempio n. 8
0
def SSD300(input_shape, num_classes=2):
    net = {}
    # Block 1
    input_tensor = input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor
    net['conv1_1'] = Conv2D(64, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv1_1')(net['input'])
    net['conv1_2'] = Conv2D(64, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv1_2')(net['conv1_1'])
    net['pool1'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool1')(net['conv1_2'])
    # Block 2
    net['conv2_1'] = Conv2D(128, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv2_1')(net['pool1'])
    net['conv2_2'] = Conv2D(128, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv2_2')(net['conv2_1'])
    net['pool2'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool2')(net['conv2_2'])
    # Block 3
    net['conv3_1'] = Conv2D(256, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv3_1')(net['pool2'])
    net['conv3_2'] = Conv2D(256, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv3_2')(net['conv3_1'])
    net['conv3_3'] = Conv2D(256, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv3_3')(net['conv3_2'])
    net['pool3'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool3')(net['conv3_3'])
    # Block 4
    net['conv4_1'] = Conv2D(512, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv4_1')(net['pool3'])
    net['conv4_2'] = Conv2D(512, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv4_2')(net['conv4_1'])
    net['conv4_3'] = Conv2D(512, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv4_3')(net['conv4_2'])
    net['pool4'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool4')(net['conv4_3'])
    # Block 5
    net['conv5_1'] = Conv2D(512, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv5_1')(net['pool4'])
    net['conv5_2'] = Conv2D(512, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv5_2')(net['conv5_1'])
    net['conv5_3'] = Conv2D(512, (3, 3),
                            activation='relu',
                            padding='same',
                            name='conv5_3')(net['conv5_2'])
    net['pool5'] = MaxPooling2D((3, 3),
                                strides=(1, 1),
                                padding='same',
                                name='pool5')(net['conv5_3'])
    # FC6
    net['fc6'] = Conv2D(1024, (3, 3),
                        activation="relu",
                        name="fc6",
                        dilation_rate=(6, 6),
                        padding="same")(net['pool5'])
    #    net['fc6'] = AtrousConvolution2D(1024, (3, 3), atrous_rate=(6, 6),
    #                                     activation='relu', padding='same',
    #                                     name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Conv2D(1024, (1, 1),
                        activation='relu',
                        padding='same',
                        name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # Block 6
    net['conv6_1'] = Conv2D(256, (1, 1),
                            activation='relu',
                            padding='same',
                            name='conv6_1')(net['fc7'])
    net['conv6_2'] = Conv2D(512, (3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            name='conv6_2')(net['conv6_1'])
    # Block 7
    net['conv7_1'] = Conv2D(128, (1, 1),
                            activation='relu',
                            padding='same',
                            name='conv7_1')(net['conv6_2'])
    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
    net['conv7_2'] = Conv2D(256, (3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='valid',
                            name='conv7_2')(net['conv7_2'])
    # Block 8
    net['conv8_1'] = Conv2D(128, (1, 1),
                            activation='relu',
                            padding='same',
                            name='conv8_1')(net['conv7_2'])
    net['conv8_2'] = Conv2D(256, (3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            name='conv8_2')(net['conv8_1'])
    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])
    # Prediction from conv4_3
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 3
    x = Conv2D(num_priors * 4, (3, 3),
               padding='same',
               name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (3, 3), padding='same',
               name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])
    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Conv2D(num_priors * 4, (3, 3),
                                 padding='same',
                                 name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, (3, 3),
                                  padding='same',
                                  name=name)(net['fc7'])
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=114.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])
    # Prediction from conv6_2
    num_priors = 6
    x = Conv2D(num_priors * 4, (3, 3), padding='same',
               name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (3, 3), padding='same',
               name=name)(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        114.0,
                        max_size=168.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])
    # Prediction from conv7_2
    num_priors = 6
    x = Conv2D(num_priors * 4, (3, 3), padding='same',
               name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (3, 3), padding='same',
               name=name)(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        168.0,
                        max_size=222.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])
    # Prediction from conv8_2
    num_priors = 6
    x = Conv2D(num_priors * 4, (3, 3), padding='same',
               name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (3, 3), padding='same',
               name=name)(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        222.0,
                        max_size=276.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])
    # Prediction from pool6
    num_priors = 6
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Dense(num_priors * num_classes, name=name)(net['pool6'])
    net['pool6_mbox_conf_flat'] = x
    priorbox = PriorBox(img_size,
                        276.0,
                        max_size=330.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')
    if K.common.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])
    # Gather all predictions
    net['mbox_loc'] = concatenate(
        [  #net['conv4_3_norm_mbox_loc_flat'],
            net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'],
            net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'],
            net['pool6_mbox_loc_flat']
        ],
        #                            mode='concat', concat_
        axis=1,
        name='mbox_loc')
    net['mbox_conf'] = concatenate(
        [  #net['conv4_3_norm_mbox_conf_flat'],
            net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'],
            net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'],
            net['pool6_mbox_conf_flat']
        ],
        #                             mode='concat', concat_
        axis=1,
        name='mbox_conf')
    net['mbox_priorbox'] = concatenate(
        [  #net['conv4_3_norm_mbox_priorbox'],
            net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'],
            net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'],
            net['pool6_mbox_priorbox']
        ],
        #                                 mode='concat', concat_
        axis=1,
        name='mbox_priorbox')
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    net['predictions'] = concatenate(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        #                               mode='concat', concat_
        axis=2,
        name='predictions')
    model = Model(net['input'], net['predictions'])

    #    model = Model(net['input'], net['mbox_loc'])
    #    plot_model(model, to_file='model.png')

    return model
Esempio n. 9
0
def SSD(input_shape=(300, 300, 3), num_classes=21, segmentation_head=False, depth_head=False):
    """SSD architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3).
        num_classes: Number of classes including background.

    conv3_4  conv4_6   fc7   conv6_2   conv7_2   pool6
      +       +         +      +           +       +
      |       |         |      |           |       |
      |       |         v      v           |       |
      |       |                            |       |
      |       |    +----------------+      |       |
      |       +--> |                | <----+       |
      |            |  Concatenate   |              |
      +----------> |                |  <-----------+
                   +-------+--------+
                           |
                           v
                       prediction


    # References
        SSD: https://arxiv.org/abs/1512.02325
        Rainbow SSD: https://arxiv.org/abs/1705.09587
    """
    net = {}
    # Block 1
    input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])

####################################################################################
    # zerro-padding need for backward compatibility
    x = ZeroPadding2D((3, 3))(input_tensor)
    model = ResNet50(include_top=False, input_tensor=x)
    # resnet_out = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='pool5v')(model.get_layer('activation_49').output)
    resnet_out = MaxPooling2D((3, 3), strides=(1, 1), padding='same',
                              name='pool5v')(model.get_layer('activation_49').output)

    net['conv3_4'] = model.get_layer("activation_22").output
    net['conv4_6'] = model.get_layer("activation_40").output

# END ResNet50
#####################################################################################

    # FC6
    net['fc6'] = Conv2D(1024, (3, 3), dilation_rate=(6, 6),
                        activation='relu', padding='same',
                        name='fc6')(resnet_out)
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Conv2D(1024, (1, 1), activation='relu',
                        padding='same', name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # Block 6
    net['conv6_1'] = Conv2D(256, (1, 1), activation='relu',
                            padding='same',
                            name='conv6_1')(net['fc7'])

    net['conv6_2'] = Conv2D(512, (3, 3), strides=(2, 2),
                            activation='relu', padding='same',
                            name='conv6_2')(net['conv6_1'])
    # Block 7
    net['conv7_1'] = Conv2D(128, (1, 1), activation='relu',
                            padding='same',
                            name='conv7_1')(net['conv6_2'])

    net['conv7_2'] = Conv2D(256, (3, 3), strides=(2, 2),
                            activation='relu', padding='same',
                            name='conv7_2')(net['conv7_1'])
    # Block 8
    net['conv8_1'] = Conv2D(128, (1, 1), activation='relu',
                            padding='same',
                            name='conv8_1')(net['conv7_2'])

    net['conv8_2'] = Conv2D(256, (3, 3), strides=(2, 2),
                            activation='relu', padding='same',
                            name='conv8_2')(net['conv8_1'])
    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])

    ###########################################################################
    # Segmentation PSP ########################################################

    if depth_head:
        # depth map
        x = Conv2D(512, (3, 3), strides=(1, 1), padding="same", name="depth_conv1_3", use_bias=False)(psp)
        x = BatchNormalization(momentum=0.95, epsilon=1e-5, name="depth_conv1_3_bn")(x)
        x = Activation('relu')(x)
        x = Dropout(0.1)(x)

        x = Conv2D(512, (3, 3), strides=(1, 1), padding="same", name="depth_conv2_3", use_bias=False)(x)
        x = BatchNormalization(momentum=0.95, epsilon=1e-5, name="depth_conv2_3_bn")(x)
        x = Activation('relu')(x)
        x = Conv2D(1, (3, 3), strides=(1, 1), padding="same", name="depth_conv2_3", use_bias=False)(x)
        x = Lambda(Interp, arguments={'shape': (input_shape[0], input_shape[1])})(x)
        depth_map = Activation('relu', name="depth_map")(x)

    ###########################################################################

    asp0 = [1. / 2, 1, 1., 2.]
    asp1 = [1. / 3, 1. / 2, 1, 1., 2., 3.]
    scales = [0.1, 0.2, 0.38, 0.56, 0.74, 0.92, 1.1]

    if segmentation_head:
        net['psp1'] = Lambda(Interp, arguments={'shape': (60, 60)})(model.output)

    ###########################################################################
    # CLASSIFIER:1 LAYER: conv3_4 #############################################

    num_priors = len(asp0)

    cl1_input = Normalize(20, name='conv3_4_norm')(net['conv3_4'])

    x = Conv2D(num_priors * 4, (3, 3), strides=(1, 1), dilation_rate=(2, 2),
               padding='same', name='conv3_4_norm_mbox_loc')(cl1_input)

    x = Flatten(name='conv3_4_norm_mbox_loc_flat')(x)
    net['conv3_4_norm_mbox_loc_flat'] = x

    x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="conv3_4_norm_mbox_conf")(cl1_input)

    if segmentation_head:
        net['psp6'] = Lambda(Interp, arguments={'shape': (60, 60)})(x)
        # net['psp6'] = interp_block(y, 1, (60,60), str_lvl=6)

    x = Flatten(name='conv3_4_norm_mbox_conf_flat')(x)
    net['conv3_4_norm_mbox_conf_flat'] = x

    x = PriorBox(img_size, scales[0] * img_size[0], aspect_ratios=asp0,
                 variances=[0.1, 0.1, 0.2, 0.2],
                 name='conv3_4_norm_mbox_priorbox')(cl1_input)
    net['conv3_4_norm_mbox_priorbox'] = x

    ###########################################################################
    # CLASSIFIER:2 LAYER: conv4_6 #############################################

    num_priors = len(asp1)
    cl2_input = net['conv4_6']

    x = Conv2D(num_priors * 4, (3, 3), padding='same', name='fc7_mbox_loc')(cl2_input)

    x = Flatten(name='fc7_mbox_loc_flat')(x)
    net['fc7_mbox_loc_flat'] = x

    x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="fc7_mbox_conf")(cl2_input)

    if segmentation_head:
        net['psp5'] = Lambda(Interp, arguments={'shape': (60, 60)})(x)
        # net['psp5'] = interp_block(y, 2, (60,60), str_lvl=4)
    
    x = Flatten(name='fc7_mbox_conf_flat')(x)
    net['fc7_mbox_conf_flat'] = x

    x = PriorBox(img_size, scales[1] * img_size[0], max_size=scales[2] * img_size[0], aspect_ratios=asp1,
                 variances=[0.1, 0.1, 0.2, 0.2],
                 name='fc7_mbox_priorbox')(cl2_input)

    net['fc7_mbox_priorbox'] = x

    ###########################################################################
    # CLASSIFIER:3 LAYER: fc7 #################################################

    num_priors = len(asp1)

    cl3_input = Conv2D(512, (1, 1), activation='relu', padding='same', name='fc7_mbox_pre')(net['fc7'])

    x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv6_2_mbox_loc')(cl3_input)

    x = Flatten(name='conv6_2_mbox_loc_flat')(x)
    net['conv6_2_mbox_loc_flat'] = x

    x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="conv6_2_mbox_conf")(cl3_input)

    if segmentation_head:
        net['psp4'] = Lambda(Interp, arguments={'shape': (60, 60)})(x)
        # net['psp4'] = interp_block(y, 3, (60,60), str_lvl=3)

    x = Flatten(name='conv6_2_mbox_conf_flat')(x)
    net['conv6_2_mbox_conf_flat'] = x

    x = PriorBox(img_size, scales[2] * img_size[0], max_size=scales[3] * img_size[0], aspect_ratios=asp1,
                 variances=[0.1, 0.1, 0.2, 0.2],
                 name='conv6_2_mbox_priorbox')(cl3_input)

    net['conv6_2_mbox_priorbox'] = x

    ###########################################################################
    # CLASSIFIER:4 LAYER: conv6_2 #############################################

    num_priors = len(asp1)

    cl4_input = Conv2D(256, (1, 1), activation='relu', padding='same', name='conv6_2_mbox_pre')(net['conv6_2'])

    x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(cl4_input)

    x = Flatten(name='conv7_2_mbox_loc_flat')(x)
    net['conv7_2_mbox_loc_flat'] = x

    x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="conv7_2_mbox_conf")(cl4_input)

    if segmentation_head:
        net['psp3'] = Lambda(Interp, arguments={'shape': (60, 60)})(x)
        # net['psp3'] = interp_block(y, 4, (60,60), str_lvl=2)

    x = Flatten(name='conv7_2_mbox_conf_flat')(x)
    net['conv7_2_mbox_conf_flat'] = x

    x = PriorBox(img_size, scales[3] * img_size[0], max_size=scales[4] * img_size[0], aspect_ratios=asp1,
                 variances=[0.1, 0.1, 0.2, 0.2],
                 name='conv7_2_mbox_priorbox')(cl4_input)

    net['conv7_2_mbox_priorbox'] = x

    ###########################################################################
    # CLASSIFIER:5 LAYER: conv7_2 #############################################

    num_priors = len(asp1)
    cl5_input = net['conv7_2']

    x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(cl5_input)

    x = Flatten(name='conv8_2_mbox_loc_flat')(x)
    net['conv8_2_mbox_loc_flat'] = x

    x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="conv8_2_mbox_conf")(cl5_input)

    if segmentation_head:
        net['psp2'] = Lambda(Interp, arguments={'shape': (60, 60)})(x)
        # net['psp2'] = interp_block(y, 6, (60,60), str_lvl=1)

    x = Flatten(name='conv8_2_mbox_conf_flat')(x)
    net['conv8_2_mbox_conf_flat'] = x

    x = PriorBox(img_size, scales[4] * img_size[0], max_size=scales[5] * img_size[0], aspect_ratios=asp1,
                 variances=[0.1, 0.1, 0.2, 0.2],
                 name='conv8_2_mbox_priorbox')(cl5_input)

    net['conv8_2_mbox_priorbox'] = x

    ###########################################################################
    # CLASSIFIER:6 LAYER: pool6 ###############################################

    num_priors = len(asp0)
    cl6_input = net['pool6']

    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(cl6_input)
    net['pool6_mbox_loc_flat'] = x

    x = Dense(num_priors * num_classes, name="pool6_mbox_conf_flat")(cl6_input)
    net['pool6_mbox_conf_flat'] = x

    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    x = Reshape(target_shape, name='pool6_reshaped')(cl6_input)

    x = PriorBox(img_size, scales[5] * img_size[0], max_size=scales[6] * img_size[0], aspect_ratios=asp0,
                 variances=[0.1, 0.1, 0.2, 0.2],
                 name='pool6_mbox_priorbox')(x)

    net['pool6_mbox_priorbox'] = x

    ###########################################################################

    # Gather all predictions
    net['mbox_loc'] = Concatenate(axis=1, name='mbox_loc')([
        net['conv3_4_norm_mbox_loc_flat'],
        net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'],
        net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'],
        net['pool6_mbox_loc_flat']])

    net['mbox_conf'] = Concatenate(axis=1, name='mbox_conf')([
        net['conv3_4_norm_mbox_conf_flat'],
        net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'],
        net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'],
        net['pool6_mbox_conf_flat']])

    net['mbox_priorbox'] = Concatenate(axis=1, name='mbox_priorbox')([
        net['conv3_4_norm_mbox_priorbox'],
        net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'],
        net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'],
        net['pool6_mbox_priorbox']])

    if segmentation_head:
        psp = Concatenate(axis=-1, name='psp')([
            net['psp1'],
            net['psp2'],
            net['psp3'],
            net['psp4'],
            net['psp5'],
            net['psp6'],
        ])
        psp.trainable = False

        x = Conv2D(256, (3, 3), strides=(1, 1), padding="same", name="seg_conv1_1")(psp)
        x = Activation('relu')(x)
        x = Conv2D(256, (3, 3), strides=(1, 1), padding="same", name="seg_conv1_2")(x)
        x = BatchNormalization(momentum=0.95, epsilon=1e-5, name="seg_conv1_2_bn")(x)
        x = Activation('relu')(x)
        x = Dropout(0.1)(x)

        x = Conv2D(num_classes, (1, 1), strides=(1, 1), name="seg_conv_last")(x)
        x = Lambda(Interp, arguments={'shape': (input_shape[0], input_shape[1])})(x)
        segmentation = Activation('sigmoid', name='segmentation')(x)

    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4

    net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf'])

    ssd_out = Concatenate(axis=2, name='ssd_out')([
        net['mbox_loc'],
        net['mbox_conf'],
        net['mbox_priorbox']])

    if not segmentation_head and not depth_head:
        model = Model(input_tensor, ssd_out)
    else:
        out = [ssd_out]
        if segmentation_head:
            out.append(segmentation)
        if depth_head:
            out.append(depth_map)
        model = Model(input_tensor, out)
    return model
Esempio n. 10
0
def ssd512(input_shape=(512, 512, 3),
           num_classes=21,
           min_scale=0.1,
           max_scale=0.9,
           scales=None,
           aspect_ratios_global=None,
           aspect_ratios_per_layer=None,
           two_boxes_for_ar1=True,
           limit_boxes=True,
           variances=[0.1, 0.1, 0.2, 0.2],
           weights_path=None,
           frozen_layers=None,
           summary=False,
           plot=False):

    n_predictor_layers = 7  # The number of predictor conv layers in the network is 6 for the original SSD300
    default_aspect_ratios = [[0.5, 1.0, 2.0],
                             [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                             [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                             [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                             [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                             [0.5, 1.0, 2.0],
                             [0.5, 1.0, 2.0]]

    # Get a few exceptions out of the way first
    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        print(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` both are None. Default aspect ratios of the paper implementation are used.")

    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}.".format(
                    n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError("Either `min_scale` and `max_scale` or `scales` need to be specified.")
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format(
                n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(variances) != 4:
        raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError("All variances must be >0, but the variances given are {}".format(variances))

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        aspect_ratios = default_aspect_ratios
    elif aspect_ratios_per_layer and aspect_ratios_global is None:
        aspect_ratios = aspect_ratios_per_layer
    elif aspect_ratios_per_layer is None and aspect_ratios_global:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    aspect_ratios_conv4_3 = aspect_ratios[0]
    aspect_ratios_fc7     = aspect_ratios[1]
    aspect_ratios_conv6_2 = aspect_ratios[2]
    aspect_ratios_conv7_2 = aspect_ratios[3]
    aspect_ratios_conv8_2 = aspect_ratios[4]
    aspect_ratios_conv9_2 = aspect_ratios[5]
    aspect_ratios_conv10_2 = aspect_ratios[6]

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios:
        n_boxes = []
        for aspect_ratio in aspect_ratios:
            if (1 in aspect_ratio) & two_boxes_for_ar1:
                n_boxes.append(len(aspect_ratio) + 1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(aspect_ratio))
        n_boxes_conv4_3 = n_boxes[0]
        n_boxes_fc7 = n_boxes[1]
        n_boxes_conv6_2 = n_boxes[2]
        n_boxes_conv7_2 = n_boxes[3]
        n_boxes_conv8_2 = n_boxes[4]
        n_boxes_conv9_2 = n_boxes[5]
        n_boxes_conv10_2 = n_boxes[6]

    input_layer = Input(shape=input_shape)
    img_height, img_width, img_channels = input_shape[0], input_shape[1], input_shape[2]

    # Block 1 -----------------------------------------------
    conv1_1 = Conv2D(64, (3, 3),
                     name='conv1_1',
                     padding='same',
                     activation='relu')(input_layer)

    conv1_2 = Conv2D(64, (3, 3),
                     name='conv1_2',
                     padding='same',
                     activation='relu')(conv1_1)

    pool1 = MaxPooling2D(name='pool1',
                         pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same', )(conv1_2)

    # Block 2 ----------------------------------------------
    conv2_1 = Conv2D(128, (3, 3),
                     name='conv2_1',
                     padding='same',
                     activation='relu')(pool1)

    conv2_2 = Conv2D(128, (3, 3),
                     name='conv2_2',
                     padding='same',
                     activation='relu')(conv2_1)

    pool2 = MaxPooling2D(name='pool2',
                         pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same')(conv2_2)

    # Block 3 ----------------------------------------------
    conv3_1 = Conv2D(256, (3, 3),
                     name='conv3_1',
                     padding='same',
                     activation='relu')(pool2)

    conv3_2 = Conv2D(256, (3, 3),
                     name='conv3_2',
                     padding='same',
                     activation='relu')(conv3_1)

    conv3_3 = Conv2D(256, (3, 3),
                     name='conv3_3',
                     padding='same',
                     activation='relu')(conv3_2)

    pool3 = MaxPooling2D(name='pool3',
                         pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same')(conv3_3)

    # Block 4 ---------------------------------------------
    conv4_1 = Conv2D(512, (3, 3),
                     name='conv4_1',
                     padding='same',
                     activation='relu')(pool3)

    conv4_2 = Conv2D(512, (3, 3),
                     name='conv4_2',
                     padding='same',
                     activation='relu')(conv4_1)

    conv4_3 = Conv2D(512, (3, 3),
                     name='conv4_3',
                     padding='same',
                     activation='relu')(conv4_2)

    pool4 = MaxPooling2D(name='pool4',
                         pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same')(conv4_3)

    # Block 5 --------------------------------------------
    conv5_1 = Conv2D(512, (3, 3),
                     name='conv5_1',
                     padding='same',
                     activation='relu')(pool4)

    conv5_2 = Conv2D(512, (3, 3),
                     name='conv5_2',
                     padding='same',
                     activation='relu')(conv5_1)

    conv5_3 = Conv2D(512, (3, 3),
                     name='conv5_3',
                     padding='same',
                     activation='relu')(conv5_2)

    pool5 = MaxPooling2D(name='pool5',
                         pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same')(conv5_3)

    # Block 6 --------------------------------------------
    fc6 = Conv2D(1024, (3, 3),
                 name='fc6',
                 dilation_rate=(6, 6),
                 padding='same',
                 activation='relu'
                 )(pool5)

    # Block 7 --------------------------------------------
    fc7 = Conv2D(1024, (1, 1),
                 name='fc7',
                 padding='same',
                 activation='relu'
                 )(fc6)

    # EXTRAS
    # Block 8 --------------------------------------------
    conv6_1 = Conv2D(256, (1, 1),
                     name='conv6_1',
                     padding='same',
                     activation='relu')(fc7)

    conv6_1z = ZeroPadding2D(name='conv6_1z')(conv6_1)

    conv6_2 = Conv2D(512, (3, 3),
                     name='conv6_2',
                     strides=(2, 2),
                     padding='valid',
                     activation='relu')(conv6_1z)

    # Block 9 --------------------------------------------
    conv7_1 = Conv2D(128, (1, 1),
                     name='conv7_1',
                     padding='same',
                     activation='relu')(conv6_2)

    conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1)

    conv7_2 = Conv2D(256, (3, 3),
                     name='conv7_2',
                     padding='valid',
                     strides=(2, 2),
                     activation='relu')(conv7_1z)

    # Block 10 -------------------------------------------
    conv8_1 = Conv2D(128, (1, 1),
                     name='conv8_1',
                     padding='same',
                     activation='relu')(conv7_2)

    conv8_2 = Conv2D(256, (3, 3),
                     name='conv8_2',
                     padding='valid',
                     strides=(1, 1),
                     activation='relu')(conv8_1)

    # Block 11 -------------------------------------------
    conv9_1 = Conv2D(128, (1, 1),
                     name='conv9_1',
                     padding='same',
                     activation='relu')(conv8_2)

    conv9_2 = Conv2D(256, (3, 3),
                     name='conv9_2',
                     padding='valid',
                     strides=(1, 1),
                     activation='relu')(conv9_1)

    # Block 12 -------------------------------------------
    conv10_1 = Conv2D(128, (1, 1),
                     name='conv10_1',
                     padding='same',
                     activation='relu')(conv9_2)

    conv10_2 = Conv2D(256, (4, 4),
                     name='conv10_2',
                     padding='valid',
                     strides=(1, 1),
                     activation='relu')(conv10_1)

    # Last Pool ------------------------------------------
    # pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2)

    # Prediction from conv4_3 ----------------------------
    conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3)

    conv4_3_norm_mbox_loc = Conv2D(n_boxes_conv4_3 * 4, (3, 3),
                                   name='conv4_3_norm_mbox_loc',
                                   padding='same')(conv4_3_norm)

    conv4_3_norm_mbox_loc_flat = Flatten(name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc)

    conv4_3_norm_mbox_conf = Conv2D(n_boxes_conv4_3 * num_classes, (3, 3),
                                    name='conv4_3_norm_mbox_conf',
                                    padding='same')(conv4_3_norm)

    conv4_3_norm_mbox_conf_flat = Flatten(name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf)

    conv4_3_norm_mbox_priorbox = PriorBox(img_height, img_width,
                                          this_scale=scales[0], next_scale=scales[1],
                                          aspect_ratios=aspect_ratios_conv4_3,
                                          two_boxes_for_ar1=two_boxes_for_ar1,
                                          limit_boxes=limit_boxes,
                                          variances=variances,
                                          name='conv4_3_norm_mbox_priorbox')(conv4_3_norm)

    # Prediction from fc7 ---------------------------------
    fc7_mbox_conf = Conv2D(n_boxes_fc7 * num_classes, (3, 3),
                           padding='same',
                           name='fc7_mbox_conf')(fc7)

    fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf)

    fc7_mbox_loc = Conv2D(n_boxes_fc7 * 4, (3, 3),
                          name='fc7_mbox_loc',
                          padding='same')(fc7)

    fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc)

    fc7_mbox_priorbox = PriorBox(img_height, img_width,
                                 this_scale=scales[1], next_scale=scales[2],
                                 aspect_ratios=aspect_ratios_fc7,
                                 two_boxes_for_ar1=two_boxes_for_ar1,
                                 limit_boxes=limit_boxes,
                                 variances=variances,
                                 name='fc7_mbox_priorbox')(fc7)

    # Prediction from conv6_2 ------------------------------
    conv6_2_mbox_conf = Conv2D(n_boxes_conv6_2 * num_classes, (3, 3),
                               padding='same',
                               name='conv6_2_mbox_conf')(conv6_2)

    conv6_2_mbox_conf_flat = Flatten(name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf)

    conv6_2_mbox_loc = Conv2D(n_boxes_conv6_2 * 4, (3, 3,),
                              name='conv6_2_mbox_loc',
                              padding='same')(conv6_2)

    conv6_2_mbox_loc_flat = Flatten(name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc)

    conv6_2_mbox_priorbox = PriorBox(img_height, img_width,
                                     this_scale=scales[2], next_scale=scales[3],
                                     aspect_ratios=aspect_ratios_conv6_2,
                                     two_boxes_for_ar1=two_boxes_for_ar1,
                                     limit_boxes=limit_boxes,
                                     variances=variances,
                                     name='conv6_2_mbox_priorbox')(conv6_2)

    # Prediction from conv7_2 --------------------------------
    conv7_2_mbox_conf = Conv2D(n_boxes_conv7_2 * num_classes, (3, 3),
                               padding='same',
                               name='conv7_2_mbox_conf')(conv7_2)

    conv7_2_mbox_conf_flat = Flatten(name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf)

    conv7_2_mbox_loc = Conv2D(n_boxes_conv7_2 * 4, (3, 3),
                              padding='same',
                              name='conv7_2_mbox_loc')(conv7_2)

    conv7_2_mbox_loc_flat = Flatten(name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc)

    conv7_2_mbox_priorbox = PriorBox(img_height, img_width,
                                     this_scale=scales[3], next_scale=scales[4],
                                     aspect_ratios=aspect_ratios_conv7_2,
                                     two_boxes_for_ar1=two_boxes_for_ar1,
                                     limit_boxes=limit_boxes,
                                     variances=variances,
                                     name='conv7_2_mbox_priorbox')(conv7_2)

    # Prediction from conv8_2 -------------------------------
    conv8_2_mbox_conf = Conv2D(n_boxes_conv8_2 * num_classes, (3, 3),
                               padding='same',
                               name='conv8_2_mbox_conf')(conv8_2)

    conv8_2_mbox_conf_flat = Flatten(name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf)

    conv8_2_mbox_loc = Conv2D(n_boxes_conv8_2 * 4, (3, 3),
                              padding='same',
                              name='conv8_2_mbox_loc')(conv8_2)

    conv8_2_mbox_loc_flat = Flatten(name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc)

    conv8_2_mbox_priorbox = PriorBox(img_height, img_width,
                                     this_scale=scales[4], next_scale=scales[5],
                                     aspect_ratios=aspect_ratios_conv8_2,
                                     two_boxes_for_ar1=two_boxes_for_ar1,
                                     limit_boxes=limit_boxes,
                                     variances=variances,
                                     name='conv8_2_mbox_priorbox')(conv8_2)

    # Prediction from conv9_2 -------------------------------
    conv9_2_mbox_conf = Conv2D(n_boxes_conv9_2 * num_classes, (3, 3),
                               padding='same',
                               name='conv9_2_mbox_conf')(conv9_2)

    conv9_2_mbox_conf_flat = Flatten(name='conv9_2_mbox_conf_flat')(conv9_2_mbox_conf)

    conv9_2_mbox_loc = Conv2D(n_boxes_conv9_2 * 4, (3, 3),
                              padding='same',
                              name='conv9_2_mbox_loc')(conv9_2)

    conv9_2_mbox_loc_flat = Flatten(name='conv9_2_mbox_loc_flat')(conv9_2_mbox_loc)

    conv9_2_mbox_priorbox = PriorBox(img_height, img_width,
                                     this_scale=scales[5], next_scale=scales[6],
                                     aspect_ratios=aspect_ratios_conv9_2,
                                     two_boxes_for_ar1=two_boxes_for_ar1,
                                     limit_boxes=limit_boxes,
                                     variances=variances,
                                     name='conv9_2_mbox_priorbox')(conv9_2)

    # Prediction from conv10_2 --------------------------------------------
    conv10_2_mbox_conf = Conv2D(n_boxes_conv10_2 * num_classes, (3, 3),
                               padding='same',
                               name='conv10_2_mbox_conf')(conv10_2)

    conv10_2_mbox_conf_flat = Flatten(name='conv10_2_mbox_conf_flat')(conv10_2_mbox_conf)

    conv10_2_mbox_loc = Conv2D(n_boxes_conv10_2 * 4, (3, 3),
                              padding='same',
                              name='conv10_2_mbox_loc')(conv10_2)

    conv10_2_mbox_loc_flat = Flatten(name='conv10_2_mbox_loc_flat')(conv10_2_mbox_loc)

    conv10_2_mbox_priorbox = PriorBox(img_height, img_width,
                                      this_scale=scales[6], next_scale=scales[7],
                                      aspect_ratios=aspect_ratios_conv10_2,
                                      two_boxes_for_ar1=two_boxes_for_ar1,
                                      limit_boxes=limit_boxes,
                                      variances=variances,
                                      name='conv10_2_mbox_priorbox')(conv10_2)


    # Gather all predictions -------------------------------------------
    mbox_loc = concatenate([conv4_3_norm_mbox_loc_flat,
                            fc7_mbox_loc_flat,
                            conv6_2_mbox_loc_flat,
                            conv7_2_mbox_loc_flat,
                            conv8_2_mbox_loc_flat,
                            conv9_2_mbox_loc_flat,
                            conv10_2_mbox_loc_flat],
                           axis=1,
                           name='mbox_loc')

    mbox_conf = concatenate([conv4_3_norm_mbox_conf_flat,
                             fc7_mbox_conf_flat,
                             conv6_2_mbox_conf_flat,
                             conv7_2_mbox_conf_flat,
                             conv8_2_mbox_conf_flat,
                             conv9_2_mbox_conf_flat,
                             conv10_2_mbox_conf_flat],
                            axis=1,
                            name='mbox_conf')

    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    conv4_3_norm_mbox_priorbox_reshape = Reshape((-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox)
    fc7_mbox_priorbox_reshape = Reshape((-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox)
    conv6_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox)
    conv7_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox)
    conv8_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox)
    conv9_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox)
    conv10_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv10_2_mbox_priorbox_reshape')(conv10_2_mbox_priorbox)

    mbox_priorbox = concatenate([conv4_3_norm_mbox_priorbox_reshape,
                                 fc7_mbox_priorbox_reshape,
                                 conv6_2_mbox_priorbox_reshape,
                                 conv7_2_mbox_priorbox_reshape,
                                 conv8_2_mbox_priorbox_reshape,
                                 conv9_2_mbox_priorbox_reshape,
                                 conv10_2_mbox_priorbox_reshape],
                                axis=1, name='mbox_priorbox')

    if hasattr(mbox_loc, '_keras_shape'):
        num_boxes = mbox_loc._keras_shape[-1] // 4
    elif hasattr(mbox_loc, 'int_shape'):
        num_boxes = K.int_shape(mbox_loc)[-1] // 4
    mbox_loc = Reshape((num_boxes, 4),
                       name='mbox_loc_final')(mbox_loc)
    mbox_conf = Reshape((num_boxes, num_classes),
                        name='mbox_conf_logits')(mbox_conf)
    mbox_conf = Activation('softmax',
                           name='mbox_conf_final')(mbox_conf)
    predictions = concatenate([mbox_loc,
                               mbox_conf,
                               mbox_priorbox],
                              axis=2,
                              name='predictions')

    model = Model(inputs=input_layer, outputs=predictions)

    if weights_path is not None:
        model.load_weights(weights_path, by_name=True)

    if frozen_layers is not None:
        for layer in model.layers:
            if layer.name in frozen_layers:
                layer.trainable = False

    if summary:
        model.summary()

    if plot:
        plot_model(model, to_file='SSD512.png')
        SVG(model_to_dot(model).create(prog='dot', format='svg'))

    return model
def SSD300(input_shape, num_classes=21):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    # SSD网路(以键值对方式存储每个网络层张量)
    net = {}

    # <editor-fold defaultstate = "collapsed" desc = "block1" >
    # Block 1
    # 输入源
    input_tensor = input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor
    # 卷积
    net['conv1_1'] = Convolution2D(64,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_1')(net['input'])
    # 卷积
    net['conv1_2'] = Convolution2D(64,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_2')(net['conv1_1'])
    # 池化
    net['pool1'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool1')(net['conv1_2'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "block2" >
    # Block 2
    # 卷积
    net['conv2_1'] = Convolution2D(128,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_1')(net['pool1'])
    # 卷积
    net['conv2_2'] = Convolution2D(128,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_2')(net['conv2_1'])
    # 池化
    net['pool2'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool2')(net['conv2_2'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "block3" >
    # Block 3
    # 卷积
    net['conv3_1'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_1')(net['pool2'])
    # 卷积
    net['conv3_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_2')(net['conv3_1'])
    # 卷积
    net['conv3_3'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_3')(net['conv3_2'])
    # 池化
    net['pool3'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool3')(net['conv3_3'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "block4" >
    # Block 4
    # 卷积
    net['conv4_1'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_1')(net['pool3'])
    # 卷积
    net['conv4_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_2')(net['conv4_1'])
    # 卷积
    net['conv4_3'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_3')(net['conv4_2'])
    # 池化
    net['pool4'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool4')(net['conv4_3'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "block5" >
    # Block 5
    # 卷积
    net['conv5_1'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_1')(net['pool4'])
    # 卷积
    net['conv5_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_2')(net['conv5_1'])
    # 卷积
    net['conv5_3'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_3')(net['conv5_2'])
    # 池化
    net['pool5'] = MaxPooling2D((3, 3),
                                strides=(1, 1),
                                border_mode='same',
                                name='pool5')(net['conv5_3'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "fc6" >
    # FC6
    # 带孔卷积
    net['fc6'] = AtrousConvolution2D(1024,
                                     3,
                                     3,
                                     atrous_rate=(6, 6),
                                     activation='relu',
                                     border_mode='same',
                                     name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "fc7" >
    # FC7
    # 卷积
    net['fc7'] = Convolution2D(1024,
                               1,
                               1,
                               activation='relu',
                               border_mode='same',
                               name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "block6" >
    # Block 6
    # 卷积
    net['conv6_1'] = Convolution2D(256,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_1')(net['fc7'])
    # 卷积
    net['conv6_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_2')(net['conv6_1'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "block7" >
    # Block 7
    # 卷积
    net['conv7_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv7_1')(net['conv6_2'])
    # ZeroPadding
    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
    # 卷积
    net['conv7_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='valid',
                                   name='conv7_2')(net['conv7_2'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "block8" >
    # Block 8
    # 卷积
    net['conv8_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_1')(net['conv7_2'])
    # 卷积
    net['conv8_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_2')(net['conv8_1'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "AveragePooling" >
    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Prediction from conv4_3" >
    # Prediction from conv4_3
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 3
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Prediction from fc7" >
    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4,
                                        3,
                                        3,
                                        border_mode='same',
                                        name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                         3,
                                         3,
                                         border_mode='same',
                                         name=name)(net['fc7'])
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=114.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Prediction from conv6_2" >
    # Prediction from conv6_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        114.0,
                        max_size=168.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Prediction from conv7_2" >
    # Prediction from conv7_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        168.0,
                        max_size=222.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Prediction from conv8_2" >
    # Prediction from conv8_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        222.0,
                        max_size=276.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Prediction from pool6" >
    # Prediction from pool6
    num_priors = 6
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Dense(num_priors * num_classes, name=name)(net['pool6'])
    net['pool6_mbox_conf_flat'] = x
    priorbox = PriorBox(img_size,
                        276.0,
                        max_size=330.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Gather all predictions" >
    # Gather all predictions
    net['mbox_loc'] = merge([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat']
    ],
                            mode='concat',
                            concat_axis=1,
                            name='mbox_loc')
    net['mbox_conf'] = merge([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat']
    ],
                             mode='concat',
                             concat_axis=1,
                             name='mbox_conf')
    net['mbox_priorbox'] = merge([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox']
    ],
                                 mode='concat',
                                 concat_axis=1,
                                 name='mbox_priorbox')
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Reshape And Merge" >
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    net['predictions'] = merge(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        mode='concat',
        concat_axis=2,
        name='predictions')
    # </editor-fold>

    # <editor-fold defaultstate = "collapsed" desc = "Build Model" >
    model = Model(net['input'], net['predictions'])
    return model
Esempio n. 12
0
def SSD300(input_shape, num_classes=21):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """

    net2 = MarkNet(input_shape=(64, 64, 3))

    net = {}
    # Block 1
    input_tensor = Input(shape=input_shape)
    # prior layerに引数として渡す際利用する
    img_size = (input_shape[1], input_shape[0])

    net['input'] = input_tensor
    net['conv1_1'] = Convolution2D(64,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_1')(net['input'])
    net['conv1_2'] = Convolution2D(64,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_2')(net['conv1_1'])
    net['pool1'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool1')(net['conv1_2'])
    # Block 2
    net['conv2_1'] = Convolution2D(128,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_1')(net['pool1'])
    net['conv2_2'] = Convolution2D(128,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_2')(net['conv2_1'])
    net['pool2'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool2')(net['conv2_2'])
    # Block 3
    net['conv3_1'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_1')(net['pool2'])
    net['conv3_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_2')(net['conv3_1'])
    net['conv3_3'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_3')(net['conv3_2'])
    net['pool3'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool3')(net['conv3_3'])
    # Block 4
    net['conv4_1'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_1')(net['pool3'])
    net['conv4_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_2')(net['conv4_1'])
    net['conv4_3'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_3')(net['conv4_2'])
    net['pool4'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool4')(net['conv4_3'])
    # Block 5
    net['conv5_1'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_1')(net['pool4'])
    net['conv5_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_2')(net['conv5_1'])
    net['conv5_3'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_3')(net['conv5_2'])
    net['pool5'] = MaxPooling2D((3, 3),
                                strides=(1, 1),
                                border_mode='same',
                                name='pool5')(net['conv5_3'])

    # FC6
    net['fc6'] = AtrousConvolution2D(1024,
                                     3,
                                     3,
                                     atrous_rate=(6, 6),
                                     activation='relu',
                                     border_mode='same',
                                     name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Convolution2D(1024,
                               1,
                               1,
                               activation='relu',
                               border_mode='same',
                               name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)

    # Block 6
    net['conv6_1'] = Convolution2D(256,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_1')(net['fc7'])
    net['conv6_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_2')(net['conv6_1'])
    # Block 7
    net['conv7_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv7_1')(net['conv6_2'])
    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
    net['conv7_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='valid',
                                   name='conv7_2')(net['conv7_2'])

    # Block 8
    net['conv8_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_1')(net['conv7_2'])
    net['conv8_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_2')(net['conv8_1'])

    # Last Pool 最終出力
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])

    from keras.layers import Lambda
    # Prediction from conv4_3
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 3
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])

    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4,
                                        3,
                                        3,
                                        border_mode='same',
                                        name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                         3,
                                         3,
                                         border_mode='same',
                                         name=name)(net['fc7'])
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=114.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])

    # Prediction from conv6_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        114.0,
                        max_size=168.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])

    # Prediction from conv7_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        168.0,
                        max_size=222.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])

    # Prediction from conv8_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        222.0,
                        max_size=276.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])

    # Prediction from pool6
    num_priors = 6
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)

    x = Dense(num_priors * num_classes, name=name)(net['pool6'])

    # Marknetとのmarge
    #    merge = Add()([x, net2['dense2m']])

    net['pool6_mbox_conf_flat'] = x  # merge#x
    priorbox = PriorBox(img_size,
                        276.0,
                        max_size=330.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])

    # Gather all predictions
    net['mbox_loc'] = concatenate([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat']
    ],
                                  axis=1,
                                  name='mbox_loc')
    net['mbox_conf'] = concatenate([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat']
    ],
                                   axis=1,
                                   name='mbox_conf')
    net['mbox_priorbox'] = concatenate([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox']
    ],
                                       axis=1,
                                       name='mbox_priorbox')

    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4

    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])

    # 最終出力
    net['predictions'] = concatenate(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        axis=2,
        name='predictions')

    model = Model(net['input'], net['predictions'])

    # モデルの構造プロット
    keras.utils.plot_model(model, "./ssdmodel.png", show_shapes=True)
    return model
Esempio n. 13
0
def SSD300(input_shape=(300, 300, 3), num_classes=21):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    net = {}
    # Block 1
    input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])

    ####################################################################################
    # ResNet50 architecture
    # (adapted from https://github.com/fchollet/deep-learning-models/resnet50.py)

    if not K.is_keras_tensor(input_tensor):
        net['input'] = Input(tensor=input_tensor)
    else:
        net['input'] = input_tensor
    if K.image_dim_ordering() == 'tf':
        bn_axis = 3
    else:
        bn_axis = 1

    # Block 1
    x = ZeroPadding2D((3, 3))(net['input'])
    net['conv1'] = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x)
    net['bn_conv1'] = BatchNormalization(axis=bn_axis,
                                         name='bn_conv1')(net['conv1'])
    x = Activation('relu')(net['bn_conv1'])
    x = ZeroPadding2D((1, 1))(x)
    net['pool1'] = MaxPooling2D((3, 3), strides=(2, 2))(x)

    # Block 2
    net['conv2_1'] = conv_block(net['pool1'],
                                3, [64, 64, 256],
                                stage=2,
                                block='a',
                                strides=(1, 1))
    net['conv2_2'] = identity_block(net['conv2_1'],
                                    3, [64, 64, 256],
                                    stage=2,
                                    block='b')
    net['conv2_3'] = identity_block(net['conv2_2'],
                                    3, [64, 64, 256],
                                    stage=2,
                                    block='c')

    # Block 3
    net['conv3_1'] = conv_block(net['conv2_3'],
                                3, [128, 128, 512],
                                stage=3,
                                block='a')
    net['conv3_2'] = identity_block(net['conv3_1'],
                                    3, [128, 128, 512],
                                    stage=3,
                                    block='b')
    net['conv3_3'] = identity_block(net['conv3_2'],
                                    3, [128, 128, 512],
                                    stage=3,
                                    block='c')
    net['conv3_4'] = identity_block(net['conv3_3'],
                                    3, [128, 128, 512],
                                    stage=3,
                                    block='d')

    # Block 4
    net['conv4_1'] = conv_block(net['conv3_4'],
                                3, [256, 256, 1024],
                                stage=4,
                                block='a')
    net['conv4_2'] = identity_block(net['conv4_1'],
                                    3, [256, 256, 1024],
                                    stage=4,
                                    block='b')
    net['conv4_3'] = identity_block(net['conv4_2'],
                                    3, [256, 256, 1024],
                                    stage=4,
                                    block='c')
    net['conv4_4'] = identity_block(net['conv4_3'],
                                    3, [256, 256, 1024],
                                    stage=4,
                                    block='d')
    net['conv4_5'] = identity_block(net['conv4_4'],
                                    3, [256, 256, 1024],
                                    stage=4,
                                    block='e')
    net['conv4_6'] = identity_block(net['conv4_5'],
                                    3, [256, 256, 1024],
                                    stage=4,
                                    block='f')

    # Block 5
    net['conv5_1'] = conv_block(net['conv4_6'],
                                3, [512, 512, 2048],
                                stage=5,
                                block='a')
    net['conv5_2'] = identity_block(net['conv5_1'],
                                    3, [512, 512, 2048],
                                    stage=5,
                                    block='b')
    net['conv5_3'] = identity_block(net['conv5_2'],
                                    3, [512, 512, 2048],
                                    stage=5,
                                    block='c')

    # net['pool5'] = AveragePooling2D((7, 7), name='pool5')(net['conv5_3'])
    # resnet uses this map directly onto the classification (top layer)
    # we will use the VGG pooling instead, which provides an appropriately sized input to fc6
    net['pool5v'] = MaxPooling2D((3, 3),
                                 strides=(1, 1),
                                 border_mode='same',
                                 name='pool5v')(net['conv5_3'])

    # END ResNet50
    #####################################################################################

    # FC6
    net['fc6'] = AtrousConvolution2D(1024,
                                     3,
                                     3,
                                     atrous_rate=(6, 6),
                                     activation='relu',
                                     border_mode='same',
                                     name='fc6')(net['pool5v'])
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Convolution2D(1024,
                               1,
                               1,
                               activation='relu',
                               border_mode='same',
                               name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # Block 6
    net['conv6_1'] = Convolution2D(256,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_1')(net['fc7'])

    net['conv6_2'] = ZeroPadding2D()(net['conv6_1'])
    net['conv6_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='valid',
                                   name='conv6_2')(net['conv6_2'])
    # Block 7
    net['conv7_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv7_1')(net['conv6_2'])
    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
    net['conv7_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='valid',
                                   name='conv7_2')(net['conv7_2'])
    # Block 8
    net['conv8_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_1')(net['conv7_2'])
    net['conv8_2'] = ZeroPadding2D()(net['conv8_1'])
    net['conv8_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='valid',
                                   name='conv8_2')(net['conv8_2'])
    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])

    # Prediction from conv3_4 (still called conv4_3 in the remainder)
    # Will clean this up after training tests
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv3_4'])
    num_priors = 3
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])

    # Prediction from conv4_6 -- again, will replace after train test
    num_priors = 6
    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4,
                                        3,
                                        3,
                                        border_mode='same',
                                        name='fc7_mbox_loc')(net['conv4_6'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Convolution2D(
        num_priors * num_classes, 3, 3, border_mode='same',
        name=name)(net['conv4_6'])  # changed from fc7
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=114.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')

    # Another change from fc7
    net['fc7_mbox_priorbox'] = priorbox(net['conv4_6'])

    # Prediction from this fc7 (it will still be called 6_2)
    # project it so that its channels are 512, as bounding box data
    net['fc7_mbox_pre'] = Convolution2D(512,
                                        1,
                                        1,
                                        activation='relu',
                                        border_mode='same',
                                        name='fc7_mbox_pre')(net['fc7'])
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv6_2_mbox_loc')(net['fc7_mbox_pre'])
    net['conv6_2_mbox_loc'] = x
    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['fc7_mbox_pre'])  # changed from conv6_2
    net['conv6_2_mbox_conf'] = x
    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        114.0,
                        max_size=168.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(
        net['fc7_mbox_pre'])  # changed from conv6_2

    # Prediction from conv6_2
    # Project it down to 256
    # (old conv7_2)
    net['conv6_2_mbox_pre'] = Convolution2D(256,
                                            1,
                                            1,
                                            activation='relu',
                                            border_mode='same',
                                            name='conv6_2_mbox_pre')(
                                                net['conv6_2'])
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv7_2_mbox_loc')(net['conv6_2_mbox_pre'])
    net['conv7_2_mbox_loc'] = x
    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(
                          net['conv6_2_mbox_pre'])  # changed from conv7_2
    net['conv7_2_mbox_conf'] = x
    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        168.0,
                        max_size=222.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    # old conv7_2
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv6_2_mbox_pre'])
    # Prediction from conv7_2
    # old (conv8_2)
    # no projections needed

    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv8_2_mbox_loc')(net['conv7_2'])
    net['conv8_2_mbox_loc'] = x
    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv7_2'])  # changed from conv8_2
    net['conv8_2_mbox_conf'] = x
    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        222.0,
                        max_size=276.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')

    net['conv8_2_mbox_priorbox'] = priorbox(
        net['conv7_2'])  # changed from conv8_2
    # Prediction from pool6
    num_priors = 6
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Dense(num_priors * num_classes, name=name)(net['pool6'])
    net['pool6_mbox_conf_flat'] = x
    priorbox = PriorBox(img_size,
                        276.0,
                        max_size=330.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])
    # Gather all predictions
    net['mbox_loc'] = merge([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat']
    ],
                            mode='concat',
                            concat_axis=1,
                            name='mbox_loc')
    net['mbox_conf'] = merge([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat']
    ],
                             mode='concat',
                             concat_axis=1,
                             name='mbox_conf')
    net['mbox_priorbox'] = merge([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox']
    ],
                                 mode='concat',
                                 concat_axis=1,
                                 name='mbox_priorbox')
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    net['predictions'] = merge(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        mode='concat',
        concat_axis=2,
        name='predictions')
    model = Model(net['input'], net['predictions'])
    return model
Esempio n. 14
0
def SSD(input_shape, num_classes):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    img_size=(input_shape[1],input_shape[0])
    input_shape=(input_shape[1],input_shape[0],3)
    mobilenet_input_shape=(224,224,3)
    net={}
    net['input']=Input(input_shape)
    mobilenet=MobileNet(input_shape=mobilenet_input_shape,include_top=False,weights='imagenet')
    FeatureExtractor=Model(inputs=mobilenet.input,outputs=mobilenet.get_layer('conv_dw_11_relu').output)
    conv11=FeatureExtractor(net['input'])

    net['conv11'] = Conv2D(512, (1, 1),  padding='same', name='conv11')(conv11)
    net['conv11'] = BatchNormalization( momentum=0.99, name='bn11')(net['conv11'])
    net['conv11'] = Activation('relu')(net['conv11'])
    # Block
    #(19,19)
    net['conv12dw'] = SeparableConv2D(512, (3, 3),strides=(2, 2),  padding='same', name='conv12dw')(net['conv11'])
    net['conv12dw'] = BatchNormalization( momentum=0.99, name='bn12dw')(net['conv12dw'])
    net['conv12dw'] = Activation('relu')(net['conv12dw'])
    net['conv12'] = Conv2D(1024, (1, 1), padding='same',name='conv12')(net['conv12dw'])
    net['conv12'] = BatchNormalization( momentum=0.99, name='bn12')(net['conv12'])
    net['conv12'] = Activation('relu')(net['conv12'])
    net['conv13dw'] = SeparableConv2D(1024, (3, 3), padding='same',name='conv13dw')(net['conv12'])
    net['conv13dw'] = BatchNormalization( momentum=0.99, name='bn13dw')(net['conv13dw'])
    net['conv13dw'] = Activation('relu')(net['conv13dw'])
    net['conv13'] = Conv2D(1024, (1, 1), padding='same',name='conv13')(net['conv13dw'])
    net['conv13'] = BatchNormalization( momentum=0.99, name='bn13')(net['conv13'])
    net['conv13'] = Activation('relu')(net['conv13'])
    net['conv14_1'] = Conv2D(256, (1, 1),  padding='same', name='conv14_1')(net['conv13'])
    net['conv14_1'] = BatchNormalization( momentum=0.99, name='bn14_1')(net['conv14_1'])
    net['conv14_1'] = Activation('relu')(net['conv14_1'])
    net['conv14_2'] = Conv2D(512, (3, 3), strides=(2, 2),  padding='same', name='conv14_2')(net['conv14_1'])
    net['conv14_2'] = BatchNormalization( momentum=0.99, name='bn14_2')(net['conv14_2'])
    net['conv14_2'] = Activation('relu')(net['conv14_2'])
    net['conv15_1'] = Conv2D(128, (1, 1), padding='same',name='conv15_1')(net['conv14_2'])
    net['conv15_1'] = BatchNormalization( momentum=0.99, name='bn15_1')(net['conv15_1'])
    net['conv15_1'] = Activation('relu')(net['conv15_1'])
    net['conv15_2'] = Conv2D(256, (3, 3), strides=(2, 2), padding='same',name='conv15_2')(net['conv15_1'])
    net['conv15_2'] = BatchNormalization( momentum=0.99, name='bn15_2')(net['conv15_2'])
    net['conv15_2'] = Activation('relu')(net['conv15_2'])
    net['conv16_1'] = Conv2D(128, (1, 1),  padding='same', name='conv16_1')(net['conv15_2'])
    net['conv16_1'] = BatchNormalization( momentum=0.99, name='bn16_1')(net['conv16_1'])
    net['conv16_1'] = Activation('relu')(net['conv16_1'])
    net['conv16_2'] = Conv2D(256, (3, 3), strides=(2, 2),  padding='same', name='conv16_2')(net['conv16_1'])
    net['conv16_2'] = BatchNormalization( momentum=0.99, name='bn16_2')(net['conv16_2'])
    net['conv16_2'] = Activation('relu')(net['conv16_2'])
    net['conv17_1'] = Conv2D(64, (1, 1),  padding='same', name='conv17_1')(net['conv16_2'])
    net['conv17_1'] = BatchNormalization( momentum=0.99, name='bn17_1')(net['conv17_1'])
    net['conv17_1'] = Activation('relu')(net['conv17_1'])
    net['conv17_2'] = Conv2D(128, (3, 3), strides=(2, 2),  padding='same', name='conv17_2')(net['conv17_1'])
    net['conv17_2'] = BatchNormalization( momentum=0.99, name='bn17_2')(net['conv17_2'])
    net['conv17_2'] = Activation('relu')(net['conv17_2'])

    #Prediction from conv11
    num_priors = 3
    x = Conv2D(num_priors * 4, (1,1), padding='same',name='conv11_mbox_loc')(net['conv11'])
    net['conv11_mbox_loc'] = x
    flatten = Flatten(name='conv11_mbox_loc_flat')
    net['conv11_mbox_loc_flat'] = flatten(net['conv11_mbox_loc'])
    name = 'conv11_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (1,1), padding='same',name=name)(net['conv11'])
    net['conv11_mbox_conf'] = x
    flatten = Flatten(name='conv11_mbox_conf_flat')
    net['conv11_mbox_conf_flat'] = flatten(net['conv11_mbox_conf'])
    priorbox = PriorBox(img_size,60,max_size=None, aspect_ratios=[2],variances=[0.1, 0.1, 0.2, 0.2],name='conv11_mbox_priorbox')
    net['conv11_mbox_priorbox'] = priorbox(net['conv11'])
    # Prediction from conv13
    num_priors = 6
    net['conv13_mbox_loc'] = Conv2D(num_priors * 4, (1,1),padding='same',name='conv13_mbox_loc')(net['conv13'])
    flatten = Flatten(name='conv13_mbox_loc_flat')
    net['conv13_mbox_loc_flat'] = flatten(net['conv13_mbox_loc'])
    name = 'conv13_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['conv13_mbox_conf'] = Conv2D(num_priors * num_classes, (1,1),padding='same',name=name)(net['conv13'])
    flatten = Flatten(name='conv13_mbox_conf_flat')
    net['conv13_mbox_conf_flat'] = flatten(net['conv13_mbox_conf'])
    priorbox = PriorBox(img_size, 105.0, max_size=150.0, aspect_ratios=[2, 3],variances=[0.1, 0.1, 0.2, 0.2],name='conv13_mbox_priorbox')
    net['conv13_mbox_priorbox'] = priorbox(net['conv13'])
    # Prediction from conv12
    num_priors = 6
    x = Conv2D(num_priors * 4, (1,1), padding='same',name='conv14_2_mbox_loc')(net['conv14_2'])
    net['conv14_2_mbox_loc'] = x
    flatten = Flatten(name='conv14_2_mbox_loc_flat')
    net['conv14_2_mbox_loc_flat'] = flatten(net['conv14_2_mbox_loc'])
    name = 'conv14_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (1,1), padding='same',name=name)(net['conv14_2'])
    net['conv14_2_mbox_conf'] = x
    flatten = Flatten(name='conv14_2_mbox_conf_flat')
    net['conv14_2_mbox_conf_flat'] = flatten(net['conv14_2_mbox_conf'])
    priorbox = PriorBox(img_size, 150, max_size=195.0, aspect_ratios=[2, 3],variances=[0.1, 0.1, 0.2, 0.2],name='conv14_2_mbox_priorbox')
    net['conv14_2_mbox_priorbox'] = priorbox(net['conv14_2'])
    # Prediction from conv15_2_mbox
    num_priors = 6
    x = Conv2D(num_priors * 4, (1,1), padding='same',name='conv15_2_mbox_loc')(net['conv15_2'])
    net['conv15_2_mbox_loc'] = x
    flatten = Flatten(name='conv15_2_mbox_loc_flat')
    net['conv15_2_mbox_loc_flat'] = flatten(net['conv15_2_mbox_loc'])
    name = 'conv15_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (1,1), padding='same',name=name)(net['conv15_2'])
    net['conv15_2_mbox_conf'] = x
    flatten = Flatten(name='conv15_2_mbox_conf_flat')
    net['conv15_2_mbox_conf_flat'] = flatten(net['conv15_2_mbox_conf'])
    priorbox = PriorBox(img_size, 195.0, max_size=240.0, aspect_ratios=[2, 3],variances=[0.1, 0.1, 0.2, 0.2],name='conv15_2_mbox_priorbox')
    net['conv15_2_mbox_priorbox'] = priorbox(net['conv15_2'])

    # Prediction from conv16_2
    num_priors = 6
    x = Conv2D(num_priors * 4, (1,1), padding='same',name='conv16_2_mbox_loc')(net['conv16_2'])
    net['conv16_2_mbox_loc'] = x
    flatten = Flatten(name='conv16_2_mbox_loc_flat')
    net['conv16_2_mbox_loc_flat'] = flatten(net['conv16_2_mbox_loc'])
    name = 'conv16_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (1,1), padding='same',name=name)(net['conv16_2'])
    net['conv16_2_mbox_conf'] = x
    flatten = Flatten(name='conv16_2_mbox_conf_flat')
    net['conv16_2_mbox_conf_flat'] = flatten(net['conv16_2_mbox_conf'])
    priorbox = PriorBox(img_size, 240.0, max_size=285.0, aspect_ratios=[2, 3],variances=[0.1, 0.1, 0.2, 0.2],name='conv16_2_mbox_priorbox')
    net['conv16_2_mbox_priorbox'] = priorbox(net['conv16_2'])

    # Prediction from conv17_2
    num_priors = 6
    x = Conv2D(num_priors * 4,(1, 1), padding='same', name='conv17_2_mbox_loc')(net['conv17_2'])
    net['conv17_2_mbox_loc'] = x
    flatten = Flatten(name='conv17_2_mbox_loc_flat')
    net['conv17_2_mbox_loc_flat'] = flatten(net['conv17_2_mbox_loc'])
    name = 'conv17_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes, (1,1), padding='same', name=name)(net['conv17_2'])
    net['conv17_2_mbox_conf'] = x
    flatten = Flatten(name='conv17_2_mbox_conf_flat')
    net['conv17_2_mbox_conf_flat'] = flatten(net['conv17_2_mbox_conf'])
    priorbox = PriorBox(img_size, 285.0, max_size=300.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2],name='conv17_2_mbox_priorbox')
    net['conv17_2_mbox_priorbox'] = priorbox(net['conv17_2'])

    # Gather all predictions
    net['mbox_loc'] = concatenate([net['conv11_mbox_loc_flat'],net['conv13_mbox_loc_flat'],net['conv14_2_mbox_loc_flat'],net['conv15_2_mbox_loc_flat'],net['conv16_2_mbox_loc_flat'],net['conv17_2_mbox_loc_flat']],axis=1, name='mbox_loc')
    net['mbox_conf'] = concatenate([net['conv11_mbox_conf_flat'],net['conv13_mbox_conf_flat'],net['conv14_2_mbox_conf_flat'],net['conv15_2_mbox_conf_flat'],net['conv16_2_mbox_conf_flat'],net['conv17_2_mbox_conf_flat']],axis=1, name='mbox_conf')
    net['mbox_priorbox'] = concatenate([net['conv11_mbox_priorbox'],net['conv13_mbox_priorbox'],net['conv14_2_mbox_priorbox'],net['conv15_2_mbox_priorbox'],net['conv16_2_mbox_priorbox'],net['conv17_2_mbox_priorbox']],axis=1,name='mbox_priorbox')
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',name='mbox_conf_final')(net['mbox_conf'])
    net['predictions'] = concatenate([net['mbox_loc'],net['mbox_conf'],net['mbox_priorbox']],axis=2,name='predictions')
    model = Model(net['input'], net['predictions'])
    return model
Esempio n. 15
0
def SSD300v2(input_shape, num_classes=21, featurte_map=None):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    input_layer = Input(shape=input_shape)

    # Block 1
    with tf.name_scope("Block1"):
        conv1_1 = Conv2D(64, (3, 3),
                         name='conv1_1',
                         padding='same',
                         activation='relu')(input_layer)

        conv1_2 = Conv2D(64, (3, 3),
                         name='conv1_2',
                         padding='same',
                         activation='relu')(conv1_1)
        pool1 = MaxPooling2D(
            name='pool1',
            pool_size=(2, 2),
            strides=(2, 2),
            padding='same',
        )(conv1_2)

    # Block 2
    with tf.name_scope("Block2"):
        conv2_1 = Conv2D(128, (3, 3),
                         name='conv2_1',
                         padding='same',
                         activation='relu')(pool1)
        conv2_2 = Conv2D(128, (3, 3),
                         name='conv2_2',
                         padding='same',
                         activation='relu')(conv2_1)
        pool2 = MaxPooling2D(name='pool2',
                             pool_size=(2, 2),
                             strides=(2, 2),
                             padding='same')(conv2_2)

    # Block 3
    with tf.name_scope("Block3"):
        conv3_1 = Conv2D(256, (3, 3),
                         name='conv3_1',
                         padding='same',
                         activation='relu')(pool2)
        conv3_2 = Conv2D(256, (3, 3),
                         name='conv3_2',
                         padding='same',
                         activation='relu')(conv3_1)
        conv3_3 = Conv2D(256, (3, 3),
                         name='conv3_3',
                         padding='same',
                         activation='relu')(conv3_2)
        pool3 = MaxPooling2D(name='pool3',
                             pool_size=(2, 2),
                             strides=(2, 2),
                             padding='same')(conv3_3)

    # Block 4
    with tf.name_scope("Block4"):
        conv4_1 = Conv2D(512, (3, 3),
                         name='conv4_1',
                         padding='same',
                         activation='relu')(pool3)
        conv4_2 = Conv2D(512, (3, 3),
                         name='conv4_2',
                         padding='same',
                         activation='relu')(conv4_1)
        conv4_3 = Conv2D(512, (3, 3),
                         name='conv4_3',
                         padding='same',
                         activation='relu')(conv4_2)
        pool4 = MaxPooling2D(name='pool4',
                             pool_size=(2, 2),
                             strides=(2, 2),
                             padding='same')(conv4_3)

    # Block 5
    with tf.name_scope("Block5"):
        conv5_1 = Conv2D(512, (3, 3),
                         name='conv5_1',
                         padding='same',
                         activation='relu')(pool4)
        conv5_2 = Conv2D(512, (3, 3),
                         name='conv5_2',
                         padding='same',
                         activation='relu')(conv5_1)
        conv5_3 = Conv2D(512, (3, 3),
                         name='conv5_3',
                         padding='same',
                         activation='relu')(conv5_2)
        pool5 = MaxPooling2D(name='pool5',
                             pool_size=(2, 2),
                             strides=(1, 1),
                             padding='same')(conv5_3)

    # FC6
    with tf.name_scope("fc6"):
        fc6 = Conv2D(1024, (3, 3),
                     name='fc6',
                     dilation_rate=(6, 6),
                     padding='same',
                     activation='relu')(pool5)

    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    with tf.name_scope("fc7"):
        fc7 = Conv2D(1024, (1, 1),
                     name='fc7',
                     padding='same',
                     activation='relu')(fc6)
    # x = Dropout(0.5, name='drop7')(x)

    # Block 6
    with tf.name_scope("Block6"):
        conv6_1 = Conv2D(256, (1, 1),
                         name='conv6_1',
                         padding='same',
                         activation='relu')(fc7)
        conv6_2 = Conv2D(512, (3, 3),
                         name='conv6_2',
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(conv6_1)

    # Block 7
    with tf.name_scope("Block7"):
        conv7_1 = Conv2D(128, (1, 1),
                         name='conv7_1',
                         padding='same',
                         activation='relu')(conv6_2)
        conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1)
        conv7_2 = Conv2D(256, (3, 3),
                         name='conv7_2',
                         padding='valid',
                         strides=(2, 2),
                         activation='relu')(conv7_1z)

    # Block 8
    with tf.name_scope("Block8"):
        conv8_1 = Conv2D(128, (1, 1),
                         name='conv8_1',
                         padding='same',
                         activation='relu')(conv7_2)
        conv8_2 = Conv2D(256, (3, 3),
                         name='conv8_2',
                         padding='same',
                         strides=(2, 2),
                         activation='relu')(conv8_1)

    # Last Pool
    with tf.name_scope("LastPool"):
        pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2)

    # Prediction from conv4_3
    num_priors = 3
    img_size = (input_shape[1], input_shape[0])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)

    with tf.name_scope("conv4_3"):
        conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3)
        conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                                       name='conv4_3_norm_mbox_loc',
                                       padding='same')(conv4_3_norm)
        conv4_3_norm_mbox_loc_flat = Flatten(
            name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc)
        conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                                        name=name,
                                        padding='same')(conv4_3_norm)
        conv4_3_norm_mbox_conf_flat = Flatten(
            name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf)
        conv4_3_norm_mbox_priorbox = PriorBox(
            img_size,
            30.0,
            name='conv4_3_norm_mbox_priorbox',
            aspect_ratios=[2],
            variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm)

    # Prediction from fc7
    num_priors = 6
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    with tf.name_scope("fc7"):
        fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(fc7)
        fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf)

        fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                              name='fc7_mbox_loc',
                              padding='same')(fc7)
        fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc)
        fc7_mbox_priorbox = PriorBox(img_size,
                                     60.0,
                                     name='fc7_mbox_priorbox',
                                     max_size=114.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2])(fc7)

    # Prediction from conv6_2
    num_priors = 6
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    with tf.name_scope("conv6_2"):
        conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                                   padding='same',
                                   name=name)(conv6_2)
        conv6_2_mbox_conf_flat = Flatten(
            name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf)
        conv6_2_mbox_loc = Conv2D(num_priors * 4, (
            3,
            3,
        ),
                                  name='conv6_2_mbox_loc',
                                  padding='same')(conv6_2)
        conv6_2_mbox_loc_flat = Flatten(
            name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc)
        conv6_2_mbox_priorbox = PriorBox(img_size,
                                         114.0,
                                         max_size=168.0,
                                         aspect_ratios=[2, 3],
                                         variances=[0.1, 0.1, 0.2, 0.2],
                                         name='conv6_2_mbox_priorbox')(conv6_2)
    # Prediction from conv7_2
    num_priors = 6
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    with tf.name_scope("conv7_2"):
        conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                                   padding='same',
                                   name=name)(conv7_2)
        conv7_2_mbox_conf_flat = Flatten(
            name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf)
        conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                                  padding='same',
                                  name='conv7_2_mbox_loc')(conv7_2)
        conv7_2_mbox_loc_flat = Flatten(
            name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc)
        conv7_2_mbox_priorbox = PriorBox(img_size,
                                         168.0,
                                         max_size=222.0,
                                         aspect_ratios=[2, 3],
                                         variances=[0.1, 0.1, 0.2, 0.2],
                                         name='conv7_2_mbox_priorbox')(conv7_2)
    # Prediction from conv8_2
    num_priors = 6
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    with tf.name_scope("conv8_2"):
        conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                                   padding='same',
                                   name=name)(conv8_2)
        conv8_2_mbox_conf_flat = Flatten(
            name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf)
        conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                                  padding='same',
                                  name='conv8_2_mbox_loc')(conv8_2)
        conv8_2_mbox_loc_flat = Flatten(
            name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc)
        conv8_2_mbox_priorbox = PriorBox(img_size,
                                         222.0,
                                         max_size=276.0,
                                         aspect_ratios=[2, 3],
                                         variances=[0.1, 0.1, 0.2, 0.2],
                                         name='conv8_2_mbox_priorbox')(conv8_2)

    # Prediction from pool6
    num_priors = 6
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    with tf.name_scope("pool6"):
        pool6_mbox_loc_flat = Dense(num_priors * 4,
                                    name='pool6_mbox_loc_flat')(pool6)
        pool6_mbox_conf_flat = Dense(num_priors * num_classes,
                                     name=name)(pool6)
        pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6)
        pool6_mbox_priorbox = PriorBox(
            img_size,
            276.0,
            max_size=330.0,
            aspect_ratios=[2, 3],
            variances=[0.1, 0.1, 0.2, 0.2],
            name='pool6_mbox_priorbox')(pool6_reshaped)
    # Gather all predictions
    with tf.name_scope("mbox"):
        mbox_loc = concatenate([
            conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat,
            conv6_2_mbox_loc_flat, conv7_2_mbox_loc_flat,
            conv8_2_mbox_loc_flat, pool6_mbox_loc_flat
        ],
                               axis=1,
                               name='mbox_loc')
        mbox_conf = concatenate([
            conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat,
            conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat,
            conv8_2_mbox_conf_flat, pool6_mbox_conf_flat
        ],
                                axis=1,
                                name='mbox_conf')
        mbox_priorbox = concatenate([
            conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox,
            conv6_2_mbox_priorbox, conv7_2_mbox_priorbox,
            conv8_2_mbox_priorbox, pool6_mbox_priorbox
        ],
                                    axis=1,
                                    name='mbox_priorbox')
        print('{} conv4_3_norm_mbox_loc_flat'.format(
            conv4_3_norm_mbox_loc_flat._keras_shape))
        print('{} conv4_3_norm_mbox_conf_flat'.format(
            conv4_3_norm_mbox_conf_flat._keras_shape))
        print(
            '{} conv4_3_norm_mbox_priorbox'.format(conv4_3_norm_mbox_priorbox))
        if hasattr(mbox_loc, '_keras_shape'):
            num_boxes = mbox_loc._keras_shape[-1] // 4
        elif hasattr(mbox_loc, 'int_shape'):
            num_boxes = K.int_shape(mbox_loc)[-1] // 4
        print('{} num_boxes'.format(num_boxes))
        print('{} mbox_loc'.format(mbox_loc._keras_shape))
        print('{} mbox_conf'.format(mbox_conf._keras_shape))
        mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc)
        mbox_conf = Reshape((num_boxes, num_classes),
                            name='mbox_conf_logits')(mbox_conf)
        mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf)
        print('{} locatation'.format(mbox_loc))
        print('{} conf'.format(mbox_conf))
        print('{} priorbox'.format(mbox_priorbox))

    if featurte_map == 'conv4_3_norm_mbox_loc_flat':
        return set_return_model(input_layer=input_layer,
                                output_layer=conv4_3_norm_mbox_loc_flat)
    elif featurte_map == 'fc7_mbox_loc_flat':
        return set_return_model(input_layer=input_layer,
                                output_layer=fc7_mbox_loc_flat)
    elif featurte_map == 'conv4_3_norm_mbox_conf_flat':
        return set_return_model(input_layer=input_layer,
                                output_layer=conv4_3_norm_mbox_conf_flat)
    elif featurte_map == 'fc7_mbox_conf_flat':
        return set_return_model(input_layer=input_layer,
                                output_layer=fc7_mbox_conf_flat)
    predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox],
                              axis=2,
                              name='predictions')
    print('{} predictions'.format(predictions.shape))
    print('{} predictions'.format(predictions))
    model = Model(inputs=input_layer, outputs=predictions)
    return model
Esempio n. 16
0
def SSD512(input_shape, num_classes=21):
    """SSD512 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (512, 512, 3) or (3, 512, 512)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    net = {}
    # Block 1
    input_tensor = input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor
    net['conv1_1'] = Convolution2D(64,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_1')(net['input'])
    net['conv1_2'] = Convolution2D(64,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_2')(net['conv1_1'])
    net['pool1'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool1')(net['conv1_2'])
    # Block 2
    net['conv2_1'] = Convolution2D(128,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_1')(net['pool1'])
    net['conv2_2'] = Convolution2D(128,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_2')(net['conv2_1'])
    net['pool2'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool2')(net['conv2_2'])
    # Block 3
    net['conv3_1'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_1')(net['pool2'])
    net['conv3_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_2')(net['conv3_1'])
    net['conv3_3'] = Convolution2D(256,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_3')(net['conv3_2'])
    net['pool3'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool3')(net['conv3_3'])
    # Block 4
    net['conv4_1'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_1')(net['pool3'])
    net['conv4_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_2')(net['conv4_1'])
    net['conv4_3'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_3')(net['conv4_2'])
    net['pool4'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                border_mode='same',
                                name='pool4')(net['conv4_3'])
    # Block 5
    net['conv5_1'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_1')(net['pool4'])
    net['conv5_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_2')(net['conv5_1'])
    net['conv5_3'] = Convolution2D(512,
                                   3,
                                   3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_3')(net['conv5_2'])
    net['pool5'] = MaxPooling2D((3, 3),
                                strides=(1, 1),
                                border_mode='same',
                                name='pool5')(net['conv5_3'])
    # FC6
    net['fc6'] = AtrousConvolution2D(1024,
                                     3,
                                     3,
                                     atrous_rate=(6, 6),
                                     activation='relu',
                                     border_mode='same',
                                     name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Convolution2D(1024,
                               1,
                               1,
                               activation='relu',
                               border_mode='same',
                               name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # Block 6
    net['conv6_1'] = Convolution2D(256,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_1')(net['fc7'])
    net['conv6_2'] = Convolution2D(512,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv6_2')(net['conv6_1'])
    # Block 7
    net['conv7_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv7_1')(net['conv6_2'])
    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
    net['conv7_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='valid',
                                   name='conv7_2')(net['conv7_2'])
    # Block 8
    net['conv8_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_1')(net['conv7_2'])
    net['conv8_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv8_2')(net['conv8_1'])
    # Block 9
    net['conv9_1'] = Convolution2D(128,
                                   1,
                                   1,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv9_1')(net['conv8_2'])

    net['conv9_2'] = Convolution2D(256,
                                   3,
                                   3,
                                   subsample=(2, 2),
                                   activation='relu',
                                   border_mode='same',
                                   name='conv9_2')(net['conv9_1'])

    # Block 10
    net['conv10_1'] = Convolution2D(128,
                                    1,
                                    1,
                                    activation='relu',
                                    border_mode='same',
                                    name='conv10_1')(net['conv9_2'])

    net['conv10_2'] = Convolution2D(256,
                                    3,
                                    3,
                                    subsample=(2, 2),
                                    activation='relu',
                                    border_mode='same',
                                    name='conv10_2')(net['conv10_1'])

    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv10_2'])
    # Prediction from conv4_3
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 4
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        35.84,
                        max_size=76.8,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])
    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4,
                                        3,
                                        3,
                                        border_mode='same',
                                        name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes,
                                         3,
                                         3,
                                         border_mode='same',
                                         name=name)(net['fc7'])
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size,
                        76.8,
                        max_size=153.6,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])
    # Prediction from conv6_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        153.6,
                        max_size=230.4,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])
    # Prediction from conv7_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        230.4,
                        max_size=307.2,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])
    # Prediction from conv8_2
    num_priors = 6
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        307.2,
                        max_size=384.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])
    # Prediction from conv9_2
    num_priors = 4
    x = Convolution2D(num_priors * 4,
                      3,
                      3,
                      border_mode='same',
                      name='conv9_2_mbox_loc')(net['conv9_2'])
    net['conv9_2_mbox_loc'] = x
    flatten = Flatten(name='conv9_2_mbox_loc_flat')
    net['conv9_2_mbox_loc_flat'] = flatten(net['conv9_2_mbox_loc'])
    name = 'conv9_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes,
                      3,
                      3,
                      border_mode='same',
                      name=name)(net['conv9_2'])
    net['conv9_2_mbox_conf'] = x
    flatten = Flatten(name='conv9_2_mbox_conf_flat')
    net['conv9_2_mbox_conf_flat'] = flatten(net['conv9_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        384.0,
                        max_size=460.8,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv9_2_mbox_priorbox')
    net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2'])
    # Prediction from pool6
    num_priors = 4
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Dense(num_priors * num_classes, name=name)(net['pool6'])
    net['pool6_mbox_conf_flat'] = x
    priorbox = PriorBox(img_size,
                        460.8,
                        max_size=537.6,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])
    # Gather all predictions
    net['mbox_loc'] = merge([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat'],
        net['pool6_mbox_loc_flat']
    ],
                            mode='concat',
                            concat_axis=1,
                            name='mbox_loc')
    net['mbox_conf'] = merge([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat'],
        net['pool6_mbox_conf_flat']
    ],
                             mode='concat',
                             concat_axis=1,
                             name='mbox_conf')
    net['mbox_priorbox'] = merge([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox'],
        net['pool6_mbox_priorbox']
    ],
                                 mode='concat',
                                 concat_axis=1,
                                 name='mbox_priorbox')
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    net['predictions'] = merge(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        mode='concat',
        concat_axis=2,
        name='predictions')
    model = Model(net['input'], net['predictions'])
    return model
Esempio n. 17
0
def SSD(input_shape, num_classes=21):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    net = {}
    # Block 1
    input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    input0 = input_tensor

    conv1_1 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv1_1')(input0)
    conv1_2 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                         name='pool1')(conv1_2)
    # Block 2
    conv2_1 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                         name='pool2')(conv2_2)
    # Block 3
    conv3_1 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                         name='pool3')(conv3_3)
    # Block 4
    conv4_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                         name='pool4')(conv4_3)
    # Block 5
    conv5_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D((3, 3), strides=(1, 1), padding='same',
                         name='pool5')(conv5_3)
    # FC6
    fc6 = Conv2D(1024, (3, 3),
                 dilation_rate=(6, 6),
                 activation='relu',
                 padding='same',
                 name='fc6')(pool5)
    #fc6 = Dropout(0.5, name='drop6')(fc6)
    # FC7
    fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same',
                 name='fc7')(fc6)
    #fc7 = Dropout(0.5, name='drop7')(fc7)
    # Block 6
    conv6_1 = Conv2D(256, (1, 1),
                     activation='relu',
                     padding='same',
                     name='conv6_1')(fc7)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='same',
                     name='conv6_2')(conv6_1)
    # Block 7
    conv7_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     name='conv7_1')(conv6_2)
    conv7_2 = ZeroPadding2D()(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     name='conv7_2')(conv7_2)
    # Block 8
    conv8_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     name='conv8_1')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='same',
                     name='conv8_2')(conv8_1)
    # Last Pool
    pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2)

    # Prediction from conv4_3
    conv4_3_norm = Normalize(num_classes - 1, name='conv4_3_norm')(conv4_3)
    num_priors = 3
    conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                                   padding='same',
                                   name='conv4_3_norm_mbox_loc')(conv4_3_norm)
    conv4_3_norm_mbox_loc_flat = Flatten(
        name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc)
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                                    padding='same',
                                    name=name)(conv4_3_norm)
    conv4_3_norm_mbox_conf_flat = Flatten(
        name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf)
    conv4_3_norm_mbox_priorbox = PriorBox(
        img_size,
        30.0,
        aspect_ratios=[2],
        variances=[0.1, 0.1, 0.2, 0.2],
        name='conv4_3_norm_mbox_priorbox')(conv4_3_norm)
    # Prediction from fc7
    num_priors = 6
    fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                          padding='same',
                          name='fc7_mbox_loc')(fc7)
    fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc)
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                           padding='same',
                           name=name)(fc7)
    fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf)
    fc7_mbox_priorbox = PriorBox(img_size,
                                 60.0,
                                 max_size=114.0,
                                 aspect_ratios=[2, 3],
                                 variances=[0.1, 0.1, 0.2, 0.2],
                                 name='fc7_mbox_priorbox')(fc7)
    # Prediction from conv6_2
    num_priors = 6
    conv6_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                              padding='same',
                              name='conv6_2_mbox_loc')(conv6_2)
    conv6_2_mbox_loc_flat = Flatten(
        name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc)
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv6_2)
    conv6_2_mbox_conf_flat = Flatten(
        name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf)
    conv6_2_mbox_priorbox = PriorBox(img_size,
                                     114.0,
                                     max_size=168.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv6_2_mbox_priorbox')(conv6_2)
    # Prediction from conv7_2
    num_priors = 6
    conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                              padding='same',
                              name='conv7_2_mbox_loc')(conv7_2)
    conv7_2_mbox_loc_flat = Flatten(
        name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc)
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv7_2)
    conv7_2_mbox_conf_flat = Flatten(
        name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf)
    conv7_2_mbox_priorbox = PriorBox(img_size,
                                     168.0,
                                     max_size=222.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv7_2_mbox_priorbox')(conv7_2)
    # Prediction from conv8_2
    num_priors = 6
    conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3),
                              padding='same',
                              name='conv8_2_mbox_loc')(conv8_2)
    conv8_2_mbox_loc_flat = Flatten(
        name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc)
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3),
                               padding='same',
                               name=name)(conv8_2)
    conv8_2_mbox_conf_flat = Flatten(
        name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf)
    conv8_2_mbox_priorbox = PriorBox(img_size,
                                     222.0,
                                     max_size=276.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv8_2_mbox_priorbox')(conv8_2)
    # Prediction from pool6
    num_priors = 6
    pool6_mbox_loc_flat = Dense(num_priors * 4,
                                name='pool6_mbox_loc_flat')(pool6)
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    pool6_mbox_conf_flat = Dense(num_priors * num_classes, name=name)(pool6)
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6)
    pool6_mbox_priorbox = PriorBox(img_size,
                                   276.0,
                                   max_size=330.0,
                                   aspect_ratios=[2, 3],
                                   variances=[0.1, 0.1, 0.2, 0.2],
                                   name='pool6_mbox_priorbox')(pool6_reshaped)
    # Gather all predictions
    mbox_loc = concatenate([
        conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat, conv6_2_mbox_loc_flat,
        conv7_2_mbox_loc_flat, conv8_2_mbox_loc_flat, pool6_mbox_loc_flat
    ],
                           axis=1,
                           name='mbox_loc')
    mbox_conf = concatenate([
        conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat,
        conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat, conv8_2_mbox_conf_flat,
        pool6_mbox_conf_flat
    ],
                            axis=1,
                            name='mbox_conf')
    mbox_priorbox = concatenate([
        conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox,
        conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox
    ],
                                axis=1,
                                name='mbox_priorbox')
    if hasattr(mbox_loc, '_keras_shape'):
        num_boxes = mbox_loc._keras_shape[-1] // 4
    elif hasattr(mbox_loc, 'int_shape'):
        num_boxes = K.int_shape(mbox_loc)[-1] // 4
    mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc)
    mbox_conf = Reshape((num_boxes, num_classes),
                        name='mbox_conf_logits')(mbox_conf)
    mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf)
    predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox],
                              axis=2,
                              name='predictions')
    model = Model(input0, predictions)
    return model
Esempio n. 18
0
def SSD(input_shape, num_classes):

    img_size = (input_shape[1], input_shape[0])
    input_shape = (input_shape[1], input_shape[0], 3)
    alpha = 1.0
    depth_multiplier = 1
    input0 = Input(input_shape)
    x = _conv_block(input0, 32, alpha, strides=(2, 2))
    x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
    x = _depthwise_conv_block(x,
                              128,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=2)
    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
    x = _depthwise_conv_block(x,
                              256,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=4)
    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
    x = _depthwise_conv_block(x,
                              512,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=6)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
    x = _depthwise_conv_block_f(x,
                                depth_multiplier,
                                strides=(1, 1),
                                block_id=11)
    x, conv11 = _conv_blockSSD_f(x,
                                 512,
                                 depth_multiplier,
                                 kernel=(1, 1),
                                 strides=(1, 1),
                                 block_id=11)
    x = _depthwise_conv_block(x,
                              512,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=12)
    x = _depthwise_conv_block_f(x,
                                depth_multiplier,
                                strides=(1, 1),
                                block_id=13)
    x, conv13 = _conv_blockSSD_f(x,
                                 512,
                                 alpha,
                                 kernel=(1, 1),
                                 strides=(1, 1),
                                 block_id=13)
    x, conv14_2 = _conv_blockSSD(x, 256, alpha, block_id=14)
    x, conv15_2 = _conv_blockSSD(x, 128, alpha, block_id=15)
    x, conv16_2 = _conv_blockSSD(x, 128, alpha, block_id=16)
    x, conv17_2 = _conv_blockSSD(x, 64, alpha, block_id=17)

    #Prediction from conv11
    num_priors = 3
    x = Conv2D(num_priors * 4, (1, 1), padding='same',
               name='conv11_mbox_loc')(conv11)
    conv11_mbox_loc = x
    flatten = Flatten(name='conv11_mbox_loc_flat')
    conv11_mbox_loc_flat = flatten(conv11_mbox_loc)
    name = 'conv11_mbox_conf'  # type: str
    conv11_mbox_conf = Conv2D(num_priors * num_classes, (1, 1),
                              padding='same',
                              name=name)(conv11)
    flatten = Flatten(name='conv11_mbox_conf_flat')
    conv11_mbox_conf_flat = flatten(conv11_mbox_conf)
    priorbox = PriorBox(img_size,
                        60,
                        max_size=None,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv11_mbox_priorbox')
    conv11_mbox_priorbox = priorbox(conv11)

    num_priors = 6
    x = Conv2D(num_priors * 4, (1, 1), padding='same',
               name='conv13_mbox_loc')(conv13)
    conv13_mbox_loc = x
    flatten = Flatten(name='conv13_mbox_loc_flat')
    conv13_mbox_loc_flat = flatten(conv13_mbox_loc)
    name = 'conv13_mbox_conf'
    conv13_mbox_conf = Conv2D(num_priors * num_classes, (1, 1),
                              padding='same',
                              name=name)(conv13)
    flatten = Flatten(name='conv13_mbox_conf_flat')
    conv13_mbox_conf_flat = flatten(conv13_mbox_conf)
    priorbox = PriorBox(img_size,
                        105.0,
                        max_size=150.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv13_mbox_priorbox')
    conv13_mbox_priorbox = priorbox(conv13)
    num_priors = 6

    x = Conv2D(num_priors * 4, (1, 1),
               padding='same',
               name='conv14_2_mbox_loc')(conv14_2)
    conv14_2_mbox_loc = x
    flatten = Flatten(name='conv14_2_mbox_loc_flat')
    conv14_2_mbox_loc_flat = flatten(conv14_2_mbox_loc)
    name = 'conv14_2_mbox_conf'
    x = Conv2D(num_priors * num_classes, (1, 1), padding='same',
               name=name)(conv14_2)
    conv14_2_mbox_conf = x
    flatten = Flatten(name='conv14_2_mbox_conf_flat')
    conv14_2_mbox_conf_flat = flatten(conv14_2_mbox_conf)
    priorbox = PriorBox(img_size,
                        150,
                        max_size=195.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv14_2_mbox_priorbox')
    conv14_2_mbox_priorbox = priorbox(conv14_2)
    num_priors = 6

    x = Conv2D(num_priors * 4, (1, 1),
               padding='same',
               name='conv15_2_mbox_loc')(conv15_2)
    conv15_2_mbox_loc = x
    flatten = Flatten(name='conv15_2_mbox_loc_flat')
    conv15_2_mbox_loc_flat = flatten(conv15_2_mbox_loc)
    name = 'conv15_2_mbox_conf'
    x = Conv2D(num_priors * num_classes, (1, 1), padding='same',
               name=name)(conv15_2)
    conv15_2_mbox_conf = x
    flatten = Flatten(name='conv15_2_mbox_conf_flat')
    conv15_2_mbox_conf_flat = flatten(conv15_2_mbox_conf)
    priorbox = PriorBox(img_size,
                        195.0,
                        max_size=240.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv15_2_mbox_priorbox')
    conv15_2_mbox_priorbox = priorbox(conv15_2)
    num_priors = 6

    x = Conv2D(num_priors * 4, (1, 1),
               padding='same',
               name='conv16_2_mbox_loc')(conv16_2)
    conv16_2_mbox_loc = x
    flatten = Flatten(name='conv16_2_mbox_loc_flat')
    conv16_2_mbox_loc_flat = flatten(conv16_2_mbox_loc)
    name = 'conv16_2_mbox_conf'
    x = Conv2D(num_priors * num_classes, (1, 1), padding='same',
               name=name)(conv16_2)
    conv16_2_mbox_conf = x
    flatten = Flatten(name='conv16_2_mbox_conf_flat')
    conv16_2_mbox_conf_flat = flatten(conv16_2_mbox_conf)
    priorbox = PriorBox(img_size,
                        240.0,
                        max_size=285.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv16_2_mbox_priorbox')
    conv16_2_mbox_priorbox = priorbox(conv16_2)

    num_priors = 6
    x = Conv2D(num_priors * 4, (1, 1),
               padding='same',
               name='conv17_2_mbox_loc')(conv17_2)
    conv17_2_mbox_loc = x
    flatten = Flatten(name='conv17_2_mbox_loc_flat')
    conv17_2_mbox_loc_flat = flatten(conv17_2_mbox_loc)
    name = 'conv17_2_mbox_conf'
    x = Conv2D(num_priors * num_classes, (1, 1), padding='same',
               name=name)(conv17_2)
    conv17_2_mbox_conf = x
    flatten = Flatten(name='conv17_2_mbox_conf_flat')
    conv17_2_mbox_conf_flat = flatten(conv17_2_mbox_conf)
    priorbox = PriorBox(img_size,
                        285.0,
                        max_size=300.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv17_2_mbox_priorbox')
    conv17_2_mbox_priorbox = priorbox(conv17_2)

    mbox_loc = concatenate([
        conv11_mbox_loc_flat, conv13_mbox_loc_flat, conv14_2_mbox_loc_flat,
        conv15_2_mbox_loc_flat, conv16_2_mbox_loc_flat, conv17_2_mbox_loc_flat
    ],
                           axis=1,
                           name='mbox_loc')
    mbox_conf = concatenate([
        conv11_mbox_conf_flat, conv13_mbox_conf_flat, conv14_2_mbox_conf_flat,
        conv15_2_mbox_conf_flat, conv16_2_mbox_conf_flat,
        conv17_2_mbox_conf_flat
    ],
                            axis=1,
                            name='mbox_conf')
    mbox_priorbox = concatenate([
        conv11_mbox_priorbox, conv13_mbox_priorbox, conv14_2_mbox_priorbox,
        conv15_2_mbox_priorbox, conv16_2_mbox_priorbox, conv17_2_mbox_priorbox
    ],
                                axis=1,
                                name='mbox_priorbox')
    if hasattr(mbox_loc, '_keras_shape'):
        num_boxes = mbox_loc._keras_shape[-1] // 4
    elif hasattr(mbox_loc, 'int_shape'):
        num_boxes = K.int_shape(mbox_loc)[-1] // 4
    mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc)
    mbox_conf = Reshape((num_boxes, num_classes),
                        name='mbox_conf_logits')(mbox_conf)
    mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf)
    predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox],
                              axis=2,
                              name='predictions')
    model = Model(inputs=input0, outputs=predictions)
    return model
def SSD300(input_shape=(300, 300, 3), num_classes=21):

    net = {}
    net['inputs'] = Input(shape=input_shape, name='inputs')

    net['conv1_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv1_1_zp')(net['inputs'])
    net['conv1_1'] = Conv2D(64, (3, 3), activation='relu', strides=(1, 1), name='conv1_1')(net['conv1_1_zp'])
    net['conv1_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv1_2_zp')(net['conv1_1'])
    net['conv1_2'] = Conv2D(64, (3, 3), activation='relu', strides=(1, 1), name='conv1_2')(net['conv1_2_zp'])
    net['pool1'] = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(net['conv1_2'])

    net['conv2_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv2_1_zp')(net['pool1'])
    net['conv2_1'] = Conv2D(128, (3, 3), activation='relu', strides=(1, 1), name='conv2_1')(net['conv2_1_zp'])
    net['conv2_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv2_2_zp')(net['conv2_1'])
    net['conv2_2'] = Conv2D(128, (3, 3), activation='relu', strides=(1, 1), name='conv2_2')(net['conv2_2_zp'])
    net['pool2'] = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(net['conv2_2'])

    net['conv3_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv3_1_zp')(net['pool2'])
    net['conv3_1'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv3_1')(net['conv3_1_zp'])
    net['conv3_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv3_2_zp')(net['conv3_1'])
    net['conv3_2'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv3_2')(net['conv3_2_zp'])
    net['conv3_3_zp'] = ZeroPadding2D(padding=(1, 1), name='conv3_3_zp')(net['conv3_2'])
    net['conv3_3'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv3_3')(net['conv3_3_zp'])
    net['pool3'] = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(net['conv3_3'])

    net['conv4_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_1_zp')(net['pool3'])
    net['conv4_1'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv4_1')(net['conv4_1_zp'])
    net['conv4_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_2_zp')(net['conv4_1'])
    net['conv4_2'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv4_2')(net['conv4_2_zp'])
    net['conv4_3_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_3_zp')(net['conv4_2'])
    net['conv4_3'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv4_3')(net['conv4_3_zp'])
    net['pool4'] = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name='pool4')(net['conv4_3'])

    net['conv5_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv5_1_zp')(net['pool4'])
    net['conv5_1'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv5_1')(net['conv5_1_zp'])
    net['conv5_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv5_2_zp')(net['conv5_1'])
    net['conv5_2'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv5_2')(net['conv5_2_zp'])
    net['conv5_3_zp'] = ZeroPadding2D(padding=(1, 1), name='conv5_3_zp')(net['conv5_2'])
    net['conv5_3'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv5_3')(net['conv5_3_zp'])
    net['pool5_zp'] = ZeroPadding2D(padding=(1, 1), name='pool5_zp')(net['conv5_3'])
    net['pool5'] = MaxPool2D(pool_size=(3, 3), strides=(1, 1), name='pool5')(net['pool5_zp'])

    net['fc6_zp'] = ZeroPadding2D(padding=(6, 6), name='fc6_zp')(net['pool5'])
    net['fc6'] = Conv2D(1024, (3, 3), activation='relu', strides=(1, 1), dilation_rate=(6, 6), name='fc6')(net['fc6_zp'])

    net['fc7'] = Conv2D(1024, (1, 1), activation='relu', strides=(1, 1), name='fc7')(net['fc6'])

    net['conv6_1'] = Conv2D(256, (1, 1), activation='relu', strides=(1, 1), name='conv6_1')(net['fc7'])
    net['conv6_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv6_2_zp')(net['conv6_1'])
    net['conv6_2'] = Conv2D(512, (3, 3), activation='relu', strides=(2, 2), name='conv6_2')(net['conv6_2_zp'])

    net['conv7_1'] = Conv2D(128, (1, 1), activation='relu', strides=(1, 1), name='conv7_1')(net['conv6_2'])
    net['conv7_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv7_2_zp')(net['conv7_1'])
    net['conv7_2'] = Conv2D(256, (3, 3), activation='relu', strides=(2, 2), name='conv7_2')(net['conv7_2_zp'])

    net['conv8_1'] = Conv2D(128, (1, 1), activation='relu', strides=(1, 1), name='conv8_1')(net['conv7_2'])
    net['conv8_2'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv8_2')(net['conv8_1'])

    net['conv9_1'] = Conv2D(128, (1, 1), activation='relu', strides=(1, 1), name='conv9_1')(net['conv8_2'])
    net['conv9_2'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv9_2')(net['conv9_1'])

    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])

    num_priors = 4
    net['conv4_3_norm_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_3_norm_mbox_loc_zp')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv4_3_norm_mbox_loc')(net['conv4_3_norm_mbox_loc_zp'])
    net['conv4_3_norm_mbox_loc_flat'] = Flatten(name='conv4_3_norm_mbox_loc_flat')(net['conv4_3_norm_mbox_loc'])
    net['conv4_3_norm_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_3_norm_mbox_conf_zp')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv4_3_norm_mbox_conf')(net['conv4_3_norm_mbox_conf_zp'])
    net['conv4_3_norm_mbox_conf_flat'] = Flatten(name='conv4_3_norm_mbox_conf_flat')(net['conv4_3_norm_mbox_conf'])
    net['conv4_3_norm_mbox_priorbox'] = PriorBox((300, 300), min_size=30.0, max_size=60.0,
                                                 aspect_ratios=[2.0], variances=[0.10, 0.10, 0.20, 0.20],
                                                 flip=True, clip=False, name='conv4_3_norm_mbox_priorbox')(net['conv4_3_norm'])

    num_priors = 6
    net['fc7_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='fc7_mbox_loc_zp')(net['fc7'])
    net['fc7_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='fc7_mbox_loc')(net['fc7_mbox_loc_zp'])
    net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')(net['fc7_mbox_loc'])
    net['fc7_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='fc7_mbox_conf_zp')(net['fc7'])
    net['fc7_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='fc7_mbox_conf')(net['fc7_mbox_conf_zp'])
    net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')(net['fc7_mbox_conf'])
    net['fc7_mbox_priorbox'] = PriorBox((300, 300), min_size=60.0, max_size=111.0,
                                        aspect_ratios=[2.0, 3.0], variances=[0.10, 0.10, 0.20, 0.20],
                                        flip=True, clip=False, name='fc7_mbox_priorbox')(net['fc7'])

    net['conv6_2_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv6_2_mbox_loc_zp')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv6_2_mbox_loc')(net['conv6_2_mbox_loc_zp'])
    net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')(net['conv6_2_mbox_loc'])
    net['conv6_2_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv6_2_mbox_conf_zp')(net['conv6_2'])
    net['conv6_2_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv6_2_mbox_conf')(net['conv6_2_mbox_conf_zp'])
    net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')(net['conv6_2_mbox_conf'])
    net['conv6_2_mbox_priorbox'] = PriorBox((300, 300), min_size=111.0, max_size=162.0,
                                            aspect_ratios=[2.0, 3.0], variances=[0.10, 0.10, 0.20, 0.20],
                                            flip=True, clip=False, name='conv6_2_mbox_priorbox')(net['conv6_2'])

    net['conv7_2_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv7_2_mbox_loc_zp')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv7_2_mbox_loc')(net['conv7_2_mbox_loc_zp'])
    net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')(net['conv7_2_mbox_loc'])
    net['conv7_2_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv7_2_mbox_conf_zp')(net['conv7_2'])
    net['conv7_2_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv7_2_mbox_conf')(net['conv7_2_mbox_conf_zp'])
    net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')(net['conv7_2_mbox_conf'])
    net['conv7_2_mbox_priorbox'] = PriorBox((300, 300), min_size=162.0, max_size=213.0,
                                            aspect_ratios=[2.0, 3.0], variances=[0.10, 0.10, 0.20, 0.20],
                                            flip=True, clip=False, name='conv7_2_mbox_priorbox')(net['conv7_2'])

    num_priors = 4
    net['conv8_2_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv8_2_mbox_loc_zp')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv8_2_mbox_loc')(net['conv8_2_mbox_loc_zp'])
    net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')(net['conv8_2_mbox_loc'])
    net['conv8_2_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv8_2_mbox_conf_zp')(net['conv8_2'])
    net['conv8_2_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv8_2_mbox_conf')(net['conv8_2_mbox_conf_zp'])
    net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')(net['conv8_2_mbox_conf'])
    net['conv8_2_mbox_priorbox'] = PriorBox((300, 300), min_size=213.0, max_size=264.0,
                                            aspect_ratios=[2.0], variances=[0.10, 0.10, 0.20, 0.20],
                                            flip=True, clip=False, name='conv8_2_mbox_priorbox')(net['conv8_2'])

    net['conv9_2_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv9_2_mbox_loc_zp')(net['conv9_2'])
    net['conv9_2_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv9_2_mbox_loc')(net['conv9_2_mbox_loc_zp'])
    net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')(net['conv9_2_mbox_loc'])
    net['conv9_2_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv9_2_mbox_conf_zp')(net['conv9_2'])
    net['conv9_2_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv9_2_mbox_conf')(net['conv9_2_mbox_conf_zp'])
    net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')(net['conv9_2_mbox_conf'])
    net['conv9_2_mbox_priorbox'] = PriorBox((300, 300), min_size=264.0, max_size=315.0,
                                            aspect_ratios=[2.0], variances=[0.10, 0.10, 0.20, 0.20],
                                            flip=True, clip=False, name='conv9_2_mbox_priorbox')(net['conv9_2'])

    net['mbox_loc'] = concatenate(inputs=[net['conv4_3_norm_mbox_loc_flat'],
                                          net['fc7_mbox_loc_flat'],
                                          net['conv6_2_mbox_loc_flat'],
                                          net['conv7_2_mbox_loc_flat'],
                                          net['conv8_2_mbox_loc_flat'],
                                          net['conv9_2_mbox_loc_flat']], axis=1, name='mbox_loc')
    net['mbox_conf'] = concatenate(inputs=[net['conv4_3_norm_mbox_conf_flat'],
                                           net['fc7_mbox_conf_flat'],
                                           net['conv6_2_mbox_conf_flat'],
                                           net['conv7_2_mbox_conf_flat'],
                                           net['conv8_2_mbox_conf_flat'],
                                           net['conv9_2_mbox_conf_flat']], axis=1, name='mbox_conf')
    net['mbox_priorbox'] = concatenate(inputs=[net['conv4_3_norm_mbox_priorbox'],
                                               net['fc7_mbox_priorbox'],
                                               net['conv6_2_mbox_priorbox'],
                                               net['conv7_2_mbox_priorbox'],
                                               net['conv8_2_mbox_priorbox'],
                                               net['conv9_2_mbox_priorbox']], axis=1, name='mbox_priorbox')

    num_boxes = net['mbox_loc']._keras_shape[-1] // 4

    net['mbox_conf_reshape'] = Reshape(target_shape=(num_boxes, num_classes), name='mbox_conf_reshape')(net['mbox_conf'])
    net['mbox_conf_softmax'] = Activation('softmax', name='mbox_conf_softmax')(net['mbox_conf_reshape'])
    net['mbox_loc_reshape'] = Reshape(target_shape=(num_boxes, 4), name='mbox_loc_reshape')(net['mbox_loc'])
    net['detection_out'] = concatenate(inputs=[net['mbox_loc_reshape'],
                                               net['mbox_conf_softmax'],
                                               net['mbox_priorbox']], axis=2, name='detection_out')
    model = Model(net['inputs'], net['detection_out'])
    return model
Esempio n. 20
0
def SSD300(input_shape, num_classes=21):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    net = {}
    # Block 1
    input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor
    net['conv1_1'] = Conv2D(64,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv1_1')(net['input'])

    net['conv1_2'] = Conv2D(64,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv1_2')(net['conv1_1'])

    net['pool1'] = MaxPooling2D(pool_size=(2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool1')(net['conv1_2'])
    # Block 2
    net['conv2_1'] = Conv2D(128,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv2_1')(net['pool1'])

    net['conv2_2'] = Conv2D(128,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv2_2')(net['conv2_1'])

    net['pool2'] = MaxPooling2D(pool_size=(2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool2')(net['conv2_2'])
    # Block 3
    net['conv3_1'] = Conv2D(256,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv3_1')(net['pool2'])

    net['conv3_2'] = Conv2D(256,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv3_2')(net['conv3_1'])

    net['conv3_3'] = Conv2D(256,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv3_3')(net['conv3_2'])

    net['pool3'] = MaxPooling2D(pool_size=(2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool3')(net['conv3_3'])
    # Block 4
    net['conv4_1'] = Conv2D(512,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv4_1')(net['pool3'])

    net['conv4_2'] = Conv2D(512,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv4_2')(net['conv4_1'])

    net['conv4_3'] = Conv2D(512,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv4_3')(net['conv4_2'])

    net['pool4'] = MaxPooling2D(pool_size=(2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool4')(net['conv4_3'])
    # Block 5
    net['conv5_1'] = Conv2D(512,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv5_1')(net['pool4'])

    net['conv5_2'] = Conv2D(512,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv5_2')(net['conv5_1'])

    net['conv5_3'] = Conv2D(512,
                            kernel_size=(3, 3),
                            activation='relu',
                            padding='same',
                            name='conv5_3')(net['conv5_2'])

    net['pool5'] = MaxPooling2D(pool_size=(3, 3),
                                strides=(1, 1),
                                padding='same',
                                name='pool5')(net['conv5_3'])
    # FC6
    net['fc6'] = Conv2D(1024,
                        kernel_size=(3, 3),
                        dilation_rate=(6, 6),
                        activation='relu',
                        padding='same',
                        name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)

    # FC7
    net['fc7'] = Conv2D(1024,
                        kernel_size=(1, 1),
                        activation='relu',
                        padding='same',
                        name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)

    # Block 6
    net['conv6_1'] = Conv2D(256,
                            kernel_size=(1, 1),
                            activation='relu',
                            padding='same',
                            name='conv6_1')(net['fc7'])

    net['conv6_2'] = Conv2D(512,
                            kernel_size=(3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            name='conv6_2')(net['conv6_1'])
    # Block 7
    net['conv7_1'] = Conv2D(128,
                            kernel_size=(1, 1),
                            activation='relu',
                            padding='same',
                            name='conv7_1')(net['conv6_2'])

    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])

    net['conv7_2'] = Conv2D(256,
                            kernel_size=(3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='valid',
                            name='conv7_2')(net['conv7_2'])
    # Block 8
    net['conv8_1'] = Conv2D(128,
                            kernel_size=(1, 1),
                            activation='relu',
                            padding='same',
                            name='conv8_1')(net['conv7_2'])

    net['conv8_2'] = Conv2D(256,
                            kernel_size=(3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            name='conv8_2')(net['conv8_1'])
    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])

    # Prediction from conv4_3
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])

    num_priors = 3
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x

    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])

    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)

    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')

    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])

    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Conv2D(num_priors * 4,
                                 kernel_size=(3, 3),
                                 padding='same',
                                 name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])

    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)

    net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes,
                                  kernel_size=(3, 3),
                                  padding='same',
                                  name=name)(net['fc7'])

    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])

    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=114.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])

    # Prediction from conv6_2
    num_priors = 6
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x

    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])

    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name=name)(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x

    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        114.0,
                        max_size=168.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])

    # Prediction from conv7_2
    num_priors = 6
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x

    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])

    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name=name)(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x

    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        168.0,
                        max_size=222.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])

    # Prediction from conv8_2
    num_priors = 6
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x

    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])

    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name=name)(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x

    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        222.0,
                        max_size=276.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])

    # Prediction from pool6
    num_priors = 6
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Dense(num_priors * num_classes, name=name)(net['pool6'])
    net['pool6_mbox_conf_flat'] = x

    priorbox = PriorBox(img_size,
                        276.0,
                        max_size=330.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')

    if K.image_data_format() == 'channels_last':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)

    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])

    # Gather all predictions

    net['mbox_loc'] = concatenate([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat']
    ],
                                  axis=1,
                                  name='mbox_loc')

    net['mbox_conf'] = concatenate([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat']
    ],
                                   axis=1,
                                   name='mbox_conf')

    net['mbox_priorbox'] = concatenate([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox']
    ],
                                       axis=1,
                                       name='mbox_priorbox')

    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4

    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])

    net['predictions'] = concatenate(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        axis=2,
        name='predictions')
    model = Model(inputs=(net['input'], ), outputs=(net['predictions'], ))
    return model
Esempio n. 21
0
def ssd_prior_box_layer(img_size, conv4_3_norm, fc7, conv6_2, conv7_2, conv8_2,
                        pool6):
    # conv4_3
    conv4_3_norm_mbox_priorbox = PriorBox(img_size,
                                          30.0,
                                          name='conv4_3_norm_mbox_priorbox',
                                          aspect_ratios=[2],
                                          variances=[0.1, 0.1, 0.2,
                                                     0.2])(conv4_3_norm)
    # fc7
    fc7_mbox_priorbox = PriorBox(img_size,
                                 60.0,
                                 name='fc7_mbox_priorbox',
                                 max_size=114.0,
                                 aspect_ratios=[2, 3],
                                 variances=[0.1, 0.1, 0.2, 0.2])(fc7)

    # conv6_2
    conv6_2_mbox_priorbox = PriorBox(img_size,
                                     114.0,
                                     max_size=168.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv6_2_mbox_priorbox')(conv6_2)

    # conv7_2
    conv7_2_mbox_priorbox = PriorBox(img_size,
                                     168.0,
                                     max_size=222.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv7_2_mbox_priorbox')(conv7_2)

    # conv8_2
    conv8_2_mbox_priorbox = PriorBox(img_size,
                                     222.0,
                                     max_size=276.0,
                                     aspect_ratios=[2, 3],
                                     variances=[0.1, 0.1, 0.2, 0.2],
                                     name='conv8_2_mbox_priorbox')(conv8_2)

    # pool6
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)

    pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6)
    pool6_mbox_priorbox = PriorBox(img_size,
                                   276.0,
                                   max_size=330.0,
                                   aspect_ratios=[2, 3],
                                   variances=[0.1, 0.1, 0.2, 0.2],
                                   name='pool6_mbox_priorbox')(pool6_reshaped)

    return concatenate([
        conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox,
        conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox
    ],
                       axis=1,
                       name='mbox_priorbox')