Ejemplo n.º 1
0
def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[],
        use_objectness=False, use_iou=False, normalizations=[], use_batchnorm=True, lr_mult=1,
        use_scale=True, min_sizes=[], max_sizes=[], prior_variance = [0.1],
        aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True,
        flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0,
        conf_postfix='', loc_postfix='', **bn_param):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length"
    if aspect_ratios:
        assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length"
    if steps:
        assert len(from_layers) == len(steps), "from_layers and steps should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"
    if inter_layer_depth:
        assert len(from_layers) == len(inter_layer_depth), "from_layers and inter_layer_depth should have same length"

    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    iou_layers = []
    objectness_layers = []
    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(from_layer)
                net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]),
                    across_spatial=False, channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth:
            if inter_layer_depth[i] > 0:
                inter_name = "{}_inter".format(from_layer)
                ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, lr_mult=lr_mult,
                      num_output=inter_layer_depth[i], kernel_size=3, pad=1, stride=1, **bn_param)
                from_layer = inter_name

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list: min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list: max_size = [max_size]
            if max_size:
                assert len(max_size) == len(min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i: step = steps[i]

        # Create location prediction layer.
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4;
        if not share_location:
            num_loc_output *= num_classes
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes;
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create iou prediction layer.
        if use_iou:
            name = "{}_mbox_iou{}".format(from_layer, conf_postfix)
            num_iou_output = num_priors_per_location
            ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
                    num_output=num_iou_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
            permute_name = "{}_perm".format(name)
            net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            iou_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        priorbox_param = {'min_size': min_size,
                          'clip': clip,
                          'offset': offset}
        if max_size:
            priorbox_param.update({'max_size': max_size})
        if aspect_ratio:
            priorbox_param.update({'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            priorbox_param.update({'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                priorbox_param.update({'img_size': img_height})
            else:
                priorbox_param.update({'img_h': img_height, 'img_w': img_width})
        net[name] = L.Python(net[from_layer], net['im_info'], module='layers.prior_box_layer',
                             layer='PriorBoxLayer', param_str=str(priorbox_param))
        priorbox_layers.append(net[name])

        # Create objectness prediction layer.
        if use_objectness:
            name = "{}_mbox_objectness".format(from_layer)
            num_obj_output = num_priors_per_location * 2;
            ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
                num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
            permute_name = "{}_perm".format(name)
            net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layers.append(net[flatten_name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    net['mbox_loc_reshape'] = L.Reshape(net[name], shape={'dim': [0, -1, 4]})
    mbox_layers.append(net['mbox_loc_reshape'])
    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    net['mbox_conf_reshape'] = L.Reshape(net[name], shape={'dim': [0, -1, num_classes]})
    mbox_layers.append(net['mbox_conf_reshape'])
    if use_iou:
        name = "mbox_iou"
        net[name] = L.Concat(*iou_layers, axis=1)
        net['mbox_iou_reshape'] = L.Reshape(net[name], shape={'dim': [0, -1]})
        mbox_layers.append(net['mbox_iou_reshape'])
    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=0)
    mbox_layers.append(net[name])
    if use_objectness:
        name = "mbox_objectness"
        net[name] = L.Concat(*objectness_layers, axis=1)
        mbox_layers.append(net[name])

    return mbox_layers
def make(split='train'):

    net = caffe.NetSpec()
    net.data, net.gt_boxes, net.im_info = L.Python(
        ntop=3, module='layers.box_data_layer', layer='BoxDataLayer')
    VGGNetBody(net,
               from_layer='data',
               fully_conv=True,
               reduced=True,
               dilated=True,
               dropout=False)

    mbox_source_layers = [
        'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2'
    ]
    use_batchnorm = False
    lr_mult = 1
    min_sizes = []
    max_sizes = []
    min_ratio = 20
    max_ratio = 90
    step = int(
        math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))
    min_dim = 300
    for ratio in xrange(min_ratio, max_ratio + 1, step):
        min_sizes.append(min_dim * ratio / 100.)
        max_sizes.append(min_dim * (ratio + step) / 100.)
    min_sizes = [min_dim * 10 / 100.] + min_sizes
    max_sizes = [min_dim * 20 / 100.] + max_sizes
    aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
    steps = [8, 16, 32, 64, 100, 300]
    normalizations = [20, -1, -1, -1, -1, -1]
    num_classes = 21
    share_location = True
    flip = True
    clip = False
    prior_variance = [0.1, 0.1, 0.2, 0.2]
    AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)
    mbox_layers = CreateMultiBoxHead(net,
                                     data_layer='data',
                                     from_layers=mbox_source_layers,
                                     use_batchnorm=use_batchnorm,
                                     min_sizes=min_sizes,
                                     max_sizes=max_sizes,
                                     aspect_ratios=aspect_ratios,
                                     steps=steps,
                                     normalizations=normalizations,
                                     num_classes=num_classes,
                                     share_location=share_location,
                                     flip=flip,
                                     clip=clip,
                                     prior_variance=prior_variance,
                                     kernel_size=3,
                                     pad=1,
                                     lr_mult=lr_mult)

    num_classes = 21
    overlap_threshold = 0.5
    neg_pos_ratio = 3.
    neg_overlap = 0.5
    ignore_cross_boundary_bbox = False

    if split == 'test':
        with open('test.prototxt', 'w') as f:
            # Create the SoftmaxLayer
            name = "mbox_prob"
            softmax_inputs = [net.mbox_conf_reshape]
            net.mbox_prob = L.Softmax(*softmax_inputs,
                                      name=name,
                                      softmax_param={'axis': 2})
            net_param = net.to_proto()
            del net_param.layer[0]
            net_param.input.extend(['data'])
            net_param.input_shape.extend(
                [caffe_pb2.BlobShape(dim=[1, 3, 300, 300])])
            net_param.input.extend(['im_info'])
            net_param.input_shape.extend([caffe_pb2.BlobShape(dim=[2])])
            f.write(str(net_param))
        return

    multibox_match_param = {
        'num_classes': num_classes,
        'overlap_threshold': overlap_threshold,
        'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox,
    }
    # Create the MultiBoxMatchLayer.
    name = "mbox_match"
    match_inputs = [net.mbox_priorbox, net.gt_boxes]
    net.match_inds, net.match_labels = L.Python(
        *match_inputs,
        name=name,
        module='layers.multibox_match_layer',
        layer='MultiBoxMatchLayer',
        param_str=str(multibox_match_param),
        ntop=2)

    # Create the LossLayer for cls
    name = "cls_loss"
    cls_loss_inputs = [net.mbox_conf_reshape, net.match_labels]
    net.cls_loss = L.SoftmaxWithFocalLoss(*cls_loss_inputs,
                                          name=name,
                                          loss_param={'ignore_label': -1},
                                          softmax_param={'axis': 2},
                                          focal_loss_param={
                                              'alpha': 0.25,
                                              'gamma': 2.0
                                          })

    # Create the MultiBoxTargetLayer for bbox
    name = "mbox_target"
    bbox_target_inputs = [
        net.match_inds, net.match_labels, net.mbox_priorbox, net.gt_boxes
    ]
    net.bbox_targets, net.bbox_inside_weights, net.bbox_outside_weights = \
        L.Python(*bbox_target_inputs, name=name, ntop=3,
                 module='layers.multibox_target_layer', layer='MultiBoxTargetLayer')

    # Create the LossLayer for bbox
    name = "bbox_loss"
    bbox_loss_inputs = [
        net.mbox_loc_reshape, net.bbox_targets, net.bbox_inside_weights,
        net.bbox_outside_weights
    ]
    net.bbox_loss = L.SmoothL1Loss(*bbox_loss_inputs, name=name, loss_weight=1)

    with open('train.prototxt', 'w') as f:
        f.write(str(net.to_proto()))
Ejemplo n.º 3
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split,
                         mean=(104.00699, 116.66877, 122.67892),
                         seed=1337)
    if split == 'train':
        pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params['voc_dir'] = '../data/pascal/VOC2011'
        pylayer = 'VOCSegDataLayer'
    n.data, n.label = L.Python(module='voc_layers',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.score_fr = L.Convolution(
        n.drop7,
        num_output=21,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore = L.Deconvolution(n.score_fr,
                                convolution_param=dict(num_output=21,
                                                       kernel_size=64,
                                                       stride=32,
                                                       bias_term=False),
                                param=[dict(lr_mult=0)])
    n.score = crop(n.upscore, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Ejemplo n.º 4
0
# SSD @ Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------

import dragon.vm.caffe as caffe
from dragon.vm.caffe.model_libs import *
from dragon.vm.caffe import layers as L
from dragon.vm.caffe import params as P
import math
from backbone import AirBody

if __name__ == '__main__':

    net = caffe.NetSpec()
    net.data, net.gt_boxes, net.im_info = L.Python(
        ntop=3, module='layers.box_data_layer', layer='BoxDataLayer')
    AirBody(net, from_layer='data', use_conv5=False)
    mbox_source_layers = ['conv3b', 'conv4b']
    lr_mult = 1
    input_dim = 300
    min_sizes = [input_dim * 0.1, input_dim * 0.2]
    max_sizes = [input_dim * 0.3, input_dim * 0.5]
    aspect_ratios = [[2, 3], [2, 3]]
    steps = [8, 16]
    normalizations = [-1, -1]
    num_classes = 2
    mbox_layers = CreateMultiQuadHead(net,
                                      data_layer='data',
                                      from_layers=mbox_source_layers,
                                      use_batchnorm=False,
                                      min_sizes=min_sizes,