def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, use_iou=False, normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance = [0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length" assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len(steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len(inter_layer_depth), "from_layers and inter_layer_depth should have same length" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] iou_layers = [] objectness_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}_norm".format(from_layer) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth: if inter_layer_depth[i] > 0: inter_name = "{}_inter".format(from_layer) ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, lr_mult=lr_mult, num_output=inter_layer_depth[i], kernel_size=3, pad=1, stride=1, **bn_param) from_layer = inter_name # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len(min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create location prediction layer. name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4; if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes; ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create iou prediction layer. if use_iou: name = "{}_mbox_iou{}".format(from_layer, conf_postfix) num_iou_output = num_priors_per_location ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_iou_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) iou_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) priorbox_param = {'min_size': min_size, 'clip': clip, 'offset': offset} if max_size: priorbox_param.update({'max_size': max_size}) if aspect_ratio: priorbox_param.update({'aspect_ratio': aspect_ratio, 'flip': flip}) if step: priorbox_param.update({'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: priorbox_param.update({'img_size': img_height}) else: priorbox_param.update({'img_h': img_height, 'img_w': img_width}) net[name] = L.Python(net[from_layer], net['im_info'], module='layers.prior_box_layer', layer='PriorBoxLayer', param_str=str(priorbox_param)) priorbox_layers.append(net[name]) # Create objectness prediction layer. if use_objectness: name = "{}_mbox_objectness".format(from_layer) num_obj_output = num_priors_per_location * 2; ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) net['mbox_loc_reshape'] = L.Reshape(net[name], shape={'dim': [0, -1, 4]}) mbox_layers.append(net['mbox_loc_reshape']) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) net['mbox_conf_reshape'] = L.Reshape(net[name], shape={'dim': [0, -1, num_classes]}) mbox_layers.append(net['mbox_conf_reshape']) if use_iou: name = "mbox_iou" net[name] = L.Concat(*iou_layers, axis=1) net['mbox_iou_reshape'] = L.Reshape(net[name], shape={'dim': [0, -1]}) mbox_layers.append(net['mbox_iou_reshape']) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=0) mbox_layers.append(net[name]) if use_objectness: name = "mbox_objectness" net[name] = L.Concat(*objectness_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers
def make(split='train'): net = caffe.NetSpec() net.data, net.gt_boxes, net.im_info = L.Python( ntop=3, module='layers.box_data_layer', layer='BoxDataLayer') VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True, dropout=False) mbox_source_layers = [ 'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2' ] use_batchnorm = False lr_mult = 1 min_sizes = [] max_sizes = [] min_ratio = 20 max_ratio = 90 step = int( math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2))) min_dim = 300 for ratio in xrange(min_ratio, max_ratio + 1, step): min_sizes.append(min_dim * ratio / 100.) max_sizes.append(min_dim * (ratio + step) / 100.) min_sizes = [min_dim * 10 / 100.] + min_sizes max_sizes = [min_dim * 20 / 100.] + max_sizes aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] steps = [8, 16, 32, 64, 100, 300] normalizations = [20, -1, -1, -1, -1, -1] num_classes = 21 share_location = True flip = True clip = False prior_variance = [0.1, 0.1, 0.2, 0.2] AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult) mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers, use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes, aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations, num_classes=num_classes, share_location=share_location, flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult) num_classes = 21 overlap_threshold = 0.5 neg_pos_ratio = 3. neg_overlap = 0.5 ignore_cross_boundary_bbox = False if split == 'test': with open('test.prototxt', 'w') as f: # Create the SoftmaxLayer name = "mbox_prob" softmax_inputs = [net.mbox_conf_reshape] net.mbox_prob = L.Softmax(*softmax_inputs, name=name, softmax_param={'axis': 2}) net_param = net.to_proto() del net_param.layer[0] net_param.input.extend(['data']) net_param.input_shape.extend( [caffe_pb2.BlobShape(dim=[1, 3, 300, 300])]) net_param.input.extend(['im_info']) net_param.input_shape.extend([caffe_pb2.BlobShape(dim=[2])]) f.write(str(net_param)) return multibox_match_param = { 'num_classes': num_classes, 'overlap_threshold': overlap_threshold, 'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox, } # Create the MultiBoxMatchLayer. name = "mbox_match" match_inputs = [net.mbox_priorbox, net.gt_boxes] net.match_inds, net.match_labels = L.Python( *match_inputs, name=name, module='layers.multibox_match_layer', layer='MultiBoxMatchLayer', param_str=str(multibox_match_param), ntop=2) # Create the LossLayer for cls name = "cls_loss" cls_loss_inputs = [net.mbox_conf_reshape, net.match_labels] net.cls_loss = L.SoftmaxWithFocalLoss(*cls_loss_inputs, name=name, loss_param={'ignore_label': -1}, softmax_param={'axis': 2}, focal_loss_param={ 'alpha': 0.25, 'gamma': 2.0 }) # Create the MultiBoxTargetLayer for bbox name = "mbox_target" bbox_target_inputs = [ net.match_inds, net.match_labels, net.mbox_priorbox, net.gt_boxes ] net.bbox_targets, net.bbox_inside_weights, net.bbox_outside_weights = \ L.Python(*bbox_target_inputs, name=name, ntop=3, module='layers.multibox_target_layer', layer='MultiBoxTargetLayer') # Create the LossLayer for bbox name = "bbox_loss" bbox_loss_inputs = [ net.mbox_loc_reshape, net.bbox_targets, net.bbox_inside_weights, net.bbox_outside_weights ] net.bbox_loss = L.SmoothL1Loss(*bbox_loss_inputs, name=name, loss_weight=1) with open('train.prototxt', 'w') as f: f.write(str(net.to_proto()))
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params['sbdd_dir'] = '../data/sbdd/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params['voc_dir'] = '../data/pascal/VOC2011' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=21, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
# SSD @ Dragon # Copyright(c) 2017 SeetaTech # Written by Ting Pan # -------------------------------------------------------- import dragon.vm.caffe as caffe from dragon.vm.caffe.model_libs import * from dragon.vm.caffe import layers as L from dragon.vm.caffe import params as P import math from backbone import AirBody if __name__ == '__main__': net = caffe.NetSpec() net.data, net.gt_boxes, net.im_info = L.Python( ntop=3, module='layers.box_data_layer', layer='BoxDataLayer') AirBody(net, from_layer='data', use_conv5=False) mbox_source_layers = ['conv3b', 'conv4b'] lr_mult = 1 input_dim = 300 min_sizes = [input_dim * 0.1, input_dim * 0.2] max_sizes = [input_dim * 0.3, input_dim * 0.5] aspect_ratios = [[2, 3], [2, 3]] steps = [8, 16] normalizations = [-1, -1] num_classes = 2 mbox_layers = CreateMultiQuadHead(net, data_layer='data', from_layers=mbox_source_layers, use_batchnorm=False, min_sizes=min_sizes,