def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params['sbdd_dir'] = '../data/sbdd/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params['voc_dir'] = '../data/pascal/VOC2011' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=21, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
out_layer, use_batchnorm, use_relu, 256, 3, 0, 1, lr_mult=lr_mult) return net if __name__ == '__main__': imdb_name = 'voc_2007_trainval' net = caffe.NetSpec() net.data, net.gt_boxes, net.im_info = L.Python( ntop=3, module='layers.box_data_layer', layer='BoxDataLayer') VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True, dropout=False) mbox_source_layers = [ 'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2' ] use_batchnorm = False lr_mult = 1 min_sizes = []
def make(split='train'): net = caffe.NetSpec() net.data, net.gt_boxes, net.im_info = L.Python( ntop=3, module='layers.box_data_layer', layer='BoxDataLayer') VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True, dropout=False) mbox_source_layers = [ 'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2' ] use_batchnorm = False lr_mult = 1 min_sizes = [] max_sizes = [] min_ratio = 20 max_ratio = 90 step = int( math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2))) min_dim = 300 for ratio in xrange(min_ratio, max_ratio + 1, step): min_sizes.append(min_dim * ratio / 100.) max_sizes.append(min_dim * (ratio + step) / 100.) min_sizes = [min_dim * 10 / 100.] + min_sizes max_sizes = [min_dim * 20 / 100.] + max_sizes aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] steps = [8, 16, 32, 64, 100, 300] normalizations = [20, -1, -1, -1, -1, -1] num_classes = 21 share_location = True flip = True clip = False prior_variance = [0.1, 0.1, 0.2, 0.2] AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult) mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers, use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes, aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations, num_classes=num_classes, share_location=share_location, flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult) num_classes = 21 overlap_threshold = 0.5 neg_pos_ratio = 3. neg_overlap = 0.5 ignore_cross_boundary_bbox = False if split == 'test': with open('test.prototxt', 'w') as f: # Create the SoftmaxLayer name = "mbox_prob" softmax_inputs = [net.mbox_conf_reshape] net.mbox_prob = L.Softmax(*softmax_inputs, name=name, softmax_param={'axis': 2}) net_param = net.to_proto() del net_param.layer[0] net_param.input.extend(['data']) net_param.input_shape.extend( [caffe_pb2.BlobShape(dim=[1, 3, 300, 300])]) net_param.input.extend(['im_info']) net_param.input_shape.extend([caffe_pb2.BlobShape(dim=[2])]) f.write(str(net_param)) return multibox_match_param = { 'num_classes': num_classes, 'overlap_threshold': overlap_threshold, 'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox, } # Create the MultiBoxMatchLayer. name = "mbox_match" match_inputs = [net.mbox_priorbox, net.gt_boxes] net.match_inds, net.match_labels = L.Python( *match_inputs, name=name, module='layers.multibox_match_layer', layer='MultiBoxMatchLayer', param_str=str(multibox_match_param), ntop=2) # Create the LossLayer for cls name = "cls_loss" cls_loss_inputs = [net.mbox_conf_reshape, net.match_labels] net.cls_loss = L.SoftmaxWithFocalLoss(*cls_loss_inputs, name=name, loss_param={'ignore_label': -1}, softmax_param={'axis': 2}, focal_loss_param={ 'alpha': 0.25, 'gamma': 2.0 }) # Create the MultiBoxTargetLayer for bbox name = "mbox_target" bbox_target_inputs = [ net.match_inds, net.match_labels, net.mbox_priorbox, net.gt_boxes ] net.bbox_targets, net.bbox_inside_weights, net.bbox_outside_weights = \ L.Python(*bbox_target_inputs, name=name, ntop=3, module='layers.multibox_target_layer', layer='MultiBoxTargetLayer') # Create the LossLayer for bbox name = "bbox_loss" bbox_loss_inputs = [ net.mbox_loc_reshape, net.bbox_targets, net.bbox_inside_weights, net.bbox_outside_weights ] net.bbox_loss = L.SmoothL1Loss(*bbox_loss_inputs, name=name, loss_weight=1) with open('train.prototxt', 'w') as f: f.write(str(net.to_proto()))