def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, nopool=False, dropout=True, freeze_layers=[], dilate_pool4=False): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0)} assert from_layer in net.keys() net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) if nopool: name = 'conv1_3' net[name] = L.Convolution(net.relu1_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool1' net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv2_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_1 = L.ReLU(net.conv2_1, in_place=True) net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_2 = L.ReLU(net.conv2_2, in_place=True) if nopool: name = 'conv2_3' net[name] = L.Convolution(net.relu2_2, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool2' net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv3_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_1 = L.ReLU(net.conv3_1, in_place=True) net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_2 = L.ReLU(net.conv3_2, in_place=True) net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_3 = L.ReLU(net.conv3_3, in_place=True) if nopool: name = 'conv3_4' net[name] = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool3' net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv4_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_1 = L.ReLU(net.conv4_1, in_place=True) net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_2 = L.ReLU(net.conv4_2, in_place=True) net.conv4_3 = L.Convolution(net.relu4_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_3 = L.ReLU(net.conv4_3, in_place=True) if nopool: name = 'conv4_4' net[name] = L.Convolution(net.relu4_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool4' if dilate_pool4: net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=3, stride=1, pad=1) dilation = 2 else: net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) dilation = 1 kernel_size = 3 pad = int(int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2) net.conv5_1 = L.Convolution(net[name], num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_1 = L.ReLU(net.conv5_1, in_place=True) net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_2 = L.ReLU(net.conv5_2, in_place=True) net.conv5_3 = L.Convolution(net.relu5_2, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_3 = L.ReLU(net.conv5_3, in_place=True) if need_fc: if dilated: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=1, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) if fully_conv: if dilated: if reduced: dilation = dilation * 6 kernel_size = 3 num_output = 1024 else: dilation = dilation * 2 kernel_size = 7 num_output = 4096 else: if reduced: dilation = dilation * 3 kernel_size = 3 num_output = 1024 else: kernel_size = 7 num_output = 4096 pad = int(int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2) net.fc6 = L.Convolution(net[name], num_output=num_output, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=4096) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=4096) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) # Update freeze layers. kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params['sbdd_dir'] = '../data/sbdd/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params['voc_dir'] = '../data/pascal/VOC2011' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=21, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()