def ResNet152Body(net, from_layer, use_pool5=True, use_dilation_conv5=False, **bn_param): conv_prefix = '' conv_postfix = '' bn_prefix = 'bn_' bn_postfix = '' scale_prefix = 'scale_' scale_postfix = '' ConvBNLayer(net, from_layer, 'conv1', use_bn=True, use_relu=True, num_output=64, kernel_size=7, pad=3, stride=2, conv_prefix=conv_prefix, conv_postfix=conv_postfix, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) net.pool1 = L.Pooling(net.conv1, pool=P.Pooling.MAX, kernel_size=3, stride=2) ResBody(net, 'pool1', '2a', out2a=64, out2b=64, out2c=256, stride=1, use_branch1=True, **bn_param) ResBody(net, 'res2a', '2b', out2a=64, out2b=64, out2c=256, stride=1, use_branch1=False, **bn_param) ResBody(net, 'res2b', '2c', out2a=64, out2b=64, out2c=256, stride=1, use_branch1=False, **bn_param) ResBody(net, 'res2c', '3a', out2a=128, out2b=128, out2c=512, stride=2, use_branch1=True, **bn_param) from_layer = 'res3a' for i in range(1, 8): block_name = '3b{}'.format(i) ResBody(net, from_layer, block_name, out2a=128, out2b=128, out2c=512, stride=1, use_branch1=False, **bn_param) from_layer = 'res{}'.format(block_name) ResBody(net, from_layer, '4a', out2a=256, out2b=256, out2c=1024, stride=2, use_branch1=True, **bn_param) from_layer = 'res4a' for i in range(1, 36): block_name = '4b{}'.format(i) ResBody(net, from_layer, block_name, out2a=256, out2b=256, out2c=1024, stride=1, use_branch1=False, **bn_param) from_layer = 'res{}'.format(block_name) stride = 2 dilation = 1 if use_dilation_conv5: stride = 1 dilation = 2 ResBody(net, from_layer, '5a', out2a=512, out2b=512, out2c=2048, stride=stride, use_branch1=True, dilation=dilation, **bn_param) ResBody(net, 'res5a', '5b', out2a=512, out2b=512, out2c=2048, stride=1, use_branch1=False, dilation=dilation, **bn_param) ResBody(net, 'res5b', '5c', out2a=512, out2b=512, out2c=2048, stride=1, use_branch1=False, dilation=dilation, **bn_param) if use_pool5: net.pool5 = L.Pooling(net.res5c, pool=P.Pooling.AVE, global_pooling=True) return net
def InceptionTower(net, from_layer, tower_name, layer_params, **bn_param): use_scale = False for param in layer_params: tower_layer = '{}/{}'.format(tower_name, param['name']) del param['name'] if 'pool' in tower_layer: net[tower_layer] = L.Pooling(net[from_layer], **param) else: param.update(bn_param) ConvBNLayer(net, from_layer, tower_layer, use_bn=True, use_relu=True, use_scale=use_scale, **param) from_layer = tower_layer return net[from_layer]
def AirBody(net, from_layer='data', use_conv5=False): # conv1 ConvNormLayer(net, from_layer, 'conv1', ks=7, p=3, s=2, num_output=64) net.pool1 = L.Pooling(net.conv1, pool=P.Pooling.MAX, kernel_size=2, stride=2) # conv2 ResBlock(net, 'pool1', 'conv2a', 64, force_branch1=True) ResBlock(net, 'conv2a', 'conv2b', 64) # conv3 ResBlock(net, 'conv2b', 'conv3a', 128, stride=2) InceptionResBlock(net, 'conv3a', 'conv3b', 128) # conv4 ResBlock(net, 'conv3b', 'conv4a', 256, stride=2) InceptionResBlock(net, 'conv4a', 'conv4b', 256) # conv5 if use_conv5: ResBlock(net, 'conv4b', 'conv5a', 384, stride=2) InceptionResBlock(net, 'conv5a', 'conv5b', 384)
def InceptionV3Body(net, from_layer, output_pred=False, **bn_param): # scale is fixed to 1, thus we ignore it. use_scale = False out_layer = 'conv' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=32, kernel_size=3, pad=0, stride=2, use_scale=use_scale, **bn_param) from_layer = out_layer out_layer = 'conv_1' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=32, kernel_size=3, pad=0, stride=1, use_scale=use_scale, **bn_param) from_layer = out_layer out_layer = 'conv_2' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=64, kernel_size=3, pad=1, stride=1, use_scale=use_scale, **bn_param) from_layer = out_layer out_layer = 'pool' net[out_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) from_layer = out_layer out_layer = 'conv_3' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=80, kernel_size=1, pad=0, stride=1, use_scale=use_scale, **bn_param) from_layer = out_layer out_layer = 'conv_4' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=192, kernel_size=3, pad=0, stride=1, use_scale=use_scale, **bn_param) from_layer = out_layer out_layer = 'pool_1' net[out_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) from_layer = out_layer # inceptions with 1x1, 3x3, 5x5 convolutions for inception_id in range(0, 3): if inception_id == 0: out_layer = 'mixed' tower_2_conv_num_output = 32 else: out_layer = 'mixed_{}'.format(inception_id) tower_2_conv_num_output = 64 towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), ], **bn_param) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=48, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=64, kernel_size=5, pad=2, stride=1), ], **bn_param) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1), dict(name='conv_2', num_output=96, kernel_size=3, pad=1, stride=1), ], **bn_param) towers.append(tower) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.AVE, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=tower_2_conv_num_output, kernel_size=1, pad=0, stride=1), ], **bn_param) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 3x3(in sequence) convolutions out_layer = 'mixed_3' towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=384, kernel_size=3, pad=0, stride=2), ], **bn_param) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1), dict(name='conv_2', num_output=96, kernel_size=3, pad=0, stride=2), ], **bn_param) towers.append(tower) tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2), ], **bn_param) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 7x1, 1x7 convolutions for inception_id in range(4, 8): if inception_id == 4: num_output = 128 elif inception_id == 5 or inception_id == 6: num_output = 160 elif inception_id == 7: num_output = 192 out_layer = 'mixed_{}'.format(inception_id) towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ], **bn_param) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_2', num_output=192, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), ], **bn_param) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_2', num_output=num_output, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_3', num_output=num_output, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_4', num_output=192, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), ], **bn_param) towers.append(tower) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.AVE, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ], **bn_param) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 3x3, 1x7, 7x1 filters out_layer = 'mixed_8' towers = [] tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=320, kernel_size=3, pad=0, stride=2), ], **bn_param) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=192, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_2', num_output=192, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_3', num_output=192, kernel_size=3, pad=0, stride=2), ], **bn_param) towers.append(tower) tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2), ], **bn_param) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer for inception_id in range(9, 11): num_output = 384 num_output2 = 448 if inception_id == 9: pool = P.Pooling.AVE else: pool = P.Pooling.MAX out_layer = 'mixed_{}'.format(inception_id) towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=320, kernel_size=1, pad=0, stride=1), ], **bn_param) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), ], **bn_param) subtowers = [] subtower_name = '{}/mixed'.format(tower_name) subtower = InceptionTower(net, '{}/conv'.format(tower_name), subtower_name, [ dict(name='conv', num_output=num_output, kernel_size=[1, 3], pad=[0, 1], stride=[1, 1]), ], **bn_param) subtowers.append(subtower) subtower = InceptionTower(net, '{}/conv'.format(tower_name), subtower_name, [ dict(name='conv_1', num_output=num_output, kernel_size=[3, 1], pad=[1, 0], stride=[1, 1]), ], **bn_param) subtowers.append(subtower) net[subtower_name] = L.Concat(*subtowers, axis=1) towers.append(net[subtower_name]) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output2, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=3, pad=1, stride=1), ], **bn_param) subtowers = [] subtower_name = '{}/mixed'.format(tower_name) subtower = InceptionTower(net, '{}/conv_1'.format(tower_name), subtower_name, [ dict(name='conv', num_output=num_output, kernel_size=[1, 3], pad=[0, 1], stride=[1, 1]), ], **bn_param) subtowers.append(subtower) subtower = InceptionTower(net, '{}/conv_1'.format(tower_name), subtower_name, [ dict(name='conv_1', num_output=num_output, kernel_size=[3, 1], pad=[1, 0], stride=[1, 1]), ], **bn_param) subtowers.append(subtower) net[subtower_name] = L.Concat(*subtowers, axis=1) towers.append(net[subtower_name]) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=pool, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ], **bn_param) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer if output_pred: net.pool_3 = L.Pooling(net[from_layer], pool=P.Pooling.AVE, kernel_size=8, pad=0, stride=1) net.softmax = L.InnerProduct(net.pool_3, num_output=1008) net.softmax_prob = L.Softmax(net.softmax) return net
def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, nopool=False, dropout=True, freeze_layers=[], dilate_pool4=False): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0)} assert from_layer in net.keys() net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) if nopool: name = 'conv1_3' net[name] = L.Convolution(net.relu1_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool1' net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv2_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_1 = L.ReLU(net.conv2_1, in_place=True) net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_2 = L.ReLU(net.conv2_2, in_place=True) if nopool: name = 'conv2_3' net[name] = L.Convolution(net.relu2_2, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool2' net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv3_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_1 = L.ReLU(net.conv3_1, in_place=True) net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_2 = L.ReLU(net.conv3_2, in_place=True) net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_3 = L.ReLU(net.conv3_3, in_place=True) if nopool: name = 'conv3_4' net[name] = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool3' net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv4_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_1 = L.ReLU(net.conv4_1, in_place=True) net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_2 = L.ReLU(net.conv4_2, in_place=True) net.conv4_3 = L.Convolution(net.relu4_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_3 = L.ReLU(net.conv4_3, in_place=True) if nopool: name = 'conv4_4' net[name] = L.Convolution(net.relu4_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool4' if dilate_pool4: net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=3, stride=1, pad=1) dilation = 2 else: net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) dilation = 1 kernel_size = 3 pad = int(int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2) net.conv5_1 = L.Convolution(net[name], num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_1 = L.ReLU(net.conv5_1, in_place=True) net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_2 = L.ReLU(net.conv5_2, in_place=True) net.conv5_3 = L.Convolution(net.relu5_2, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_3 = L.ReLU(net.conv5_3, in_place=True) if need_fc: if dilated: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=1, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) if fully_conv: if dilated: if reduced: dilation = dilation * 6 kernel_size = 3 num_output = 1024 else: dilation = dilation * 2 kernel_size = 7 num_output = 4096 else: if reduced: dilation = dilation * 3 kernel_size = 3 num_output = 1024 else: kernel_size = 7 num_output = 4096 pad = int(int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2) net.fc6 = L.Convolution(net[name], num_output=num_output, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=4096) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=4096) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) # Update freeze layers. kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net
def max_pool(bottom, ks=2, stride=2): return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)