def ConvBNLayer(net, from_layer, out_name, use_bn, use_relu, num_output, kernel_size, pad, stride, use_scale=False, moving_average_fraction=0.99, eps=0.0001, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='/bn', scale_prefix='', scale_postfix='/scale', bias_prefix='', bias_postfix='/bias', group=1, dilation=1, in_place=True, use_bias=False, lr_mult=1, engine=None): # parameters for convolution layer with batchnorm. if use_bn: kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='msra'), 'bias_term': True, 'bias_filler': dict(type='constant', value=0), 'group': group, 'dilation': dilation } else: kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='msra'), 'bias_term': True, 'bias_filler': dict(type='constant', value=0), 'group': group, 'dilation': dilation } if engine is not None: kwargs['engine'] = engine if use_scale: # parameters for scale bias layer after batchnorm. sb_kwargs = { 'bias_term': True #,'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)], #'filler': dict(type='constant', value=1.0), #'bias_filler': dict(type='constant', value=0.0), } if use_bn and use_scale: # parameters for batchnorm layer. bn_kwargs = { #not needed (and wrong order) for caffe-0.16 #'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], #'scale_filler': dict(type='constant', value=1.0), #'bias_filler': dict(type='constant', value=0.0), 'moving_average_fraction': moving_average_fraction, 'eps': eps #,'scale_bias': True } elif use_bn: bn_kwargs = { #not needed (and wrong order) for caffe-0.16 #'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], #'scale_filler': dict(type='constant', value=1.0), #'bias_filler': dict(type='constant', value=0.0), 'moving_average_fraction': moving_average_fraction, 'eps': eps, 'scale_bias': True } if use_bias: bias_kwargs = { 'param': [dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } out_layer = None conv_name = '{}{}{}'.format(conv_prefix, out_name, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) kwargs_conv = copy.deepcopy(kwargs) #lower wd for dw layers as per mobilenet paper - not working - harder to train #decay_mult = 0.01 if group == num_output else 1 #param = {'decay_mult': decay_mult} #kwargs_conv['param'][0]['decay_mult'] = decay_mult if kernel_h == kernel_w: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs_conv) out_layer = conv_name else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, **kwargs_conv) out_layer = conv_name if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_name, bn_postfix) net[bn_name] = L.BatchNorm(net[out_layer], in_place=in_place, **bn_kwargs) out_layer = conv_name if in_place else bn_name if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_name, scale_postfix) net[sb_name] = L.Scale(net[out_layer], in_place=True, **sb_kwargs) out_layer = sb_name if use_bias: bias_name = '{}{}{}'.format(bias_prefix, out_name, bias_postfix) net[bias_name] = L.Bias(net[out_layer], in_place=True, **bias_kwargs) out_layer = bias_name if use_relu: relu_name = '{}/relu'.format(conv_name) net[relu_name] = L.ReLU(net[out_layer], in_place=True) out_layer = relu_name return out_layer
def generate_topologies(): # Lenet结构 # conv_pool1 conv_pool1 = TopologyElement('conv_pool1', ['conv', 'pool'], None) conv_pool1_path = path_origin + '/train_' + conv_pool1.getName( ) + '.prototxt' conv_pool1.setPath(conv_pool1_path) conv_pool1_n = caffe.NetSpec() conv_pool1_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_pool1_n.conv1 = L.Convolution(conv_pool1_n.data, kernel_size=5, num_output=20, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_pool1_n.pool1 = L.Pooling(conv_pool1_n.conv1, pool=P.Pooling.MAX, kernel_size=2, stride=2) save_proto(conv_pool1_n.to_proto(), conv_pool1_path) conv_pool1.setOutput('pool1') # conv_pool2 conv_pool2 = TopologyElement('conv_pool2', ['conv', 'pool'], None) conv_pool2_path = path_origin + '/train_' + conv_pool2.getName( ) + '.prototxt' conv_pool2.setPath(conv_pool2_path) conv_pool2_n = caffe.NetSpec() conv_pool2_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_pool2_n.conv1 = L.Convolution(conv_pool2_n.data, kernel_size=5, num_output=50, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_pool2_n.pool1 = L.Pooling(conv_pool2_n.conv1, pool=P.Pooling.MAX, kernel_size=2, stride=2) save_proto(conv_pool2_n.to_proto(), conv_pool2_path) conv_pool2.setOutput('pool1') # relu_softmax1 relu_softmax1 = TopologyElement('relu_softmax1', ['relu', 'softmax'], None) relu_softmax1_path = path_origin + '/train_' + relu_softmax1.getName( ) + '.prototxt' relu_softmax1.setPath(relu_softmax1_path) relu_softmax1_n = caffe.NetSpec() relu_softmax1_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) relu_softmax1_n.relu1 = L.ReLU(relu_softmax1_n.data) relu_softmax1_n.softmax1 = L.Softmax(relu_softmax1_n.relu1) save_proto(relu_softmax1_n.to_proto(), conv_pool2_path) relu_softmax1.setOutput('softmax1') # Vgg结构 # conv_relu_pool1 conv_relu_pool1 = TopologyElement('conv_relu_pool1', ['conv', 'relu', 'pool'], None) conv_relu_pool1_path = path_origin + '/train_' + conv_relu_pool1.getName( ) + '.prototxt' conv_relu_pool1.setPath(conv_relu_pool1_path) conv_relu_pool1_n = caffe.NetSpec() conv_relu_pool1_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_relu_pool1_n.conv1 = L.Convolution(conv_relu_pool1_n.data, kernel_size=3, num_output=64, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool1_n.relu1 = L.ReLU(conv_relu_pool1_n.conv1) conv_relu_pool1_n.conv2 = L.Convolution(conv_relu_pool1_n.relu1, kernel_size=3, num_output=64, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool1_n.relu2 = L.ReLU(conv_relu_pool1_n.conv2) conv_relu_pool1_n.pool1 = L.Pooling(conv_relu_pool1_n.relu2, pool=P.Pooling.MAX, kernel_size=2, stride=2) save_proto(conv_relu_pool1_n.to_proto(), conv_relu_pool1_path) conv_relu_pool1.setOutput('pool1') # conv_relu_pool2 conv_relu_pool2 = TopologyElement('conv_relu_pool2', ['conv', 'relu', 'pool'], None) conv_relu_pool2_path = path_origin + '/train_' + conv_relu_pool2.getName( ) + '.prototxt' conv_relu_pool2.setPath(conv_relu_pool2_path) conv_relu_pool2_n = caffe.NetSpec() conv_relu_pool2_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_relu_pool2_n.conv1 = L.Convolution(conv_relu_pool2_n.data, kernel_size=3, num_output=128, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool2_n.relu1 = L.ReLU(conv_relu_pool2_n.conv1) conv_relu_pool2_n.conv2 = L.Convolution(conv_relu_pool2_n.relu1, kernel_size=3, num_output=128, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool2_n.relu2 = L.ReLU(conv_relu_pool2_n.conv2) conv_relu_pool2_n.pool1 = L.Pooling(conv_relu_pool2_n.relu2, pool=P.Pooling.MAX, kernel_size=2, stride=2) save_proto(conv_relu_pool2_n.to_proto(), conv_relu_pool2_path) conv_relu_pool2.setOutput('pool1') # conv_relu_pool3 conv_relu_pool3 = TopologyElement('conv_relu_pool3', ['conv', 'relu', 'pool'], None) conv_relu_pool3_path = path_origin + '/train_' + conv_relu_pool3.getName( ) + '.prototxt' conv_relu_pool3.setPath(conv_relu_pool3_path) conv_relu_pool3_n = caffe.NetSpec() conv_relu_pool3_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_relu_pool3_n.conv1 = L.Convolution(conv_relu_pool3_n.data, kernel_size=3, num_output=256, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool3_n.relu1 = L.ReLU(conv_relu_pool3_n.conv1) conv_relu_pool3_n.conv2 = L.Convolution(conv_relu_pool3_n.relu1, kernel_size=3, num_output=256, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool3_n.relu2 = L.ReLU(conv_relu_pool3_n.conv2) conv_relu_pool3_n.conv3 = L.Convolution(conv_relu_pool3_n.relu2, kernel_size=3, num_output=256, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool3_n.relu3 = L.ReLU(conv_relu_pool3_n.conv3) conv_relu_pool3_n.pool1 = L.Pooling(conv_relu_pool3_n.relu3, pool=P.Pooling.MAX, kernel_size=2, stride=2) save_proto(conv_relu_pool3_n.to_proto(), conv_relu_pool3_path) conv_relu_pool3.setOutput('pool1') # conv_relu_pool4 conv_relu_pool4 = TopologyElement('conv_relu_pool4', ['conv', 'relu', 'pool'], None) conv_relu_pool4_path = path_origin + '/train_' + conv_relu_pool4.getName( ) + '.prototxt' conv_relu_pool4.setPath(conv_relu_pool4_path) conv_relu_pool4_n = caffe.NetSpec() conv_relu_pool4_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_relu_pool4_n.conv1 = L.Convolution(conv_relu_pool4_n.data, kernel_size=3, num_output=256, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool4_n.relu1 = L.ReLU(conv_relu_pool4_n.conv1) conv_relu_pool4_n.conv2 = L.Convolution(conv_relu_pool4_n.relu1, kernel_size=3, num_output=256, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool4_n.relu2 = L.ReLU(conv_relu_pool4_n.conv2) conv_relu_pool4_n.conv3 = L.Convolution(conv_relu_pool4_n.relu2, kernel_size=3, num_output=256, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool4_n.relu3 = L.ReLU(conv_relu_pool4_n.conv3) conv_relu_pool4_n.conv4 = L.Convolution(conv_relu_pool4_n.relu3, kernel_size=3, num_output=256, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_relu_pool4_n.relu4 = L.ReLU(conv_relu_pool4_n.conv4) conv_relu_pool4_n.pool1 = L.Pooling(conv_relu_pool4_n.relu4, pool=P.Pooling.MAX, kernel_size=2, stride=2) save_proto(conv_relu_pool4_n.to_proto(), conv_relu_pool4_path) conv_relu_pool4.setOutput('pool1') # SE-BN-Inception # conv_sigmoid1 conv_sigmoid1 = TopologyElement('conv_sigmoid1', ['conv', 'sigmoid'], None) conv_sigmoid1_path = path_origin + '/train_' + conv_sigmoid1.getName( ) + '.prototxt' conv_sigmoid1.setPath(conv_sigmoid1_path) conv_sigmoid1_n = caffe.NetSpec() conv_sigmoid1_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_sigmoid1_n.conv1 = L.Convolution(conv_sigmoid1_n.data, kernel_size=1, num_output=320, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_sigmoid1_n.sigmoid1 = L.Sigmoid(conv_sigmoid1_n.conv1) conv_sigmoid1.setOutput('sigmoid1') # SE-ResNet-50 # conv_sigmoid2 conv_sigmoid2 = TopologyElement('conv_sigmoid2', ['conv', 'sigmoid'], None) conv_sigmoid2_path = path_origin + '/train_' + conv_sigmoid2.getName( ) + '.prototxt' conv_sigmoid2.setPath(conv_sigmoid2_path) conv_sigmoid2_n = caffe.NetSpec() conv_sigmoid2_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_sigmoid2_n.conv1 = L.Convolution(conv_sigmoid2_n.data, kernel_size=1, num_output=256, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_sigmoid2_n.sigmoid1 = L.Sigmoid(conv_sigmoid2_n.conv1) conv_sigmoid2.setOutput('sigmoid1') # conv_sigmoid3 conv_sigmoid3 = TopologyElement('conv_sigmoid3', ['conv', 'sigmoid'], None) conv_sigmoid3_path = path_origin + '/train_' + conv_sigmoid3.getName( ) + '.prototxt' conv_sigmoid3.setPath(conv_sigmoid3_path) conv_sigmoid3_n = caffe.NetSpec() conv_sigmoid3_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_sigmoid3_n.conv1 = L.Convolution(conv_sigmoid3_n.data, kernel_size=1, num_output=512, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_sigmoid3_n.sigmoid1 = L.Sigmoid(conv_sigmoid3_n.conv1) conv_sigmoid3.setOutput('sigmoid1') # conv_sigmoid4 conv_sigmoid4 = TopologyElement('conv_sigmoid4', ['conv', 'sigmoid'], None) conv_sigmoid4_path = path_origin + '/train_' + conv_sigmoid4.getName( ) + '.prototxt' conv_sigmoid4.setPath(conv_sigmoid4_path) conv_sigmoid4_n = caffe.NetSpec() conv_sigmoid4_n.data = L.DummyData(shape=dict(dim=[1, 3, 28, 28])) conv_sigmoid4_n.conv1 = L.Convolution(conv_sigmoid4_n.data, kernel_size=1, num_output=1024, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) conv_sigmoid4_n.sigmoid1 = L.Sigmoid(conv_sigmoid4_n.conv1) conv_sigmoid4.setOutput('sigmoid1') return (conv_pool1, conv_pool2, relu_softmax1_n, conv_relu_pool1, conv_relu_pool2, conv_relu_pool3, conv_relu_pool4, conv_sigmoid1, conv_sigmoid2, conv_sigmoid3, conv_sigmoid4)
def conv_relu(bottom, nout, ks=3, stride=1, pad=1): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'), param=[dict(lr_mult=1), dict(lr_mult=2)]) return conv, L.ReLU(conv, in_place=True)
def eltsum_relu(bottom1, bottom2): eltsum = L.Eltwise(bottom1, bottom2, eltwise_param=dict(operation=1)) relu = L.ReLU(eltsum, in_place=True) return eltsum, relu
def inception_v1_proto(self, batch_size, phase='TRAIN'): n = caffe.NetSpec() if phase == 'TRAIN': source_data = self.train_data mirror = True else: source_data = self.test_data mirror = False n.data, n.label = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=mirror)) n.conv1_7x7_s2 = L.Convolution(n.data, num_output=64, kernel_size=7, stride=2, pad=3, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', weight_std=1), bias_filler=dict(type='constant', value=0.2)) n.conv1_relu_7x7 = L.ReLU(n.conv1_7x7_s2, in_place=True) n.pool1_3x3_s2 = L.Pooling(n.conv1_7x7_s2, kernel_size=3, stride=1, pad=1, pool=P.Pooling.MAX) n.pool1_norm1 = L.LRN(n.pool1_3x3_s2, local_size=5, alpha=1e-4, beta=0.75) n.conv2_3x3_reduce = L.Convolution(n.pool1_norm1, kernel_size=1, num_output=64, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', weight_std=1), bias_filler=dict(type='constant', value=0.2)) n.conv2_relu_3x3_reduce = L.ReLU(n.conv2_3x3_reduce, in_place=True) n.conv2_3x3 = L.Convolution(n.conv2_3x3_reduce, num_output=192, kernel_size=3, stride=1, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', weight_std=1), bias_filler=dict(type='constant', value=0.2)) n.conv2_relu_3x3 = L.ReLU(n.conv2_3x3, in_place=True) n.conv2_norm2 = L.LRN(n.conv2_3x3, local_size=5, alpha=1e-4, beta=0.75) n.pool2_3x3_s2 = L.Pooling(n.conv2_norm2, kernel_size=3, stride=1, pad=1, pool=P.Pooling.MAX) n.inception_3a_1x1, n.inception_3a_relu_1x1, n.inception_3a_3x3_reduce, n.inception_3a_relu_3x3_reduce, \ n.inception_3a_3x3, n.inception_3a_relu_3x3, n.inception_3a_5x5_reduce, n.inception_3a_relu_5x5_reduce, \ n.inception_3a_5x5, n.inception_3a_relu_5x5, n.inception_3a_pool, n.inception_3a_pool_proj, \ n.inception_3a_relu_pool_proj, n.inception_3a_output = \ inception(n.pool2_3x3_s2, dict(conv_1x1=64, conv_3x3_reduce=96, conv_3x3=128, conv_5x5_reduce=16, conv_5x5=32, pool_proj=32)) n.inception_3b_1x1, n.inception_3b_relu_1x1, n.inception_3b_3x3_reduce, n.inception_3b_relu_3x3_reduce, \ n.inception_3b_3x3, n.inception_3b_relu_3x3, n.inception_3b_5x5_reduce, n.inception_3b_relu_5x5_reduce, \ n.inception_3b_5x5, n.inception_3b_relu_5x5, n.inception_3b_pool, n.inception_3b_pool_proj, \ n.inception_3b_relu_pool_proj, n.inception_3b_output = \ inception(n.inception_3a_output, dict(conv_1x1=128, conv_3x3_reduce=128, conv_3x3=192, conv_5x5_reduce=32, conv_5x5=96, pool_proj=64)) n.pool3_3x3_s2 = L.Pooling(n.inception_3b_output, kernel_size=3, stride=2, pool=P.Pooling.MAX) n.inception_4a_1x1, n.inception_4a_relu_1x1, n.inception_4a_3x3_reduce, n.inception_4a_relu_3x3_reduce, \ n.inception_4a_3x3, n.inception_4a_relu_3x3, n.inception_4a_5x5_reduce, n.inception_4a_relu_5x5_reduce, \ n.inception_4a_5x5, n.inception_4a_relu_5x5, n.inception_4a_pool, n.inception_4a_pool_proj, \ n.inception_4a_relu_pool_proj, n.inception_4a_output = \ inception(n.pool3_3x3_s2, dict(conv_1x1=192, conv_3x3_reduce=96, conv_3x3=208, conv_5x5_reduce=16, conv_5x5=48, pool_proj=64)) # loss 1 n.loss1_ave_pool = L.Pooling(n.inception_4a_output, kernel_size=5, stride=3, pool=P.Pooling.AVE) n.loss1_conv = L.Convolution(n.loss1_ave_pool, num_output=128, kernel_size=1, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', weight_std=1), bias_filler=dict(type='constant', value=0.2)) n.loss1_relu_conv = L.ReLU(n.loss1_conv, in_place=True) n.loss1_fc, n.loss1_relu_fc, n.loss1_drop_fc = \ fc_relu_drop(n.loss1_conv, dict(num_output=1024, weight_type='xavier', weight_std=1, bias_type='constant', bias_value=0.2), dropout_ratio=0.7) n.loss1_classifier = L.InnerProduct(n.loss1_fc, num_output=self.classifier_num, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.loss1_loss = L.SoftmaxWithLoss(n.loss1_classifier, n.label, loss_weight=0.3) if phase == 'TRAIN': pass else: n.loss1_accuracy_top1 = L.Accuracy(n.loss1_classifier, n.label, include=dict(phase=1)) n.loss1_accuracy_top5 = L.Accuracy(n.loss1_classifier, n.label, include=dict(phase=1), accuracy_param=dict(top_k=5)) n.inception_4b_1x1, n.inception_4b_relu_1x1, n.inception_4b_3x3_reduce, n.inception_4b_relu_3x3_reduce, \ n.inception_4b_3x3, n.inception_4b_relu_3x3, n.inception_4b_5x5_reduce, n.inception_4b_relu_5x5_reduce, \ n.inception_4b_5x5, n.inception_4b_relu_5x5, n.inception_4b_pool, n.inception_4b_pool_proj, \ n.inception_4b_relu_pool_proj, n.inception_4b_output = \ inception(n.inception_4a_output, dict(conv_1x1=160, conv_3x3_reduce=112, conv_3x3=224, conv_5x5_reduce=24, conv_5x5=64, pool_proj=64)) n.inception_4c_1x1, n.inception_4c_relu_1x1, n.inception_4c_3x3_reduce, n.inception_4c_relu_3x3_reduce, \ n.inception_4c_3x3, n.inception_4c_relu_3x3, n.inception_4c_5x5_reduce, n.inception_4c_relu_5x5_reduce, \ n.inception_4c_5x5, n.inception_4c_relu_5x5, n.inception_4c_pool, n.inception_4c_pool_proj, \ n.inception_4c_relu_pool_proj, n.inception_4c_output = \ inception(n.inception_4b_output, dict(conv_1x1=128, conv_3x3_reduce=128, conv_3x3=256, conv_5x5_reduce=24, conv_5x5=64, pool_proj=64)) n.inception_4d_1x1, n.inception_4d_relu_1x1, n.inception_4d_3x3_reduce, n.inception_4d_relu_3x3_reduce, \ n.inception_4d_3x3, n.inception_4d_relu_3x3, n.inception_4d_5x5_reduce, n.inception_4d_relu_5x5_reduce, \ n.inception_4d_5x5, n.inception_4d_relu_5x5, n.inception_4d_pool, n.inception_4d_pool_proj, \ n.inception_4d_relu_pool_proj, n.inception_4d_output = \ inception(n.inception_4c_output, dict(conv_1x1=112, conv_3x3_reduce=144, conv_3x3=288, conv_5x5_reduce=32, conv_5x5=64, pool_proj=64)) # loss 2 n.loss2_ave_pool = L.Pooling(n.inception_4d_output, kernel_size=5, stride=3, pool=P.Pooling.AVE) n.loss2_conv = L.Convolution(n.loss2_ave_pool, num_output=128, kernel_size=1, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', weight_std=1), bias_filler=dict(type='constant', value=0.2)) n.loss2_relu_conv = L.ReLU(n.loss2_conv, in_place=True) n.loss2_fc, n.loss2_relu_fc, n.loss2_drop_fc = \ fc_relu_drop(n.loss2_conv, dict(num_output=1024, weight_type='xavier', weight_std=1, bias_type='constant', bias_value=0.2), dropout_ratio=0.7) n.loss2_classifier = L.InnerProduct(n.loss2_fc, num_output=self.classifier_num, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.loss2_loss = L.SoftmaxWithLoss(n.loss2_classifier, n.label, loss_weight=0.3) if phase == 'TRAIN': pass else: n.loss2_accuracy_top1 = L.Accuracy(n.loss2_classifier, n.label, include=dict(phase=1)) n.loss2_accuracy_top5 = L.Accuracy(n.loss2_classifier, n.label, include=dict(phase=1), accuracy_param=dict(top_k=5)) n.inception_4e_1x1, n.inception_4e_relu_1x1, n.inception_4e_3x3_reduce, n.inception_4e_relu_3x3_reduce, \ n.inception_4e_3x3, n.inception_4e_relu_3x3, n.inception_4e_5x5_reduce, n.inception_4e_relu_5x5_reduce, \ n.inception_4e_5x5, n.inception_4e_relu_5x5, n.inception_4e_pool, n.inception_4e_pool_proj, \ n.inception_4e_relu_pool_proj, n.inception_4e_output = \ inception(n.inception_4d_output, dict(conv_1x1=256, conv_3x3_reduce=160, conv_3x3=320, conv_5x5_reduce=32, conv_5x5=128, pool_proj=128)) n.pool4_3x3_s2 = L.Pooling(n.inception_4e_output, kernel_size=3, stride=2, pool=P.Pooling.MAX) n.inception_5a_1x1, n.inception_5a_relu_1x1, n.inception_5a_3x3_reduce, n.inception_5a_relu_3x3_reduce, \ n.inception_5a_3x3, n.inception_5a_relu_3x3, n.inception_5a_5x5_reduce, n.inception_5a_relu_5x5_reduce, \ n.inception_5a_5x5, n.inception_5a_relu_5x5, n.inception_5a_pool, n.inception_5a_pool_proj, \ n.inception_5a_relu_pool_proj, n.inception_5a_output = \ inception(n.pool4_3x3_s2, dict(conv_1x1=256, conv_3x3_reduce=160, conv_3x3=320, conv_5x5_reduce=32, conv_5x5=128, pool_proj=128)) n.inception_5b_1x1, n.inception_5b_relu_1x1, n.inception_5b_3x3_reduce, n.inception_5b_relu_3x3_reduce, \ n.inception_5b_3x3, n.inception_5b_relu_3x3, n.inception_5b_5x5_reduce, n.inception_5b_relu_5x5_reduce, \ n.inception_5b_5x5, n.inception_5b_relu_5x5, n.inception_5b_pool, n.inception_5b_pool_proj, \ n.inception_5b_relu_pool_proj, n.inception_5b_output = \ inception(n.inception_5a_output, dict(conv_1x1=384, conv_3x3_reduce=192, conv_3x3=384, conv_5x5_reduce=48, conv_5x5=128, pool_proj=128)) n.pool5_7x7_s1 = L.Pooling(n.inception_5b_output, kernel_size=7, stride=1, pool=P.Pooling.AVE) n.pool5_drop_7x7_s1 = L.Dropout(n.pool5_7x7_s1, in_place=True, dropout_param=dict(dropout_ratio=0.4)) n.loss3_classifier = L.InnerProduct(n.pool5_7x7_s1, num_output=self.classifier_num, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.loss3_loss = L.SoftmaxWithLoss(n.loss3_classifier, n.label, loss_weight=1) if phase == 'TRAIN': pass else: n.loss3_accuracy_top1 = L.Accuracy(n.loss3_classifier, n.label, include=dict(phase=1)) n.loss3_accuracy_top5 = L.Accuracy(n.loss3_classifier, n.label, include=dict(phase=1), accuracy_param=dict(top_k=5)) return n.to_proto()
def define_model(self): n = caffe.NetSpec() pylayer = 'ClsDataLayer' pydata_params = dict( phase='train', data_root=opt.cls_data_root, batch_size=16, ratio=5, augument=True, ) n.arch1_data, n.arch2_data, n.arch3_data, n.label = L.Python( module='data.ClsDataLayer', layer=pylayer, ntop=4, param_str=str(pydata_params)) n.arch1_conv1 = SingleConv(n.arch1_data, 64, kernel_size=[3, 5, 5], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch1_conv2 = SingleConv(n.arch1_conv1, 64, kernel_size=2, stride=2, padding=0) n.arch1_conv3 = SingleConv(n.arch1_conv2, 64, kernel_size=1, stride=1, padding=0) n.arch1_conv4 = SingleConv(n.arch1_conv3, 64, kernel_size=[2, 5, 5], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch1_conv5 = SingleConv(n.arch1_conv4, 64, kernel_size=[1, 4, 4], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch1_flat = L.Flatten(n.arch1_conv5) n.arch1_fc1 = L.InnerProduct(n.arch1_flat, num_output=150, weight_filler=dict(type='xavier')) n.fc1_act = L.ReLU(n.arch1_fc1, engine=3) n.arch1 = L.InnerProduct(n.fc1_act, num_output=2, weight_filler=dict(type='xavier')) n.arch1_loss = L.SoftmaxWithLoss(n.arch1, n.label) n.arch2_conv1 = SingleConv(n.arch2_data, 64, kernel_size=[3, 5, 5], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch2_conv2 = SingleConv(n.arch2_conv1, 64, kernel_size=2, stride=2, padding=0) n.arch2_conv3 = SingleConv(n.arch2_conv2, 64, kernel_size=[1, 2, 2], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch2_conv4 = SingleConv(n.arch2_conv3, 64, kernel_size=[3, 5, 5], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch2_conv5 = SingleConv(n.arch2_conv4, 64, kernel_size=[2, 5, 5], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch2_flat = L.Flatten(n.arch2_conv5) n.arch2_fc1 = L.InnerProduct(n.arch2_flat, num_output=250, weight_filler=dict(type='xavier')) n.fc2_act = L.ReLU(n.arch2_fc1, engine=3) n.arch2 = L.InnerProduct(n.fc2_act, num_output=2, weight_filler=dict(type='xavier')) n.arch2_loss = L.SoftmaxWithLoss(n.arch2, n.label) n.arch3_conv1 = SingleConv(n.arch3_data, 64, kernel_size=[3, 5, 5], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch3_conv2 = SingleConv(n.arch3_conv1, 64, kernel_size=2, stride=2, padding=0) n.arch3_conv3 = SingleConv(n.arch3_conv2, 64, kernel_size=[2, 2, 2], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch3_conv4 = SingleConv(n.arch3_conv3, 64, kernel_size=[3, 5, 5], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch3_conv5 = SingleConv(n.arch3_conv4, 64, kernel_size=[3, 5, 5], stride=[1, 1, 1], padding=[0, 0, 0]) n.arch3_flat = L.Flatten(n.arch3_conv5) n.arch3_fc1 = L.InnerProduct(n.arch3_flat, num_output=250, weight_filler=dict(type='xavier')) n.fc3_act = L.ReLU(n.arch3_fc1, engine=3) n.arch3 = L.InnerProduct(n.fc3_act, num_output=2, weight_filler=dict(type='xavier')) n.arch3_loss = L.SoftmaxWithLoss(n.arch3, n.label) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def ReLU(self, bottom, name='relu'): return L.ReLU(bottom, name=name)
def qlstm(mode, batchsize, T, question_vocab_size): n = caffe.NetSpec() mode_str = json.dumps({'mode': mode, 'batchsize': batchsize}) # n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ # module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 ) n.data, n.cont, n.img_feature, n.label = L.Python(\ module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 ) n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='uniform',min=-0.08,max=0.08)) n.embed = L.TanH(n.embed_ba) # concat_word_embed = [n.embed, n.glove] # n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600 # LSTM1 n.lstm1 = L.LSTM(\ n.embed, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0))) tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis': 0}) for i in xrange(T - 1): n.__setattr__('slice_first' + str(i), tops1[int(i)]) n.__setattr__('silence_data_first' + str(i), L.Silence(tops1[int(i)], ntop=0)) n.lstm1_out = tops1[T - 1] n.lstm1_reshaped = L.Reshape(n.lstm1_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped, dropout_param={'dropout_ratio': 0.3}) n.lstm1_droped = L.Dropout(n.lstm1, dropout_param={'dropout_ratio': 0.3}) # LSTM2 n.lstm2 = L.LSTM(\ n.lstm1_droped, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0))) tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis': 0}) for i in xrange(T - 1): n.__setattr__('slice_second' + str(i), tops2[int(i)]) n.__setattr__('silence_data_second' + str(i), L.Silence(tops2[int(i)], ntop=0)) n.lstm2_out = tops2[T - 1] n.lstm2_reshaped = L.Reshape(n.lstm2_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped, dropout_param={'dropout_ratio': 0.3}) concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped] n.lstm_12 = L.Concat(*concat_botom) n.q_emb_tanh_droped_resh = L.Reshape( n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1]))) n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.q_emb_tanh_droped_resh, axis=2, tiles=14) n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14) n.i_emb_tanh_droped_resh = L.Reshape( n.img_feature, reshape_param=dict(shape=dict(dim=[-1, 2048, 14, 14]))) n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000, sum_pool=False)) n.blcf_sign_sqrt = L.SignedSqrt(n.blcf) n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt) n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2, dropout_param={'dropout_ratio': 0.1}) # multi-channel attention n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier')) n.att_reshaped = L.Reshape( n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 2, 14 * 14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att = L.Reshape(n.att_softmax, reshape_param=dict(shape=dict(dim=[-1, 2, 14, 14]))) att_maps = L.Slice(n.att, ntop=2, slice_param={'axis': 1}) n.att_map0 = att_maps[0] n.att_map1 = att_maps[1] dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy) n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy) n.att_feature0_resh = L.Reshape( n.att_feature0, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.att_feature1_resh = L.Reshape( n.att_feature1, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh) # merge attention and lstm with compact bilinear pooling n.att_feature_resh = L.Reshape( n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 4096, 1, 1]))) n.lstm_12_resh = L.Reshape( n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1]))) n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.lstm_12_resh, compact_bilinear_param=dict( num_output=16000, sum_pool=False)) n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm) n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt) n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio': 0.1}) n.bc_dropped_resh = L.Reshape( n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000]))) n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier')) n.loss = L.SoftmaxWithLoss(n.prediction, n.label) return n.to_proto()
def add_relu(net, bottom, name, in_place=True): """Add ReLu activation """ net[name] = L.ReLU(bottom, in_place=in_place)
def add_activate(self, bottom): return cl.ReLU(bottom, in_place=True)
def fc_relu(bottom, nout, param=learned_param): fc = L.InnerProduct(bottom, num_output=nout, param=param) return fc, L.ReLU(fc, in_place=True)
def DeconvBNUnitLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, \ kernel_size, pad, stride, lr_mult=1, decay_mult=1, \ dilation=1, use_conv_bias=False, use_scale=True, eps=0.001, \ conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', \ scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias',leaky=False,leaky_ratio=0.1, \ init_xavier=True): if use_bn: # parameters for convolution layer with batchnorm. if use_conv_bias: if init_xavier: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=decay_mult)], 'convolution_param': { 'num_output': num_output, 'kernel_size': kernel_size, 'pad': pad, 'stride': stride, 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } } else: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=decay_mult)], 'convolution_param': { 'num_output': num_output, 'kernel_size': kernel_size, 'pad': pad, 'stride': stride, 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } } else: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=decay_mult)], 'convolution_param': { 'num_output': num_output, 'kernel_size': kernel_size, 'pad': pad, 'stride': stride, 'weight_filler': dict(type='xavier'), 'bias_term': False } } # parameters for batchnorm layer. bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, } # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [ dict(lr_mult=lr_mult, decay_mult=0), dict(lr_mult=lr_mult, decay_mult=0) ], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: if init_xavier: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=decay_mult), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=decay_mult), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = Upar.UnpackVariable(kernel_size, 2) [pad_h, pad_w] = Upar.UnpackVariable(pad, 2) [stride_h, stride_w] = Upar.UnpackVariable(stride, 2) net[conv_name] = L.Deconvolution(net[from_layer], **kwargs) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) if leaky: leaky_kwargs = {"negative_slope": leaky_ratio} net[relu_name] = L.ReLU(net[conv_name], in_place=True, **leaky_kwargs) else: net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def reduceVGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, nopool=False, dropout=True, freeze_layers=[], dilate_pool4=False): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0)} assert from_layer in net.keys() net.conv1_1 = L.Convolution(net[from_layer], num_output=32, pad=1, kernel_size=3, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) net.conv1_2 = L.Convolution(net.relu1_1, num_output=32, pad=1, kernel_size=3, **kwargs) net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) if nopool: name = 'conv1_3' net[name] = L.Convolution(net.relu1_2, num_output=32, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool1' net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv2_1 = L.Convolution(net[name], num_output=64, pad=1, kernel_size=3, **kwargs) net.relu2_1 = L.ReLU(net.conv2_1, in_place=True) net.conv2_2 = L.Convolution(net.relu2_1, num_output=64, pad=1, kernel_size=3, **kwargs) net.relu2_2 = L.ReLU(net.conv2_2, in_place=True) if nopool: name = 'conv2_3' net[name] = L.Convolution(net.relu2_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool2' net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv3_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs) net.relu3_1 = L.ReLU(net.conv3_1, in_place=True) net.conv3_2 = L.Convolution(net.relu3_1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu3_2 = L.ReLU(net.conv3_2, in_place=True) net.conv3_3 = L.Convolution(net.relu3_2, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu3_3 = L.ReLU(net.conv3_3, in_place=True) if nopool: name = 'conv3_4' net[name] = L.Convolution(net.relu3_3, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool3' net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv4_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs) net.relu4_1 = L.ReLU(net.conv4_1, in_place=True) net.conv4_2 = L.Convolution(net.relu4_1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu4_2 = L.ReLU(net.conv4_2, in_place=True) net.conv4_3 = L.Convolution(net.relu4_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu4_3 = L.ReLU(net.conv4_3, in_place=True) if nopool: name = 'conv4_4' net[name] = L.Convolution(net.relu4_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool4' if dilate_pool4: net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=3, stride=1, pad=1) dilation = 2 else: net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) dilation = 1 kernel_size = 3 pad = int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) // 2 net.conv5_1 = L.Convolution(net[name], num_output=256, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_1 = L.ReLU(net.conv5_1, in_place=True) net.conv5_2 = L.Convolution(net.relu5_1, num_output=256, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_2 = L.ReLU(net.conv5_2, in_place=True) net.conv5_3 = L.Convolution(net.relu5_2, num_output=256, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_3 = L.ReLU(net.conv5_3, in_place=True) if need_fc: if dilated: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=256, pad=1, kernel_size=3, stride=1, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) if fully_conv: if dilated: if reduced: dilation = dilation * 6 kernel_size = 3 num_output = 512 else: dilation = dilation * 2 kernel_size = 7 num_output = 4096 else: if reduced: dilation = dilation * 3 kernel_size = 3 num_output = 512 else: kernel_size = 7 num_output = 4096 pad = int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) // 2 net.fc6 = L.Convolution(net[name], num_output=num_output, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=512, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=512, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=512) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=512) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) # Update freeze layers. kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net
def create_deploy(): #少了第一层数据层 #第二层,卷积层 conv1 = L.Convolution( bottom='data', kernel_size=11, stride=4, num_output=96, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) #第三层,激活函数层 relu1 = L.ReLU(conv1, in_place=True) #第四层,池化层 pool1 = L.Pooling(relu1, pool=P.Pooling.MAX, kernel_size=3, stride=2) #第五层,LRN层 norm1 = L.LRN(pool1, local_size=5, alpha=1e-4, beta=0.75) #第六层,卷积层 conv2 = L.Convolution( norm1, kernel_size=5, stride=1, num_output=256, pad=2, group=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=1)) #第七层,激活函数层 relu2 = L.ReLU(conv2, in_place=True) #第八层,池化层 pool2 = L.Pooling(relu2, pool=P.Pooling.MAX, kernel_size=3, stride=2) #第九层,LRN层 norm2 = L.LRN(pool2, local_size=5, alpha=1e-4, beta=0.75) #第十层,卷积层 conv3 = L.Convolution( norm2, kernel_size=3, stride=1, num_output=384, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) #第十一层,激活函数层 relu3 = L.ReLU(conv3, in_place=True) #第十二层,卷积层 conv4 = L.Convolution( relu3, kernel_size=3, stride=1, num_output=384, pad=1, group=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=1)) #第十三层,激活函数层 relu4 = L.ReLU(conv4, in_place=True) #第十四层,卷积层 conv5 = L.Convolution( relu4, kernel_size=3, stride=1, num_output=256, pad=1, group=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=1)) #第十五层,激活函数层 relu5 = L.ReLU(conv5, in_place=True) #第十六层,池化层 pool5 = L.Pooling(relu5, pool=P.Pooling.MAX, kernel_size=3, stride=2) #第十七层,全连接层 fc6 = L.InnerProduct( pool5, num_output=4096, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=1)) #第十八层,激活函数层 relu6 = L.ReLU(fc6, in_place=True) #第十九层,Dropout层 drop6 = L.Dropout(relu6, dropout_ratio=0.5, in_place=True) #第二十层,全连接层 fc7 = L.InnerProduct( drop6, num_output=4096, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=1)) #第二十一层,激活函数层 relu7 = L.ReLU(fc7, in_place=True) #第二十二层,Dropout层 drop7 = L.Dropout(relu7, dropout_ratio=0.5) #第二十三层,全连接层 fc8 = L.InnerProduct( drop7, num_output=1000, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) #最后没有accuracy层,但有一个Softmax层 prob = L.Softmax(fc8) return to_proto(prob)
def relu(prev): return layers.ReLU(prev, in_place=True)
def create_caffe_net_struct(keras_model_path, prototxt_path): """ It goes through the Keras model and creates the prototxt of an equivalent Caffe one. Arguments: keras_model_path: the path to a file containing the Keras model prototxt_path: the path to the prototxt file to create """ # Create the output directory (if it doens't exist) for i in range(len(prototxt_path) - 1, -1, -1): if prototxt_path[i] == '/': output_dir = prototxt_path[:i + 1] break Path(output_dir).mkdir(parents=True, exist_ok=True) # load the keras model keras_model = tf.keras.models.load_model(keras_model_path) # create a Caffe NetSpec caffe_net = caffe.NetSpec() # read layer by layer of the keras model types = [] for i in range(len(keras_model.layers)): layer = keras_model.layers[i] type = layer.__class__.__name__ name = layer.name if type not in types: types.append(type) if type == 'InputLayer': # position 0 because it returns a list with one tuple containing the shape dimensions, # then I make that tuple a list (to potentially modify its items) shape = list(layer.output_shape[0]) with open(prototxt_path, 'w') as prototxt: prototxt.write(f'input: "{name}"\n') shape[0] = shape[0] if shape[0] != None else 1 prototxt.write( f'input_dim: {shape[0]}\ninput_dim: {shape[3]}\n') prototxt.write( f'input_dim: {shape[1]}\ninput_dim: {shape[2]}\n') caffe_net.tops[name] = L.Input() elif type in ('Conv2D', 'ReLU', 'MaxPooling2D', 'PReLU'): # To get the bottom, we first access to the node which connects # two layers and then we take the node's inbound layer bottom_name = layer._inbound_nodes[0].inbound_layers.name bottom = caffe_net.tops[bottom_name] # For padding, kernel_size and pool_size, I only take the 1st number of the tuple I get with layer.get_config() # because Caffe only accepts spatially square kernels if type == 'Conv2D': config = layer.get_config() filters = layer.get_weights()[0] biases = layer.get_weights()[1] num_output = config[ 'filters'] # equivalent to: num_output = np.shape(biases)[0] kernel_size = config['kernel_size'][ 0] # equivalent to: kernel_size = np.shape(filters)[0] if config[ 'padding'] == 'same': # maintain the same spatial size stride = config['strides'][0] layer_input_size = np.shape( layer._inbound_nodes[0].inbound_layers.output)[1] pad = (stride * (np.shape(layer.output)[1] - 1) - layer_input_size + kernel_size) // 2 elif config['padding'] == 'valid': pad = 0 caffe_net.tops[name] = L.Convolution(bottom, num_output=num_output, kernel_size=kernel_size, pad=pad) elif type == 'ReLU': caffe_net.tops[name] = L.ReLU(bottom) elif type == 'PReLU': caffe_net.tops[name] = L.PReLU(bottom) elif type == 'MaxPooling2D': config = layer.get_config() pool_size = config['pool_size'][0] stride = config['strides'][0] caffe_net.tops[name] = L.Pooling(bottom, pool=0, stride=stride, kernel_size=pool_size) elif type == 'Concatenate': # To get the bottom, we first access to the node which connects # the two layers and then we take each inbound layer (which is one of the many bottoms) bottoms_list = [] bottoms_shapes = [] for j in range( np.shape(layer._inbound_nodes[0].inbound_layers)[0]): current = layer._inbound_nodes[0].inbound_layers[j] """# In case a layer is followed by an activation layer, even if the top # does not take the name of the activation layer, the inbound layer will be that. # So in this case we take the "bottom of the bottom", because on the prototxt the concat layer # wants the name of the "top" field of the layers to connect. # e.g. # layer {name: "conv1", top: "conv1" ...} layer {name: "relu1", type: "ReLU" top: "conv1" ...} # layer {name: concat, type: "Concat", bottom: "conv1" ...} if current.__class__.__name__ in ('ReLU', 'PReLU'): bottom_name = current._inbound_nodes[0].inbound_layers.name else: bottom_name = current.name""" # pick the bottom bottom_name = current.name for k in caffe_net.tops.keys(): if k == bottom_name: bottom = caffe_net.tops[k] bottoms_list.append(bottom) bottoms_shapes.append(current.output_shape) # Check concat axis if layer.get_config()['axis'] == -1: axis = find_concat_axis(bottoms_shapes) # unfortunately the following currently works only with concatenation of 2 or 3 layers if len(bottoms_list) == 2: caffe_net.tops[name] = L.Concat(bottoms_list[0], bottoms_list[1], axis=axis) elif len(bottoms_list) == 3: caffe_net.tops[name] = L.Concat(bottoms_list[0], bottoms_list[1], bottoms_list[2], axis=axis) else: print( "\n\nE: found concat layer with more than 3 bottoms. This programm cannot handle it\t", len(bottoms_list)) print('All types present: ', types) with open(prototxt_path, 'a') as prototxt: prototxt.write(str(caffe_net.to_proto())) fix_prototxt(prototxt_path)
def make_resnet(training_data='train_data_path', test_data='test_data_path', mean_file='mean.binaryproto', depth=50): # num_feature_maps = np.array([16, 32, 64]) # feature map size: [32, 16, 8] configs = { 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3], 200: [3, 24, 36, 3], } block_config = configs[depth] num_feature_maps = [64, 128, 256, 512] n_stage = len(num_feature_maps) n = caffe.NetSpec() # make training data layer n.data, n.label = L.Data(source=training_data, backend=P.Data.LMDB, batch_size=256, ntop=2, transform_param=dict(crop_size=224, mean_file=mean_file, mirror=True), image_data_param=dict(shuffle=True), include=dict(phase=0)) # make test data layer n.test_data, n.test_label = L.Data(source=test_data, backend=P.Data.LMDB, batch_size=100, ntop=2, transform_param=dict( crop_size=224, mean_file=mean_file, mirror=False), include=dict(phase=1)) # conv1 should accept both training and test data layers. But this is inconvenient to code in pycaffe. # You have to write two conv layers for them. To deal with this, I temporarily ignore the test data layer # and let conv1 accept the output of training data layer. Then, after making the whole prototxt, I postprocess # the top name of the two data layers, renaming their names to the same. n.conv = L.Convolution( n.data, kernel_size=7, stride=2, num_output=64, pad=3, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=weight_filler, bias_filler=bias_filler) n.bn = L.BatchNorm(n.conv, in_place=True) n.scale = L.Scale(n.bn, scale_param=dict(bias_term=True), in_place=True) n.relu = L.ReLU(n.scale, in_place=True) n.max_pooling = L.Pooling(n.relu, pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) # set up a checkpoint so as to know where we get. checkpoint = 'n.max_pooling' # start making blocks. # num_feature_maps: the number of feature maps for each stage. Default is [16,32,64], # suggesting the network has three stages. # num_block_in_stage: a parameter from the original paper, telling us how many blocks there are in # each stage. # depth : for i in range(n_stage): num_map = num_feature_maps[i] nblocks = block_config[i] if (i == 0): stride = 1 else: stride = 2 for res in range(nblocks): # stage name stage = 'blk' + str(res + 1) + '_stg' + str(i + 1) # use the projecting block when downsample the feature map if res == 0: make_res = 'n.' + 'conv_' + stage + '_proj,' + \ 'n.' + 'bn_' + stage + '_proj,' + \ 'n.' + 'scale_' + stage + '_proj,' + \ 'n.' + 'conv_' + stage + '_a,' + \ 'n.' + 'bn_' + stage + '_a, ' + \ 'n.' + 'scale_' + stage + '_a, ' + \ 'n.' + 'relu_' + stage + '_a, ' + \ 'n.' + 'conv_' + stage + '_b, ' + \ 'n.' + 'bn_' + stage + '_b, ' + \ 'n.' + 'scale_' + stage + '_b, ' + \ 'n.' + 'relu_' + stage + '_b, ' + \ 'n.' + 'conv_' + stage + '_c, ' + \ 'n.' + 'bn_' + stage + '_c, ' + \ 'n.' + 'scale_' + stage + '_c, ' + \ 'n.' + 'se_pool_' + stage + '_c, ' + \ 'n.' + 'se_reduce_' + stage + ', ' + \ 'n.' + 'se_relu_' + stage + ', ' + \ 'n.' + 'se_recover_' + stage + ', ' + \ 'n.' + 'se_sigmoid_' + stage + ', ' + \ 'n.' + 'se_scale_' + stage + ', ' + \ 'n.' + 'eltsum_' + stage + ', ' + \ 'n.' + 'relu_after_sum_' + stage + \ ' = project_residual(' + checkpoint + ', num_out=num_map, stride=' + str(stride) + ')' exec(make_res) checkpoint = 'n.' + 'relu_after_sum_' + stage # where we get continue # most blocks have this shape make_res = 'n.' + 'conv_' + stage + '_a, ' + \ 'n.' + 'bn_' + stage + '_a, ' + \ 'n.' + 'scale_' + stage + '_a, ' + \ 'n.' + 'relu_' + stage + '_a, ' + \ 'n.' + 'conv_' + stage + '_b, ' + \ 'n.' + 'bn_' + stage + '_b, ' + \ 'n.' + 'scale_' + stage + '_b, ' + \ 'n.' + 'relu_' + stage + '_b, ' + \ 'n.' + 'conv_' + stage + '_c, ' + \ 'n.' + 'bn_' + stage + '_c, ' + \ 'n.' + 'scale_' + stage + '_c, ' + \ 'n.' + 'se_pool_' + stage + '_d, ' + \ 'n.' + 'se_reduce_' + stage + ', ' + \ 'n.' + 'se_relu_' + stage + ', ' + \ 'n.' + 'se_recover_' + stage + ', ' + \ 'n.' + 'se_sigmoid_' + stage + ', ' + \ 'n.' + 'se_scale_' + stage + ', ' + \ 'n.' + 'eltsum_' + stage + ', ' + \ 'n.' + 'relu_after_sum_' + stage + \ ' = identity_residual(' + checkpoint + ', num_out=num_map, stride=1)' exec(make_res) checkpoint = 'n.' + 'relu_after_sum_' + stage # where we get # add the pooling layer exec('n.pool_global = L.Pooling(' + checkpoint + ', pool=P.Pooling.AVE, global_pooling=True)') n.score = L.InnerProduct( n.pool_global, num_output=1000, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.loss = L.SoftmaxWithLoss(n.score, n.label) n.acc = L.Accuracy(n.score, n.label) return n.to_proto()
def setLayers_twoBranches(data_source, batch_size, layername, kernel, stride, outCH, label_name, transform_param_in, deploy=False, batchnorm=0, lr_mult_distro=[1,1,1]): # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround # producing training and testing prototxt files is pretty straight forward n = caffe.NetSpec() assert len(layername) == len(kernel) assert len(layername) == len(stride) assert len(layername) == len(outCH) num_parts = transform_param['num_parts'] if deploy == False and "lmdb" not in data_source: if(len(label_name)==1): n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict(batch_size=batch_size, source=data_source), ntop=2) elif(len(label_name)==2): n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data(hdf5_data_param=dict(batch_size=batch_size, source=data_source), ntop=3) # produce data definition for deploy net elif deploy == False: n.data, n.tops['label'] = L.CPMData(data_param=dict(backend=1, source=data_source, batch_size=batch_size), cpm_transform_param=transform_param_in, ntop=2) n.tops[label_name[2]], n.tops[label_name[3]], n.tops[label_name[4]], n.tops[label_name[5]] = L.Slice(n.label, slice_param=dict(axis=1, slice_point=[38, num_parts+1, num_parts+39]), ntop=4) n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]], n.tops[label_name[4]], operation=P.Eltwise.PROD) n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]], n.tops[label_name[5]], operation=P.Eltwise.PROD) else: input = "data" dim1 = 1 dim2 = 4 dim3 = 368 dim4 = 368 # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data", # we will later have to remove this layer from the serialization string, since this is just a placeholder n.data = L.Layer() # something special before everything n.image, n.center_map = L.Slice(n.data, slice_param=dict(axis=1, slice_point=3), ntop=2) n.silence2 = L.Silence(n.center_map, ntop=0) #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE) # just follow arrays..CPCPCPCPCCCC.... last_layer = ['image', 'image'] stage = 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' # can be image or fuse share_point = 0 for l in range(0, len(layername)): if layername[l] == 'V': #pretrained VGG layers conv_name = 'conv%d_%d' % (pool_counter, local_counter) lr_m = lr_mult_distro[0] n.tops[conv_name] = L.Convolution(n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l]/2)), param=[dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m*2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) ReLUname = 'relu%d_%d' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) local_counter += 1 print ReLUname if layername[l] == 'B': pool_counter += 1 local_counter = 1 if layername[l] == 'C': if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM' % (pool_counter, local_counter) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d' % (conv_counter, stage) lr_m = lr_mult_distro[2] conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub n.tops[conv_name] = L.Convolution(n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l]/2)), param=[dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m*2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) if layername[l+1] != 'L': if(state == 'image'): if(batchnorm == 1): batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm(n.tops[last_layer[0]], param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) else: if(batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm(n.tops[last_layer[0]], param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) #last_layer = ReLUname print ReLUname #conv_counter += 1 local_counter += 1 elif layername[l] == 'C2': for level in range(0,2): if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM_L%d' % (pool_counter, local_counter, level+1) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage, level+1) lr_m = lr_mult_distro[2] #conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub if layername[l+1] == 'L2' or layername[l+1] == 'L3': if level == 0: outCH[l] = 38 else: outCH[l] = 19 n.tops[conv_name] = L.Convolution(n.tops[last_layer[level]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l]/2)), param=[dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m*2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[level] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m) if layername[l+1] != 'L2' and layername[l+1] != 'L3': if(state == 'image'): if(batchnorm == 1): batchnorm_name = 'bn%d_stage%d_L%d' % (conv_counter, stage, level+1) n.tops[batchnorm_name] = L.BatchNorm(n.tops[last_layer[level]], param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM_L%d' % (pool_counter, local_counter, level+1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) else: if(batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d_L%d' % (conv_counter, stage, level+1) n.tops[batchnorm_name] = L.BatchNorm(n.tops[last_layer[level]], param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter, stage, level+1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) print ReLUname conv_counter += 1 local_counter += 1 elif layername[l] == 'P': # Pooling n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(n.tops[last_layer[0]], kernel_size=kernel[l], stride=stride[l], pool=P.Pooling.MAX) last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage) pool_counter += 1 local_counter = 1 conv_counter += 1 print last_layer[0] elif layername[l] == 'L': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d' % stage] = L.Flatten(n.tops[last_layer[0]]) n.tops['loss_stage%d' % stage] = L.EuclideanLoss(n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]]) elif deploy == False: level = 1 name = 'weight_stage%d' % stage n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level+2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d' % stage] = L.EuclideanLoss(n.tops[name], n.tops[label_name[level]]) print 'loss %d' % stage stage += 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L2': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3],1]; # print lr_mult_distro[3] for level in range(0,2): if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d_L%d' % (stage, level+1)] = L.Flatten(n.tops[last_layer[level]]) n.tops['loss_stage%d_L%d' % (stage, level+1)] = L.EuclideanLoss(n.tops['map_vec_stage%d' % stage], n.tops[label_name[level]], loss_weight=weight[level]) elif deploy == False: name = 'weight_stage%d_L%d' % (stage, level+1) n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level+2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d_L%d' % (stage, level+1)] = L.EuclideanLoss(n.tops[name], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level+1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L3': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3],1]; # print lr_mult_distro[3] if deploy == False: level = 0 n.tops['loss_stage%d_L%d' % (stage, level+1)] = L.Euclidean2Loss(n.tops[last_layer[level]], n.tops[label_name[level]], n.tops[label_name[2]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level+1) level = 1 n.tops['loss_stage%d_L%d' % (stage, level+1)] = L.EuclideanLoss(n.tops[last_layer[level]], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level+1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'D': if deploy == False: n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(n.tops[last_layer[0]], in_place=True, dropout_param=dict(dropout_ratio=0.5)) drop_counter += 1 elif layername[l] == '@': #if not share_point: # share_point = last_layer n.tops['concat_stage%d' % stage] = L.Concat(n.tops[last_layer[0]], n.tops[last_layer[1]], n.tops[share_point], concat_param=dict(axis=1)) local_counter = 1 state = 'fuse' last_layer[0] = 'concat_stage%d' % stage last_layer[1] = 'concat_stage%d' % stage print last_layer elif layername[l] == '$': share_point = last_layer[0] pool_counter += 1 local_counter = 1 print 'share' # final process stage -= 1 #if stage == 1: # n.silence = L.Silence(n.pool_center_lower, ntop=0) if deploy == False: return str(n.to_proto()) # for generating the deploy net else: # generate the input information header string deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format('"' + input + '"', dim1, dim2, dim3, dim4) # assemble the input header with the net layers string. remove the first placeholder layer from the net string. return deploy_str + '\n' + 'layer {' + 'layer {'.join(str(n.to_proto()).split('layer {')[2:])
def DeconvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, kernel_size, pad, stride, dilation=1, use_scale=True, lr_mult=1, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias', **bn_params): if use_bn: # parameters for convolution layer with batchnorm. kwargs = { 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } param = [dict(lr_mult=lr_mult, decay_mult=1)] eps = bn_params.get('eps', 0.001) moving_average_fraction = bn_params.get('moving_average_fraction', 0.999) use_global_stats = bn_params.get('use_global_stats', False) # parameters for batchnorm layer. bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, 'moving_average_fraction': moving_average_fraction, } bn_lr_mult = lr_mult if use_global_stats: # only specify if use_global_stats is explicitly provided; # otherwise, use_global_stats_ = this->phase_ == TEST; bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, 'use_global_stats': use_global_stats, } # not updating scale/bias parameters bn_lr_mult = 0 # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [ dict(lr_mult=bn_lr_mult, decay_mult=0), dict(lr_mult=bn_lr_mult, decay_mult=0) ], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=bn_lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } param = [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0) ], conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: convolution_param = dict(num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h) else: convolution_param = dict(num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w) convolution_param.update(kwargs) net[conv_name] = L.Deconvolution(net[from_layer], convolution_param=convolution_param, param=param) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def resnet_module(net, bot, name, ninput, kernel_size, stride, pad, bottleneck_nout, expand_nout, use_batch_norm, train, parname_stem): if ninput != expand_nout: bypass_conv = L.Convolution( bot, kernel_size=1, stride=1, num_output=expand_nout, pad=0, bias_term=False, weight_filler=dict(type="msra"), param=[dict(name="par_%s_bypass_conv_w" % (parname_stem))]) if use_batch_norm: if train: bypass_bn = L.BatchNorm( bypass_conv, in_place=True, batch_norm_param=dict(use_global_stats=False), param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: bypass_bn = L.BatchNorm( bypass_conv, in_place=True, batch_norm_param=dict(use_global_stats=True)) bypass_scale = L.Scale(bypass_bn, in_place=True, scale_param=dict(bias_term=True)) net.__setattr__(name + "_bypass", bypass_conv) net.__setattr__(name + "_bypass_bn", bypass_bn) net.__setattr__(name + "_bypass_scale", bypass_scale) else: net.__setattr__(name + "_bypass", bypass_conv) bypass_layer = bypass_conv else: bypass_layer = bot # bottle neck bottleneck_layer = L.Convolution( bot, num_output=bottleneck_nout, kernel_size=1, stride=1, pad=0, bias_term=False, weight_filler=dict(type="msra"), param=[dict(name="par_%s_bottleneck_conv_w" % (parname_stem))]) if use_batch_norm: if train: bottleneck_bn = L.BatchNorm( bottleneck_layer, in_place=True, batch_norm_param=dict(use_global_stats=False), param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: bottleneck_bn = L.BatchNorm( bottleneck_layer, in_place=True, batch_norm_param=dict(use_global_stats=True)) bottleneck_scale = L.Scale(bottleneck_bn, in_place=True, scale_param=dict(bias_term=True)) bottleneck_relu = L.ReLU(bottleneck_scale, in_place=True) else: bottleneck_relu = L.ReLU(bottleneck_layer, in_place=True) net.__setattr__(name + "_btlnk", bottleneck_layer) if use_batch_norm: net.__setattr__(name + "_btlnk_bn", bottleneck_bn) net.__setattr__(name + "_btlnk_scale", bottleneck_scale) net.__setattr__(name + "_btlnk_relu", bottleneck_relu) # conv conv_layer = L.Convolution( bottleneck_relu, num_output=bottleneck_nout, kernel_size=3, stride=1, pad=1, bias_term=False, weight_filler=dict(type="msra"), param=[dict(name="par_%s_conv_w" % (parname_stem))]) if use_batch_norm: if train: conv_bn = L.BatchNorm( conv_layer, in_place=True, batch_norm_param=dict(use_global_stats=False), param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: conv_bn = L.BatchNorm(conv_layer, in_place=True, batch_norm_param=dict(use_global_stats=True)) conv_scale = L.Scale(conv_bn, in_place=True, scale_param=dict(bias_term=True)) conv_relu = L.ReLU(conv_scale, in_place=True) else: conv_relu = L.ReLU(conv_layer, in_place=True) net.__setattr__(name + "_conv", conv_layer) if use_batch_norm: net.__setattr__(name + "_conv_bn", conv_bn) net.__setattr__(name + "_conv_scale", conv_scale) net.__setattr__(name + "_conv_relu", conv_relu) # expand expand_layer = L.Convolution( conv_relu, num_output=expand_nout, kernel_size=1, stride=1, pad=0, bias_term=False, weight_filler=dict(type="msra"), param=[dict(name="par_%s_expand_conv_w" % (parname_stem))]) ex_last_layer = expand_layer if use_batch_norm: if train: expand_bn = L.BatchNorm( expand_layer, in_place=True, batch_norm_param=dict(use_global_stats=False), param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: expand_bn = L.BatchNorm( expand_layer, in_place=True, batch_norm_param=dict(use_global_stats=True)) expand_scale = L.Scale(expand_bn, in_place=True, scale_param=dict(bias_term=True)) ex_last_layer = expand_scale net.__setattr__(name + "_expnd", expand_layer) if use_batch_norm: net.__setattr__(name + "_expnd_bn", expand_bn) net.__setattr__(name + "_expnd_scale", expand_scale) # Eltwise elt_layer = L.Eltwise(bypass_layer, ex_last_layer, eltwise_param=dict(operation=P.Eltwise.SUM)) elt_relu = L.ReLU(elt_layer, in_place=True) net.__setattr__(name + "_eltwise", elt_layer) net.__setattr__(name + "_eltwise_relu", elt_relu) return elt_relu
def test_relu2(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([10, 4, 64, 64])) n.relu1 = L.ReLU(n.input1, negative_slope=0.1) self._test_model(*self._netspec_to_model(n, 'relu2'))
def conv_relu(bottom, nout, ks=3, stride=1, pad=1): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) return conv, L.ReLU(conv, in_place=True)
def create_net(phase): global train_transform_param global test_transform_param train_transform_param = {'mirror': False, 'mean_file': Params['mean_file']} test_transform_param = {'mean_file': Params['mean_file']} if phase == 'train': lmdb_file = Params['train_lmdb'] transform_param = train_transform_param batch_size = Params['batch_size_per_device'] else: lmdb_file = Params['test_lmdb'] transform_param = test_transform_param batch_size = Params['test_batch_size'] net = caffe.NetSpec() net.data, net.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_file, transform_param=transform_param, ntop=2) #include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), kwargs = { 'param': [dict(lr_mult=1), dict(lr_mult=2)], 'weight_filler': dict(type='gaussian', std=0.0001), 'bias_filler': dict(type='constant') } net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=3, **kwargs) net.pool1 = L.Pooling(net.conv1, pool=P.Pooling.MAX, kernel_size=3, stride=2) net.relu1 = L.ReLU(net.pool1, in_place=True) kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.005), 'bias_filler': dict(type='constant') } net.fc2 = L.InnerProduct(net.pool1, num_output=128, **kwargs) net.relu2 = L.ReLU(net.fc2, in_place=True) net.drop2 = L.Dropout(net.fc2, in_place=True, dropout_param=dict(dropout_ratio=0.5)) kwargs = { 'param': [dict(lr_mult=1, decay_mult=100), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } net.fc3 = L.InnerProduct(net.fc2, num_output=45, **kwargs) if phase == 'train': net.loss = L.SoftmaxWithLoss(net.fc3, net.label) elif phase == 'test': net.accuracy = L.Accuracy(net.fc3, net.label) else: net.prob = L.Softmax(net.fc3) net_proto = net.to_proto() if phase == 'deploy': del net_proto.layer[0] #del net_proto.layer[-1] net_proto.input.extend(['data']) net_proto.input_dim.extend([1, 3, 12, 36]) net_proto.name = '{}_{}'.format(Params['model_name'], phase) return net_proto
def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, group=group) return conv, L.ReLU(conv, in_place=True)
def gen_train_proto_multiple(target_interface, shape=(28, 28, 3)): path = path_origin + '/train_' + target_interface + 'm.prototxt' n = caffe.NetSpec() n.data = L.DummyData(shape=dict(dim=[1, 3, shape[0], shape[1]])) if target_interface == 'conv1': n.conv1_1 = L.Convolution(n.data, kernel_size=11, stride=4, num_output=1, weight_filler={ "type": "constant", "value": 1 }) n.conv1_2 = L.Convolution(n.conv1_1, kernel_size=11, stride=4, num_output=1, weight_filler={ "type": "constant", "value": 1 }) n.conv1_3 = L.Convolution(n.conv1_2, kernel_size=11, stride=4, num_output=1, weight_filler={ "type": "constant", "value": 1 }) n.conv1_4 = L.Convolution(n.conv1_3, kernel_size=11, stride=4, num_output=1, weight_filler={ "type": "constant", "value": 1 }) n.conv1_5 = L.Convolution(n.conv1_5, kernel_size=11, stride=4, num_output=1, weight_filler={ "type": "constant", "value": 1 }) elif target_interface == 'pool1': n.pool1 = L.Pooling(n.data, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.pool2 = L.Pooling(n.pool1, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.pool3 = L.Pooling(n.pool2, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.pool4 = L.Pooling(n.pool3, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.pool5 = L.Pooling(n.pool4, kernel_size=2, stride=2, pool=P.Pooling.MAX) elif target_interface == 'pool2': n.pool2_1 = L.Pooling(n.data, kernel_size=2, stride=2, pool=P.Pooling.AVE) n.pool2_2 = L.Pooling(n.pool2_1, kernel_size=2, stride=2, pool=P.Pooling.AVE) n.pool2_3 = L.Pooling(n.pool2_2, kernel_size=2, stride=2, pool=P.Pooling.AVE) n.pool2_4 = L.Pooling(n.pool2_3, kernel_size=2, stride=2, pool=P.Pooling.AVE) n.pool2_5 = L.Pooling(n.pool2_4, kernel_size=2, stride=2, pool=P.Pooling.AVE) elif target_interface == 'relu1': n.relu1_1 = L.ReLU(n.data) n.relu1_2 = L.ReLU(n.relu1_1) n.relu1_3 = L.ReLU(n.relu1_2) n.relu1_4 = L.ReLU(n.relu1_3) n.relu1_5 = L.ReLU(n.relu1_4) elif target_interface == 'sigmoid1': n.sigmoid1_1 = L.Sigmoid(n.data) n.sigmoid1_2 = L.Sigmoid(n.sigmoid1_1) n.sigmoid1_3 = L.Sigmoid(n.sigmoid1_2) n.sigmoid1_4 = L.Sigmoid(n.sigmoid1_3) n.sigmoid1_5 = L.Sigmoid(n.sigmoid1_4) elif target_interface == 'softmax1': n.softmax1_1 = L.Softmax(n.data) n.softmax1_2 = L.Softmax(n.softmax1_1) n.softmax1_3 = L.Softmax(n.softmax1_2) n.softmax1_4 = L.Softmax(n.softmax1_3) n.softmax1_5 = L.Softmax(n.softmax1_4) elif target_interface == 'tanh1': n.tanh1_1 = L.TanH(n.data) n.tanh1_2 = L.TanH(n.tanh1_1) n.tanh1_3 = L.TanH(n.tanh1_2) n.tanh1_4 = L.TanH(n.tanh1_3) n.tanh1_5 = L.TanH(n.tanh1_4) save_proto(n.to_proto(), path) return path
def fc_relu(bottom, nout): fc = L.InnerProduct(bottom, num_output=nout) return fc, L.ReLU(fc, in_place=True)
def fc_relu_dropout(bottom, nout, dropout): fc = L.InnerProduct(bottom, num_output=nout, param=[dict(lr_mult=1), dict(lr_mult=2)], weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant')) return fc, L.ReLU(fc, in_place=True), L.Dropout(fc, dropout_ratio=dropout, in_place=True)
def write_layer(): filters = [16, 32, 64, 128, 192, 256] nClasses = 2 net = caffe.NetSpec() net.data, net.label = L.HDF5Data(batch_size=16, source='train.h5list', ntop=2) # 第一层编码 net.enc1_conv1 = L.Convolution( net.data, param=froozen_param, # 这里通过定义一个list,来整合到param的字典,也就是:param=[] num_output=filters[0], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc1_norm1 = L.BatchNorm(net.enc1_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc1_scale1 = L.Scale(net.enc1_norm1, bias_term=True, in_place=True) net.enc1_relu1 = L.ReLU(net.enc1_scale1, in_place=True) net.enc1_conv2 = L.Convolution( net.enc1_relu1, param=froozen_param, # 这里通过定义一个list,来整合到param的字典,也就是:param=[] num_output=filters[0], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc1_norm2 = L.BatchNorm(net.enc1_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc1_scale2 = L.Scale(net.enc1_norm2, bias_term=True, in_place=True) net.enc1_relu2 = L.ReLU(net.enc1_scale2, in_place=True) net.enc1_pool1 = L.Pooling(net.enc1_relu2, pool=caffe.params.Pooling.MAX, kernel_size=2, stride=2) # 第二层编码 net.enc2_conv1 = L.Convolution(net.enc1_pool1, param=froozen_param, num_output=filters[1], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc2_norm1 = L.BatchNorm(net.enc2_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc2_scale1 = L.Scale(net.enc2_norm1, bias_term=True, in_place=True) net.enc2_relu1 = L.ReLU(net.enc2_scale1, in_place=True) net.enc2_conv2 = L.Convolution(net.enc2_relu1, param=froozen_param, num_output=filters[1], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc2_norm2 = L.BatchNorm(net.enc2_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc2_scale2 = L.Scale(net.enc2_norm2, bias_term=True, in_place=True) net.enc2_relu2 = L.ReLU(net.enc2_scale2, in_place=True) net.enc2_pool1 = L.Pooling(net.enc2_relu2, pool=caffe.params.Pooling.MAX, kernel_size=2, stride=2) # 第三层卷积层 net.enc3_conv1 = L.Convolution(net.enc2_pool1, param=froozen_param, num_output=filters[2], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc3_norm1 = L.BatchNorm(net.enc3_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc3_scale1 = L.Scale(net.enc3_norm1, bias_term=True, in_place=True) net.enc3_relu1 = L.ReLU(net.enc3_scale1, in_place=True) net.enc3_conv2 = L.Convolution(net.enc3_relu1, param=froozen_param, num_output=filters[2], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc3_norm2 = L.BatchNorm(net.enc3_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc3_scale2 = L.Scale(net.enc3_norm2, bias_term=True, in_place=True) net.enc3_relu2 = L.ReLU(net.enc3_scale2, in_place=True) net.enc3_pool1 = L.Pooling(net.enc3_relu2, pool=caffe.params.Pooling.MAX, kernel_size=2, stride=2) # 第四层卷积层 net.enc4_conv1 = L.Convolution(net.enc3_pool1, param=froozen_param, num_output=filters[3], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc4_norm1 = L.BatchNorm(net.enc4_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc4_scale1 = L.Scale(net.enc4_norm1, bias_term=True, in_place=True) net.enc4_relu1 = L.ReLU(net.enc4_scale1, in_place=True) net.enc4_conv2 = L.Convolution(net.enc4_relu1, param=froozen_param, num_output=filters[3], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc4_norm2 = L.BatchNorm(net.enc4_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc4_scale2 = L.Scale(net.enc4_norm2, bias_term=True, in_place=True) net.enc4_relu2 = L.ReLU(net.enc4_scale2, in_place=True) net.enc4_pool1 = L.Pooling(net.enc4_relu2, pool=caffe.params.Pooling.MAX, kernel_size=2, stride=2) # 第五层卷积层 net.enc5_conv1 = L.Convolution(net.enc4_pool1, param=froozen_param, num_output=filters[4], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc5_norm1 = L.BatchNorm(net.enc5_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc5_scale1 = L.Scale(net.enc5_norm1, bias_term=True, in_place=True) net.enc5_relu1 = L.ReLU(net.enc5_scale1, in_place=True) net.enc5_conv2 = L.Convolution(net.enc5_relu1, param=froozen_param, num_output=filters[4], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc5_norm2 = L.BatchNorm(net.enc5_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc5_scale2 = L.Scale(net.enc5_norm2, bias_term=True, in_place=True) net.enc5_relu2 = L.ReLU(net.enc5_scale2, in_place=True) net.enc5_pool1 = L.Pooling(net.enc5_relu2, pool=caffe.params.Pooling.MAX, kernel_size=2, stride=2) # 第六层卷积层 net.enc6_conv1 = L.Convolution(net.enc5_pool1, param=froozen_param, num_output=filters[5], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc6_norm1 = L.BatchNorm(net.enc6_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc6_scale1 = L.Scale(net.enc6_norm1, bias_term=True, in_place=True) net.enc6_relu1 = L.ReLU(net.enc6_scale1, in_place=True) net.enc6_conv2 = L.Convolution(net.enc6_relu1, param=froozen_param, num_output=filters[5], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.enc6_norm2 = L.BatchNorm(net.enc6_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.enc6_scale2 = L.Scale(net.enc6_norm2, bias_term=True, in_place=True) net.enc6_relu2 = L.ReLU(net.enc6_scale2, in_place=True) net.enc6_pool1 = L.Pooling(net.enc6_relu2, pool=caffe.params.Pooling.MAX, kernel_size=2, stride=2) # 中间层 net.mid_conv1 = L.Convolution(net.enc6_pool1, param=froozen_param, num_output=filters[5], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.mid_norm1 = L.BatchNorm(net.mid_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.mid_scale1 = L.Scale(net.mid_norm1, bias_term=True, in_place=True) net.mid_relu1 = L.ReLU(net.mid_scale1, in_place=True) net.mid_conv2 = L.Convolution(net.mid_relu1, param=froozen_param, num_output=filters[5], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.mid_norm2 = L.BatchNorm(net.mid_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.mid_scale2 = L.Scale(net.mid_norm2, bias_term=True, in_place=True) net.mid_relu2 = L.ReLU(net.mid_scale2, in_place=True) # 第一层解码 net.dec1_deconv1 = L.Deconvolution(net.mid_relu2, param=froozen_param, convolution_param=dict( num_output=filters[5], pad=0, kernel_size=2, stride=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.dec1_norm0 = L.BatchNorm(net.dec1_deconv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec1_scale0 = L.Scale(net.dec1_norm0, bias_term=True, in_place=True) net.dec1_concat1 = caffe.layers.Concat(net.dec1_scale0, net.enc6_relu2, concat_param=dict(concat_dim=1)) net.dec1_conv1 = L.Convolution(net.dec1_concat1, param=froozen_param, num_output=filters[5], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec1_norm1 = L.BatchNorm(net.dec1_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec1_scale1 = L.Scale(net.dec1_norm1, bias_term=True, in_place=True) net.dec1_relu1 = L.ReLU(net.dec1_scale1, in_place=True) net.dec1_conv2 = L.Convolution(net.dec1_relu1, param=froozen_param, num_output=filters[5], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec1_norm2 = L.BatchNorm(net.dec1_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec1_scale2 = L.Scale(net.dec1_norm2, bias_term=True, in_place=True) net.dec1_relu2 = L.ReLU(net.dec1_scale2, in_place=True) # 第二层解码 net.dec2_deconv1 = L.Deconvolution(net.dec1_relu2, param=froozen_param, convolution_param=dict( num_output=filters[4], pad=0, kernel_size=2, stride=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.dec2_norm0 = L.BatchNorm(net.dec2_deconv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec2_scale0 = L.Scale(net.dec2_norm0, bias_term=True, in_place=True) net.dec2_concat1 = caffe.layers.Concat(net.dec2_scale0, net.enc5_relu2, concat_param=dict(concat_dim=1)) net.dec2_conv1 = L.Convolution(net.dec2_concat1, param=froozen_param, num_output=filters[4], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec2_norm1 = L.BatchNorm(net.dec2_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec2_scale1 = L.Scale(net.dec2_norm1, bias_term=True, in_place=True) net.dec2_relu1 = L.ReLU(net.dec2_scale1, in_place=True) net.dec2_conv2 = L.Convolution(net.dec2_relu1, param=froozen_param, num_output=filters[4], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec2_norm2 = L.BatchNorm(net.dec2_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec2_scale2 = L.Scale(net.dec2_norm2, bias_term=True, in_place=True) net.dec2_relu2 = L.ReLU(net.dec2_scale2, in_place=True) # 第三层解码 net.dec3_deconv1 = L.Deconvolution(net.dec2_relu2, param=froozen_param, convolution_param=dict( num_output=filters[3], pad=0, kernel_size=2, stride=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.dec3_norm0 = L.BatchNorm(net.dec3_deconv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec3_scale0 = L.Scale(net.dec3_norm0, bias_term=True, in_place=True) net.dec3_concat1 = caffe.layers.Concat(net.dec3_scale0, net.enc4_relu2, concat_param=dict(concat_dim=1)) net.dec3_conv1 = L.Convolution(net.dec3_concat1, param=froozen_param, num_output=filters[3], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec3_norm1 = L.BatchNorm(net.dec3_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec3_scale1 = L.Scale(net.dec3_norm1, bias_term=True, in_place=True) net.dec3_relu1 = L.ReLU(net.dec3_scale1, in_place=True) net.dec3_conv2 = L.Convolution(net.dec3_relu1, param=froozen_param, num_output=filters[3], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec3_norm2 = L.BatchNorm(net.dec3_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec3_scale2 = L.Scale(net.dec3_norm2, bias_term=True, in_place=True) net.dec3_relu2 = L.ReLU(net.dec3_scale2, in_place=True) # 第四层解码 net.dec4_deconv1 = L.Deconvolution(net.dec3_relu2, param=froozen_param, convolution_param=dict( num_output=filters[2], pad=0, kernel_size=2, stride=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.dec4_norm0 = L.BatchNorm(net.dec4_deconv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec4_scale0 = L.Scale(net.dec4_norm0, bias_term=True, in_place=True) net.dec4_concat1 = caffe.layers.Concat(net.dec4_scale0, net.enc3_relu2, concat_param=dict(concat_dim=1)) net.dec4_conv1 = L.Convolution(net.dec4_concat1, param=froozen_param, num_output=filters[2], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec4_norm1 = L.BatchNorm(net.dec4_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec4_scale1 = L.Scale(net.dec4_norm1, bias_term=True, in_place=True) net.dec4_relu1 = L.ReLU(net.dec4_scale1, in_place=True) net.dec4_conv2 = L.Convolution(net.dec4_relu1, param=froozen_param, num_output=filters[2], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec4_norm2 = L.BatchNorm(net.dec4_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec4_scale2 = L.Scale(net.dec4_norm2, bias_term=True, in_place=True) net.dec4_relu2 = L.ReLU(net.dec4_scale2, in_place=True) # 第五层解码 net.dec5_deconv1 = L.Deconvolution(net.dec4_relu2, param=froozen_param, convolution_param=dict( num_output=filters[1], pad=0, kernel_size=2, stride=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.dec5_norm0 = L.BatchNorm(net.dec5_deconv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec5_scale0 = L.Scale(net.dec5_norm0, bias_term=True, in_place=True) net.dec5_concat1 = caffe.layers.Concat(net.dec5_scale0, net.enc2_relu2, concat_param=dict(concat_dim=1)) net.dec5_conv1 = L.Convolution(net.dec5_concat1, param=froozen_param, num_output=filters[1], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec5_norm1 = L.BatchNorm(net.dec5_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec5_scale1 = L.Scale(net.dec5_norm1, bias_term=True, in_place=True) net.dec5_relu1 = L.ReLU(net.dec5_scale1, in_place=True) net.dec5_conv2 = L.Convolution(net.dec5_relu1, param=froozen_param, num_output=filters[1], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec5_norm2 = L.BatchNorm(net.dec5_conv2, moving_average_fraction=0.9, in_place=True, batch_norm_param=dict(use_global_stats=False)) net.dec5_scale2 = L.Scale(net.dec5_norm2, bias_term=True, in_place=True) net.dec5_relu2 = L.ReLU(net.dec5_scale2, in_place=True) # 第六层解码 net.dec6_deconv1 = L.Deconvolution(net.dec5_relu2, param=froozen_param, convolution_param=dict( num_output=filters[0], pad=0, kernel_size=2, stride=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.dec6_norm0 = L.BatchNorm(net.dec6_deconv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec6_scale0 = L.Scale(net.dec6_norm0, bias_term=True, in_place=True) net.dec6_concat1 = caffe.layers.Concat(net.dec6_scale0, net.enc1_relu2, concat_param=dict(concat_dim=1)) net.dec6_conv1 = L.Convolution(net.dec6_concat1, param=froozen_param, num_output=filters[0], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec6_norm1 = L.BatchNorm(net.dec6_conv1, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec6_scale1 = L.Scale(net.dec6_norm1, bias_term=True, in_place=True) net.dec6_relu1 = L.ReLU(net.dec6_scale1, in_place=True) net.dec6_conv2 = L.Convolution(net.dec6_relu1, param=froozen_param, num_output=filters[0], pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.dec6_norm2 = L.BatchNorm(net.dec6_conv2, moving_average_fraction=0.9, in_place=True, use_global_stats=False) net.dec6_scale2 = L.Scale(net.dec6_norm2, bias_term=True, in_place=True) net.dec6_relu2 = L.ReLU(net.dec6_scale2, in_place=True) net.conv_out = L.Convolution( net.dec6_relu2, param=froozen_param, # 这里通过定义一个list,来整合到param的字典,也就是:param=[] num_output=nClasses, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.loss = caffe.layers.SoftmaxWithLoss(net.conv_out, net.label) net.accuracy = caffe.layers.Accuracy(net.conv_out, net.label) return net.to_proto()
def residual_factory1(bottom, num_filter): conv1 = conv_factory_relu(bottom, 3, num_filter, 1, 1) conv2 = conv_factory(conv1, 3, num_filter, 1, 1) addition = L.Eltwise(bottom, conv2, operation=P.Eltwise.SUM) relu = L.ReLU(addition, in_place=True) return relu
def ResBody(net, from_layer, block_name, out2a, out2b, out2c, stride, use_branch1): # ResBody(net, 'pool1', '2a', 64, 64, 256, 1, True) conv_prefix = 'res{}_'.format(block_name) conv_postfix = '' bn_prefix = 'bn{}_'.format(block_name) bn_postfix = '' scale_prefix = 'scale{}_'.format(block_name) scale_postfix = '' use_scale = True if use_branch1: branch_name = 'branch1' ConvBNLayer(net, from_layer, branch_name, use_bn=True, use_relu=False, num_output=out2c, kernel_size=1, pad=0, stride=stride, use_scale=use_scale, conv_prefix=conv_prefix, conv_postfix=conv_postfix, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix) branch1 = '{}{}'.format(conv_prefix, branch_name) else: branch1 = from_layer branch_name = 'branch2a' ConvBNLayer(net, from_layer, branch_name, use_bn=True, use_relu=True, num_output=out2a, kernel_size=1, pad=0, stride=stride, use_scale=use_scale, conv_prefix=conv_prefix, conv_postfix=conv_postfix, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix) out_name = '{}{}'.format(conv_prefix, branch_name) branch_name = 'branch2b' ConvBNLayer(net, out_name, branch_name, use_bn=True, use_relu=True, num_output=out2b, kernel_size=3, pad=1, stride=1, use_scale=use_scale, conv_prefix=conv_prefix, conv_postfix=conv_postfix, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix) out_name = '{}{}'.format(conv_prefix, branch_name) branch_name = 'branch2c' ConvBNLayer(net, out_name, branch_name, use_bn=True, use_relu=False, num_output=out2c, kernel_size=1, pad=0, stride=1, use_scale=use_scale, conv_prefix=conv_prefix, conv_postfix=conv_postfix, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix) branch2 = '{}{}'.format(conv_prefix, branch_name) res_name = 'res{}'.format(block_name) net[res_name] = L.Eltwise(net[branch1], net[branch2]) relu_name = '{}_relu'.format(res_name) net[relu_name] = L.ReLU(net[res_name], in_place=True)