Ejemplo n.º 1
0
def _conv_block(net, bottom, name, num_output, use_relu=True, kernel_size=3, stride=1, pad=1, bn_prefix='', bn_postfix='/bn', 
    scale_prefix='', scale_postfix='/scale',depthwise=False,weight_filler='xavier'):
    if depthwise is False :
      conv = L.Convolution(bottom, kernel_size=kernel_size, stride=stride, 
                    num_output=num_output,  pad=pad, bias_term=False, weight_filler=dict(type=weight_filler), bias_filler=dict(type='constant'))
    else :
      conv = L.Convolution(bottom, kernel_size=kernel_size, stride=stride,group= num_output,engine=1,type='ConvolutionDepthwise',
                    num_output=num_output,  pad=pad, bias_term=False, weight_filler=dict(type=weight_filler), bias_filler=dict(type='constant'))        
    net[name] = conv

    bn_name = '{}{}{}'.format(bn_prefix, name, bn_postfix)
    bn_kwargs = {
        'param': [
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0)],
			'eps': 0.001,
			'moving_average_fraction': 0.999,
        }
    batch_norm = L.BatchNorm(conv, in_place=True, **bn_kwargs)
    net[bn_name] = batch_norm
    scale_kwargs = {
        'param': [
            dict(lr_mult=1, decay_mult=0),
            dict(lr_mult=2, decay_mult=0),],
        }
    scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0),**scale_kwargs)
    sb_name = '{}{}{}'.format(scale_prefix, name, scale_postfix)
    net[sb_name] = scale

    if use_relu:
        out_layer = L.ReLU6(scale, in_place=True)
        relu_name = '{}/relu'.format(name)
        net[relu_name] = out_layer
    else:
        out_layer = scale

    return out_layer
Ejemplo n.º 2
0
def BuildBiFPNLayer(net,
                    feats,
                    feat_sizes,
                    fpn_nodes,
                    layerPrefix='',
                    fpn_out_filters=88,
                    min_level=3,
                    max_level=7,
                    use_global_stats=True,
                    use_relu=False,
                    use_swish=True,
                    concat_method="fast_attention",
                    con_bn_act_pattern=False,
                    apply_bn=True,
                    is_training=True,
                    conv_after_downsample=False,
                    separable_conv=True,
                    use_nearest_resize=False,
                    pooling_type=None):
    """Builds a feature pyramid given previous feature pyramid and config."""
    temp_feats = []
    for _, feat in enumerate(feats):
        temp_feats.append(feat)
    for i, fnode in enumerate(fpn_nodes):
        new_node_height = feat_sizes[fnode['feat_level']]['height']
        new_node_width = feat_sizes[fnode['feat_level']]['width']
        nodes = []
        for idx, input_offset in enumerate(fnode['inputs_offsets']):
            input_node = temp_feats[input_offset]
            #print("length temp_feats: {} temp_feats[{}]: {}, target height: {}".format(len(temp_feats), input_offset, temp_feats[input_offset], new_node_height))
            #print("temp_feats[input_offset]['height']: {}, new_node_height: {}\n".format(temp_feats[input_offset]['height'], new_node_height))

            input_node = resample_feature_map(
                net,
                from_layer=input_node,
                use_global_stats=use_global_stats,
                use_relu=False,
                use_swish=False,
                target_height=new_node_height,
                target_width=new_node_width,
                target_channels=fpn_out_filters,
                layerPrefix='{}_{}_{}_{}_{}'.format(layerPrefix, i, idx,
                                                    input_offset,
                                                    len(temp_feats)),
                apply_bn=apply_bn,
                is_training=is_training,
                conv_after_downsample=conv_after_downsample,
                use_nearest_resize=use_nearest_resize,
                pooling_type=pooling_type)
            nodes.append(net[input_node['layer']])
        # Combine all nodes.
        if concat_method == "fast_attention":
            out_layer = "{}_{}_concat_fast_attention".format(layerPrefix, i)
            net[out_layer] = L.WightEltwise(
                *nodes,
                wighted_eltwise_param=dict(operation=P.WightedEltwise.FASTER,
                                           weight_filler=dict(type="msra")))
        elif concat_method == "softmax_attention":
            out_layer = "{}_{}_concat_softmax_attention".format(layerPrefix, i)
            net[out_layer] = L.WightEltwise(
                *nodes,
                wighted_eltwise_param=dict(operation=P.WightedEltwise.SOFTMAX,
                                           weight_filler=dict(type="msra")))
        elif concat_method == "sum_attention":
            out_layer = "{}_{}_concat_sum_attention".format(layerPrefix, i)
            net[out_layer] = L.WightEltwise(
                *nodes,
                wighted_eltwise_param=dict(operation=P.WightedEltwise.FASTER,
                                           weight_filler=dict(type="msra")))
        else:
            raise ValueError('unknown weight_method {}'.format(concat_method))
        # operation after combine, like conv & bn
        print(out_layer)
        if not con_bn_act_pattern:
            if use_swish:
                Swish_Name = "{}_{}_swish".format(layerPrefix, i)
                net[Swish_Name] = L.Swish(net[out_layer], in_place=True)
                out_layer = Swish_Name
            elif use_relu:
                Relu_Name = "{}_{}_relu6".format(layerPrefix, i)
                net[Relu_Name] = L.ReLU6(net[out_layer], in_place=True)
                out_layer = Relu_Name
            if separable_conv:  # need batch-norm
                Deconv_Name = "{}_{}_Deconv_3x3".format(layerPrefix, i)
                ConvBNLayer(net,
                            out_layer,
                            Deconv_Name,
                            use_bn=apply_bn,
                            use_relu=False,
                            use_swish=False,
                            num_output=fpn_out_filters,
                            kernel_size=3,
                            pad=1,
                            stride=1,
                            group=fpn_out_filters,
                            lr_mult=1,
                            use_scale=apply_bn,
                            use_global_stats=use_global_stats,
                            Use_DeConv=False)
                out_layer = Deconv_Name
                Point_Name = "{}_{}_conv_1x1".format(layerPrefix, i)
                ConvBNLayer(net,
                            out_layer,
                            Point_Name,
                            use_bn=apply_bn,
                            use_relu=use_relu,
                            use_swish=use_swish,
                            num_output=fpn_out_filters,
                            kernel_size=1,
                            pad=0,
                            stride=1,
                            lr_mult=1,
                            use_scale=apply_bn,
                            use_global_stats=use_global_stats,
                            Use_DeConv=False)
                out_layer = Point_Name
            else:
                Conv_name = "{}_{}_conv_3x3".format(layerPrefix, i)
                ConvBNLayer(net,
                            out_layer,
                            out_layer,
                            use_bn=apply_bn,
                            use_relu=use_relu,
                            use_swish=use_swish,
                            num_output=fpn_out_filters,
                            kernel_size=3,
                            pad=1,
                            stride=1,
                            lr_mult=1,
                            use_scale=apply_bn,
                            use_global_stats=use_global_stats,
                            Use_DeConv=False)
                out_layer = Conv_name
        temp_feats.append({
            "layer": out_layer,
            "height": new_node_height,
            "width": new_node_width,
            "channel": fpn_out_filters
        })

    output_feats = {}
    for l in range(min_level, max_level + 1):
        for i, fnode in enumerate(reversed(fpn_nodes)):
            if fnode['feat_level'] == l:
                output_feats[l] = temp_feats[-1 - i]
                break
    return output_feats
Ejemplo n.º 3
0
def ConvBNLayer(net,
                from_layer,
                out_layer,
                use_bn,
                num_output,
                kernel_size,
                pad,
                stride,
                group=1,
                dilation=1,
                use_scale=True,
                lr_mult=1,
                conv_prefix='',
                conv_postfix='',
                bn_prefix='',
                bn_postfix='_bn',
                scale_prefix='',
                scale_postfix='_scale',
                bias_prefix='',
                bias_postfix='_bias',
                bn_eps=0.001,
                bn_moving_avg_fraction=0.999,
                Use_DeConv=False,
                use_global_stats=False,
                use_relu=False,
                use_swish=False,
                use_bias=False,
                use_merge_bn=False,
                **bn_params):
    if use_merge_bn and use_bn:
        raise (
            "param use_merge_bn & use_bn should not be true at the sametime")
    if use_merge_bn:
        # parameters for convolution layer with batchnorm.
        if use_bias:
            kwargs = {
                'param': [
                    dict(lr_mult=lr_mult, decay_mult=1),
                    dict(lr_mult=2 * lr_mult, decay_mult=0)
                ],
                'weight_filler':
                dict(type='msra'),
                'bias_filler':
                dict(type='constant', value=0)
            }
        else:
            kwargs = {
                'param': [dict(lr_mult=lr_mult, decay_mult=1)],
                'weight_filler': dict(type='msra'),
                'bias_term': False,
            }
        eps = bn_params.get('eps', bn_eps)
        moving_average_fraction = bn_params.get('moving_average_fraction',
                                                bn_moving_avg_fraction)
        use_global_stats = bn_params.get('use_global_stats', use_global_stats)
        # parameters for batchnorm layer.
        bn_lr_mult = lr_mult

        bn_kwargs = {
            'param': [
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=bn_lr_mult, decay_mult=0),
                dict(lr_mult=bn_lr_mult * 2, decay_mult=0)
            ],
            'batch_norm_param':
            [dict(eps=eps, moving_average_fraction=moving_average_fraction)],
            'scale_param': [
                dict(filler=dict(value=1.0),
                     bias_term=True,
                     bias_filler=dict(value=0.0))
            ],
        }

        if use_global_stats:
            # only specify if use_global_stats is explicitly provided;
            # otherwise, use_global_stats_ = this->phase_ == TEST;
            bn_kwargs = {
                'param': [
                    dict(lr_mult=0, decay_mult=0),
                    dict(lr_mult=0, decay_mult=0),
                    dict(lr_mult=0, decay_mult=0),
                    dict(lr_mult=bn_lr_mult, decay_mult=0),
                    dict(lr_mult=bn_lr_mult * 2, decay_mult=0)
                ],
                'batch_norm_param':
                [dict(eps=eps, use_global_stats=use_global_stats)],
                'scale_param': [
                    dict(filler=dict(value=1.0),
                         bias_term=True,
                         bias_filler=dict(value=0.0))
                ],
            }
    if use_bn:
        # parameters for convolution layer with batchnorm.
        if use_bias:
            kwargs = {
                'param': [
                    dict(lr_mult=lr_mult, decay_mult=1),
                    dict(lr_mult=2 * lr_mult, decay_mult=0)
                ],
                'weight_filler':
                dict(type='msra'),
                'bias_filler':
                dict(type='constant', value=0)
            }
        else:
            kwargs = {
                'param': [dict(lr_mult=lr_mult, decay_mult=1)],
                'weight_filler': dict(type='msra'),
                'bias_term': False,
            }
        eps = bn_params.get('eps', bn_eps)
        moving_average_fraction = bn_params.get('moving_average_fraction',
                                                bn_moving_avg_fraction)
        use_global_stats = bn_params.get('use_global_stats', use_global_stats)
        # parameters for batchnorm layer.
        bn_kwargs = {
            'param': [
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=0, decay_mult=0)
            ],
            'eps':
            eps,
            'moving_average_fraction':
            moving_average_fraction,
        }
        bn_lr_mult = lr_mult
        if use_global_stats:
            # only specify if use_global_stats is explicitly provided;
            # otherwise, use_global_stats_ = this->phase_ == TEST;
            bn_kwargs = {
                'param': [
                    dict(lr_mult=0, decay_mult=0),
                    dict(lr_mult=0, decay_mult=0),
                    dict(lr_mult=0, decay_mult=0)
                ],
                'eps':
                eps,
                'use_global_stats':
                use_global_stats,
            }
            # not updating scale/bias parameters
            bn_lr_mult = 0
        # parameters for scale bias layer after batchnorm.
        if use_scale:
            sb_kwargs = {
                'bias_term':
                True,
                'param': [
                    dict(lr_mult=bn_lr_mult, decay_mult=0),
                    dict(lr_mult=bn_lr_mult * 2, decay_mult=0)
                ],
                'filler':
                dict(value=1.0),
                'bias_filler':
                dict(value=0.0),
            }
        else:
            bias_kwargs = {
                'param': [dict(lr_mult=bn_lr_mult, decay_mult=0)],
                'filler': dict(type='constant', value=0.0),
            }
    else:
        if use_bias:
            kwargs = {
                'param': [
                    dict(lr_mult=lr_mult, decay_mult=1),
                    dict(lr_mult=2 * lr_mult, decay_mult=0)
                ],
                'weight_filler':
                dict(type='msra'),
                'bias_filler':
                dict(type='constant', value=0)
            }
        else:
            kwargs = {
                'param': [dict(lr_mult=lr_mult, decay_mult=1)],
                'weight_filler': dict(type='msra'),
                'bias_term': False,
            }

    conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix)
    [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2)
    [pad_h, pad_w] = UnpackVariable(pad, 2)
    [stride_h, stride_w] = UnpackVariable(stride, 2)
    if kernel_h == kernel_w:
        if Use_DeConv:
            net[conv_name] = L.Deconvolution(
                net[from_layer],
                param=[dict(lr_mult=lr_mult, decay_mult=1)],
                convolution_param=dict(bias_term=False,
                                       num_output=num_output,
                                       kernel_size=kernel_h,
                                       stride=stride_h,
                                       pad=pad_h,
                                       weight_filler=dict(type="msra")))
        else:
            net[conv_name] = L.Convolution(net[from_layer],
                                           num_output=num_output,
                                           kernel_size=kernel_h,
                                           pad=pad_h,
                                           stride=stride_h,
                                           **kwargs)
    else:
        net[conv_name] = L.Convolution(net[from_layer],
                                       num_output=num_output,
                                       kernel_h=kernel_h,
                                       kernel_w=kernel_w,
                                       pad_h=pad_h,
                                       pad_w=pad_w,
                                       stride_h=stride_h,
                                       stride_w=stride_w,
                                       **kwargs)
    if dilation > 1:
        net.update(conv_name, {'dilation': dilation})
    if group > 1:
        net.update(conv_name, {'group': group})
    if use_bn:
        bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix)
        net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs)
        if use_scale:
            sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix)
            net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs)
        else:
            bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix)
            net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs)
    if use_merge_bn:
        bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix)
        net[bn_name] = L.BatchNormScale(
            net[conv_name],
            in_place=True,
            param=[
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=bn_lr_mult, decay_mult=0),
                dict(lr_mult=bn_lr_mult * 2, decay_mult=0)
            ],
            batch_norm_param=dict(
                eps=eps,
                use_global_stats=use_global_stats,
                moving_average_fraction=moving_average_fraction))
    if use_relu:
        relu_name = '{}_relu6'.format(conv_name)
        net[relu_name] = L.ReLU6(net[conv_name], in_place=True)
    if use_swish:
        swish_name = '{}_swish'.format(conv_name)
        net[swish_name] = L.Swish(net[conv_name], in_place=True)
Ejemplo n.º 4
0
def generate_net(train_lmdb, val_lmdb, train_batch_size, test_batch_size):
    net = caffe.NetSpec()

    net.data, net.label = L.Data(source=train_lmdb,
                                 backend=caffe.params.Data.LMDB,
                                 batch_size=train_batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=224,
                                     mean_value=[103.94, 116.78, 123.68]),
                                 scale=0.017,
                                 include=dict(phase=caffe.TRAIN))
    # note:
    train_data_layer_str = str(net.to_proto())

    net.data, net.label = L.Data(source=val_lmdb,
                                 backend=caffe.params.Data.LMDB,
                                 batch_size=test_batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=224,
                                     mean_value=[103.94, 116.78, 123.68]),
                                 scale=0.017,
                                 include=dict(phase=caffe.TEST))
    # bone
    net.conv1 = L.Convolution(
        net.data,
        num_output=32,
        kernel_size=3,
        stride=2,
        pad=1,
        weight_filler={"type": "xavier"},
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    net.tops['conv1/bn'] = L.BatchNorm(net.conv1,
                                       param=[
                                           dict(lr_mult=0, decay_mult=0),
                                           dict(lr_mult=0, decay_mult=0),
                                           dict(lr_mult=0, decay_mult=0)
                                       ],
                                       in_place=False)

    net.tops['conv1/scale'] = L.Scale(
        net.tops['conv1/bn'],
        param=[dict(lr_mult=1, decay_mult=0),
               dict(lr_mult=2, decay_mult=0)],
        scale_param={
            'filler': {
                'value': 1
            },
            'bias_term': True,
            'bias_filler': {
                'value': 0
            }
        },
        in_place=True)

    net.conv1_relu = L.ReLU6(net.tops['conv1/scale'], in_place=True)

    bottleneck(net, net.conv1_relu, 'conv2', 32, 1, 16, 1)

    bottleneck(net, net.tops['conv2/1x1_down/scale'], 'conv3_1', 16, 6, 24, 2)

    bottleneck(net, net.tops['conv3_1/1x1_down/scale'], 'conv3_2', 24, 6, 24,
               1)

    bottleneck(net, net.tops['conv3_2/add'], 'conv4_1', 24, 6, 32, 2)

    bottleneck(net, net.tops['conv4_1/1x1_down/scale'], 'conv4_2', 32, 6, 32,
               1)

    bottleneck(net, net.tops['conv4_2/add'], 'conv4_3', 32, 6, 32, 1)

    bottleneck(net, net.tops['conv4_3/add'], 'conv5_1', 32, 6, 64, 2)

    bottleneck(net, net.tops['conv5_1/1x1_down/scale'], 'conv5_2', 64, 6, 64,
               1)

    bottleneck(net, net.tops['conv5_2/add'], 'conv5_3', 64, 6, 64, 1)

    bottleneck(net, net.tops['conv5_3/add'], 'conv5_4', 64, 6, 64, 1)

    bottleneck(net, net.tops['conv5_4/add'], 'conv6_1', 64, 6, 96, 1)

    bottleneck(net, net.tops['conv6_1/1x1_down/scale'], 'conv6_2', 96, 6, 96,
               1)

    bottleneck(net, net.tops['conv6_2/add'], 'conv6_3', 96, 6, 96, 1)

    bottleneck(net, net.tops['conv6_3/add'], 'conv7_1', 96, 6, 160, 2)

    bottleneck(net, net.tops['conv7_1/1x1_down/scale'], 'conv7_2', 160, 6, 160,
               1)

    bottleneck(net, net.tops['conv7_2/add'], 'conv7_3', 160, 6, 160, 1)

    bottleneck(net, net.tops['conv7_3/add'], 'conv8', 160, 6, 320, 1)

    net.conv9 = L.Convolution(
        net.tops['conv8/1x1_down/scale'],
        num_output=1280,
        kernel_size=1,
        weight_filler={"type": "xavier"},
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    net.tops['conv9/bn'] = L.BatchNorm(net.conv9,
                                       param=[
                                           dict(lr_mult=0, decay_mult=0),
                                           dict(lr_mult=0, decay_mult=0),
                                           dict(lr_mult=0, decay_mult=0)
                                       ],
                                       in_place=False)

    net.tops['conv9/scale'] = L.Scale(
        net.tops['conv9/bn'],
        param=[dict(lr_mult=1, decay_mult=0),
               dict(lr_mult=2, decay_mult=0)],
        scale_param={
            'filler': {
                'value': 1
            },
            'bias_term': True,
            'bias_filler': {
                'value': 0
            }
        },
        in_place=True)
    net.conv9_relu = caffe.layers.ReLU6(net.tops['conv9/scale'], in_place=True)

    # global average pooling
    net.pool10 = L.Pooling(net.conv9_relu,
                           pool=caffe.params.Pooling.AVE,
                           global_pooling=True)

    # 1000 cls
    net.conv11 = L.Convolution(
        net.pool10,
        num_output=1000,
        kernel_size=1,
        weight_filler={
            "type": "gaussian",
            "mean": 0,
            "std": 0.01
        },
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    # softmax loss
    net.loss = L.SoftmaxWithLoss(net.conv11,
                                 net.label,
                                 include=dict(phase=caffe.TRAIN))

    # accuracy
    net.accuracy = L.Accuracy(net.conv11,
                              net.label,
                              include=dict(phase=caffe.TEST))
    net.accuracy_top5 = L.Accuracy(net.conv11,
                                   net.label,
                                   include=dict(phase=caffe.TEST),
                                   accuracy_param=dict(top_k=5))

    return train_data_layer_str + str(net.to_proto())
Ejemplo n.º 5
0
def bottleneck(net, net_bottom, prefix, input_channel, time, out, step):
    if (prefix != 'conv2'):
        net.tops[prefix + '/1x1_up'] = L.Convolution(
            net_bottom,
            num_output=input_channel * time,
            kernel_size=1,
            weight_filler={"type": "xavier"},
            param=[
                dict(lr_mult=1, decay_mult=1),
                dict(lr_mult=2, decay_mult=0)
            ])
        net.tops[prefix + '/1x1_up/bn'] = L.BatchNorm(net.tops[prefix +
                                                               '/1x1_up'],
                                                      param=[
                                                          dict(lr_mult=0,
                                                               decay_mult=0),
                                                          dict(lr_mult=0,
                                                               decay_mult=0),
                                                          dict(lr_mult=0,
                                                               decay_mult=0)
                                                      ],
                                                      in_place=False)
        net.tops[prefix + '/1x1_up/scale'] = L.Scale(net.tops[prefix +
                                                              '/1x1_up/bn'],
                                                     param=[
                                                         dict(lr_mult=1,
                                                              decay_mult=0),
                                                         dict(lr_mult=2,
                                                              decay_mult=0)
                                                     ],
                                                     scale_param={
                                                         'filler': {
                                                             'value': 1
                                                         },
                                                         'bias_term': True,
                                                         'bias_filler': {
                                                             'value': 0
                                                         }
                                                     },
                                                     in_place=True)
        net.tops[prefix + '/1x1_up/relu6'] = L.ReLU6(net.tops[prefix +
                                                              '/1x1_up/scale'],
                                                     in_place=True)

        net.tops[prefix + '/3x3_dw'] = L.ConvolutionDepthwise(
            net.tops[prefix + '/1x1_up/relu6'],
            num_output=input_channel * time,
            kernel_size=3,
            stride=step,
            pad=1,
            weight_filler={"type": "xavier"},
            param=[
                dict(lr_mult=1, decay_mult=1),
                dict(lr_mult=2, decay_mult=0)
            ])
    else:
        net.tops[prefix + '/3x3_dw'] = L.ConvolutionDepthwise(
            net_bottom,
            num_output=input_channel * time,
            kernel_size=3,
            stride=step,
            pad=1,
            weight_filler={"type": "xavier"},
            param=[
                dict(lr_mult=1, decay_mult=1),
                dict(lr_mult=2, decay_mult=0)
            ])

    net.tops[prefix + '/3x3_dw/bn'] = L.BatchNorm(net.tops[prefix + '/3x3_dw'],
                                                  param=[
                                                      dict(lr_mult=0,
                                                           decay_mult=0),
                                                      dict(lr_mult=0,
                                                           decay_mult=0),
                                                      dict(lr_mult=0,
                                                           decay_mult=0)
                                                  ],
                                                  in_place=False)
    net.tops[prefix + '/3x3_dw/scale'] = L.Scale(
        net.tops[prefix + '/3x3_dw/bn'],
        param=[dict(lr_mult=1, decay_mult=0),
               dict(lr_mult=2, decay_mult=0)],
        scale_param={
            'filler': {
                'value': 1
            },
            'bias_term': True,
            'bias_filler': {
                'value': 0
            }
        },
        in_place=True)
    net.tops[prefix + '/3x3_dw/relu6'] = L.ReLU6(net.tops[prefix +
                                                          '/3x3_dw/scale'],
                                                 in_place=True)

    net.tops[prefix + '/1x1_down'] = L.Convolution(
        net.tops[prefix + '/3x3_dw/relu6'],
        num_output=out,
        kernel_size=1,
        weight_filler={"type": "xavier"},
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.tops[prefix + '/1x1_down/bn'] = L.BatchNorm(net.tops[prefix +
                                                             '/1x1_down'],
                                                    param=[
                                                        dict(lr_mult=0,
                                                             decay_mult=0),
                                                        dict(lr_mult=0,
                                                             decay_mult=0),
                                                        dict(lr_mult=0,
                                                             decay_mult=0)
                                                    ],
                                                    in_place=False)
    net.tops[prefix + '/1x1_down/scale'] = L.Scale(
        net.tops[prefix + '/1x1_down/bn'],
        param=[dict(lr_mult=1, decay_mult=0),
               dict(lr_mult=2, decay_mult=0)],
        scale_param={
            'filler': {
                'value': 1
            },
            'bias_term': True,
            'bias_filler': {
                'value': 0
            }
        },
        in_place=True)

    if (prefix != 'conv2' and prefix != 'conv6_1' and prefix != 'conv8'
            and step == 1):
        print("prefix: ", prefix)
        net.tops[prefix + '/add'] = L.Eltwise(
            net_bottom, net.tops[prefix + '/1x1_down/scale'])
    else:
        pass