def ConvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, kernel_size, pad, stride, use_scale=True, eps=0.001, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias'): if use_bn: # parameters for convolution layer with batchnorm. kwargs = { 'param': [dict(lr_mult=1, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } # parameters for batchnorm layer. bn_kwargs = { 'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, } # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs) else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, **kwargs) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def ConvBNUnitLayer_decomp(net, from_layer, out_layer, use_bn, use_relu, num_output, \ kernel_size, pad, stride, lr=1, decay=1, R1_channels=12, R2_channels=12, use_scale=True, eps=0.001, \ conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias',leaky=False,leaky_ratio=0.1, \ init_xavier=False): if use_bn: use_bias = False bn_kwargs = { 'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, } if use_scale: sb_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=lr, decay_mult=0), dict(lr_mult=lr, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=lr, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: use_bias = True # Conv Layer conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) conv_name = Decomp_ConvLayer(net,from_layer=from_layer,out_layer=conv_name,num_output=num_output, \ kernel_size=kernel_size,pad=pad,stride=stride,R1=R1_channels,R2=R2_channels, \ lr=lr,decay=decay,use_bias=use_bias,init_xavier=init_xavier) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(out_layer) if leaky: leaky_kwargs = {"negative_slope":leaky_ratio} net[relu_name] = L.ReLU(net[conv_name], in_place=True,**leaky_kwargs) else: net[relu_name] = L.ReLU(net[conv_name], in_place=True) return conv_name
def DeconvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, kernel_size, pad, stride, use_scale=True, lr_mult=1, deconv_prefix='', deconv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias', **bn_params): if use_bn: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, } else: bias_kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } deconv_name = '{}{}{}'.format(deconv_prefix, out_layer, deconv_postfix) net[deconv_name] = L.Deconvolution(net[from_layer], num_output=num_output, kernel_size=kernel_size, pad=pad, stride=stride, **kwargs) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[deconv_name], in_place=True) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(deconv_name) net[relu_name] = L.ReLU(net[deconv_name], in_place=True)
def ConvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, kernel_size, pad, stride, dilation=1, use_scale=True, lr_mult=1, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias', **bn_params): if use_bn: # parameters for convolution layer with batchnorm. kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } eps = bn_params.get('eps', 0.001) moving_average_fraction = bn_params.get('moving_average_fraction', 0.999) use_global_stats = bn_params.get('use_global_stats', False) # parameters for batchnorm layer. bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, 'moving_average_fraction': moving_average_fraction, } bn_lr_mult = lr_mult if use_global_stats: # only specify if use_global_stats is explicitly provided; # otherwise, use_global_stats_ = this->phase_ == TEST; bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, 'use_global_stats': use_global_stats, } # not updating scale/bias parameters bn_lr_mult = 0 # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [ dict(lr_mult=bn_lr_mult, decay_mult=0), dict(lr_mult=bn_lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=bn_lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs) else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, **kwargs) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def ConvBNUnitLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, \ kernel_size, pad, stride, lr_mult=1, decay_mult=1, \ dilation=1, use_conv_bias=False, use_scale=True, eps=0.001, \ conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', \ scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias',leaky=False,leaky_ratio=0.1, \ init_xavier=True,n_group=1,flag_bninplace=True,engine="CUDNN",flag_withparamname = False,pose_string='', constant_value=0.1,truncvalue = -1,use_global_stats=None): conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) if truncvalue > 0: use_bn = False # pose_string='_pose' if use_bn: # parameters for convolution layer with batchnorm. if use_conv_bias: if init_xavier: if flag_withparamname: kwargs = { 'param': [ dict(name=conv_name + "_paramconv0", lr_mult=lr_mult, decay_mult=decay_mult), dict(name=conv_name + "_paramconv1", lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=decay_mult), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } else: if flag_withparamname: kwargs = { 'param': [ dict(name=conv_name + "_paramconv0", lr_mult=lr_mult, decay_mult=decay_mult), dict(name=conv_name + "_paramconv1", lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=decay_mult), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } else: if flag_withparamname: kwargs = { 'param': [ dict(name=conv_name + "_paramconv0", lr_mult=lr_mult, decay_mult=decay_mult) ], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } else: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=decay_mult)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } # parameters for batchnorm layer. if flag_withparamname: if use_global_stats is None: bn_kwargs = { 'param': [ dict(name=conv_name + "_parambn0", lr_mult=0, decay_mult=0), dict(name=conv_name + "_parambn1", lr_mult=0, decay_mult=0), dict(name=conv_name + "_parambn2", lr_mult=0, decay_mult=0) ], 'eps': eps, } else: bn_kwargs = { 'param': [ dict(name=conv_name + "_parambn0", lr_mult=0, decay_mult=0), dict(name=conv_name + "_parambn1", lr_mult=0, decay_mult=0), dict(name=conv_name + "_parambn2", lr_mult=0, decay_mult=0) ], 'eps': eps, 'use_global_stats': use_global_stats } else: if use_global_stats is None: bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, } else: bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, 'use_global_stats': use_global_stats } # parameters for scale bias layer after batchnorm. if use_scale: if flag_withparamname: sb_kwargs = { 'bias_term': True, 'param': [ dict(name=conv_name + "_paramsb0", lr_mult=lr_mult, decay_mult=0), dict(name=conv_name + "_paramsb1", lr_mult=lr_mult, decay_mult=0) ], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=constant_value), } else: sb_kwargs = { 'bias_term': True, 'param': [ dict(lr_mult=lr_mult, decay_mult=0), dict(lr_mult=lr_mult, decay_mult=0) ], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=constant_value), } else: if flag_withparamname: bias_kwargs = { 'param': [ dict(name=conv_name + "_parambias0", lr_mult=lr_mult, decay_mult=0) ], 'filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: if init_xavier: if flag_withparamname: kwargs = { 'param': [ dict(name=conv_name + "_paramconv0", lr_mult=lr_mult, decay_mult=decay_mult), dict(name=conv_name + "_paramconv1", lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=decay_mult), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } else: if flag_withparamname: kwargs = { 'param': [ dict(name=conv_name + "_paramconv0", lr_mult=lr_mult, decay_mult=decay_mult), dict(name=conv_name + "_paramconv1", lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=decay_mult), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } if engine == "CAFFE": engine_param = P.Convolution.CAFFE else: engine_param = P.Convolution.CUDNN [kernel_h, kernel_w] = Upar.UnpackVariable(kernel_size, 2) [pad_h, pad_w] = Upar.UnpackVariable(pad, 2) [stride_h, stride_w] = Upar.UnpackVariable(stride, 2) conv_name_pose = conv_name + pose_string if kernel_h == kernel_w: if truncvalue > 0: net[conv_name_pose] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, group=n_group, weight_satvalue=truncvalue, **kwargs) else: net[conv_name_pose] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, group=n_group, **kwargs) else: # if n_group<num_output/4 or check_macc: # net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, # kernel_size=kernel_h, pad=pad_h, stride=stride_h, group=n_group,engine=engine_param,**kwargs) # else: # conv_param = {"num_output":num_output,"kernel_size":kernel_h, "pad":pad_h, "stride":stride_h, "group":n_group} # for key in kwargs.keys(): # if key != "param": # conv_param[key] = kwargs[key] # net[conv_name] = L.ConvolutionDepthwise(net[from_layer], convolution_param=conv_param,param = kwargs["param"]) net[conv_name_pose] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, group=n_group, weight_satvalue=truncvalue, **kwargs) # if n_group < num_output/4: # net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, # kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, # stride_h=stride_h, stride_w=stride_w, group=n_group,engine=engine_param,**kwargs) # else: # conv_param = {"num_output": num_output, # "kernel_h": kernel_h, "kernel_w": kernel_w, "pad_h": pad_h, "pad_w": pad_w, # "stride_h": stride_h, "stride_w": stride_w, "group": n_group} # for key in kwargs.keys(): # if key != "param": # conv_param[key] = kwargs[key] # net[conv_name] = L.ConvolutionDepthwise(net[from_layer], convolution_param = conv_param,param = kwargs["param"]) if dilation > 1: net.update(conv_name_pose, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) + pose_string net[bn_name] = L.BatchNorm(net[conv_name_pose], in_place=flag_bninplace, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) + pose_string net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) + pose_string net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) + pose_string if use_bn: if use_scale: conv_name = sb_name else: conv_name = bias_name else: conv_name = conv_name_pose if leaky: leaky_kwargs = {"negative_slope": leaky_ratio} net[relu_name] = L.ReLU(net[conv_name], in_place=True, **leaky_kwargs) else: net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def DeconvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, kernel_size, pad, stride, dilation=1, use_scale=True, eps=0.001, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias'): if use_bn: # parameters for convolution layer with batchnorm. kwargs = { 'param': [dict(lr_mult=1, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } # parameters for batchnorm layer. bn_kwargs = { 'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, } # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: deconv_param={ 'num_output':num_output, 'kernel_size':kernel_h, 'pad':pad_h, 'stride':stride_h, 'dilation':dilation, } else: deconv_param={ 'num_output':num_output, 'kernel_h':kernel_h, 'kernel_w':kernel_w, 'pad_h':pad_h, 'pad_w':pad_w, 'stride_h':stride_h, 'stride_w':stride_w, 'dilation':dilation, } param = {'param':kwargs['param']} del kwargs['param'] deconv_param.update(kwargs) net[conv_name] = L.Deconvolution(net[from_layer], convolution_param=deconv_param, **param ) # if dilation > 1: # net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def ConvBNLayer(net, from_layer, out_layer, use_bn, num_output, kernel_size, pad, stride, group=1, dilation=1, use_scale=True, lr_mult=1, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias', bn_eps=0.001, bn_moving_avg_fraction=0.999, Use_DeConv=False, use_global_stats=False, use_relu=False, use_swish=False, use_bias=False, use_merge_bn=False, **bn_params): if use_merge_bn and use_bn: raise ( "param use_merge_bn & use_bn should not be true at the sametime") if use_merge_bn: # parameters for convolution layer with batchnorm. if use_bias: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='msra'), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=1)], 'weight_filler': dict(type='msra'), 'bias_term': False, } eps = bn_params.get('eps', bn_eps) moving_average_fraction = bn_params.get('moving_average_fraction', bn_moving_avg_fraction) use_global_stats = bn_params.get('use_global_stats', use_global_stats) # parameters for batchnorm layer. bn_lr_mult = lr_mult bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=bn_lr_mult, decay_mult=0), dict(lr_mult=bn_lr_mult * 2, decay_mult=0) ], 'batch_norm_param': [dict(eps=eps, moving_average_fraction=moving_average_fraction)], 'scale_param': [ dict(filler=dict(value=1.0), bias_term=True, bias_filler=dict(value=0.0)) ], } if use_global_stats: # only specify if use_global_stats is explicitly provided; # otherwise, use_global_stats_ = this->phase_ == TEST; bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=bn_lr_mult, decay_mult=0), dict(lr_mult=bn_lr_mult * 2, decay_mult=0) ], 'batch_norm_param': [dict(eps=eps, use_global_stats=use_global_stats)], 'scale_param': [ dict(filler=dict(value=1.0), bias_term=True, bias_filler=dict(value=0.0)) ], } if use_bn: # parameters for convolution layer with batchnorm. if use_bias: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='msra'), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=1)], 'weight_filler': dict(type='msra'), 'bias_term': False, } eps = bn_params.get('eps', bn_eps) moving_average_fraction = bn_params.get('moving_average_fraction', bn_moving_avg_fraction) use_global_stats = bn_params.get('use_global_stats', use_global_stats) # parameters for batchnorm layer. bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, 'moving_average_fraction': moving_average_fraction, } bn_lr_mult = lr_mult if use_global_stats: # only specify if use_global_stats is explicitly provided; # otherwise, use_global_stats_ = this->phase_ == TEST; bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, 'use_global_stats': use_global_stats, } # not updating scale/bias parameters bn_lr_mult = 0 # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [ dict(lr_mult=bn_lr_mult, decay_mult=0), dict(lr_mult=bn_lr_mult * 2, decay_mult=0) ], 'filler': dict(value=1.0), 'bias_filler': dict(value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=bn_lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: if use_bias: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='msra'), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=1)], 'weight_filler': dict(type='msra'), 'bias_term': False, } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: if Use_DeConv: net[conv_name] = L.Deconvolution( net[from_layer], param=[dict(lr_mult=lr_mult, decay_mult=1)], convolution_param=dict(bias_term=False, num_output=num_output, kernel_size=kernel_h, stride=stride_h, pad=pad_h, weight_filler=dict(type="msra"))) else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs) else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, **kwargs) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if group > 1: net.update(conv_name, {'group': group}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_merge_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNormScale( net[conv_name], in_place=True, param=[ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=bn_lr_mult, decay_mult=0), dict(lr_mult=bn_lr_mult * 2, decay_mult=0) ], batch_norm_param=dict( eps=eps, use_global_stats=use_global_stats, moving_average_fraction=moving_average_fraction)) if use_relu: relu_name = '{}_relu6'.format(conv_name) net[relu_name] = L.ReLU6(net[conv_name], in_place=True) if use_swish: swish_name = '{}_swish'.format(conv_name) net[swish_name] = L.Swish(net[conv_name], in_place=True)
def test_bias6(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.input2 = L.Input(shape=make_shape([64, 64])) n.bias1 = L.Bias(n.input1, n.input2, axis=2) self._test_model(*self._netspec_to_model(n, 'bias5'))
def DeconvBNUnitLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, \ kernel_size, pad, stride, lr_mult=1, decay_mult=1, \ dilation=1, use_conv_bias=False, use_scale=True, eps=0.001, \ conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', \ scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias',leaky=False,leaky_ratio=0.1, \ init_xavier=True): if use_bn: # parameters for convolution layer with batchnorm. if use_conv_bias: if init_xavier: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=decay_mult)], 'convolution_param': { 'num_output': num_output, 'kernel_size': kernel_size, 'pad': pad, 'stride': stride, 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } } else: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=decay_mult)], 'convolution_param': { 'num_output': num_output, 'kernel_size': kernel_size, 'pad': pad, 'stride': stride, 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } } else: kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=decay_mult)], 'convolution_param': { 'num_output': num_output, 'kernel_size': kernel_size, 'pad': pad, 'stride': stride, 'weight_filler': dict(type='xavier'), 'bias_term': False } } # parameters for batchnorm layer. bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, } # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [ dict(lr_mult=lr_mult, decay_mult=0), dict(lr_mult=lr_mult, decay_mult=0) ], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: if init_xavier: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=decay_mult), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=decay_mult), dict(lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = Upar.UnpackVariable(kernel_size, 2) [pad_h, pad_w] = Upar.UnpackVariable(pad, 2) [stride_h, stride_w] = Upar.UnpackVariable(stride, 2) net[conv_name] = L.Deconvolution(net[from_layer], **kwargs) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) if leaky: leaky_kwargs = {"negative_slope": leaky_ratio} net[relu_name] = L.ReLU(net[conv_name], in_place=True, **leaky_kwargs) else: net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def ConvBNLayer(net, from_layer, out_name, use_bn, use_relu, num_output, kernel_size, pad, stride, use_scale=False, moving_average_fraction=0.99, eps=0.0001, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='/bn', scale_prefix='', scale_postfix='/scale', bias_prefix='', bias_postfix='/bias', group=1, dilation=1, in_place=True, use_bias=False, lr_mult=1, engine=None): # parameters for convolution layer with batchnorm. if use_bn: kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='msra'), 'bias_term': True, 'bias_filler': dict(type='constant', value=0), 'group': group, 'dilation': dilation } else: kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='msra'), 'bias_term': True, 'bias_filler': dict(type='constant', value=0), 'group': group, 'dilation': dilation } if engine is not None: kwargs['engine'] = engine if use_scale: # parameters for scale bias layer after batchnorm. sb_kwargs = { 'bias_term': True #,'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)], #'filler': dict(type='constant', value=1.0), #'bias_filler': dict(type='constant', value=0.0), } if use_bn and use_scale: # parameters for batchnorm layer. bn_kwargs = { #not needed (and wrong order) for caffe-0.16 #'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], #'scale_filler': dict(type='constant', value=1.0), #'bias_filler': dict(type='constant', value=0.0), 'moving_average_fraction': moving_average_fraction, 'eps': eps #,'scale_bias': True } elif use_bn: bn_kwargs = { #not needed (and wrong order) for caffe-0.16 #'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], #'scale_filler': dict(type='constant', value=1.0), #'bias_filler': dict(type='constant', value=0.0), 'moving_average_fraction': moving_average_fraction, 'eps': eps, 'scale_bias': True } if use_bias: bias_kwargs = { 'param': [dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } out_layer = None conv_name = '{}{}{}'.format(conv_prefix, out_name, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) kwargs_conv = copy.deepcopy(kwargs) #lower wd for dw layers as per mobilenet paper - not working - harder to train #decay_mult = 0.01 if group == num_output else 1 #param = {'decay_mult': decay_mult} #kwargs_conv['param'][0]['decay_mult'] = decay_mult if kernel_h == kernel_w: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs_conv) out_layer = conv_name else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, **kwargs_conv) out_layer = conv_name if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_name, bn_postfix) net[bn_name] = L.BatchNorm(net[out_layer], in_place=in_place, **bn_kwargs) out_layer = conv_name if in_place else bn_name if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_name, scale_postfix) net[sb_name] = L.Scale(net[out_layer], in_place=True, **sb_kwargs) out_layer = sb_name if use_bias: bias_name = '{}{}{}'.format(bias_prefix, out_name, bias_postfix) net[bias_name] = L.Bias(net[out_layer], in_place=True, **bias_kwargs) out_layer = bias_name if use_relu: relu_name = '{}/relu'.format(conv_name) net[relu_name] = L.ReLU(net[out_layer], in_place=True) out_layer = relu_name return out_layer
def DWConvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, group, kernel_size, pad, stride, dilation=1, use_scale=True, lr_mult=1, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias', **bn_params): if use_bn: # parameters for convolution layer with batchnorm. kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, } else: bias_kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}'.format(conv_prefix, out_layer) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: #net[conv_name] = L.ConvolutionDepthwise(net[from_layer], num_output=num_output, group=group, # kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs) net[conv_name] = L.ConvolutionDepthwise(net[from_layer], convolution_param = dict(num_output=num_output, group=group, pad=pad_h, kernel_size=kernel_h, stride=stride_h, bias_term=False, weight_filler = dict(type='gaussian', std=0.01)), param=[dict(lr_mult=lr_mult, decay_mult=1)]) else: #net[conv_name] = L.ConvolutionDepthwise(net[from_layer], num_output=num_output, group=group, # kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, # stride_h=stride_h, stride_w=stride_w, **kwargs) net[conv_name] = L.ConvolutionDepthwise(net[from_layer], convolution_param = dict(num_output=num_output, group=group, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, bias_term=False, weight_filler = dict(type='gaussian', std=0.01)), param=[dict(lr_mult=lr_mult, decay_mult=1)]) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: #relu_name = '{}_relu'.format(conv_name) relu_name = 'relu{}'.format(conv_postfix) net[relu_name] = L.ReLU(net[conv_name], in_place=True)