def change_lr_mult(model, output): with open(model) as n: str1 = n.read() msg = caffe_pb2.NetParameter() text_format.Merge(str1, msg) for i, l in enumerate(msg.layer): if l.type == "Convolution" or l.type == "DepthwiseConvolution" or l.type == "ConvolutionDepthwise": if l.param._values == []: lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 0 l.param._values.append(lr_param) if l.convolution_param.bias_term is True: lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 0 l.param._values.append(lr_param) # You should ensure param num and blobs num is equal else: for num in range(len(l.param)): l.param[num].lr_mult = 0 print("Saving output model %s..." % output) with open(output, 'w') as m: m.write(text_format.MessageToString(msg)) return output
def add_BN_layer(net_msg,name,bottom): # norm layer batchnormlayer = net_msg.layer.add() batchnormlayer.name = name+'_norm' batchnormlayer.type = 'BatchNorm' batchnormlayer.bottom._values.append(bottom) batchnormlayer.top._values.append(batchnormlayer.name) for i in range(0,3): lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 0 batchnormlayer.param._values.append(lr_param) # scale layer scalelayer = net_msg.layer.add() scalelayer.name = name+'_scale' scalelayer.type = 'Scale' scalelayer.bottom._values.append(batchnormlayer.name) scalelayer.top._values.append(name) lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 1 scalelayer.param._values.append(lr_param) lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 2 lr_param.decay_mult = 0 scalelayer.param._values.append(lr_param) scalelayer.scale_param.bias_term = True scalelayer.scale_param.filler.type = 'msra'
def add_conv_layer(net_msg,name,bottom,num_output,pad,kernel_size,stride,bias_term=True,learn_depth=False,input_channel=1,connectivity_mode=0): conv_layer = net_msg.layer.add() conv_layer.name = name conv_layer.type = 'Convolution' conv_layer.bottom._values.append(bottom) conv_layer.top._values.append(conv_layer.name) if 1==connectivity_mode: conv_layer.connectivity_mode = caffe_pb2.LayerParameter.DISCONNECTED_ELTWISE elif 2==connectivity_mode: conv_layer.connectivity_mode = caffe_pb2.LayerParameter.DISCONNECTED_GRPWISE # param info for weight and bias lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 1 if learn_depth: blk_param = caffe_pb2.BlockGroupLassoSpec() blk_param.xdimen = kernel_size*kernel_size*input_channel blk_param.ydimen = num_output lr_param.block_group_lasso._values.append(blk_param) conv_layer.param._values.append(lr_param) if bias_term: lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 2 conv_layer.param._values.append(lr_param) # conv parameters conv_layer.convolution_param.num_output = num_output conv_layer.convolution_param.pad._values.append(pad) conv_layer.convolution_param.kernel_size._values.append(kernel_size) conv_layer.convolution_param.stride._values.append(stride) conv_layer.convolution_param.weight_filler.type = 'msra' conv_layer.convolution_param.bias_term = bias_term if bias_term: conv_layer.convolution_param.bias_filler.type = 'constant'
def add_BN_layer(net_msg, bottom): # norm layer batchnormlayer = net_msg.layer.add() batchnormlayer.name = bottom + '_bn' batchnormlayer.type = 'BatchNorm' batchnormlayer.bottom._values.append(bottom) batchnormlayer.top._values.append(bottom) # compatible with old caffe version for i in range(0, 3): lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 0 lr_param.decay_mult = 0 batchnormlayer.param._values.append(lr_param) # scale layer scalelayer = net_msg.layer.add() scalelayer.name = bottom + '_scale' scalelayer.type = 'Scale' scalelayer.bottom._values.append(bottom) scalelayer.top._values.append(bottom) # optional: modify lr mult # lr_param = caffe_pb2.ParamSpec() # lr_param.lr_mult = 1 # scalelayer.param._values.append(lr_param) # lr_param = caffe_pb2.ParamSpec() # lr_param.lr_mult = 2 # lr_param.decay_mult = 0 # scalelayer.param._values.append(lr_param) scalelayer.scale_param.bias_term = True scalelayer.scale_param.filler.value = 1 scalelayer.scale_param.bias_filler.value = 0 return bottom
def rename_Upsample(self, source_node): attr = source_node.attrs layer = pb2.LayerParameter() layer.type = "Deconvolution" assert attr['height_scale'] == attr['width_scale'] factor = int(attr['height_scale']) c = int(attr['channel']) k = 2 * factor - factor % 2 layer.convolution_param.num_output = c layer.convolution_param.kernel_size.extend([k]) layer.convolution_param.stride.extend([factor]) layer.convolution_param.pad.extend([int(math.ceil((factor - 1) / 2.))]) layer.convolution_param.group = c layer.convolution_param.weight_filler.type = 'bilinear' layer.convolution_param.bias_term = False learning_param = pb2.ParamSpec() learning_param.lr_mult = 0 learning_param.decay_mult = 0 layer.param.extend([learning_param]) """ Init weight blob of filter kernel """ blobs_weight = FillBilinear(c, k) layer.blobs.extend([as_blob(blobs_weight)]) for b in source_node.in_edges: layer.bottom.append(b) layer.top.append(source_node.name) layer.name = source_node.real_name return layer
def UpsampleBilinear(pytorch_layer): layer = pb2.LayerParameter() layer.type = "Deconvolution" assert pytorch_layer.scale_factor[0] == pytorch_layer.scale_factor[1] factor = int(pytorch_layer.scale_factor[0]) c = int(pytorch_layer.input_size[1]) k = 2 * factor - factor % 2 layer.convolution_param.num_output = c layer.convolution_param.kernel_size.extend([k]) layer.convolution_param.stride.extend([factor]) layer.convolution_param.pad.extend([int(math.ceil((factor - 1) / 2.))]) layer.convolution_param.group = c layer.convolution_param.weight_filler.type = 'bilinear' layer.convolution_param.bias_term = False learning_param = pb2.ParamSpec() learning_param.lr_mult = 0 learning_param.decay_mult = 0 layer.param.extend([learning_param]) """ Init weight blob of filter kernel """ blobs_weight = FillBilinear(c, k) layer.blobs.extend([as_blob(blobs_weight)]) return layer
def bn(torch_layer): layer = pb2.LayerParameter() layer.type = "BN" param_scale = pb2.ParamSpec() param_scale.lr_mult = 1 param_scale.decay_mult = 0 param_bias = pb2.ParamSpec() param_bias.lr_mult = 1 param_bias.decay_mult = 0 layer.param.extend([param_scale, param_bias]) layer.bn_param.slope_filler.value = 1 layer.bn_param.bias_filler.value = 0 layer.blobs.extend([ as_blob(torch_layer[name][None, :]) for name in ['weight', 'bias', 'running_mean', 'running_var'] ]) return layer
def _get_param(num_param, lr_mult=1): if num_param == 1: # only weight param = caffe_pb2.ParamSpec() param.lr_mult = 1 * lr_mult param.decay_mult = 1 * lr_mult return [param] elif num_param == 2: # weight and bias param_w = caffe_pb2.ParamSpec() param_w.lr_mult = 1 * lr_mult param_w.decay_mult = 1 * lr_mult param_b = caffe_pb2.ParamSpec() param_b.lr_mult = 2 * lr_mult param_b.decay_mult = 0 return [param_w, param_b] else: raise ValueError("Unknown num_param {}".format(num_param))
def __init__(self, name, num_filters, kernel_size, stride=1, pad=0, weight_filler=GaussianFiller(std=0.01), lr_mult=1): super(ConvolutionLayer, self).__init__(name, 'Convolution', 1, 1) self._inplace = False self._params.convolution_param.num_output = num_filters self._params.convolution_param.kernel_size.extend([kernel_size]) self._params.convolution_param.pad.extend([pad]) self._params.convolution_param.stride.extend([stride]) self._params.convolution_param.weight_filler.MergeFrom(weight_filler.to_proto()) self._params.convolution_param.bias_filler.MergeFrom(ConstantFiller().to_proto()) weight_blob_param = caffe_pb2.ParamSpec(lr_mult=1 * lr_mult) bias_blob_param = caffe_pb2.ParamSpec(lr_mult=2 * lr_mult) self._params.param.extend([weight_blob_param, bias_blob_param])
def add_ip_layer(net_msg,name,bottom,num): ip_layer = net_msg.layer.add() ip_layer.name = name ip_layer.type = 'InnerProduct' ip_layer.bottom._values.append(bottom) ip_layer.top._values.append(name) # param info for weight and bias lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 1 lr_param.decay_mult = 1 ip_layer.param._values.append(lr_param) lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 2 lr_param.decay_mult = 0 ip_layer.param._values.append(lr_param) # inner product parameters ip_layer.inner_product_param.num_output = num ip_layer.inner_product_param.weight_filler.type = 'msra' ip_layer.inner_product_param.bias_filler.type = 'constant' ip_layer.inner_product_param.bias_filler.value = 0.0
def _simple_conv_layer(name, bottom, top, num_output, kernel_size, pad, dilation=1, std=0.01, bias=0.0, param_type=0): """ param_type: 0 -> do nothing 1 -> (lr 1, decay 0; lr 2, decay 0) 2 -> (lr 1, decay 1; lr 2, decay 0) 3 -> (lr 10, decay 1; lr 20, decay 0) 4 -> (lr 1, decay 1; lr 1, decay 1) """ conv_layer = caffe_pb2.LayerParameter() conv_layer.name = name conv_layer.type = 'Convolution' conv_layer.bottom.append(bottom) conv_layer.top.append(top) conv_layer.convolution_param.num_output = num_output conv_layer.convolution_param.pad.append(pad) conv_layer.convolution_param.kernel_size.append(kernel_size) conv_layer.convolution_param.weight_filler.type = "gaussian" conv_layer.convolution_param.weight_filler.std = std conv_layer.convolution_param.bias_filler.type = "constant" conv_layer.convolution_param.bias_filler.value = bias conv_layer.convolution_param.dilation.append(dilation) conv_layer.ClearField('param') conv_layer.param.extend([caffe_pb2.ParamSpec()] * 2) if param_type == 1: conv_layer.param[0].lr_mult = 1.0 conv_layer.param[0].decay_mult = 0.0 conv_layer.param[1].lr_mult = 2.0 conv_layer.param[1].decay_mult = 0.0 elif param_type == 2: conv_layer.param[0].lr_mult = 1.0 conv_layer.param[0].decay_mult = 1.0 conv_layer.param[1].lr_mult = 2.0 conv_layer.param[1].decay_mult = 0.0 elif param_type == 3: conv_layer.param[0].lr_mult = 10.0 conv_layer.param[0].decay_mult = 1.0 conv_layer.param[1].lr_mult = 20.0 conv_layer.param[1].decay_mult = 0.0 elif param_type == 4: conv_layer.param[0].lr_mult = 1.0 conv_layer.param[0].decay_mult = 1.0 conv_layer.param[1].lr_mult = 2.0 conv_layer.param[1].decay_mult = 1.0 return conv_layer
def add_conv_layer(net_msg,name,bottom,num_output,pad,kernel_size,stride,bias_term=True): conv_layer = net_msg.layer.add() conv_layer.name = name conv_layer.type = 'Convolution' conv_layer.bottom._values.append(bottom) conv_layer.top._values.append(conv_layer.name) # param info for weight and bias lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 1 conv_layer.param._values.append(lr_param) if bias_term: lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 2 conv_layer.param._values.append(lr_param) # conv parameters conv_layer.convolution_param.num_output = num_output conv_layer.convolution_param.pad._values.append(pad) conv_layer.convolution_param.kernel_size._values.append(kernel_size) conv_layer.convolution_param.stride._values.append(stride) conv_layer.convolution_param.weight_filler.type = 'msra' conv_layer.convolution_param.bias_term = bias_term if bias_term: conv_layer.convolution_param.bias_filler.type = 'constant'
def create_inner_product_layer(input, out_name, output_number): inner_product_layer = caffe_pb2.LayerParameter( name=out_name, type="InnerProduct", bottom=[input], top=[out_name], param=[ caffe_pb2.ParamSpec( lr_mult=1.0, decay_mult=1.0, ), caffe_pb2.ParamSpec( lr_mult=2.0, decay_mult=0.0, ) ], inner_product_param=caffe_pb2.InnerProductParameter( num_output=output_number, bias_term=False, weight_filler=caffe_pb2.FillerParameter(type="xavier"), bias_filler=caffe_pb2.FillerParameter(type="constant", value=0.0))) net = caffe_pb2.NetParameter() net.layer.extend([inner_product_layer]) return net
def deconvolution(torch_layer): log.info("do deconvolution") # log.info(torch_layer) # print_all(torch_layer["output"]) # print_all(torch_layer["gradInput"]) # get output and cal num_output, need forward torch output = torch_layer["output"] size = len(output) assert size > 0 # print(len(output[0])) # gradinput = torch_layer["gradInput"] # print(len(output)) # print(len(gradinput)) # print(output[0].size[1]) # print(gradinput[0].size[1]) # assert torch_layer is None layer = pb2.LayerParameter() layer.type = "Deconvolution" factor = int(torch_layer["scale_factor"]) layer.convolution_param.num_output = len(output[0]) layer.convolution_param.group = len(output[0]) # layer.convolution_param.stride = factor # layer.convolution_param.kernel_size = (2 * factor - factor % 2) layer.convolution_param.stride_w = factor layer.convolution_param.stride_h = factor layer.convolution_param.kernel_w = (2 * factor - factor % 2) layer.convolution_param.kernel_h = (2 * factor - factor % 2) layer.convolution_param.pad.append(int(np.ceil((factor - 1) / 2.))) layer.convolution_param.bias_term = False layer.convolution_param.weight_filler.type = 'bilinear' param_spec = pb2.ParamSpec() param_spec.lr_mult = 0 param_spec.decay_mult = 0 layer.param.extend([param_spec]) # weight = torch_layer["weight"] # bias = torch_layer["bias"] # assert len(weight.shape) == 4, weight.shape # (nOutputPlane, nInputPlane, kH_, kW_) = weight.shape # # (kW, kH, dW, dH, padW, padH) = [ # int(torch_layer.get(f, 0)) # for f in ["kW", "kH", "dW", "dH", "padW", "padH"]] # assert kH_ == kH # assert kW_ == kW # layer.convolution_param.num_output = nOutputPlane # layer.convolution_param.kernel_w = kW # layer.convolution_param.stride_w = dW # layer.convolution_param.pad_w = padW # layer.convolution_param.kernel_h = kH # layer.convolution_param.stride_h = dH # layer.convolution_param.pad_h = padH # # if "bias" in torch_layer: # bias = torch_layer["bias"] # layer.blobs.extend([as_blob(weight), as_blob(bias)]) # else: # layer.convolution_param.bias_term = False # layer.blobs.extend([as_blob(weight), as_blob(bias)]) return layer
def _get_primal_dual_param(param_name, lr_mult): param = caffe_pb2.ParamSpec() param.lr_mult = lr_mult param.decay_mult = 0 param.name = param_name return [param]
def main(args): # Set default output file names if args.output_model is None: file_name = osp.splitext(args.model)[0] args.output_model = file_name + '_inference.prototxt' if args.output_weights is None: file_name = osp.splitext(args.weights)[0] args.output_weights = file_name + '_inference.caffemodel' with open(args.model) as f: model = caffe_pb2.NetParameter() pb.text_format.Parse(f.read(), model) # Determine the BN layers to be absorbed or replaced # Create the new layers new_layers = [] absorbed = {} replaced = {} for i, layer in enumerate(model.layer): if layer.type != 'BN': new_layers.append(layer) continue assert len(layer.bottom) == 1 assert len(layer.top) == 1 bottom_blob = layer.bottom[0] top_blob = layer.top[0] # Check if can be absorbed. As there could be some inplace layers, # for example, conv -> relu -> bn. In such case, the BN cannot be # absorbed. can_be_absorbed = False for j in xrange(i - 1, -1, -1): if bottom_blob in model.layer[j].top: if model.layer[j].type not in ['Convolution', 'InnerProduct']: can_be_absorbed = False break else: can_be_absorbed = True bottom_layer = model.layer[j] if can_be_absorbed: # Rename the blob in the top layers for j in xrange(i + 1, len(model.layer)): update_blob_name(model.layer[j].bottom, top_blob, bottom_blob) update_blob_name(model.layer[j].top, top_blob, bottom_blob) if bottom_layer.type == 'Convolution': bottom_layer.convolution_param.bias_term = True elif bottom_layer.type == 'InnerProduct': bottom_layer.inner_product_param.bias_term = True absorbed[layer.name] = bottom_layer.name elif args.replace_by == 'affine': # Replace by an scale bias layer new_layer = caffe_pb2.LayerParameter() new_layer.name = layer.name + '_affine' new_layer.type = 'Scale' new_layer.bottom.extend([bottom_blob]) new_layer.top.extend([top_blob]) new_layer.scale_param.bias_term = True replaced[layer.name] = new_layer.name new_layers.append(new_layer) elif args.replace_by == 'frozen': # Freeze the BN layer layer.bn_param.frozen = True del layer.param[:] param = caffe_pb2.ParamSpec() param.lr_mult = 0 param.decay_mult = 0 layer.param.extend([param] * 2) new_layers.append(layer) # Save the prototxt output_model = caffe_pb2.NetParameter() output_model.CopyFrom(model) del output_model.layer[:] output_model.layer.extend(new_layers) with open(args.output_model, 'w') as f: f.write(pb.text_format.MessageToString(output_model)) # Copy the parameters weights = caffe.Net(args.model, args.weights, caffe.TEST) output_weights = caffe.Net(args.output_model, caffe.TEST) for name in np.intersect1d(weights.params.keys(), output_weights.params.keys()): # Some original conv / inner product layers do not have bias_term for i in xrange( min(len(weights.params[name]), len(output_weights.params[name]))): output_weights.params[name][i].data[...] = \ weights.params[name][i].data.copy() # Absorb the BN parameters for old, new in absorbed.iteritems(): scale, bias, mean, tmp = [p.data.ravel() for p in weights.params[old]] invstd = tmp if args.bn_style == 'invstd' else \ np.power(tmp + args.epsilon, -0.5) W, b = output_weights.params[new] assert W.data.ndim == 4 or W.data.ndim == 2 assert b.data.ndim == 1 if W.data.ndim == 4: W.data[...] = (W.data * scale[:, None, None, None] * invstd[:, None, None, None]) elif W.data.ndim == 2: W.data[...] = W.data * scale[:, None] * invstd[:, None] b.data[...] = (b.data[...] - mean) * scale * invstd + bias # Fill up the affine layers for old, new in replaced.iteritems(): scale, bias, mean, tmp = [p.data.ravel() for p in weights.params[old]] invstd = tmp if args.bn_style == 'invstd' else \ np.power(tmp + args.epsilon, -0.5) W, b = output_weights.params[new] assert W.data.ndim == 1 assert b.data.ndim == 1 W.data[...] = scale * invstd b.data[...] = bias - scale * mean * invstd # Check if the conversion is correct check(weights, output_weights) # Save the caffemodel output_weights.save(args.output_weights)
conv_positions = [] position_idx = 0 for cur_layer in loop_layers: if 'Convolution' == cur_layer.type: print 'generating:', cur_layer.name, cur_layer.type conv_positions.append(position_idx) #p conv layer conv_p = net_msg.layer.add() conv_p.CopyFrom( cur_layer) #get a copy to make sure other parameters are consist conv_p.name = conv_p.name + 'p' conv_p.bottom._values[0] = cur_layer.bottom._values[0] conv_p.top._values[0] = conv_p.name if len(conv_p.param) == 0: lr_param = caffe_pb2.ParamSpec() lr_param.lr_mult = 0 lr_param.decay_mult = 0 conv_p.param._values.append(lr_param) else: conv_p.param._values[0].lr_mult = 0 conv_p.param._values[0].decay_mult = 0 try: #conv_p.param._values[1].lr_mult = 0 #conv_p.param._values[1].decay_mult = 0 del conv_p.param._values[1] except: print "Failed to operate param field: {}".format(conv_p.name) conv_p.convolution_param.num_output = m_all[layer_idx]