def change_lr_mult(model, output):
    with open(model) as n:
        str1 = n.read()
    msg = caffe_pb2.NetParameter()
    text_format.Merge(str1, msg)

    for i, l in enumerate(msg.layer):
        if l.type == "Convolution" or l.type == "DepthwiseConvolution" or l.type == "ConvolutionDepthwise":
            if l.param._values == []:
                lr_param = caffe_pb2.ParamSpec()
                lr_param.lr_mult = 0
                l.param._values.append(lr_param)
                if l.convolution_param.bias_term is True:
                    lr_param = caffe_pb2.ParamSpec()
                    lr_param.lr_mult = 0
                    l.param._values.append(lr_param)
            # You should ensure param num and blobs num is equal
            else:
                for num in range(len(l.param)):
                    l.param[num].lr_mult = 0

    print("Saving output model %s..." % output)
    with open(output, 'w') as m:
        m.write(text_format.MessageToString(msg))

    return output
Exemple #2
0
def add_BN_layer(net_msg,name,bottom):
    # norm layer
    batchnormlayer = net_msg.layer.add()
    batchnormlayer.name = name+'_norm'
    batchnormlayer.type = 'BatchNorm'
    batchnormlayer.bottom._values.append(bottom)
    batchnormlayer.top._values.append(batchnormlayer.name)
    for i in range(0,3):
        lr_param = caffe_pb2.ParamSpec()
        lr_param.lr_mult = 0
        batchnormlayer.param._values.append(lr_param)
    # scale layer
    scalelayer = net_msg.layer.add()
    scalelayer.name = name+'_scale'
    scalelayer.type = 'Scale'
    scalelayer.bottom._values.append(batchnormlayer.name)
    scalelayer.top._values.append(name)

    lr_param = caffe_pb2.ParamSpec()
    lr_param.lr_mult = 1
    scalelayer.param._values.append(lr_param)
    lr_param = caffe_pb2.ParamSpec()
    lr_param.lr_mult = 2
    lr_param.decay_mult = 0
    scalelayer.param._values.append(lr_param)

    scalelayer.scale_param.bias_term = True
    scalelayer.scale_param.filler.type = 'msra'
def add_conv_layer(net_msg,name,bottom,num_output,pad,kernel_size,stride,bias_term=True,learn_depth=False,input_channel=1,connectivity_mode=0):
    conv_layer = net_msg.layer.add()
    conv_layer.name = name
    conv_layer.type = 'Convolution'
    conv_layer.bottom._values.append(bottom)
    conv_layer.top._values.append(conv_layer.name)
    if 1==connectivity_mode:
        conv_layer.connectivity_mode = caffe_pb2.LayerParameter.DISCONNECTED_ELTWISE
    elif 2==connectivity_mode:
        conv_layer.connectivity_mode = caffe_pb2.LayerParameter.DISCONNECTED_GRPWISE
    # param info for weight and bias
    lr_param = caffe_pb2.ParamSpec()
    lr_param.lr_mult = 1
    if learn_depth:
        blk_param = caffe_pb2.BlockGroupLassoSpec()
        blk_param.xdimen = kernel_size*kernel_size*input_channel
        blk_param.ydimen = num_output
        lr_param.block_group_lasso._values.append(blk_param)
    conv_layer.param._values.append(lr_param)
    if bias_term:
        lr_param = caffe_pb2.ParamSpec()
        lr_param.lr_mult = 2
        conv_layer.param._values.append(lr_param)
    # conv parameters
    conv_layer.convolution_param.num_output = num_output
    conv_layer.convolution_param.pad._values.append(pad)
    conv_layer.convolution_param.kernel_size._values.append(kernel_size)
    conv_layer.convolution_param.stride._values.append(stride)
    conv_layer.convolution_param.weight_filler.type = 'msra'
    conv_layer.convolution_param.bias_term = bias_term
    if bias_term:
        conv_layer.convolution_param.bias_filler.type = 'constant'
def add_BN_layer(net_msg, bottom):
    # norm layer
    batchnormlayer = net_msg.layer.add()
    batchnormlayer.name = bottom + '_bn'
    batchnormlayer.type = 'BatchNorm'
    batchnormlayer.bottom._values.append(bottom)
    batchnormlayer.top._values.append(bottom)
    # compatible with old caffe version
    for i in range(0, 3):
        lr_param = caffe_pb2.ParamSpec()
        lr_param.lr_mult = 0
        lr_param.decay_mult = 0
        batchnormlayer.param._values.append(lr_param)

    # scale layer
    scalelayer = net_msg.layer.add()
    scalelayer.name = bottom + '_scale'
    scalelayer.type = 'Scale'
    scalelayer.bottom._values.append(bottom)
    scalelayer.top._values.append(bottom)

    # optional: modify lr mult
    # lr_param = caffe_pb2.ParamSpec()
    # lr_param.lr_mult = 1
    # scalelayer.param._values.append(lr_param)
    # lr_param = caffe_pb2.ParamSpec()
    # lr_param.lr_mult = 2
    # lr_param.decay_mult = 0
    # scalelayer.param._values.append(lr_param)

    scalelayer.scale_param.bias_term = True
    scalelayer.scale_param.filler.value = 1
    scalelayer.scale_param.bias_filler.value = 0
    return bottom
Exemple #5
0
    def rename_Upsample(self, source_node):
        attr = source_node.attrs
        layer = pb2.LayerParameter()
        layer.type = "Deconvolution"

        assert attr['height_scale'] == attr['width_scale']
        factor = int(attr['height_scale'])
        c = int(attr['channel'])
        k = 2 * factor - factor % 2

        layer.convolution_param.num_output = c
        layer.convolution_param.kernel_size.extend([k])
        layer.convolution_param.stride.extend([factor])
        layer.convolution_param.pad.extend([int(math.ceil((factor - 1) / 2.))])
        layer.convolution_param.group = c
        layer.convolution_param.weight_filler.type = 'bilinear'
        layer.convolution_param.bias_term = False

        learning_param = pb2.ParamSpec()
        learning_param.lr_mult = 0
        learning_param.decay_mult = 0
        layer.param.extend([learning_param])
        """ Init weight blob of filter kernel """
        blobs_weight = FillBilinear(c, k)
        layer.blobs.extend([as_blob(blobs_weight)])

        for b in source_node.in_edges:
            layer.bottom.append(b)

        layer.top.append(source_node.name)

        layer.name = source_node.real_name
        return layer
Exemple #6
0
def UpsampleBilinear(pytorch_layer):
    layer = pb2.LayerParameter()
    layer.type = "Deconvolution"

    assert pytorch_layer.scale_factor[0] == pytorch_layer.scale_factor[1]
    factor = int(pytorch_layer.scale_factor[0])
    c = int(pytorch_layer.input_size[1])
    k = 2 * factor - factor % 2

    layer.convolution_param.num_output = c
    layer.convolution_param.kernel_size.extend([k])
    layer.convolution_param.stride.extend([factor])
    layer.convolution_param.pad.extend([int(math.ceil((factor - 1) / 2.))])
    layer.convolution_param.group = c
    layer.convolution_param.weight_filler.type = 'bilinear'
    layer.convolution_param.bias_term = False

    learning_param = pb2.ParamSpec()
    learning_param.lr_mult = 0
    learning_param.decay_mult = 0
    layer.param.extend([learning_param])
    """ Init weight blob of filter kernel """
    blobs_weight = FillBilinear(c, k)
    layer.blobs.extend([as_blob(blobs_weight)])

    return layer
Exemple #7
0
def bn(torch_layer):
    layer = pb2.LayerParameter()
    layer.type = "BN"
    param_scale = pb2.ParamSpec()
    param_scale.lr_mult = 1
    param_scale.decay_mult = 0
    param_bias = pb2.ParamSpec()
    param_bias.lr_mult = 1
    param_bias.decay_mult = 0
    layer.param.extend([param_scale, param_bias])
    layer.bn_param.slope_filler.value = 1
    layer.bn_param.bias_filler.value = 0
    layer.blobs.extend([
        as_blob(torch_layer[name][None, :])
        for name in ['weight', 'bias', 'running_mean', 'running_var']
    ])
    return layer
Exemple #8
0
def _get_param(num_param, lr_mult=1):
    if num_param == 1:
        # only weight
        param = caffe_pb2.ParamSpec()
        param.lr_mult = 1 * lr_mult
        param.decay_mult = 1 * lr_mult
        return [param]
    elif num_param == 2:
        # weight and bias
        param_w = caffe_pb2.ParamSpec()
        param_w.lr_mult = 1 * lr_mult
        param_w.decay_mult = 1 * lr_mult
        param_b = caffe_pb2.ParamSpec()
        param_b.lr_mult = 2 * lr_mult
        param_b.decay_mult = 0
        return [param_w, param_b]
    else:
        raise ValueError("Unknown num_param {}".format(num_param))
Exemple #9
0
    def __init__(self, name, num_filters,
                 kernel_size, stride=1, pad=0,
                 weight_filler=GaussianFiller(std=0.01),
                 lr_mult=1):
        super(ConvolutionLayer, self).__init__(name, 'Convolution', 1, 1)
        self._inplace = False

        self._params.convolution_param.num_output = num_filters
        self._params.convolution_param.kernel_size.extend([kernel_size])
        self._params.convolution_param.pad.extend([pad])
        self._params.convolution_param.stride.extend([stride])

        self._params.convolution_param.weight_filler.MergeFrom(weight_filler.to_proto())
        self._params.convolution_param.bias_filler.MergeFrom(ConstantFiller().to_proto())

        weight_blob_param = caffe_pb2.ParamSpec(lr_mult=1 * lr_mult)
        bias_blob_param = caffe_pb2.ParamSpec(lr_mult=2 * lr_mult)

        self._params.param.extend([weight_blob_param, bias_blob_param])
Exemple #10
0
def add_ip_layer(net_msg,name,bottom,num):
    ip_layer = net_msg.layer.add()
    ip_layer.name = name
    ip_layer.type = 'InnerProduct'
    ip_layer.bottom._values.append(bottom)
    ip_layer.top._values.append(name)
    # param info for weight and bias
    lr_param = caffe_pb2.ParamSpec()
    lr_param.lr_mult = 1
    lr_param.decay_mult = 1
    ip_layer.param._values.append(lr_param)
    lr_param = caffe_pb2.ParamSpec()
    lr_param.lr_mult = 2
    lr_param.decay_mult = 0
    ip_layer.param._values.append(lr_param)
    # inner product parameters
    ip_layer.inner_product_param.num_output = num
    ip_layer.inner_product_param.weight_filler.type = 'msra'
    ip_layer.inner_product_param.bias_filler.type = 'constant'
    ip_layer.inner_product_param.bias_filler.value = 0.0
def _simple_conv_layer(name,
                       bottom,
                       top,
                       num_output,
                       kernel_size,
                       pad,
                       dilation=1,
                       std=0.01,
                       bias=0.0,
                       param_type=0):
    """
    param_type:
    0 -> do nothing
    1 -> (lr 1, decay 0; lr 2, decay 0)
    2 -> (lr 1, decay 1; lr 2, decay 0)
    3 -> (lr 10, decay 1; lr 20, decay 0)
    4 -> (lr 1, decay 1; lr 1, decay 1)
    """
    conv_layer = caffe_pb2.LayerParameter()
    conv_layer.name = name
    conv_layer.type = 'Convolution'
    conv_layer.bottom.append(bottom)
    conv_layer.top.append(top)
    conv_layer.convolution_param.num_output = num_output
    conv_layer.convolution_param.pad.append(pad)
    conv_layer.convolution_param.kernel_size.append(kernel_size)
    conv_layer.convolution_param.weight_filler.type = "gaussian"
    conv_layer.convolution_param.weight_filler.std = std
    conv_layer.convolution_param.bias_filler.type = "constant"
    conv_layer.convolution_param.bias_filler.value = bias
    conv_layer.convolution_param.dilation.append(dilation)
    conv_layer.ClearField('param')
    conv_layer.param.extend([caffe_pb2.ParamSpec()] * 2)
    if param_type == 1:
        conv_layer.param[0].lr_mult = 1.0
        conv_layer.param[0].decay_mult = 0.0
        conv_layer.param[1].lr_mult = 2.0
        conv_layer.param[1].decay_mult = 0.0
    elif param_type == 2:
        conv_layer.param[0].lr_mult = 1.0
        conv_layer.param[0].decay_mult = 1.0
        conv_layer.param[1].lr_mult = 2.0
        conv_layer.param[1].decay_mult = 0.0
    elif param_type == 3:
        conv_layer.param[0].lr_mult = 10.0
        conv_layer.param[0].decay_mult = 1.0
        conv_layer.param[1].lr_mult = 20.0
        conv_layer.param[1].decay_mult = 0.0
    elif param_type == 4:
        conv_layer.param[0].lr_mult = 1.0
        conv_layer.param[0].decay_mult = 1.0
        conv_layer.param[1].lr_mult = 2.0
        conv_layer.param[1].decay_mult = 1.0
    return conv_layer
Exemple #12
0
def add_conv_layer(net_msg,name,bottom,num_output,pad,kernel_size,stride,bias_term=True):
    conv_layer = net_msg.layer.add()
    conv_layer.name = name
    conv_layer.type = 'Convolution'
    conv_layer.bottom._values.append(bottom)
    conv_layer.top._values.append(conv_layer.name)
    # param info for weight and bias
    lr_param = caffe_pb2.ParamSpec()
    lr_param.lr_mult = 1
    conv_layer.param._values.append(lr_param)
    if bias_term:
        lr_param = caffe_pb2.ParamSpec()
        lr_param.lr_mult = 2
        conv_layer.param._values.append(lr_param)
    # conv parameters
    conv_layer.convolution_param.num_output = num_output
    conv_layer.convolution_param.pad._values.append(pad)
    conv_layer.convolution_param.kernel_size._values.append(kernel_size)
    conv_layer.convolution_param.stride._values.append(stride)
    conv_layer.convolution_param.weight_filler.type = 'msra'
    conv_layer.convolution_param.bias_term = bias_term
    if bias_term:
        conv_layer.convolution_param.bias_filler.type = 'constant'
Exemple #13
0
def create_inner_product_layer(input, out_name, output_number):
    inner_product_layer = caffe_pb2.LayerParameter(
        name=out_name,
        type="InnerProduct",
        bottom=[input],
        top=[out_name],
        param=[
            caffe_pb2.ParamSpec(
                lr_mult=1.0,
                decay_mult=1.0,
            ),
            caffe_pb2.ParamSpec(
                lr_mult=2.0,
                decay_mult=0.0,
            )
        ],
        inner_product_param=caffe_pb2.InnerProductParameter(
            num_output=output_number,
            bias_term=False,
            weight_filler=caffe_pb2.FillerParameter(type="xavier"),
            bias_filler=caffe_pb2.FillerParameter(type="constant", value=0.0)))
    net = caffe_pb2.NetParameter()
    net.layer.extend([inner_product_layer])
    return net
Exemple #14
0
def deconvolution(torch_layer):
    log.info("do deconvolution")
    # log.info(torch_layer)
    # print_all(torch_layer["output"])
    # print_all(torch_layer["gradInput"])

    # get output and cal num_output, need forward torch
    output = torch_layer["output"]
    size = len(output)
    assert size > 0
    # print(len(output[0]))

    # gradinput = torch_layer["gradInput"]
    # print(len(output))
    # print(len(gradinput))
    # print(output[0].size[1])
    # print(gradinput[0].size[1])
    # assert torch_layer is None

    layer = pb2.LayerParameter()
    layer.type = "Deconvolution"
    factor = int(torch_layer["scale_factor"])
    layer.convolution_param.num_output = len(output[0])
    layer.convolution_param.group = len(output[0])
    # layer.convolution_param.stride = factor
    # layer.convolution_param.kernel_size = (2 * factor - factor % 2)
    layer.convolution_param.stride_w = factor
    layer.convolution_param.stride_h = factor
    layer.convolution_param.kernel_w = (2 * factor - factor % 2)
    layer.convolution_param.kernel_h = (2 * factor - factor % 2)
    layer.convolution_param.pad.append(int(np.ceil((factor - 1) / 2.)))
    layer.convolution_param.bias_term = False
    layer.convolution_param.weight_filler.type = 'bilinear'

    param_spec = pb2.ParamSpec()
    param_spec.lr_mult = 0
    param_spec.decay_mult = 0
    layer.param.extend([param_spec])

    # weight = torch_layer["weight"]
    # bias = torch_layer["bias"]
    # assert len(weight.shape) == 4, weight.shape
    # (nOutputPlane, nInputPlane, kH_, kW_) = weight.shape
    #
    # (kW, kH, dW, dH, padW, padH) = [
    #     int(torch_layer.get(f, 0))
    #     for f in ["kW", "kH", "dW", "dH", "padW", "padH"]]
    # assert kH_ == kH
    # assert kW_ == kW
    # layer.convolution_param.num_output = nOutputPlane
    # layer.convolution_param.kernel_w = kW
    # layer.convolution_param.stride_w = dW
    # layer.convolution_param.pad_w = padW
    # layer.convolution_param.kernel_h = kH
    # layer.convolution_param.stride_h = dH
    # layer.convolution_param.pad_h = padH
    #
    # if "bias" in torch_layer:
    #     bias = torch_layer["bias"]
    #     layer.blobs.extend([as_blob(weight), as_blob(bias)])
    # else:
    #     layer.convolution_param.bias_term = False
    #     layer.blobs.extend([as_blob(weight), as_blob(bias)])
    return layer
def _get_primal_dual_param(param_name, lr_mult):
    param = caffe_pb2.ParamSpec()
    param.lr_mult = lr_mult
    param.decay_mult = 0
    param.name = param_name
    return [param]
Exemple #16
0
def main(args):
    # Set default output file names
    if args.output_model is None:
        file_name = osp.splitext(args.model)[0]
        args.output_model = file_name + '_inference.prototxt'
    if args.output_weights is None:
        file_name = osp.splitext(args.weights)[0]
        args.output_weights = file_name + '_inference.caffemodel'
    with open(args.model) as f:
        model = caffe_pb2.NetParameter()
        pb.text_format.Parse(f.read(), model)

    # Determine the BN layers to be absorbed or replaced
    # Create the new layers
    new_layers = []
    absorbed = {}
    replaced = {}
    for i, layer in enumerate(model.layer):
        if layer.type != 'BN':
            new_layers.append(layer)
            continue
        assert len(layer.bottom) == 1
        assert len(layer.top) == 1
        bottom_blob = layer.bottom[0]
        top_blob = layer.top[0]
        # Check if can be absorbed. As there could be some inplace layers,
        # for example, conv -> relu -> bn. In such case, the BN cannot be
        # absorbed.
        can_be_absorbed = False
        for j in xrange(i - 1, -1, -1):
            if bottom_blob in model.layer[j].top:
                if model.layer[j].type not in ['Convolution', 'InnerProduct']:
                    can_be_absorbed = False
                    break
                else:
                    can_be_absorbed = True
                    bottom_layer = model.layer[j]
        if can_be_absorbed:
            # Rename the blob in the top layers
            for j in xrange(i + 1, len(model.layer)):
                update_blob_name(model.layer[j].bottom, top_blob, bottom_blob)
                update_blob_name(model.layer[j].top, top_blob, bottom_blob)
            if bottom_layer.type == 'Convolution':
                bottom_layer.convolution_param.bias_term = True
            elif bottom_layer.type == 'InnerProduct':
                bottom_layer.inner_product_param.bias_term = True
            absorbed[layer.name] = bottom_layer.name
        elif args.replace_by == 'affine':
            # Replace by an scale bias layer
            new_layer = caffe_pb2.LayerParameter()
            new_layer.name = layer.name + '_affine'
            new_layer.type = 'Scale'
            new_layer.bottom.extend([bottom_blob])
            new_layer.top.extend([top_blob])
            new_layer.scale_param.bias_term = True
            replaced[layer.name] = new_layer.name
            new_layers.append(new_layer)
        elif args.replace_by == 'frozen':
            # Freeze the BN layer
            layer.bn_param.frozen = True
            del layer.param[:]
            param = caffe_pb2.ParamSpec()
            param.lr_mult = 0
            param.decay_mult = 0
            layer.param.extend([param] * 2)
            new_layers.append(layer)

    # Save the prototxt
    output_model = caffe_pb2.NetParameter()
    output_model.CopyFrom(model)
    del output_model.layer[:]
    output_model.layer.extend(new_layers)
    with open(args.output_model, 'w') as f:
        f.write(pb.text_format.MessageToString(output_model))

    # Copy the parameters
    weights = caffe.Net(args.model, args.weights, caffe.TEST)
    output_weights = caffe.Net(args.output_model, caffe.TEST)
    for name in np.intersect1d(weights.params.keys(),
                               output_weights.params.keys()):
        # Some original conv / inner product layers do not have bias_term
        for i in xrange(
                min(len(weights.params[name]),
                    len(output_weights.params[name]))):
            output_weights.params[name][i].data[...] = \
                weights.params[name][i].data.copy()

    # Absorb the BN parameters
    for old, new in absorbed.iteritems():
        scale, bias, mean, tmp = [p.data.ravel() for p in weights.params[old]]
        invstd = tmp if args.bn_style == 'invstd' else \
                 np.power(tmp + args.epsilon, -0.5)
        W, b = output_weights.params[new]
        assert W.data.ndim == 4 or W.data.ndim == 2
        assert b.data.ndim == 1
        if W.data.ndim == 4:
            W.data[...] = (W.data * scale[:, None, None, None] *
                           invstd[:, None, None, None])
        elif W.data.ndim == 2:
            W.data[...] = W.data * scale[:, None] * invstd[:, None]
        b.data[...] = (b.data[...] - mean) * scale * invstd + bias

    # Fill up the affine layers
    for old, new in replaced.iteritems():
        scale, bias, mean, tmp = [p.data.ravel() for p in weights.params[old]]
        invstd = tmp if args.bn_style == 'invstd' else \
                 np.power(tmp + args.epsilon, -0.5)
        W, b = output_weights.params[new]
        assert W.data.ndim == 1
        assert b.data.ndim == 1
        W.data[...] = scale * invstd
        b.data[...] = bias - scale * mean * invstd

    # Check if the conversion is correct
    check(weights, output_weights)

    # Save the caffemodel
    output_weights.save(args.output_weights)
Exemple #17
0
conv_positions = []
position_idx = 0
for cur_layer in loop_layers:
    if 'Convolution' == cur_layer.type:
        print 'generating:', cur_layer.name, cur_layer.type
        conv_positions.append(position_idx)
        #p conv layer
        conv_p = net_msg.layer.add()
        conv_p.CopyFrom(
            cur_layer)  #get a copy to make sure other parameters are consist
        conv_p.name = conv_p.name + 'p'
        conv_p.bottom._values[0] = cur_layer.bottom._values[0]
        conv_p.top._values[0] = conv_p.name

        if len(conv_p.param) == 0:
            lr_param = caffe_pb2.ParamSpec()
            lr_param.lr_mult = 0
            lr_param.decay_mult = 0
            conv_p.param._values.append(lr_param)
        else:
            conv_p.param._values[0].lr_mult = 0
            conv_p.param._values[0].decay_mult = 0

        try:
            #conv_p.param._values[1].lr_mult = 0
            #conv_p.param._values[1].decay_mult = 0
            del conv_p.param._values[1]
        except:
            print "Failed to operate param field: {}".format(conv_p.name)

        conv_p.convolution_param.num_output = m_all[layer_idx]