def BN_AC(data, name=None): # BatchNorm layers = [] bn_layer = caffe_pb2.LayerParameter() bn_layer.name = name + '_bn' bn_layer.type = 'BatchNorm' bn_layer.bottom.append(data) bn_layer.top.append(name + 'bn_ac_data') layers.append(bn_layer) # Scale scale_layer = caffe_pb2.LayerParameter() scale_layer.name = name + '_scale' scale_layer.type = 'Scale' scale_layer.bottom.append(name + 'bn_ac_data') scale_layer.top.append(name + 'bn_ac_data') scale_layer.scale_param.filler.value = 1 scale_layer.scale_param.bias_term = True scale_layer.scale_param.bias_filler.value = 0 layers.append(scale_layer) # Relu relu_layer = caffe_pb2.LayerParameter() relu_layer.name = name + '_relu' relu_layer.type = 'ReLU' relu_layer.bottom.append(name + 'bn_ac_data') relu_layer.top.append(name + 'bn_ac_data') layers.append(relu_layer) return layers
def Bn_Sc(name, bottom, keep_name=False): top_name = name name = name.replace('res', '') # BN bn_layer = caffe_pb2.LayerParameter() if not keep_name: bn_layer.name = 'bn' + name else: bn_layer.name = name bn_layer.type = 'BatchNorm' bn_layer.bottom.extend([bottom]) bn_layer.top.extend([top_name]) # Scale scale_layer = caffe_pb2.LayerParameter() if not keep_name: scale_layer.name = 'scale' + name else: scale_layer.name = name scale_layer.type = 'Scale' scale_layer.bottom.extend([top_name]) scale_layer.top.extend([top_name]) scale_layer.scale_param.filler.value = 1 scale_layer.scale_param.bias_term = True scale_layer.scale_param.bias_filler.value = 0 return [bn_layer, scale_layer]
def Shuffle_Channel(data, num_group = 3, name= None): layers = [] Reshape_pre_layer = caffe_pb2.LayerParameter() Reshape_pre_layer.name = name + '_reshape_pre_layer' Reshape_pre_layer.type = 'Reshape' Reshape_pre_layer.bottom.append(data) Reshape_pre_layer.top.append(name + 'pre_shuffle_data') Reshape_pre_layer.reshape_param.shape.dim.extend([num_group, -1]) Reshape_pre_layer.reshape_param.axis = 1 Reshape_pre_layer.reshape_param.num_axes = 1 layers.append(Reshape_pre_layer) Permute_layer = caffe_pb2.LayerParameter() Permute_layer.name = name + '_Permute_layer' Permute_layer.type = 'Permute' Permute_layer.bottom.append(name + 'pre_shuffle_data') Permute_layer.top.append(name + 'shuffle_data') Permute_layer.permute_param.order.extend([0, 2, 1, 3, 4]) layers.append(Permute_layer) Reshape_post_layer = caffe_pb2.LayerParameter() Reshape_post_layer.name = name + '_reshape_post_layer' Reshape_post_layer.type = 'Reshape' Reshape_post_layer.bottom.append(name + 'shuffle_data') Reshape_post_layer.top.append(name + 'post_shuffle_data') Reshape_post_layer.reshape_param.shape.dim.append(-1) Reshape_post_layer.reshape_param.axis = 1 Reshape_post_layer.reshape_param.num_axes = 2 layers.append(Reshape_post_layer) return layers
def batchnorm(pytorch_layer): layer_bn = pb2.LayerParameter() layer_bn.type = "BatchNorm" layer_bn.batch_norm_param.use_global_stats = 1 layer_bn.batch_norm_param.eps = pytorch_layer.eps layer_bn.blobs.extend([ as_blob(pytorch_layer.running_mean.numpy()), as_blob(pytorch_layer.running_var.numpy()), as_blob(np.array([1.])) ]) layer_scale = pb2.LayerParameter() layer_scale.type = "Scale" blobs_weight = pytorch_layer.state_dict()['weight'].cpu().numpy() try: bias = pytorch_layer.state_dict()['bias'].cpu().numpy() except: bias = [] if len(bias): layer_scale.scale_param.bias_term = True layer_scale.blobs.extend([as_blob(blobs_weight), as_blob(bias)]) else: layer_scale.scale_param.bias_term = False layer_scale.blobs.extend([as_blob(blobs_weight)]) return [layer_bn, layer_scale]
def batchnorm(pytorch_layer): layer_bn = pb2.LayerParameter() layer_bn.type = "BatchNorm" layer_bn.batch_norm_param.use_global_stats = 1 layer_bn.batch_norm_param.eps = pytorch_layer.eps layer_bn.blobs.extend([ as_blob(pytorch_layer.running_mean.cpu().numpy()), as_blob(pytorch_layer.running_var.cpu().numpy()), as_blob(np.array([1.])) ]) layer_scale = pb2.LayerParameter() layer_scale.type = "Scale" blobs_weight = pytorch_layer.next_functions[1][0].variable.data.cpu( ).numpy() if pytorch_layer.next_functions[2][0]: layer_scale.scale_param.bias_term = True bias = pytorch_layer.next_functions[2][0].variable.data.cpu().numpy() layer_scale.blobs.extend([as_blob(blobs_weight), as_blob(bias)]) else: layer_scale.scale_param.bias_term = False layer_scale.blobs.extend([as_blob(blobs_weight)]) return [layer_bn, layer_scale]
def _arith_with_const_tensor(input, const, order, opr, context): topB = const.np_data if input.ndim >= 2 and (topB.squeeze().shape == (input.shape[1], ) or topB.squeeze().shape == (1, )): topA = [context.get_blob_name(input)] topB = topB.squeeze() shape = topB.shape layer_param = cp.ScaleParameter() else: topA, topB, shape = _broadcast_for_eltwiseopr(opr.out_tensors[0].name, input, const, context) layer_param = cp.ScaleParameter(axis=len(shape) - topB.ndim, num_axes=topB.ndim) if isinstance(opr, (AddOpr, SubOpr)): layer_param.bias_term = True param_b = topB param_k = np.ones(shape=param_b.shape) if isinstance(opr, SubOpr): if order == 0: param_b = -param_b # pylint: disable=invalid-unary-operand-type else: param_k = -param_k # pylint: disable=invalid-unary-operand-type blobs = [ context.gen_blob_proto(param_k), context.gen_blob_proto(param_b) ] else: param_k = topB if isinstance(opr, TrueDivOpr): if order == 0: param_k = 1.0 / param_k else: bottom = topA name = opr.out_tensors[0].name + context.gen_name topA = [name] context.add_layer( cp.LayerParameter( name=name, type="Power", bottom=bottom, top=topA, power_param=cp.PowerParameter(scale=1, shift=0, power=-1), )) blobs = [context.gen_blob_proto(param_k)] bottom = topA top = [context.set_blob_name(opr.out_tensors[0], opr.out_tensors[0].name)] context.add_layer( cp.LayerParameter( name=opr.out_tensors[0].name, type="Scale", bottom=bottom, top=top, scale_param=layer_param, blobs=blobs, ))
def Permute(pytorch_layer): layer_permute = pb2.LayerParameter() layer_permute.type = 'Permute' #print(type(layer_permute.permute_param.order), dir(layer_permute.permute_param.order)) layer_permute.permute_param.order.extend([0, 2, 3, 1]) layer_flat = pb2.LayerParameter() layer_flat.type = 'Reshape' # print(dir(layer_flat.reshape_param), type(layer_flat.reshape_param.shape)) layer_flat.reshape_param.shape.dim.extend([0, -1, 1, 1]) return [layer_permute, layer_flat]
def _fake_repeat(opr, context): unsqueeze_shape = list(opr.inp_tensors[0].shape) unsqueeze_shape.insert(opr.axis + 1, 1) fake_unsqueeze_out = IRTensor( opr.inp_tensors[0].name + "_unsqueeze", unsqueeze_shape, opr.inp_tensors[0].dtype, q_type=opr.inp_tensors[0].q_dtype, scale=opr.inp_tensors[0].scale, zero_point=opr.inp_tensors[0].zero_point, ) context.update_quantize_dict(fake_unsqueeze_out) param = cp.ReshapeParameter(shape=cp.BlobShape(dim=unsqueeze_shape)) bottom = [context.get_blob_name(opr.inp_tensors[0])] top = [context.set_blob_name(fake_unsqueeze_out, fake_unsqueeze_out.name)] context.add_layer( cp.LayerParameter( name=fake_unsqueeze_out.name, type="Reshape", bottom=bottom, top=top, reshape_param=param, )) param = cp.TileParameter(axis=opr.axis + 1, tiles=opr.repeats) unsqueeze_shape[opr.axis + 1] = unsqueeze_shape[opr.axis + 1] * opr.repeats fake_tile = IRTensor( opr.inp_tensors[0].name + "_unsqueeze_tile", unsqueeze_shape, opr.inp_tensors[0].dtype, q_type=opr.inp_tensors[0].q_dtype, scale=opr.inp_tensors[0].scale, zero_point=opr.inp_tensors[0].zero_point, ) context.update_quantize_dict(fake_tile) bottom = top top = [context.set_blob_name(fake_tile, fake_tile.name)] context.add_layer( cp.LayerParameter(name=fake_tile.name, type="Tile", bottom=bottom, top=top, tile_param=param)) param = cp.ReshapeParameter(shape=cp.BlobShape( dim=opr.out_tensors[0].shape)) bottom = top top = [context.set_blob_name(opr.out_tensors[0], opr.out_tensors[0].name)] context.add_layer( cp.LayerParameter( name=opr.out_tensors[0].name, type="Reshape", bottom=bottom, top=top, reshape_param=param, ))
def sample_channel(pytorch_layer): layer_slice = pb2.LayerParameter() layer_slice.type = 'Slice' #print(type(layer_permute.permute_param.order), dir(layer_permute.permute_param.order)) layer_slice.slice_param.axis = 1 layer_slice.slice_param.slice_point.extend([ 1, ]) layer_concat = pb2.LayerParameter() layer_concat.type = "Concat" layer_concat.concat_param.axis = 1 #int(pytorch_layer.dim) return [layer_slice, layer_concat]
def f(layer): if layer.type == "Flatten": new_layer = pb2.LayerParameter() new_layer.CopyFrom(layer) new_layer.type = "Reshape" new_layer.reshape_param.shape.dim.extend([0, 0, 0, 0]) return new_layer if layer.type == "InnerProduct": new_layer = pb2.LayerParameter() new_layer.CopyFrom(layer) new_layer.inner_product_param.axis = 1 return new_layer return layer
def rename_BatchNormalization(self, source_node): attr = source_node.attrs layer_bn = pb2.LayerParameter() layer_bn.type = "BatchNorm" layer_bn.batch_norm_param.use_global_stats = 1 layer_bn.batch_norm_param.eps = attr['epsilon'] mean_name = '{0}.running_mean'.format(source_node.weights_name) var_name = '{0}.running_var'.format(source_node.weights_name) mean = self.state_dict[mean_name].numpy() variance = self.state_dict[var_name].numpy() layer_bn.blobs.extend( [as_blob(mean), as_blob(variance), as_blob(np.array([1.]))]) for b in source_node.in_edges: layer_bn.bottom.append(b) layer_bn.top.append(source_node.name) layer_bn.name = source_node.real_name + '_bn' layer_scale = pb2.LayerParameter() layer_scale.type = "Scale" bias_name = '{0}.bias'.format(source_node.weights_name) weights_name = '{0}.weight'.format(source_node.weights_name) weight = self.state_dict[weights_name].numpy() if bias_name in self.state_dict: bias = self.state_dict[bias_name].numpy() layer_scale.scale_param.bias_term = True layer_scale.blobs.extend([as_blob(weight), as_blob(bias)]) else: layer_scale.scale_param.bias_term = False layer_scale.blobs.extend([as_blob(weight)]) layer_scale.bottom.append(source_node.real_name) layer_scale.top.append(source_node.name) layer_scale.name = source_node.real_name + "_scale" return [layer_bn, layer_scale]
def _simple_relu_layer(name, bottom, top=None): relu_layer = caffe_pb2.LayerParameter() relu_layer.name = name relu_layer.type = 'ReLU' relu_layer.bottom.append(bottom) relu_layer.top.append(top if top is not None else bottom) return relu_layer
def Add(name, bottoms): layer = caffe_pb2.LayerParameter() layer.name = name layer.type = 'Eltwise' layer.bottom.extend(bottoms) layer.top.extend([name]) return layer
def Bilinear_upsample_3d(name, bottom, num_output, factor, temporal_factor, lr_mult=1, weight_filler='bilinear'): layer = caffe_pb2.LayerParameter() layer.name = name layer.type = 'Deconvolution3D' layer.bottom.extend([bottom]) layer.top.extend([name]) kernel_size = int(2 * factor - factor % 2) stride = factor pad = int(math.ceil((factor - 1) / 2.)) kernel_depth = int(2 * temporal_factor - temporal_factor % 2) temporal_stride = temporal_factor temporal_pad = int(math.ceil((temporal_factor - 1) / 2.)) layer.convolution3d_param.num_output = num_output # layer.convolution3d_param.group = num_output layer.convolution3d_param.kernel_size = kernel_size layer.convolution3d_param.kernel_depth = kernel_depth layer.convolution3d_param.stride = stride layer.convolution3d_param.temporal_stride = temporal_stride layer.convolution3d_param.pad = pad layer.convolution3d_param.temporal_pad = temporal_pad # layer.convolution3d_param.dilation = dilation layer.convolution3d_param.weight_filler.type = weight_filler layer.convolution3d_param.bias_term = False layer.param.extend(cbm._get_param(1, lr_mult=lr_mult)) return layer
def Bilinear_upsample(name, bottom, num_output, factor, lr_mult=1, weight_filler='bilinear', dilation=1): layer = caffe_pb2.LayerParameter() layer.name = name layer.type = 'Deconvolution' layer.bottom.extend([bottom]) layer.top.extend([name]) kernel_size = int(2 * factor - factor % 2) stride = factor pad = int(math.ceil((factor - 1) / 2.)) kernel_size = int(2 * temporal_factor - temporal_factor % 2) temporal_stride = temporal_factor temporal_pad = int(math.ceil((temporal_factor - 1) / 2.)) layer.convolution_param.num_output = num_output # layer.convolution_param.group = num_output layer.convolution_param.kernel_size.extend([kernel_size]) layer.convolution_param.kernel_depth.extend([kernel_depth]) layer.convolution_param.stride.extend([stride]) layer.convolution_param.temporal_stride.extend([temporal_stride]) layer.convolution_param.pad.extend([pad]) layer.convolution_param.temporal_pad.extend([temporal_pad]) layer.convolution_param.dilation.extend([dilation]) layer.convolution_param.weight_filler.type = weight_filler layer.convolution_param.bias_term = False layer.param.extend(_get_param(1, lr_mult=lr_mult)) return layer
def generate_bn_scale(layer, layers, nv_bn_names): bn_param = layer.batch_norm_param if bn_param.HasField('use_global_stats'): bn_param.ClearField('use_global_stats') if bn_param.HasField('scale_filler'): bn_param.ClearField('scale_filler') if bn_param.HasField('bias_filler'): bn_param.ClearField('bias_filler') if bn_param.HasField('scale_bias'): if bn_param.scale_bias: bn_param.ClearField('scale_bias') layers.append(layer) scale_layer = caffe_pb2.LayerParameter() scale_layer.name = layer.name + '_scale' scale_layer.type = 'Scale' scale_layer.bottom.append(layer.top[0]) scale_layer.top.append(layer.top[0]) scale_layer.scale_param.filler.value = 1 scale_layer.scale_param.bias_term = True scale_layer.scale_param.bias_filler.value = 0 layers.append(scale_layer) nv_bn_names.append(layer.name) else: bn_param.ClearField('scale_bias') layers.append(layer) else: layers.append(layer)
def Conv3d(name, bottom, num_output, kernel_size, kernel_depth, stride, temporal_stride, pad, temporal_pad, lr_mult=1, weight_filler='msra', have_bias=False): layer = caffe_pb2.LayerParameter() layer.name = name layer.type = 'Convolution3D' layer.bottom.extend([bottom]) layer.top.extend([name]) layer.convolution3d_param.num_output = num_output layer.convolution3d_param.kernel_size = kernel_size layer.convolution3d_param.kernel_depth = kernel_depth layer.convolution3d_param.stride = stride layer.convolution3d_param.temporal_stride = temporal_stride layer.convolution3d_param.pad = pad layer.convolution3d_param.temporal_pad = temporal_pad layer.convolution3d_param.weight_filler.type = weight_filler layer.convolution3d_param.bias_term = have_bias layer.param.extend(cbm._get_param(1, lr_mult)) return layer
def _fully_connected(opr, context): assert opr.inp_tensors[1].np_data is not None param_W = opr.inp_tensors[1].np_data assert not opr.transpose_a if not opr.transpose_b: param_W = param_W.T blobs = [context.gen_blob_proto(param_W)] bias_term = False if isinstance(opr, LinearOpr) and opr.has_bias: bias_term = True blobs.append( context.gen_blob_proto(opr.inp_tensors[2].np_data.reshape(-1, ))) param = cp.InnerProductParameter(bias_term=bias_term, num_output=opr.out_tensors[0].shape[1]) bottom = [context.get_blob_name(opr.inp_tensors[0])] top = [context.set_blob_name(opr.out_tensors[0], opr.out_tensors[0].name)] context.add_layer( cp.LayerParameter( name=opr.out_tensors[0].name, type="InnerProduct", bottom=bottom, top=top, inner_product_param=param, blobs=blobs, ))
def rename_Permute(self, source_node): attr = source_node.attrs kwargs = dict() layer = pb2.LayerParameter() layer.type = "Permute" if len(attr['perm']) == 4: layer.permute_param.order.extend([attr['perm'][0]]) layer.permute_param.order.extend([attr['perm'][1]]) layer.permute_param.order.extend([attr['perm'][2]]) layer.permute_param.order.extend([attr['perm'][3]]) weights_name = '{0}.weight'.format(source_node.weights_name) weight = self.state_dict[weights_name] weight = weight.numpy() layer.blobs.extend([as_blob(weight[0])]) for b in source_node.in_edges: layer.bottom.append(b) layer.top.append(source_node.name) layer.name = source_node.real_name return layer
def batchnorm_scale(torch_layer): layer = pb2.LayerParameter() layer.type = "Scale" layer.scale_param.bias_term = True layer.blobs.extend([as_blob(torch_layer["weight"]), as_blob(torch_layer["bias"])]) return layer
def leaky(torch_layer): log.info('print LeakyReLU') log.info(torch_layer) layer = pb2.LayerParameter() layer.type = "ReLU" layer.relu_param.negative_slope = float(torch_layer["negval"]) return layer
def _to_proto(self, layers, names, autonames): if self in layers: return bottom_names = [] for inp in self.inputs: inp._to_proto(layers, names, autonames) bottom_names.append(layers[inp.fn].top[inp.n]) layer = caffe_pb2.LayerParameter() layer.type = self.type_name layer.bottom.extend(bottom_names) if self.in_place: layer.top.extend(layer.bottom) else: for top in self.tops: layer.top.append(self._get_top_name(top, names, autonames)) layer.name = self._get_name(names, autonames) for k, v in six.iteritems(self.params): # special case to handle generic *params if k.endswith('param'): assign_proto(layer, k, v) else: try: assign_proto( getattr(layer, _param_names[self.type_name] + '_param'), k, v) except (AttributeError, KeyError): assign_proto(layer, k, v) layers[self] = layer
def power(torch_layer): layer = pb2.LayerParameter() layer.type = "Power" layer.power_param.power = 1 layer.power_param.scale = 1 - torch_layer["p"] layer.power_param.shift = 0 return layer
def spatial_convolution(torch_layer): layer = pb2.LayerParameter() layer.type = "Convolution" weight = torch_layer["weight"] assert len(weight.shape) == 4, weight.shape (nOutputPlane, nInputPlane, kH_, kW_) = weight.shape (kW, kH, dW, dH, padW, padH, dilation) = [ int(torch_layer.get(f, 0)) for f in ["kW", "kH", "dW", "dH", "padW", "padH", "dilationW"] ] assert kH_ == kH assert kW_ == kW layer.convolution_param.num_output = nOutputPlane layer.convolution_param.kernel_w = kW layer.convolution_param.stride_w = dW layer.convolution_param.pad_w = padW layer.convolution_param.kernel_h = kH layer.convolution_param.stride_h = dH layer.convolution_param.pad_h = padH layer.convolution_param.dilation.append(dilation if dilation else 1) if "bias" in torch_layer: bias = torch_layer["bias"] layer.blobs.extend([as_blob(weight), as_blob(bias)]) else: layer.convolution_param.bias_term = False layer.blobs.extend([as_blob(weight)]) return layer
def rename_PRelu(self, source_node): attr = source_node.attrs kwargs = dict() layer = pb2.LayerParameter() layer.type = "PReLU" bias_name = '{0}.bias'.format(source_node.weights_name) weights_name = '{0}.weight'.format(source_node.weights_name) weight = self.state_dict[weights_name] weight = weight.numpy() dim = weight.ndim layer.prelu_param.channel_shared = True if dim == 1 else False layer.blobs.extend([as_blob(weight[0])]) for b in source_node.in_edges: layer.bottom.append(b) layer.top.append(source_node.name) layer.name = source_node.real_name return layer
def _to_proto(self): bottom_names = [] for inp in self.inputs: # inp._to_proto(layers, names, autonames) bottom_names.append(inp) layer = caffe_pb2.LayerParameter() layer.type = self.type_name layer.bottom.extend(bottom_names) if self.in_place: layer.top.extend(layer.bottom) else: for top in self.outputs: layer.top.append(top) layer.name = self.layer_name # print(self.type_name + "...") for k, v in six.iteritems(self.params): # special case to handle generic *params # print("generating "+k+"...") if k.endswith('param'): assign_proto(layer, k, v) else: try: assign_proto( getattr(layer, _param_names[self.type_name] + '_param'), k, v) except (AttributeError, KeyError): assign_proto(layer, k, v) return layer
def pooling(torch_layer): layer = pb2.LayerParameter() layer.type = "Pooling" pool = { "MAX": pb2.PoolingParameter.MAX, "AVE": pb2.PoolingParameter.AVE }[torch_layer["operation"]] layer.pooling_param.pool = pool (kW, kH, dW, dH, padW, padH) = [ int(torch_layer.get(f, 0)) for f in ["kW", "kH", "dW", "dH", "padW", "padH"] ] layer.pooling_param.pad_w = padW layer.pooling_param.pad_h = padH layer.pooling_param.kernel_h = kH layer.pooling_param.kernel_w = kW layer.pooling_param.stride_h = dH layer.pooling_param.stride_w = dW # Default to torch_pooling, but override with the ceil_mode if "ceil_mode" not in torch_layer: return layer if not torch_layer["ceil_mode"]: # layer.pooling_param.torch_pooling = True if dH > 1 and padH > 0: layer.pooling_param.pad_h = padH - 1 if dW > 1 and padW > 0: layer.pooling_param.pad_w = padW - 1 return layer
def rename_Constant(self, source_node): kwargs = dict() layer = pb2.LayerParameter() layer.type = "Normalize" layer.norm_param.across_spatial = False layer.norm_param.scale_filler.type = "constant" layer.norm_param.scale_filler.value = 20 layer.norm_param.channel_shared = False weights_name = '{0}.weight'.format(source_node.weights_name) weight = self.state_dict[weights_name] weight = weight.numpy() layer.blobs.extend([as_blob(weight)]) for b in source_node.in_edges: layer.bottom.append(b) layer.top.append(source_node.name) layer.name = source_node.real_name return layer
def slice(torch_layer): layer = pb2.LayerParameter() layer.type = "Slice" layer.slice_param.axis = int(torch_layer["axis"]) layer.slice_param.slice_point.extend( range(1, int(torch_layer["num_slices"]))) return layer
def rename_Upsample(self, source_node): attr = source_node.attrs layer = pb2.LayerParameter() layer.type = "Deconvolution" assert attr['height_scale'] == attr['width_scale'] factor = int(attr['height_scale']) c = int(attr['channel']) k = 2 * factor - factor % 2 layer.convolution_param.num_output = c layer.convolution_param.kernel_size.extend([k]) layer.convolution_param.stride.extend([factor]) layer.convolution_param.pad.extend([int(math.ceil((factor - 1) / 2.))]) layer.convolution_param.group = c layer.convolution_param.weight_filler.type = 'bilinear' layer.convolution_param.bias_term = False learning_param = pb2.ParamSpec() learning_param.lr_mult = 0 learning_param.decay_mult = 0 layer.param.extend([learning_param]) """ Init weight blob of filter kernel """ blobs_weight = FillBilinear(c, k) layer.blobs.extend([as_blob(blobs_weight)]) for b in source_node.in_edges: layer.bottom.append(b) layer.top.append(source_node.name) layer.name = source_node.real_name return layer