def compute_size(self): scratch_size = 1 for op_def in self.net_def.op: mace_check(op_def.type in self._scratch_map, "The %s's scratch func is lost." % op_def.type) size = self._scratch_map[op_def.type](op_def) if scratch_size < size: scratch_size = size print("micro scatch buffer size is: %s" % scratch_size) return scratch_size
def add_size_tensor_from_arg(self, op, keyword): size_value_arg = ConverterUtil.get_arg(op, keyword) mace_check( len(size_value_arg.ints) == 2, op.name + ': ' + keyword + ' value does not have size 2') size_value_tensor = self._model.tensors.add() size_value_tensor.name = op.name + '/' + keyword + ':0' size_value_tensor.data_type = mace_pb2.DT_INT32 size_value_tensor.dims.extend([2]) size_value_tensor.int32_data.extend(size_value_arg.ints) op.input.extend([size_value_tensor.name])
def add_output_shape(self, op, shapes): mace_check( len(op.output) == len(shapes), 'Op {} ({}) output count is different from output shape count'. format( # noqa op.name, op.type)) for i in range(len(shapes)): output_name = op.output[i] output_shape = op.output_shape.add() output_shape.dims.extend(shapes[i]) self._output_shape_cache[output_name] = shapes[i]
def convert_elementwise(self, op): element_type = ConverterUtil.get_arg( op, MaceKeyword.mace_element_type_str).i if element_type == EltwiseType.DIV.value and \ op.input[0] in self._consts: tensor = self._consts[op.input[0]] if len(tensor.int32_data) == 1: f = tensor.scale * (tensor.int32_data[0] - tensor.zero_point) if abs(f - 1) < 1e-6: # recip op_input = op.input[1] del op.input[:] op.input.append(op_input) self.add_min_max_const_node(op, op.input[0]) op.type = HexagonOp.QuantizedRecip_8.name return if element_type == EltwiseType.POW.value and \ ConverterUtil.get_arg( op, MaceKeyword.mace_scalar_input_str).f == 0.5: self.add_min_max_const_node(op, op.input[0]) op.type = HexagonOp.QuantizedSqrt_8.name return if element_type == EltwiseType.CLIP.value: self.add_min_max_const_node(op, op.input[0]) coeff = ConverterUtil.get_arg(op, MaceKeyword.mace_coeff_str).floats min_value, max_value = coeff[0], coeff[1] self.add_arg_const_node(op, "/min:0", [1], [min_value], data_type=mace_pb2.DT_FLOAT) self.add_arg_const_node(op, "/max:0", [1], [max_value], data_type=mace_pb2.DT_FLOAT) op.type = HexagonOp.QuantizedClamp_8.name return if len(op.input) == 1: scalar_input = ConverterUtil.get_arg( op, MaceKeyword.mace_scalar_input_str).f self.add_quantized_scalar_const_node("/b:0", scalar_input, op) self.add_min_max_const_node(op, op.input[0]) self.add_min_max_const_node(op, op.input[1]) if element_type in [ EltwiseType.SUM.value, EltwiseType.SUB.value, EltwiseType.MIN.value, EltwiseType.MAX.value, EltwiseType.DIV.value ]: self.add_min_max_const_node(op, op.output[0], True, True, False) try: op.type = self.eltwise_type[element_type] except KeyError: mace_check( False, "Hexagon does not support elementwise %s" % EltwiseType(element_type).name)
def add_padding_tensor_from_arg(self, op): padding_value_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_padding_values_str) mace_check(len(padding_value_arg.ints) == 4, op.name + ': padding value does not have size 4') padding_value_tensor = self._model.tensors.add() padding_value_tensor.name = op.name + '/padding:0' padding_value_tensor.data_type = mace_pb2.DT_INT32 padding_value_tensor.dims.extend([4]) padding_value_tensor.int32_data.extend(padding_value_arg.ints) op.input.extend([padding_value_tensor.name])
def get_data_bytes(self, data_type): if data_type == mace_pb2.DT_FLOAT or \ data_type == mace_pb2.DT_INT32: return 4 elif data_type == mace_pb2.DT_HALF or \ data_type == mace_pb2.DT_FLOAT16: return 2 elif data_type == mace_pb2.DT_UINT8: return 1 else: mace_check(False, "Invalid data type: %s" % data_type)
def convert_interp(self, caffe_op): op = self.convert_general_op(caffe_op) param = caffe_op.layer.interp_param mace_check( param.HasField("height") and param.HasField("width"), 'Only support bilinear interp with height and width') op.type = MaceOp.ResizeBilinear.name size_arg = op.arg.add() size_arg.name = MaceKeyword.mace_resize_size_str size_value = np.array([param.height, param.width], dtype=np.int32) size_arg.ints.extend(size_value)
def is_consumer_group_conv(mge_symvar, var2oprs, map_oprs): consumer_ids = var2oprs[mge_symvar.id] n_consumers = len(consumer_ids) for consumer_id in consumer_ids: consumer_op = map_oprs[consumer_id[0]] if (mgb.cgtools.get_opr_type(consumer_op) in ("ConvolutionForward", "ConvolutionBackwardData") and consumer_op.params["sparse"] == "GROUP"): mace_check(n_consumers == 1, "This tensor should only feed depthwise conv/deconv") return True return False
def scratch_xtensa_depthwise_conv_2d(mace_op, mace_net): output_channels = mace_op.output_shape[0].dims[3] bias_bytes = output_channels * 4 input_dims = NetUtil.get_input_dims(mace_op, mace_net, 0) input_height = input_dims[1] input_width = input_dims[2] input_channels = input_dims[3] output_dims = mace_op.output_shape[0].dims output_height = output_dims[1] output_width = output_dims[2] filter_dims = NetUtil.get_input_dims(mace_op, mace_net, 1) kernel_height = filter_dims[1] kernel_width = filter_dims[2] channels_multiplier = filter_dims[0] strides = NetUtil.get_arg(mace_op, "strides").ints x_stride = strides[0] y_stride = strides[1] padding = NetUtil.calc_padding(mace_op, mace_net) x_padding = padding[0] y_padding = padding[1] # xa_nn_conv2d_depthwise_getsize data_type = NetUtil.get_arg(mace_op, "T").i # data_format = NetUtil.get_arg(mace_op, "data_format").i if data_type == mace_pb2.DT_FLOAT: scratch_bytewidth = 4 # f32 scratch circ_buf_bytewidth = 4 # bytewidth bytewidth = circ_buf_bytewidth else: mace_check(False, "Unsupported") state_size = aligned_size(24, ALIGNMENT) circ_buf_height = kernel_height + ((output_height - 1) * y_stride) circ_buf_height = max(circ_buf_height, y_padding + input_height) if bytewidth == 4: circ_buf_channels = aligned_size(input_channels*channels_multiplier, 2) else: circ_buf_channels = aligned_size(input_channels*channels_multiplier, 4) size_in_bytes = bytewidth*circ_buf_height*circ_buf_channels*kernel_width circ_buf_size = size_in_bytes xtensa_total_size = state_size + circ_buf_size return xtensa_total_size * 4 + bias_bytes
def infer_shape_resize_bilinear(self, op): input_shape = self._output_shape_cache[op.input[0]] size = ConverterUtil.get_arg(op, MaceKeyword.mace_resize_size_str).ints if ConverterUtil.data_format(op) == DataFormat.NCHW: output_shape = [input_shape[0], input_shape[1], size[0], size[1]] elif ConverterUtil.data_format(op) == DataFormat.NHWC: output_shape = [input_shape[0], size[0], size[1], input_shape[3]] else: output_shape = [] mace_check( False, "format %s is not supported" % ConverterUtil.data_format(op)) self.add_output_shape(op, [output_shape])
def build_engine(model_name, data_type): mace_check(flags.model_name is not None and len(model_name) > 0, "you should specify model name for build.") command = ("micro/tools/cmake/cmake-build-host.sh" " -DMICRO_MODEL_NAME=%s -DMACE_MICRO_ENABLE_CMSIS=ON" " -DCMAKE_BUILD_TYPE=Release" % model_name) if data_type == mace_pb2.DT_BFLOAT16: command += " -DMACE_MICRO_ENABLE_BFLOAT16=ON" print("The current engine's data type is bfloat16.") else: command += " -DMACE_MICRO_ENABLE_BFLOAT16=OFF" device.execute(command)
def convert_ops(self): layer_names = set() for layer in self._caffe_layers.layer: caffe_op = self._caffe_net.get_op(layer.name) if caffe_op not in self._skip_ops: mace_check(layer.name not in layer_names, "There is duplicate layer name '%s' in your model" % layer.name) mace_check(layer.type in self._op_converters, "Mace does not support caffe op type %s yet" % layer.type) layer_names.add(layer.name) self._op_converters[layer.type](caffe_op)
def get_input_dims(mace_op, mace_net, idx): input_name = mace_op.input[idx] for const_tensor in mace_net.tensors: if input_name == const_tensor.name: return const_tensor.dims for pre_op in mace_net.op: for i in range(len(pre_op.output)): if input_name == pre_op.output[i]: return pre_op.output_shape[i].dims for input_info in mace_net.input_info: if input_name == input_info.name: return input_info.dims mace_check(False, "unreachable")
def add_paddings_tensor_from_arg(self, op): padding_value_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_paddings_str) padding_value_tensor = self._model.tensors.add() padding_value_tensor.name = op.name + '/padding:0' padding_value_tensor.data_type = mace_pb2.DT_INT32 mace_check( len(padding_value_arg.ints) % 2 == 0, op.name + ': the rank of paddings should be even') padding_value_tensor.dims.extend( [int(len(padding_value_arg.ints) / 2), 2]) padding_value_tensor.int32_data.extend(padding_value_arg.ints) op.input.extend([padding_value_tensor.name])
def convert_Bias(self, caffe_op): op = self.convert_general_op(caffe_op) op.type = MaceOp.BiasAdd.name param = caffe_op.layer.bias_param mace_check(not param.axis or param.axis == 0 or param.axis == 1, "BiasAdd only support axis with 0 or 1.") axis_arg = op.arg.add() axis_arg.name = MaceKeyword.mace_axis_str axis_arg.i = 1 if param.axis is not None: mace_check(param.axis == 0 or param.axis == 1, "BiasAdd only support axis with 0 or 1.") axis_arg.i = param.axis
def parse_data_type(str): if str == "float32": return mace_pb2.DT_FLOAT if str == "float16": return mace_pb2.DT_FLOAT16 elif str == "int32": return mace_pb2.DT_INT32 elif str == "int16": return mace_pb2.DT_INT16 elif str == "uint8": return mace_pb2.DT_UINT8 else: mace_check(False, "data type %s not supported" % str)
def convert_ops(self): print("Convert mace graph to hexagon.") for op in self._model.op: mace_check( op.type in self._op_converters, "Mace Hexagon does not support op type %s yet" % op.type) self.pre_convert(op) post_convert_omitted = self._op_converters[op.type](op) if post_convert_omitted is None or not post_convert_omitted: self.post_convert(op) del self._model.op[:] self._model.op.extend(self._new_ops)
def convert_activation(self, op): self.add_min_max_const_node(op, op.input[0]) act_type = ConverterUtil.get_arg( op, MaceKeyword.mace_activation_type_str).s.decode() if act_type == ActivationType.RELUX.name: x = ConverterUtil.get_arg( op, MaceKeyword.mace_activation_max_limit_str).f self.add_scalar_const_node("/x:0", x, op) try: op.type = self.activation_type[act_type] except KeyError: mace_check(False, "Hexagon does not support activation %s" % act_type)
def conv_output_length(input_length, filter_size, padding, stride, dilation=1): if input_length is None: return None mace_check(padding in {'same', 'valid', 'full', 'causal'}, "Not supported padding type: %s" % padding) dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) if padding in ['same', 'causal']: output_length = input_length elif padding == 'valid': output_length = input_length - dilated_filter_size + 1 elif padding == 'full': output_length = input_length + dilated_filter_size - 1 return (output_length + stride - 1) # stride
def validate(platform, model_file, weight_file, input_file, mace_out_file, input_shape, output_shape, input_data_format, output_data_format, input_node, output_node, validation_threshold, input_data_type, backend, validation_outputs_data, log_file): if not isinstance(validation_outputs_data, list): if os.path.isfile(validation_outputs_data): validation_outputs = [validation_outputs_data] else: validation_outputs = [] else: validation_outputs = validation_outputs_data if validation_outputs: validate_with_file(platform, output_node, output_shape, mace_out_file, validation_outputs, validation_threshold, log_file, output_data_format) elif platform == Platform.TENSORFLOW: validate_tf_model(platform, model_file, input_file, mace_out_file, input_node, input_shape, input_data_format, output_node, output_shape, output_data_format, validation_threshold, input_data_type, log_file) elif platform == Platform.PYTORCH: validate_pytorch_model(platform, model_file, input_file, mace_out_file, input_node, input_shape, input_data_format, output_node, output_shape, output_data_format, validation_threshold, input_data_type, log_file) elif platform == Platform.CAFFE: validate_caffe_model(platform, model_file, input_file, mace_out_file, weight_file, input_node, input_shape, input_data_format, output_node, output_shape, output_data_format, validation_threshold, log_file) elif platform == Platform.ONNX: validate_onnx_model(platform, model_file, input_file, mace_out_file, input_node, input_shape, input_data_format, output_node, output_shape, output_data_format, validation_threshold, input_data_type, backend, log_file) elif platform == Platform.MEGENGINE: validate_megengine_model(platform, model_file, input_file, mace_out_file, input_node, input_shape, input_data_format, output_node, output_shape, output_data_format, validation_threshold, input_data_type, log_file) elif platform == Platform.KERAS: validate_keras_model(platform, model_file, input_file, mace_out_file, input_node, input_shape, input_data_format, output_node, output_shape, output_data_format, validation_threshold, input_data_type, log_file) else: mace_check(False, "Unsupported platform")
def convert_reduce(self, op): self.add_min_max_const_node(op, op.input[0]) reduce_type_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_reduce_type_str) mace_check(reduce_type_arg.i == ReduceType.MEAN.value, "Hexagon Reduce only supports Mean now.") keep_dims_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_keepdims_str) mace_check(keep_dims_arg.i == 1, "Hexagon Reduce Mean only supports keep dims now.") axis_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_axis_str) mace_check(1 <= len(axis_arg.ints) <= 2, "Hexagon Reduce Mean only supports spatial now.") for i in axis_arg.ints: mace_check(1 <= i <= 2, "Hexagon Reduce Mean only supports spatial now") input_shape = get_input_shape(op.input[0], self._model) if len(axis_arg.ints) == 1: dim1, dim2 = (input_shape[1], 1) \ if axis_arg.ints[0] == 1 else (1, input_shape[2]) else: dim1, dim2 = input_shape[1], input_shape[2] self.add_arg_const_node(op, '/window:0', [1, dim1, dim2, 1]) self.add_arg_const_node(op, '/strides:0', [1, dim1, dim2, 1]) op.type = HexagonOp.QuantizedAvgPool_8.name
def convert_splitv(self, tf_op): (op, is_split) = self.convert_split(tf_op, 2) if not is_split: return size_splits_arg = op.arg.add() size_splits_arg.name = MaceKeyword.mace_size_splits_str size_splits = tf_op.inputs[1].eval().astype(np.int32) del op.input[1] self._skip_tensor.add(tf_op.inputs[1].name) # todo(luxuhui): support size_splits for size in size_splits: mace_check(size == size_splits[0], "SplitV Only support even distribution") size_splits_arg.ints.extend(size_splits)
def convert_slice(self, caffe_op): op = self.convert_general_op(caffe_op) op.type = MaceOp.Split.name if caffe_op.layer.HasField('slice_param'): param = caffe_op.layer.slice_param mace_check(not param.HasField('axis') or param.axis == 1 or param.axis == -3, "Mace do not support slice with axis %d" % param.axis) mace_check(len(param.slice_point) == 0, "Mace do not support slice with slice_point") axis_arg = op.arg.add() axis_arg.name = MaceKeyword.mace_axis_str axis_arg.i = 1
def convert_cast(self, tf_op): op = self.convert_general_op(tf_op) op.type = MaceOp.Cast.name try: dtype = tf_op.get_attr('DstT') if dtype == tf.int32: op.output_type.extend([mace_pb2.DT_INT32]) elif dtype == tf.float32: op.output_type.extend([mace_pb2.DT_FLOAT]) else: mace_check(False, "data type %s not supported" % dtype) except ValueError: op.output_type.extend([mace_pb2.DT_FLOAT])
def scratch_xtensa_conv_2d(mace_op, mace_net): output_channels = mace_op.output_shape[0].dims[3] bias_bytes = output_channels * 4 input_dims = NetUtil.get_input_dims(mace_op, mace_net, 0) input_height = input_dims[1] input_width = input_dims[2] input_channels = input_dims[3] output_dims = mace_op.output_shape[0].dims out_height = output_dims[1] out_width = output_dims[2] filter_dims = NetUtil.get_input_dims(mace_op, mace_net, 1) kernel_height = filter_dims[1] kernel_width = filter_dims[2] strides = NetUtil.get_arg(mace_op, "strides").ints x_stride = strides[0] y_stride = strides[1] padding = NetUtil.calc_padding(mace_op, mace_net) x_padding = padding[0] y_padding = padding[1] # xa_nn_conv2d_std_getsize mem_req = 0 input_size = 0 align_size = 0 mem_req += 12 + ALIGNMENT - 1 data_type = NetUtil.get_arg(mace_op, "T").i if data_type == mace_pb2.DT_FLOAT: input_size = 4 align_size = ALIGNMENT >> 2 else: mace_check(False, "Unsupported") y_b_pad = kernel_height + (out_height - 1) * \ y_stride - (y_padding + input_height) y_b_pad = max(0, y_b_pad) input_channels_pad = aligned_size(input_channels, align_size) cir_buf_size_bytes = (y_padding + input_height + y_b_pad) * \ kernel_width * input_channels_pad * input_size mem_req += cir_buf_size_bytes mem_req += BUS_WIDTH return int(mem_req * 4 + bias_bytes)
def convert_input_output_node(self): quantize_input_op = self._model.op[0] mace_check( quantize_input_op.type == HexagonOp.QuantizeINPUT_f_to_8.name, "Not started with Quantize op.") del quantize_input_op.input[:] dequantize_output_op = self._model.op[-1] mace_check( dequantize_output_op.type == HexagonOp.DequantizeOUTPUT_8tof.name, "Not ended with Dequantize op.") dequantize_input = [input for input in dequantize_output_op.input] del dequantize_output_op.input[:] del dequantize_output_op.output_shape[:] del dequantize_output_op.output_type[:] del dequantize_output_op.out_max_byte_size[:] index = 1 while index < len(self._model.op) - 1: op = self._model.op[index] if op.type == HexagonOp.QuantizeINPUT_f_to_8.name: quantize_input_op.output.extend(op.output) quantize_input_op.output_shape.extend(op.output_shape) quantize_input_op.output_type.extend(op.output_type) quantize_input_op.out_max_byte_size.extend( op.out_max_byte_size) del self._model.op[index] elif op.type == HexagonOp.DequantizeOUTPUT_8tof.name: dequantize_output_op.input.extend(op.input) del self._model.op[index] index += 1 # input order matters dequantize_output_op.input.extend(dequantize_input) if self._option.device == DeviceType.HTA.value: # replace QuantizeINPUT_f_to_8 with INPUT quantize_input_op.type = HexagonOp.INPUT.name del quantize_input_op.output_shape[1:] del quantize_input_op.output_type[1:] del quantize_input_op.out_max_byte_size[1:] # replace first op's input min max with constant self.add_constant_min_max_for_first_op(self._model.op[1]) # replace DequantizeOUTPUT_8tof with OUTPUT dequantize_output_op.type = HexagonOp.OUTPUT.name del dequantize_output_op.input[1:]
def __init__(self, option, src_model_file): torch._C.Node.__getitem__ = _node_getitem self._param_converts = ( NodeKind.Constant, NodeKind.List, NodeKind.Size, NodeKind.NumToTensor, NodeKind.Int, ) self._option = option self._converter_info = dict() self._mace_net_def = mace_pb2.NetDef() ConverterUtil.set_filter_format(self._mace_net_def, DataFormat.OIHW) ConverterUtil.add_data_format_arg(self._mace_net_def, DataFormat.NCHW) ConverterUtil.set_framework_type(self._mace_net_def, FrameworkType.PYTORCH.value) self._op_converters = { NodeKind.AdaptiveAvgPool2D: self.convert_pool, NodeKind.Add: self.convert_add, NodeKind.Add_: self.convert_add, NodeKind.Addmm: self.convert_addmm, NodeKind.AvgPool2D: self.convert_pool, NodeKind.BatchNorm: self.convert_batch_norm, NodeKind.Cat: self.convert_cat, NodeKind.Convolution: self.convert_conv2d, NodeKind.Dropout: self.convert_dropout, NodeKind.Flatten: self.convert_flatten, NodeKind.HardTanh_: self.convert_hardtanh, NodeKind.HardTanh: self.convert_hardtanh, NodeKind.Matmul: self.convert_matmul, NodeKind.MaxPool2D: self.convert_pool, NodeKind.Relu: self.convert_relu, NodeKind.Relu_: self.convert_relu, NodeKind.Reshape: self.convert_reshape, NodeKind.T: self.convert_t, } self._loaded_model = torch.jit.load(src_model_file) self._loaded_model.eval() self._graph, self._params_dict = self.model_to_graph() self._output_node_name = list(self._graph.outputs())[0].debugName() self._output_value_type = list(self._graph.outputs())[0].type() mace_check( isinstance(self._output_value_type, (ValueType.TensorType, ValueType.ListType, ValueType.TupleType)), 'return type {} not supported'.format(self._output_value_type)) self._node_map = {} self.init_output_shape_cache()
def add_resize_args(self, op): align_corners_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_align_corners_str) self.add_arg_const_node(op, '/align_corners:0', [1], [align_corners_arg.i]) coordinate_transformation_mode_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_coordinate_transformation_mode_str) if coordinate_transformation_mode_arg is not None: name = CoordinateTransformationMode( coordinate_transformation_mode_arg.i) value = coordinate_transformation_mode_arg.i mace_check(value == CoordinateTransformationMode.HALF_PIXEL.value, "Hexagon does not support resize %s" % name) self.add_arg_const_node(op, '/half_pixel_centers:0', [1], [1])
def gen_code_from_model(self, model_name, pb_model, model_weights): net_def = pb_model output_dir = self.gen_folder + 'models/' + model_name + '/' shutil.rmtree(output_dir, ignore_errors=True) util.mkdir_p(output_dir) # comput mem size and mem block offset and update the net_def, # should count before ProtoConverter mem_computer = MemComputer(net_def, self.np_data_type) tensor_mem_size = mem_computer.compute() # gen the c++ NetDef struct net_def_converter = ProtoConverter(self.offset16, self.write_magic, NetDefExcludeFields) net_def_bytes = net_def_converter.proto_to_bytes(net_def) mace_check(net_def_bytes is not None, "proto_to_bytes failed.") self.code_gen.gen_net_def_data(model_name, net_def_bytes, output_dir + 'micro_net_def_data.h') # gen operator array (op_src_path_list, op_class_name_list) = \ self.op_resolver.get_op_desc_list_from_model() self.code_gen.gen_ops_data(model_name, op_src_path_list, op_class_name_list, output_dir + 'micro_ops_list.h') # gen the c++ Graph struct graph = GraphBuilder(net_def, self.op_resolver).build() graph_converter = ProtoConverter(self.offset16, self.write_magic) graph_bytes = graph_converter.proto_to_bytes(graph) self.code_gen.gen_graph_data(model_name, graph_bytes, output_dir + 'micro_graph_data.h') scratch_buffer_size = ScratchComputer(net_def, self.model_conf).compute_size() # gen micro engine config engine_data = {} engine_data['tensor_mem_size'] = tensor_mem_size engine_data['input_size'] = len(net_def.input_info) engine_data['scratch_buffer_size'] = scratch_buffer_size self.code_gen.gen_engin_config(model_name, engine_data, output_dir + 'micro_engine_config.cc') # gen micro model tensor data tensor_bytes = bytearray(model_weights) self.code_gen.gen_model_data(model_name, tensor_bytes, output_dir + 'micro_model_data.h')
def infer_shape_general(self, op): if len(op.input) > 0: mace_check( op.input[0] in self._output_shape_cache, "Op {} input {} does not exist".format(op.name, op.input[0])) # initial values of _output_shape_cache come from: # 1: input_shape in .yml file(may be transposed to NCHW) # 2: tensor.dims. tensor is added by add_tensor_and_shape input_shape = self._output_shape_cache[op.input[0]] # pytorch BatchNorm 1/2/3D version use same function, the only way # to check dimension of BatchNorm is to checkout input dimension if op.type == MaceOp.BatchNorm.name: mace_check( len(input_shape) == 4, 'only 2D BatchNorm is supported,' ' but {}D input found'.format(len(input_shape))) self.add_output_shape(op, [input_shape])