def set_tensor_layout(self, tensor_name, data_layout): """Sets the data layout annotation of tensor with given name. See get_tensor_layout for examples.""" tensor_shape = self.get_tensor_shape(tensor_name) assert type(data_layout) == list, "data_layout must be a list" if tensor_shape is not None: assert len(tensor_shape) == len( data_layout ), """Mismatch between number of dimensions of tensor shape and data layout annotation.""" graph = self._model_proto.graph qnt_annotations = graph.quantization_annotation ret = util.get_by_name(qnt_annotations, tensor_name, "tensor_name") if ret is not None: ret_tl = util.get_by_name( ret.quant_parameter_tensor_names, "tensor_layout", "key" ) if ret_tl is not None: ret_tl.value = str(data_layout) else: tl = onnx.StringStringEntryProto() tl.key = "tensor_layout" tl.value = str(data_layout) ret.quant_parameter_tensor_names.append(tl) else: qa = onnx.TensorAnnotation() dt = onnx.StringStringEntryProto() dt.key = "tensor_layout" dt.value = str(data_layout) qa.tensor_name = tensor_name qa.quant_parameter_tensor_names.append(dt) qnt_annotations.append(qa)
def _move_quant_attributes_into_annotations(model): """Move quantization info in attributes into quantization_annotation""" if onnx is None: raise ModuleNotFoundError("Installation of ONNX is required.") model = copy.deepcopy(model) qaname = "finn_datatype" for n in model.graph.node: for a in n.attribute: mark_for_removal = False if a.name == "weight_qnt": # assume second input is weight, make sure it has an initializer w_tensor_name = n.input[1] assert w_tensor_name in [ x.name for x in model.graph.initializer ] tq = onnx.StringStringEntryProto(key=qaname, value=a.s) ta = onnx.TensorAnnotation(tensor_name=w_tensor_name, quant_parameter_tensor_names=[tq]) model.graph.quantization_annotation.append(ta) mark_for_removal = True elif a.name == "activation_qnt": a_tensor_name = n.output[0] tq = onnx.StringStringEntryProto(key=qaname, value=a.s) ta = onnx.TensorAnnotation(tensor_name=a_tensor_name, quant_parameter_tensor_names=[tq]) model.graph.quantization_annotation.append(ta) mark_for_removal = True if mark_for_removal: n.attribute.remove(a) return model
def set_tensor_datatype(self, tensor_name, datatype): """Sets the FINN DataType of tensor with given name.""" graph = self._model_proto.graph qnt_annotations = graph.quantization_annotation ret = util.get_by_name(qnt_annotations, tensor_name, "tensor_name") if ret is not None: ret_dt = util.get_by_name(ret.quant_parameter_tensor_names, "finn_datatype", "key") if ret_dt is not None: if datatype is None: ret_dt.Clear() else: ret_dt.value = datatype.name elif datatype is not None: dt = onnx.StringStringEntryProto() dt.key = "finn_datatype" dt.value = datatype.name ret.quant_parameter_tensor_names.append(dt) elif datatype is not None: qa = onnx.TensorAnnotation() dt = onnx.StringStringEntryProto() dt.key = "finn_datatype" dt.value = datatype.name qa.tensor_name = tensor_name qa.quant_parameter_tensor_names.append(dt) qnt_annotations.append(qa)
def main(): parser = argparse.ArgumentParser() parser.add_argument("onnx_model") parser.add_argument("dst_dir") parser.add_argument("--compression", type=int, default=0, help="compression algorithm number (0=no compression)") parser.add_argument("--backends", default=",".join(ALL_BACKENDS)) args = parser.parse_args() backends = args.backends.split(",") os.makedirs(args.dst_dir, exist_ok=True) for backend in backends: src_model = onnx.load_model(args.onnx_model) optimization_result = run_passes(src_model, backend) # print(optimization_result) optimization_result.write_code(ROOT_DIR) optimized_model = src_model tensor_pathes = export_initializers( os.path.join(args.dst_dir, f"weight-{backend}-{{}}.bin"), optimized_model, optimization_result.initializers, 4 * 1024 * 1024, args.compression) weight_paths = ":".join( [os.path.basename(tensor_path) for tensor_path in tensor_pathes]) optimized_model.metadata_props.append( onnx.StringStringEntryProto(key="WebDNN2.WeightPaths", value=weight_paths)) optimized_model.metadata_props.append( onnx.StringStringEntryProto( key="WebDNN2.TensorMoveOptions", value=json.dumps(optimization_result.tensor_move_options))) onnx.save_model(optimized_model, os.path.join(args.dst_dir, f"model-{backend}.onnx")) if backend == "wasm": subprocess.check_call(["yarn", "shader:wasm"], shell=SUBPROCESS_SHELL) if backend == "webgpu": subprocess.check_call(["yarn", "shader:webgpu"], shell=SUBPROCESS_SHELL) subprocess.check_call(["yarn", "makeShaderList"], shell=SUBPROCESS_SHELL) subprocess.check_call( ["yarn", f"build:{backend}", "-o", os.path.abspath(args.dst_dir)], shell=SUBPROCESS_SHELL) optimization_result.remove_code(ROOT_DIR) # reset shader list file (remove autogen entry) subprocess.check_call(["yarn", "makeShaderList"], shell=SUBPROCESS_SHELL)
def set_metadata_prop(self, key, value): """Sets metadata property with given key to the given value.""" metadata_prop = util.get_by_name(self.model.metadata_props, key, "key") if metadata_prop is None: metadata_prop = onnx.StringStringEntryProto() metadata_prop.key = key metadata_prop.value = value self.model.metadata_props.append(metadata_prop) else: metadata_prop.value = value
def set_tensor_sparsity(self, tensor_name, sparsity_dict): """Sets the sparsity annotation of a tensor with given name.""" graph = self._model_proto.graph qnt_annotations = graph.quantization_annotation ret = util.get_by_name(qnt_annotations, tensor_name, "tensor_name") if ret is not None: ret_ts = util.get_by_name(ret.quant_parameter_tensor_names, "tensor_sparsity", "key") if ret_ts is not None: ret_ts.value = str(sparsity_dict) else: ts = onnx.StringStringEntryProto() ts.key = "tensor_sparsity" ts.value = str(sparsity_dict) ret.quant_parameter_tensor_names.append(ts) else: qa = onnx.TensorAnnotation() dt = onnx.StringStringEntryProto() dt.key = "tensor_sparsity" dt.value = str(sparsity_dict) qa.tensor_name = tensor_name qa.quant_parameter_tensor_names.append(dt) qnt_annotations.append(qa)
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_model') parser.add_argument('output_model') parser.add_argument('--auto_docstring', action='store_true') args = parser.parse_args() json_path = Path('metadata.json') if not json_path.exists(): raise RuntimeError('metadata.json is not found.') input_path = Path(args.input_model) if not input_path.exists(): raise RuntimeError('Input model is not found.({})'.format(input_path)) model = onnx.load_model(str(input_path)) with json_path.open('r', encoding='utf-8') as f: tmp = json.load(f) metadata = tmp['metadata'] if args.auto_docstring: docstring = ', '.join([ '{}:{}'.format(key, value) for key, value in metadata.items() ]) else: docstring = tmp['docstring'] for key, value in metadata.items(): s = onnx.StringStringEntryProto() s.key = key s.value = value model.metadata_props.append(s) model.doc_string = docstring output_path = Path(args.output_model) if not output_path.parent.exists(): output_path.parent.mkdir(parents=True) onnx.save_model(model, str(output_path))