def export_tracing(torch_model, inputs): assert TORCH_VERSION >= (1, 8) # RetinaNet is supported but needs a slightly different wrapper. # TODO wrapper should be automatically generated assert isinstance(torch_model, GeneralizedRCNN) image = inputs[0]["image"] class WrapModel(nn.Module): def __init__(self): super().__init__() self.torch_model = torch_model def forward(self, image): inputs = [{"image": image}] outputs = self.torch_model.inference(inputs, do_postprocess=False)[0] outputs = outputs.get_fields() from detectron2.utils.analysis import _flatten_to_tuple return _flatten_to_tuple(outputs) from detectron2.export.torchscript_patch import patch_builtin_len with torch.no_grad(), patch_builtin_len(): assert (args.format == "torchscript" ), "Tracing method only supports torchscript format for now." ts_model = torch.jit.trace(WrapModel(), (image, )) ts_model.save(os.path.join(args.output, "model.ts")) dump_torchscript_IR(ts_model, args.output) # NOTE onnx export fails in pytorch # if args.format == "onnx": # torch.onnx.export(WrapModel(), (image,), os.path.join(args.output, "model.onnx")) # TODO inference in Python now missing postprocessing glue code return None
def _test_model(self, config_path, inference_func): model = model_zoo.get(config_path, trained=True) image = get_sample_coco_image() class Wrapper(nn.ModuleList): # a wrapper to make the model traceable def forward(self, image): outputs = inference_func(self[0], image) flattened_outputs, schema = flatten_to_tuple(outputs) if not hasattr(self, "schema"): self.schema = schema return flattened_outputs def rebuild(self, flattened_outputs): return self.schema(flattened_outputs) wrapper = Wrapper([model]) wrapper.eval() with torch.no_grad(), patch_builtin_len(): small_image = nn.functional.interpolate(image, scale_factor=0.5) # trace with a different image, and the trace must still work traced_model = torch.jit.trace(wrapper, (small_image,)) output = inference_func(model, image) traced_output = wrapper.rebuild(traced_model(image)) assert_instances_allclose(output, traced_output, size_as_tensor=True)
def trace_and_save_torchscript( model: nn.Module, inputs: Tuple[torch.Tensor], output_path: str, _extra_files: Optional[Dict[str, bytes]] = None, ): logger.info("Tracing and saving TorchScript to {} ...".format(output_path)) # TODO: patch_builtin_len depends on D2, we should either copy the function or # dynamically registering the D2's version. from detectron2.export.torchscript_patch import patch_builtin_len with torch.no_grad(), patch_builtin_len(): script_model = torch.jit.trace(model, inputs) if _extra_files is None: _extra_files = {} model_file = os.path.join(output_path, "model.jit") PathManager.mkdirs(output_path) with PathManager.open(model_file, "wb") as f: torch.jit.save(script_model, f, _extra_files=_extra_files) data_file = os.path.join(output_path, "data.pth") with PathManager.open(data_file, "wb") as f: torch.save(inputs, f) # NOTE: new API doesn't require return return model_file
def testKeypointHead(self): class M(nn.Module): def __init__(self): super().__init__() self.model = KRCNNConvDeconvUpsampleHead( ShapeSpec(channels=4, height=14, width=14), num_keypoints=17, conv_dims=(4,) ) def forward(self, x, predbox1, predbox2): inst = [ Instances((100, 100), pred_boxes=Boxes(predbox1)), Instances((100, 100), pred_boxes=Boxes(predbox2)), ] ret = self.model(x, inst) return tuple(x.pred_keypoints for x in ret) model = M() model.eval() def gen_input(num1, num2): feat = torch.randn((num1 + num2, 4, 14, 14)) box1 = random_boxes(num1) box2 = random_boxes(num2) return feat, box1, box2 with torch.no_grad(), patch_builtin_len(): trace = torch.jit.trace(model, gen_input(15, 15), check_trace=False) inputs = gen_input(12, 10) trace_outputs = trace(*inputs) true_outputs = model(*inputs) for trace_output, true_output in zip(trace_outputs, true_outputs): self.assertTrue(torch.allclose(trace_output, true_output))
def test_predict_probs_tracing(self): class Model(torch.nn.Module): def __init__(self, output_layer): super(Model, self).__init__() self._output_layer = output_layer def forward(self, scores, proposal_boxes): instances = Instances((10, 10)) instances.proposal_boxes = Boxes(proposal_boxes) return self._output_layer.predict_probs((scores, None), [instances]) box_head_output_size = 8 box_predictor = FastRCNNOutputLayers( ShapeSpec(channels=box_head_output_size), box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), num_classes=5, ) model = Model(box_predictor) from detectron2.export.torchscript_patch import patch_builtin_len with torch.no_grad(), patch_builtin_len(): func = torch.jit.trace(model, (torch.randn(10, 6), torch.rand(10, 4))) o = func(torch.randn(10, 6), torch.randn(10, 4)) self.assertEqual(o[0].shape, (10, 6)) o = func(torch.randn(5, 6), torch.randn(5, 4)) self.assertEqual(o[0].shape, (5, 6)) o = func(torch.randn(20, 6), torch.randn(20, 4)) self.assertEqual(o[0].shape, (20, 6))
def trace_and_save_torchscript( model: nn.Module, inputs: Tuple[torch.Tensor], output_path: str, mobile_optimization: Optional[MobileOptimizationConfig] = None, _extra_files: Optional[Dict[str, bytes]] = None, ): logger.info("Tracing and saving TorchScript to {} ...".format(output_path)) PathManager.mkdirs(output_path) if _extra_files is None: _extra_files = {} # TODO: patch_builtin_len depends on D2, we should either copy the function or # dynamically registering the D2's version. from detectron2.export.torchscript_patch import patch_builtin_len with torch.no_grad(), patch_builtin_len(): script_model = torch.jit.trace(model, inputs) with make_temp_directory("trace_and_save_torchscript") as tmp_dir: @contextlib.contextmanager def _synced_local_file(rel_path): remote_file = os.path.join(output_path, rel_path) local_file = os.path.join(tmp_dir, rel_path) yield local_file PathManager.copy_from_local(local_file, remote_file, overwrite=True) with _synced_local_file("model.jit") as model_file: torch.jit.save(script_model, model_file, _extra_files=_extra_files) with _synced_local_file("data.pth") as data_file: torch.save(inputs, data_file) if mobile_optimization is not None: logger.info("Applying optimize_for_mobile ...") liteopt_model = optimize_for_mobile( script_model, optimization_blocklist=mobile_optimization. optimization_blocklist, preserved_methods=mobile_optimization.preserved_methods, backend=mobile_optimization.backend, ) with _synced_local_file("mobile_optimized.ptl") as lite_path: liteopt_model._save_for_lite_interpreter(lite_path) # liteopt_model(*inputs) # sanity check op_names = torch.jit.export_opnames(liteopt_model) logger.info("Operator names from lite interpreter:\n{}".format( "\n".join(op_names))) logger.info("Applying augment_model_with_bundled_inputs ...") augment_model_with_bundled_inputs(liteopt_model, [inputs]) liteopt_model.run_on_bundled_input(0) # sanity check with _synced_local_file( "mobile_optimized_bundled.ptl") as lite_path: liteopt_model._save_for_lite_interpreter(lite_path)
def _test_model(self, config_path, inference_func): model = model_zoo.get(config_path, trained=True) image = get_sample_coco_image() wrapper = TracingAdapter(model, image, inference_func) wrapper.eval() with torch.no_grad(), patch_builtin_len(): small_image = nn.functional.interpolate(image, scale_factor=0.5) # trace with a different image, and the trace must still work traced_model = torch.jit.trace(wrapper, (small_image, )) output = inference_func(model, image) traced_output = wrapper.outputs_schema(traced_model(image)) assert_instances_allclose(output, traced_output, size_as_tensor=True)
def _test_model(self, config_path, WrapperCls): # TODO wrapper should be handled by export API in the future model = model_zoo.get(config_path, trained=True) image = get_sample_coco_image() model = WrapperCls([model]) model.eval() with torch.no_grad(), patch_builtin_len(): small_image = nn.functional.interpolate(image, scale_factor=0.5) # trace with a different image, and the trace must still work traced_model = torch.jit.trace(model, (small_image, )) output = WrapperCls.convert_output(model(image)) traced_output = WrapperCls.convert_output(traced_model(image)) assert_instances_allclose(output, traced_output)
def export_tracing(torch_model, inputs): assert TORCH_VERSION >= (1, 8) image = inputs[0]["image"] inputs = [{"image": image}] # remove other unused keys if isinstance(torch_model, GeneralizedRCNN): def inference(model, inputs): # use do_postprocess=False so it returns ROI mask inst = model.inference(inputs, do_postprocess=False)[0] return [{"instances": inst}] else: inference = None # assume that we just call the model directly traceable_model = TracingAdapter(torch_model, inputs, inference) from detectron2.export.torchscript_patch import patch_builtin_len with patch_builtin_len(): if args.format == "torchscript": ts_model = torch.jit.trace(traceable_model, (image,)) with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f: torch.jit.save(ts_model, f) dump_torchscript_IR(ts_model, args.output) elif args.format == "onnx": # NOTE onnx export currently failing in pytorch with PathManager.open(os.path.join(args.output, "model.onnx"), "wb") as f: torch.onnx.export(traceable_model, (image,), f) logger.info("Inputs schema: " + str(traceable_model.inputs_schema)) logger.info("Outputs schema: " + str(traceable_model.outputs_schema)) if args.format != "torchscript": return None if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)): return None def eval_wrapper(inputs): """ The exported model does not contain the final resize step, which is typically useless for deployment but needed for evaluation. We add it manually here. """ input = inputs[0] instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"] postprocessed = detector_postprocess(instances, input["height"], input["width"]) return [{"instances": postprocessed}] return eval_wrapper
def test_PointRend_mask_head_tracing(self): cfg = model_zoo.get_config( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml") point_rend.add_pointrend_config(cfg) cfg.MODEL.ROI_HEADS.IN_FEATURES = ["p2", "p3"] cfg.MODEL.ROI_MASK_HEAD.NAME = "PointRendMaskHead" cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "" cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON = True chan = 256 head = point_rend.PointRendMaskHead( cfg, { "p2": ShapeSpec(channels=chan, stride=4), "p3": ShapeSpec(channels=chan, stride=8), }, ) def gen_inputs(h, w, N): p2 = torch.rand(1, chan, h, w) p3 = torch.rand(1, chan, h // 2, w // 2) boxes = random_boxes(N, max_coord=h) return p2, p3, boxes class Wrap(nn.ModuleDict): def forward(self, p2, p3, boxes): features = { "p2": p2, "p3": p3, } inst = Instances((p2.shape[2] * 4, p2.shape[3] * 4)) inst.pred_boxes = Boxes(boxes) inst.pred_classes = torch.zeros(inst.__len__(), dtype=torch.long) out = self.head(features, [inst])[0] return out.pred_masks model = Wrap({"head": head}) model.eval() with torch.no_grad(), patch_builtin_len(): traced = torch.jit.trace(model, gen_inputs(302, 208, 20)) inputs = gen_inputs(100, 120, 30) out_eager = model(*inputs) out_trace = traced(*inputs) self.assertTrue(torch.allclose(out_eager, out_trace))
def export(cls, model, input_args, save_path, export_method, **export_kwargs): with patch_builtin_len(): return super().export(model, input_args, save_path, export_method, **export_kwargs)
def d2_meta_arch_prepare_for_export(self, cfg, inputs, predictor_type): if "torchscript" in predictor_type and "@tracing" in predictor_type: def inference_func(model, image): inputs = [{"image": image}] return model.inference(inputs, do_postprocess=False)[0] def data_generator(x): return (x[0]["image"],) image = data_generator(inputs)[0] wrapper = TracingAdapter(self, image, inference_func) wrapper.eval() # HACK: outputs_schema can only be obtained after running tracing, but # PredictorExportConfig requires a pre-defined postprocessing function, this # causes tracing to run twice. logger.info("tracing the model to get outputs_schema ...") with torch.no_grad(), patch_builtin_len(): _ = torch.jit.trace(wrapper, (image,)) outputs_schema_json = json.dumps( wrapper.outputs_schema, default=dataclass_object_dump ) return PredictorExportConfig( model=wrapper, data_generator=data_generator, preprocess_info=FuncInfo.gen_func_info( D2TracingAdapterPreprocessFunc, params={} ), postprocess_info=FuncInfo.gen_func_info( D2TracingAdapterPostFunc, params={"outputs_schema_json": outputs_schema_json}, ), ) if cfg.MODEL.META_ARCHITECTURE in META_ARCH_CAFFE2_EXPORT_TYPE_MAP: C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE] c2_compatible_model = C2MetaArch(cfg, self) preprocess_info = FuncInfo.gen_func_info( D2Caffe2MetaArchPreprocessFunc, params=D2Caffe2MetaArchPreprocessFunc.get_params(cfg, c2_compatible_model), ) postprocess_info = FuncInfo.gen_func_info( D2Caffe2MetaArchPostprocessFunc, params=D2Caffe2MetaArchPostprocessFunc.get_params(cfg, c2_compatible_model), ) preprocess_func = preprocess_info.instantiate() return PredictorExportConfig( model=c2_compatible_model, # Caffe2MetaArch takes a single tuple as input (which is the return of # preprocess_func), data_generator requires all positional args as a tuple. data_generator=lambda x: (preprocess_func(x),), preprocess_info=preprocess_info, postprocess_info=postprocess_info, ) raise NotImplementedError("Can't determine prepare_for_tracing!")