def run_onnx_model(filename, input): model = onnx.load(filename) try: rep = backend.prepare(model, device="CUDA:0") outputs = rep.run(input) except Exception: print("Couldn't run in CUDA, running on CPU:") rep = backend.prepare(model, device="CPU") outputs = rep.run(input) return outputs
def _test_net(self, net_name, input_blob_dims=(1, 3, 224, 224), decimal=7): np.random.seed(seed=0) try: c2_init_net, c2_predict_net, value_info, debug_str = self.model_downloader.get_c2_model_dbg( net_name) except (OSError, IOError) as e: # catch IOError/OSError that is caused by FileNotFoundError and PermissionError # This is helpful because sometimes we get errors due to gfs not available print("\n_test_net exception: ", e) self.skipTest(str(e)) # start to run the model and compare outputs n, c, h, w = input_blob_dims data = np.random.randn(n, c, h, w).astype(np.float32) inputs = [data] _, c2_outputs = c2_native_run_net(c2_init_net, c2_predict_net, inputs, debug_str) del _ model = c2_onnx.caffe2_net_to_onnx_model( predict_net=c2_predict_net, init_net=c2_init_net, value_info=value_info, ) c2_ir = c2.prepare(model) onnx_outputs = c2_ir.run(inputs) self.assertSameOutputs(c2_outputs, onnx_outputs, decimal=decimal)
def onnx_inference(parser): args = parser.parse_args() # Load the ONNX model model = onnx.load("models/deepspeech_{}.onnx".format(args.continue_from)) # Check that the IR is well formed onnx.checker.check_model(model) # Print a human readable representation of the graph onnx.helper.printable_graph(model.graph) print("model checked, prepareing backend!") rep = backend.prepare(model, device="CPU") # or "CPU" # For the Caffe2 backend: # rep.predict_net is the Caffe2 protobuf for the network # rep.workspace is the Caffe2 workspace for the network # (see the class caffe2.python.onnx.backend.Workspace) print("runing inference!") # Hard coded input dim input = np.random.randn(16, 1, 161, 129).astype(np.float32) start = time.time() outputs = rep.run(input) print("time used: {}".format(time.time() - start)) # To run networks with more than one input, pass a tuple # rather than a single numpy ndarray. print(outputs[0])
def test_onnx_to_caffe2_loop(self): body_nodes = [helper.make_node( "MatMul", ["_X", "W"], ["_Y"])] nodes = self._make_fake_loop_op(body_nodes, [(TensorProto.FLOAT, (2, 2), "X")], [(TensorProto.FLOAT, (2, 2), "Y")]) X = np.random.rand(2, 2).astype(np.float32) W = np.random.rand(2, 2).flatten().astype(np.float32) graph_def = helper.make_graph( nodes, "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 2)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (2, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [2, 2], W.tolist())] ) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') Y = X for _ in range(10): Y = np.matmul(Y, W.reshape(2, 2)) p = c2.prepare(model_def) out = p.run(X) np.testing.assert_allclose(out.Y, Y)
def main(): args = parser.parse_args() if not args.checkpoint: args.pretrained = True else: args.pretrained = False # create model geffnet.config.set_exportable(True) print("==> Creating PyTorch {} model".format(args.model)) model = geffnet.create_model(args.model, num_classes=args.num_classes, in_chans=3, pretrained=args.pretrained, checkpoint_path=args.checkpoint) model.eval() x = torch.randn((1, 3, args.img_size or 224, args.img_size or 224), requires_grad=True) model(x) # run model once before export trace print("==> Exporting model to ONNX format at '{}'".format(args.output)) input_names = ["input0"] output_names = ["output0"] optional_args = dict(keep_initializers_as_inputs=True ) # pytorch 1.3 needs this for export to succeed try: torch_out = torch.onnx._export(model, x, args.output, export_params=True, verbose=False, input_names=input_names, output_names=output_names, **optional_args) except TypeError: # fallback to no keep_initializers arg for pytorch < 1.3 torch_out = torch.onnx._export(model, x, args.output, export_params=True, verbose=False, input_names=input_names, output_names=output_names) print("==> Loading and checking exported model from '{}'".format( args.output)) onnx_model = onnx.load(args.output) onnx.checker.check_model(onnx_model) # assuming throw on error print("==> Passed") print("==> Loading model into Caffe2 backend and comparing forward pass.". format(args.output)) caffe2_backend = onnx_caffe2.prepare(onnx_model) B = {onnx_model.graph.input[0].name: x.data.numpy()} c2_out = caffe2_backend.run(B)[0] np.testing.assert_almost_equal(torch_out.data.numpy(), c2_out, decimal=5) print("==> Passed")
def __init__(self, model, dummy_input, net_type): super(C2InferenceWrapper, self).__init__() from caffe2.python import core as c2_core import caffe2.python.onnx.backend as onnx_c2_backend import onnx self.c2_core = c2_core self.onnx_model_file = tempfile.NamedTemporaryFile() torch.onnx.export(model, dummy_input, self.onnx_model_file.name, keep_initializers_as_inputs=True ) # see https://github.com/onnx/onnx/issues/2417 onnx_model = onnx.load(self.onnx_model_file.name) onnx.checker.check_model(onnx_model) self.backend = onnx_c2_backend.prepare(onnx_model, device="CUDA:0") self.backend.predict_net.type = net_type for op in self.backend.predict_net.op: op.engine = 'CUDNN' # for initialization (e.g., create net) self.backend.run(dummy_input.cpu().numpy()) with self.c2_core.DeviceScope(self.backend.predict_net.device_option): self.backend.workspace.FeedBlob(self.backend.uninitialized[0], dummy_input.cpu().numpy())
def pytorch_to_caffe2( model, export_input, external_input_names, output_names, export_path, export_onnx_path=None, ): num_tensors = 0 for inp in export_input: num_tensors += len(inp) if isinstance(inp, (tuple, list)) else 1 assert len(external_input_names) == num_tensors all_input_names = external_input_names[:] for name, _ in model.named_parameters(): all_input_names.append(name) # # export the pytorch model to ONNX if export_onnx_path: print(f"Saving onnx model to: {export_onnx_path}") else: export_onnx_path = export_path model.eval() with torch.no_grad(): torch.onnx.export( model, export_input, export_onnx_path, input_names=all_input_names, output_names=output_names, export_params=True, ) onnx_model = onnx.load(export_onnx_path) onnx.checker.check_model(onnx_model) # Convert the ONNX model to a caffe2 net c2_prepared = caffe2_backend.prepare(onnx_model) return c2_prepared
def _test_net(self, net_name, input_blob_dims=(1, 3, 224, 224), decimal=7): np.random.seed(seed=0) model_dir = self._model_dir(net_name) if not os.path.exists(model_dir): self._download(net_name) c2_predict_pb = os.path.join(model_dir, 'predict_net.pb') c2_predict_net = caffe2_pb2.NetDef() with open(c2_predict_pb, 'rb') as f: c2_predict_net.ParseFromString(f.read()) c2_predict_net.name = net_name c2_init_pb = os.path.join(model_dir, 'init_net.pb') c2_init_net = caffe2_pb2.NetDef() with open(c2_init_pb, 'rb') as f: c2_init_net.ParseFromString(f.read()) c2_init_net.name = net_name + '_init' n, c, h, w = input_blob_dims data = np.random.randn(n, c, h, w).astype(np.float32) inputs = [data] _, c2_outputs = c2_native_run_net(c2_init_net, c2_predict_net, inputs) del _ model = c2_onnx.caffe2_net_to_onnx_model( predict_net=c2_predict_net, init_net=c2_init_net, value_info=json.load( open(os.path.join(model_dir, 'value_info.json')))) c2_ir = c2.prepare(model) onnx_outputs = c2_ir.run(inputs) self.assertSameOutputs(c2_outputs, onnx_outputs, decimal=decimal)
def test_initializer(self): X = np.array([[1, 2], [3, 4]]).astype(np.float32) Y = np.array([[1, 2], [3, 4]]).astype(np.float32) weight = np.array([[1, 0], [0, 1]]) graph_def = make_graph( [make_node("Add", ["X", "Y"], ["Z0"]), make_node("Cast", ["Z0"], ["Z"], to="float"), make_node("Mul", ["Z", "weight"], ["W0"]), make_node("Tanh", ["W0"], ["W1"]), make_node("Sigmoid", ["W1"], ["W2"]), make_node("Scale", ["W2"], ["W3"], scale=-1.0)], name="test_initializer", inputs=[ make_tensor_value_info("X", onnx.TensorProto.FLOAT, (2, 2)), make_tensor_value_info("Y", onnx.TensorProto.FLOAT, (2, 2)), make_tensor_value_info("weight", onnx.TensorProto.FLOAT, (2, 2)), ], outputs=[ make_tensor_value_info("W3", onnx.TensorProto.FLOAT, (2, 2)) ], initializer=[make_tensor("weight", onnx.TensorProto.FLOAT, [2, 2], weight.flatten().astype(float))] ) def sigmoid(x): return 1 / (1 + np.exp(-x)) W_ref = -sigmoid(np.tanh((X + Y) * weight)) c2_rep = c2.prepare(make_model(graph_def, producer_name='caffe2-ref-test')) output = c2_rep.run({"X": X, "Y": Y}) np.testing.assert_almost_equal(output["W3"], W_ref)
def test_initializer(self): X = np.array([[1, 2], [3, 4]]).astype(np.float32) Y = np.array([[1, 2], [3, 4]]).astype(np.float32) weight = np.array([[1, 0], [0, 1]]) graph_def = make_graph( [make_node("Add", ["X", "Y"], ["Z0"]), make_node("Cast", ["Z0"], ["Z"], to=onnx.TensorProto.FLOAT), make_node("Mul", ["Z", "weight"], ["W0"]), make_node("Tanh", ["W0"], ["W1"]), make_node("Sigmoid", ["W1"], ["W2"]), make_node("Scale", ["W2"], ["W3"], scale=-1.0)], name="test_initializer", inputs=[ make_tensor_value_info("X", onnx.TensorProto.FLOAT, (2, 2)), make_tensor_value_info("Y", onnx.TensorProto.FLOAT, (2, 2)), make_tensor_value_info("weight", onnx.TensorProto.FLOAT, (2, 2)), ], outputs=[ make_tensor_value_info("W3", onnx.TensorProto.FLOAT, (2, 2)) ], initializer=[make_tensor("weight", onnx.TensorProto.FLOAT, [2, 2], weight.flatten().astype(float))] ) def sigmoid(x): return 1 / (1 + np.exp(-x)) W_ref = -sigmoid(np.tanh((X + Y) * weight)) c2_rep = c2.prepare(make_model(graph_def, producer_name='caffe2-ref-test')) output = c2_rep.run({"X": X, "Y": Y}) np.testing.assert_almost_equal(output["W3"], W_ref)
def load(self, model_path, inputs=None, outputs=None): self.model = onnx.load(model_path) # find inputs from the model if not passed in by config if inputs: self.inputs = inputs else: self.inputs = [] initializers = set() for i in self.model.graph.initializer: initializers.add(i.name) for i in self.model.graph.input: if i.name not in initializers: self.inputs.append(i.name) # find outputs from the model if not passed in by config if outputs: self.outputs = outputs else: self.outputs = [] for i in self.model.graph.output: self.outputs.append(i.name) # prepare the backend device = "CUDA:0" if torch.cuda.is_available() else "CPU" self.sess = pt_backend.prepare(self.model, device) return self
def test_dynamicslice_4inputs_graph(self): node_def = make_node("DynamicSlice", ["X1", "X2", "X3", "axes"], ["Y"]) graph_def = make_graph( [node_def], name="test", inputs=[ make_tensor_value_info("X1", onnx.TensorProto.FLOAT, (2, 4)), make_tensor_value_info("X2", onnx.TensorProto.INT32, (1, 2)), make_tensor_value_info("X3", onnx.TensorProto.INT32, (1, 2)), make_tensor_value_info("axes", onnx.TensorProto.INT32, (1, 2)) ], outputs=[ make_tensor_value_info("Y", onnx.TensorProto.FLOAT, (1, 2)) ]) model_def = make_model(graph_def, producer_name='caffe2-ref-test') x = [[1, 2, 3, 4], [5, 6, 7, 8]] start = [0, 1] end = [4, 5] axes = [1, 0] prepared = c2.prepare(model_def) output = prepared.run(inputs=[ np.array(x), np.array(start), np.array(end), np.array(axes) ]) self.assertSameOutputs(output[0], np.array(x)[1:5, 0:4])
def _test_net(self, net_name, input_blob_dims=(1, 3, 224, 224), decimal=7): np.random.seed(seed=0) model_dir = self._model_dir(net_name) if not os.path.exists(model_dir): self._download(net_name) c2_predict_pb = os.path.join(model_dir, 'predict_net.pb') c2_predict_net = caffe2_pb2.NetDef() with open(c2_predict_pb, 'rb') as f: c2_predict_net.ParseFromString(f.read()) c2_predict_net.name = net_name c2_init_pb = os.path.join(model_dir, 'init_net.pb') c2_init_net = caffe2_pb2.NetDef() with open(c2_init_pb, 'rb') as f: c2_init_net.ParseFromString(f.read()) c2_init_net.name = net_name + '_init' n, c, h, w = input_blob_dims data = np.random.randn(n, c, h, w).astype(np.float32) inputs = [data] _, c2_outputs = c2_native_run_net(c2_init_net, c2_predict_net, inputs) del _ model = c2_onnx.caffe2_net_to_onnx_model( predict_net=c2_predict_net, init_net=c2_init_net, value_info=json.load(open(os.path.join(model_dir, 'value_info.json')))) c2_ir = c2.prepare(model) onnx_outputs = c2_ir.run(inputs) self.assertSameOutputs(c2_outputs, onnx_outputs, decimal=decimal)
def run_embed_params(proto, model, input, state_dict=None, use_gpu=True): """ This is only a helper debug function so we can test embed_params=False case as well on pytorch front This should likely be removed from the release version of the code """ device = "CPU" if use_gpu: device = "CUDA" model_def = onnx.ModelProto.FromString(proto) onnx.checker.check_model(model_def) prepared = c2.prepare(model_def, device=device) if state_dict: parameters = [] # Passed in state_dict may have a different order. Make # sure our order is consistent with the model's order. # TODO: Even better: keyword arguments! for k in model.state_dict(): if k in state_dict: parameters.append(state_dict[k]) else: parameters = list(model.state_dict().values()) W = {} for k, v in zip(model_def.graph.input, flatten((input, parameters))): if isinstance(v, Variable): W[k.name] = v.data.cpu().numpy() else: W[k.name] = v.cpu().numpy() caffe2_out = prepared.run(inputs=W) return caffe2_out
def main(): args = parser.parse_args() if not args.checkpoint: args.pretrained = True # create model model = create_model( args.model, num_classes=args.num_classes, in_chans=3, pretrained=args.pretrained, checkpoint_path=args.checkpoint) model.eval() x = torch.randn((1, 3, args.img_size or 224, args.img_size or 224), requires_grad=True) torch_out = torch.onnx._export(model, x, args.output, export_params=True) onnx_model = onnx.load(args.output) caffe2_backend = onnx_caffe2.prepare(onnx_model) B = {onnx_model.graph.input[0].name: x.data.numpy()} c2_out = caffe2_backend.run(B)[0] np.testing.assert_almost_equal(torch_out.data.numpy(), c2_out, decimal=5)
def test_onnx_to_caffe2_if(self): true_nodes = [helper.make_node( "MatMul", ["X", "W"], ["Y"])] false_nodes = [helper.make_node("Slice", ["X"], ["Y"], axes=[0, 1], starts=[0, 0], ends=[2, 2])] nodes = self._make_fake_if_op(true_nodes, false_nodes, [(TensorProto.FLOAT, (2, 2), "Y")]) X = np.random.rand(2, 3).astype(np.float32) W = np.random.rand(3, 2).flatten().astype(np.float32) graph_def = helper.make_graph( nodes, "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (3, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [3, 2], W.tolist())] ) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') p = c2.prepare(model_def) Y = np.matmul(X, W.reshape(3, 2)) out = p.run(X) np.testing.assert_allclose(out.Y, Y)
def _test_full_ensemble_export(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(models.build_model(test_args, src_dict, tgt_dict)) encoder_ensemble = EncoderEnsemble(model_list) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderStepEnsemble(model_list, beam_size=5) tmp_dir = tempfile.mkdtemp() decoder_step_pb_path = os.path.join(tmp_dir, "decoder_step.pb") decoder_step_ensemble.onnx_export(decoder_step_pb_path, pytorch_encoder_outputs) # single EOS input_token = torch.LongTensor( np.array([[model_list[0].dst_dict.eos()]])) timestep = torch.LongTensor(np.array([[0]])) pytorch_decoder_outputs = decoder_step_ensemble( input_token, timestep, *pytorch_encoder_outputs) with open(decoder_step_pb_path, "r+b") as f: onnx_model = onnx.load(f) onnx_decoder = caffe2_backend.prepare(onnx_model) decoder_inputs_numpy = [input_token.numpy(), timestep.numpy()] for tensor in pytorch_encoder_outputs: decoder_inputs_numpy.append(tensor.detach().numpy()) caffe2_decoder_outputs = onnx_decoder.run(tuple(decoder_inputs_numpy)) for i in range(len(pytorch_decoder_outputs)): caffe2_out_value = caffe2_decoder_outputs[i] pytorch_out_value = pytorch_decoder_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) decoder_step_ensemble.save_to_db( os.path.join(tmp_dir, "decoder_step.predictor_export"), pytorch_encoder_outputs, )
def evaluate(args): print('Prepare input data') rep = backend.prepare(model, device="CUDA:0") # or "CPU" prepared_backend = rep c2_workspace = prepared_backend.workspace c2_model = prepared_backend.predict_net init_net, predict_net = mobile_exporter.Export(c2_workspace, c2_model, c2_model.external_input) with open('init_net.pb', "wb") as fopen: fopen.write(init_net.SerializeToString()) with open('predict_net.pb', "wb") as fopen: fopen.write(predict_net.SerializeToString()) pass args.image = 'Menpo51220/val/0000018.jpg' aim = args.image im = cv2.imread(aim) imshape = im.shape args.face = [0, 0, imshape[0], imshape[1]] image = normalize(im) # network forward c_locs, c_scors, heatmap = rep.run(image) # obtain the locations on the image in the orignial size print(c_locs) #print(c_scors, '\n\n\n') print(heatmap) c_locations = c_locs[:-1, :] c_locations[:, 0], c_locations[:, 1] = c_locations[:, 0] * imshape[ 1] / 256., c_locations[:, 1] * imshape[0] / 256. c_scores = np.expand_dims(c_scors[:-1], -1) print(c_locations.shape) print(c_scores.shape) c_pred_pts = np.concatenate((c_locations, c_scores), axis=1).transpose(1, 0) c_pred_pts = np.transpose(c_pred_pts, [1, 0]) c_pred_pts = c_pred_pts[:, :-1] #print(c_pred_pts, '\n\n\n') sim = draw_pts(im, pred_pts=c_pred_pts, get_l1e=False) cv2.imwrite('caf_0.jpg', sim) if args.save: json_file = os.path.splitext(aimage)[0] + '.jpg' save_path = os.path.join(args.save, 'caf' + json_file) sim = draw_pts(im, pred_pts=c_pred_pts, get_l1e=False) #print(pred_pts) cv2.imwrite(save_path, sim) input('save1') # image.save(args.save) # print ('save the visualization results into {:}'.format(args.save)) else: print('ignore the visualization procedure')
def __init__(self, onnx_model_pb_path, backend="caffe2"): """Init.""" if backend == "caffe2": from caffe2.python.onnx.backend import prepare else: raise RuntimeError( "Backend {} is a non-configurable value.".format(backend)) onnx_model = onnx.load(onnx_model_pb_path) self.model = prepare(onnx_model)
def testAll (self): dcr = dc.lstm(self.dc_x, self.dc_w, self.dc_r) print(dcr[0]) # print(drc) # np.testing.assert_allclose(self.onnx_npr_su.astype(np.float32), np.array(dcr.data()).astype(np.float32),rtol=1e-3, atol=1e-3) # print(self.dc_h) model = onnx.load('./parser/unit_operators/testcases/LSTM/LSTM.onnx') rep = backend.prepare(model, device = 'CPU')
def run_on_caffe2(): import onnx import caffe2.python.onnx.backend as onnx_caffe2_backend model = onnx.load('shapenet.onnx') onnx.checker.check_model(model) onnx.helper.printable_graph(model.graph) # print('model ', model) prepared_backend = onnx_caffe2_backend.prepare(model) # print('model input ', W = {model.graph.input[0].name: get_img_data('/home/tamvm/Downloads/ibug_300W_large_face_landmark_dataset')} # W = {model.graph.input[0].name: get_dummy_data().data.numpy()} c2_out = prepared_backend.run(W)[0]
def check_onn_recognition(): model = onnx.load("/world/data-gpu-94/fenghui/mobilefacenet_3.onnx") predictor = backend.prepare(model, device="CUDA") x = cv2.imread('/world/data-gpu-94/fenghui/onnx_model/wyq_128_128.jpg') img = x.copy() batch = [] x = cv2.resize(x, (112, 112)) x = x / 256.0 x = x.transpose(2, 0, 1) x = np.float32([x]) res = predictor.run(x) print(res)
def __init__(self, path): # parameters self.path = path # config from path try: yaml_path = self.path + "/cfg.yaml" print("Opening config file %s" % yaml_path) self.CFG = yaml.load(open(yaml_path, 'r')) except Exception as e: print(e) print("Error opening cfg.yaml file from trained model.") quit() # make a colorizer self.colorizer = Colorizer(self.CFG["dataset"]["color_map"]) # get the data parserModule = imp.load_source( "parserModule", booger.TRAIN_PATH + '/tasks/segmentation/dataset/' + self.CFG["dataset"]["name"] + '/parser.py') self.parser = parserModule.Parser( img_prop=self.CFG["dataset"]["img_prop"], img_means=self.CFG["dataset"]["img_means"], img_stds=self.CFG["dataset"]["img_stds"], classes=self.CFG["dataset"]["labels"], train=False) # some useful data self.data_h, self.data_w, self.data_d = self.parser.get_img_size() self.means, self.stds = self.parser.get_means_stds() self.means = np.array(self.means, dtype=np.float32) self.stds = np.array(self.stds, dtype=np.float32) self.nclasses = self.parser.get_n_classes() # architecture definition # get weights? try: self.onnx_path = os.path.join(self.path, "model.onnx") self.model = onnx.load(self.onnx_path) print("Successfully ONNX weights from ", self.onnx_path) except Exception as e: print("Couldn't load ONNX network. Error: ", e) quit() # prepare caffe2 model in proper device if torch.cuda.is_available(): self.device = "CUDA" else: self.device = "CPU" print("Building backend ONXX Caffe2 with device ", self.device) self.engine = backend.prepare(self.model, device=self.device)
def verify_onnx(self, torch_out, input_data, precision, model_path): import onnx import caffe2.python.onnx.backend as onnx_caffe2_backend import numpy as np model = onnx.load(model_path) prepared_backend = onnx_caffe2_backend.prepare(model) W = {model.graph.input[0].name: input_data.data.numpy()} c2_out = prepared_backend.run(W)[0] np.testing.assert_almost_equal(torch_out.data.cpu().numpy(), c2_out, decimal=precision) print("Model passed percision test")
def caffe2_evaluation(self): # Load onnx model with onnx module model = onnx.load(self.onnx_model) # For caffe2, it must be prepared a backed (kind of the "session" in tensorflow) prepared_backend = onnx_caffe2_backend.prepare(model, device="CPU") # Initialize the static graph W = {model.graph.input[0].name: self.x} # Inference caffe2_predictions = prepared_backend.run(W)[0] # Calculate accuracy score = Evaluation.score(y_true=self.y, y_pred=caffe2_predictions) print(f"Caffe2 accuracy: {score}")
def __init__(self, onnx_model_path, cfg): super(ONNX_FCOS, self).__init__() self.onnx_model = backend.prepare(onnx.load(onnx_model_path), device=cfg.MODEL.DEVICE.upper()) # Note that we still use PyTorch for postprocessing self.postprocessing = FCOSPostProcessor( pre_nms_thresh=cfg.MODEL.FCOS.INFERENCE_TH, pre_nms_top_n=cfg.MODEL.FCOS.PRE_NMS_TOP_N, nms_thresh=cfg.MODEL.FCOS.NMS_TH, fpn_post_nms_top_n=cfg.TEST.DETECTIONS_PER_IMG, min_size=0, num_classes=cfg.MODEL.FCOS.NUM_CLASSES) self.cfg = cfg self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
def main(): # Load the ONNX model model = onnx.load(args.onnx_import_path) # Check that the IR is well formed onnx.checker.check_model(model) # Print a human readable representation of the graph print(onnx.helper.printable_graph(model.graph)) # import to caffe2 rep = backend.prepare(model, device="CPU") outputs = rep.run(np.random.randn(96, 3, 32, 32).astype(np.float32)) print(outputs)
def run_generated_test(model_file, data_dir, device='CPU'): model = onnx.load(model_file) input_num = len(glob.glob(os.path.join(data_dir, "input_*.pb"))) inputs = [] for i in range(input_num): inputs.append(numpy_helper.to_array(load_tensor_as_numpy_array( os.path.join(data_dir, "input_{}.pb".format(i))))) output_num = len(glob.glob(os.path.join(data_dir, "output_*.pb"))) outputs = [] for i in range(output_num): outputs.append(numpy_helper.to_array(load_tensor_as_numpy_array( os.path.join(data_dir, "output_{}.pb".format(i))))) prepared = c2.prepare(model, device=device) c2_outputs = prepared.run(inputs) assert_similar(outputs, c2_outputs)
def transform(self, X, interpret_fn: Callable = None, return_logits: bool = False, *args, **kwargs): if self._model is None and self._onnx_model is None: return is_pytorch = self.is_pytorch_module() rep = None if is_pytorch: self._model.eval() elif self._onnx_model is not None: import caffe2.python.onnx.backend as backend rep = backend.prepare(self._onnx_model, device='CPU') with torch.no_grad(): if not torch.is_tensor(X) and self._featurizer is not None: X = self._featurizer.transform(X) X = self.preprocess_input(X) if X is None: logits = None else: if self._predict_fn is None: if self._onnx_model is None: logits = self._model(X) else: if torch.is_tensor(X): X = X.numpy() warnings.warn('Running inference as onnx model') rep.run(X) logits = torch.from_numpy(logits) else: logits = self._predict_fn(X) if is_pytorch: self._model.train() if return_logits or logits is None: return logits elif interpret_fn is not None: return interpret_fn(logits, *args, **kwargs) else: return self.infer_predict(logits, *args, **kwargs)
def onnx_eval(args, onnx_model, eval_data, verbose=False): # Run prediction for full data args.eval_batch_size = 1 if verbose: logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_data)) logger.info(" Batch size = %d", args.eval_batch_size) eval_dataloader = DataLoader(eval_data, shuffle=False, batch_size=args.eval_batch_size) eval_loss, eval_accuracy = 0, 0 nb_eval_steps, nb_eval_examples = 0, 0 all_logits = [] if args.onnx_framework == 'caffe2': import caffe2.python.onnx.backend as backend prepared_backend = backend.prepare(onnx_model) model = lambda x, y, z: prepared_backend.run((x, y, z)) else: raise NotImplementedError(args.framework) for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating", leave=verbose, dynamic_ncols=True): input_ids = input_ids.numpy() input_mask = input_mask.numpy() segment_ids = segment_ids.numpy() label_ids = label_ids.numpy() logits = model(input_ids, segment_ids, input_mask) tmp_eval_accuracy = accuracy(logits, label_ids) if verbose: all_logits.append(logits) eval_accuracy += tmp_eval_accuracy nb_eval_examples += input_ids.shape[0] nb_eval_steps += 1 eval_accuracy = eval_accuracy / nb_eval_examples eval_probs = None if all_logits: eval_probs = np_softmax(np.concatenate(all_logits)).squeeze() return None, eval_accuracy, eval_probs
def _test_full_beam_decoder(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) sample = next(samples) src_tokens = sample['net_input']['src_tokens'][0:1].t() src_lengths = sample['net_input']['src_lengths'][0:1].int() num_models = 3 model_list = [] for _ in range(num_models): model_list.append(models.build_model(test_args, src_dict, tgt_dict)) bs = BeamSearch(model_list, src_tokens, src_lengths, beam_size=6) prev_token = torch.LongTensor([0]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(11) prev_hypos_indices = torch.zeros(6, dtype=torch.int64) outs = bs(src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20])) import io f = io.BytesIO() torch.onnx._export( bs, (src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20])), f, export_params=True, verbose=False, example_outputs=outs) torch.onnx._export_to_pretty_string( bs, (src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20])), f, export_params=True, verbose=False, example_outputs=outs) f.seek(0) import onnx onnx_model = onnx.load(f) c2_model = caffe2_backend.prepare(onnx_model) c2_model.run( (src_tokens.numpy(), src_lengths.numpy(), prev_token.numpy(), prev_scores.numpy(), attn_weights.numpy(), prev_hypos_indices.numpy(), np.array([20])))
def _test_ensemble_encoder_export(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(models.build_model(test_args, src_dict, tgt_dict)) encoder_ensemble = EncoderEnsemble(model_list) tmp_dir = tempfile.mkdtemp() encoder_pb_path = os.path.join(tmp_dir, 'encoder.pb') encoder_ensemble.onnx_export(encoder_pb_path) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample['net_input']['src_tokens'][0:1].t() src_lengths = sample['net_input']['src_lengths'][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) with open(encoder_pb_path, 'r+b') as f: onnx_model = onnx.load(f) onnx_encoder = caffe2_backend.prepare(onnx_model) caffe2_encoder_outputs = onnx_encoder.run( ( src_tokens.numpy(), src_lengths.numpy(), ), ) for i in range(len(pytorch_encoder_outputs)): caffe2_out_value = caffe2_encoder_outputs[i] pytorch_out_value = pytorch_encoder_outputs[i].data.numpy() np.testing.assert_allclose( caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6, ) encoder_ensemble.save_to_db( os.path.join(tmp_dir, 'encoder.predictor_export'), )
def test_rnn_init_predict_split(self): model = nn.LSTM(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE, 3, bidirectional=True) seq_lengths = np.random.randint(1, RNN_SEQUENCE_LENGTH + 1, size=7) seq_lengths = list(reversed(sorted(map(int, seq_lengths)))) input = [Variable(torch.randn(l, RNN_INPUT_SIZE)) for l in seq_lengths] input = rnn_utils.pad_sequence(input) # Test that we are correctly splitting between init and # predict net. When we embed parameters, there should be more # ops in the init net. mp = onnx.ModelProto.FromString(do_export(model, input, export_params=self.embed_params)[0]) prepared = c2.prepare(mp, device='CPU') if self.embed_params: assert len(prepared.init_net.op) == 1038 assert len(prepared.predict_net.op) == 101 else: assert len(prepared.init_net.op) == 27 assert len(prepared.predict_net.op) == 1112
def test_relu_graph(self): X = np.random.randn(3, 2).astype(np.float32) Y_ref = np.clip(X, 0, np.inf) node_def = make_node( "Relu", ["X"], ["Y"]) output = c2.run_node( node_def, {"X": X}) np.testing.assert_almost_equal(output.Y, Y_ref) graph_def = make_graph( [node_def], name="test", inputs=[make_tensor_value_info("X", onnx.TensorProto.FLOAT, [3, 2])], outputs=[make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [3, 2])]) c2_rep = c2.prepare(make_model(graph_def, producer_name='caffe2-ref-test')) output = c2_rep.run(X) np.testing.assert_almost_equal(output.Y, Y_ref)
def run_embed_params(proto, model, input, state_dict=None, use_gpu=True): """ This is only a helper debug function so we can test embed_params=False case as well on pytorch front This should likely be removed from the release version of the code """ device = 'CPU' if use_gpu: device = 'CUDA' model_def = onnx.ModelProto.FromString(proto) onnx.checker.check_model(model_def) prepared = c2.prepare(model_def, device=device) if state_dict: parameters = [] # Passed in state_dict may have a different order. Make # sure our order is consistent with the model's order. # TODO: Even better: keyword arguments! for k in model.state_dict(): if k not in state_dict: # Once PyTorch Module adds unnecessary paramter, the old pre-trained model does not have it. # Just simply pass the new one. # TODO: Please don't export unnecessary parameter. parameters.append(model.state_dict()[k]) else: parameters.append(state_dict[k]) else: parameters = list(model.state_dict().values()) W = {} for k, v in zip(model_def.graph.input, flatten((input, parameters))): if isinstance(v, Variable): W[k.name] = v.data.cpu().numpy() else: W[k.name] = v.cpu().numpy() caffe2_out = prepared.run(inputs=W) return caffe2_out
def test_onnx_to_caffe2_if(self): true_nodes = [helper.make_node( "MatMul", ["X", "W"], ["_Y"])] false_nodes = [helper.make_node("Slice", ["X"], ["_Y"], axes=[0, 1], starts=[0, 0], ends=[0, 2])] nodes = self._make_fake_if_op(true_nodes, false_nodes, [(TensorProto.FLOAT, (2, 2), "Y")]) X = np.random.rand(2, 3).astype(np.float32) W = np.random.rand(3, 2).flatten().astype(np.float32) graph_def = helper.make_graph( nodes, "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (3, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [3, 2], W.tolist())] ) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') p = c2.prepare(model_def) Y = np.matmul(X, W.reshape(3, 2)) out = p.run(X) np.testing.assert_allclose(out.Y, Y)