def test_model_knn_regressor_radius(self): model, X = self._fit_model(RadiusNeighborsRegressor()) model_onnx = convert_sklearn(model, "KNN regressor", [("input", FloatTensorType([None, 4]))], target_opset=TARGET_OPSET, options={id(model): {'optim': 'cdist'}}) sess = InferenceSession(model_onnx.SerializeToString()) got = sess.run(None, {'input': X.astype(numpy.float32)})[0] exp = model.predict(X.astype(numpy.float32)) if any(numpy.isnan(got.ravel())): # The model is unexpectedly producing nan values # not on all platforms. rows = ['--EXP--', str(exp), '--GOT--', str(got), '--EVERY-OUTPUT--'] for out in enumerate_model_node_outputs( model_onnx, add_node=False): onx = select_model_inputs_outputs(model_onnx, out) sess = InferenceSession(onx.SerializeToString()) res = sess.run( None, {'input': X.astype(numpy.float32)}) rows.append('--{}--'.format(out)) rows.append(str(res)) if (StrictVersion(onnxruntime.__version__) < StrictVersion("1.4.0")): return raise AssertionError('\n'.join(rows)) assert_almost_equal(exp.ravel(), got.ravel(), decimal=3)
def _display_intermediate_steps(model_onnx, inputs, disable_optimisation): import onnxruntime print("[_display_intermediate_steps] BEGIN") if isinstance(model_onnx, str): import onnx model_onnx = onnx.load(model_onnx) for name, node in enumerate_model_initializers(model_onnx, add_node=True): print("INIT: {} - {}".format(name, _guess_type(node))) for out, node in enumerate_model_node_outputs(model_onnx, add_node=True): print('-') print("OUTPUT: {} from {}".format(out, node.name)) step = select_model_inputs_outputs(model_onnx, out) if (disable_optimisation and hasattr(onnxruntime, 'GraphOptimizationLevel')): opts = onnxruntime.SessionOptions() opts.graph_optimization_level = ( onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL) else: opts = None try: step_sess = onnxruntime.InferenceSession(step.SerializeToString(), sess_options=opts) except Exception as e: raise RuntimeError("Unable to load ONNX model with onnxruntime. " "Last added node is:\n{}".format(node)) from e for o in step_sess.get_inputs(): print("IN :", o) for o in step_sess.get_outputs(): print("OUT: ", o) if inputs: res = step_sess.run(inputs) print(res) print("[_display_intermediate_steps] END")
def _display_intermediate_steps(model_onnx, inputs): import onnxruntime print("[_display_intermediate_steps] BEGIN") if isinstance(model_onnx, str): import onnx model_onnx = onnx.load(model_onnx) for name, node in enumerate_model_initializers(model_onnx, add_node=True): print("INIT: {} - {}".format(name, _guess_type(node))) for out, node in enumerate_model_node_outputs(model_onnx, add_node=True): print('-') print("OUTPUT: {} from {}".format(out, node.name)) step = select_model_inputs_outputs(model_onnx, out) try: step_sess = onnxruntime.InferenceSession(step.SerializeToString()) except Exception as e: raise RuntimeError("Unable to load ONNX model with onnxruntime. " "Last added node is:\n{}".format(node)) from e for o in step_sess.get_inputs(): print("IN :", o) for o in step_sess.get_outputs(): print("OUT: ", o) if inputs: res = step_sess.run(inputs) print(res) print("[_display_intermediate_steps] END")
def test_model_knn_regressor2_1_radius(self): model, X = self._fit_model_simple( RadiusNeighborsRegressor(algorithm="brute"), n_targets=2) X = X[:-1] model_onnx = convert_sklearn( model, "KNN regressor", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) sess = InferenceSession(model_onnx.SerializeToString()) got = sess.run(None, {'input': X.astype(numpy.float32)})[0] exp = model.predict(X.astype(numpy.float32)) if any(numpy.isnan(got.ravel())): # The model is unexpectedly producing nan values # not on all platforms. # It happens when two matrices are multiplied, # one is (2, 20, 20), second is (20, 20) # and contains only 0 or 1 values. # The output contains nan values on the first row # but not on the second one. rows = [ '--EXP--', str(exp), '--GOT--', str(got), '--EVERY-OUTPUT--' ] for out in enumerate_model_node_outputs(model_onnx, add_node=False): onx = select_model_inputs_outputs(model_onnx, out) sess = InferenceSession(onx.SerializeToString()) res = sess.run(None, {'input': X.astype(numpy.float32)}) rows.append('--{}--'.format(out)) rows.append(str(res)) if (onnxruntime.__version__.startswith('1.4.') or onnxruntime.__version__.startswith('1.5.')): # TODO: investigate the regression in onnxruntime 1.4 # One broadcasted multiplication unexpectedly produces nan. whole = '\n'.join(rows) if "[ nan" in whole: warnings.warn(whole) return raise AssertionError(whole) if (onnxruntime.__version__.startswith('1.3.') and sys.platform == 'win32'): # Same error but different line number for further # investigation. raise AssertionError(whole) raise AssertionError('\n'.join(rows)) assert_almost_equal(exp, got, decimal=5)
def _modify_model_add_outputs_nodes(self, model_dir): old_onnx_model = onnx.load(self.args.model_path) utils.print_info_log("load model success") for index, node in enumerate(old_onnx_model.graph.node): if not node.name: node.name = node.op_type + "_" + str(index) outputs_name = [ name for name in enumerate_model_node_outputs(old_onnx_model) ] new_onnx_model = select_model_inputs_outputs(old_onnx_model, outputs_name) new_onnx_model_path = os.path.join( model_dir, "new_" + os.path.basename(self.args.model_path)) save_onnx_model(new_onnx_model, new_onnx_model_path) utils.print_info_log("modify model outputs success") return old_onnx_model, new_onnx_model_path
def test_onnx_helper_load_save(self): model = make_pipeline(StandardScaler(), Binarizer(threshold=0.5)) X = numpy.array([[0.1, 1.1], [0.2, 2.2]]) model.fit(X) model_onnx = convert_sklearn(model, 'binarizer', [('input', FloatTensorType([1, 2]))]) filename = "temp_onnx_helper_load_save.onnx" save_onnx_model(model_onnx, filename) model = load_onnx_model(filename) nodes = list(enumerate_model_node_outputs(model)) new_model = select_model_inputs_outputs(model, 'variable') assert new_model.graph is not None tr1 = self.get_model(model) tr2 = self.get_model(new_model) X = X.astype(numpy.float32) X1 = tr1(X) X2 = tr2(X) assert X1.shape == (2, 2) assert X2.shape == (2, 2)
def test_onnx_helper_load_save_init(self): model = make_pipeline(Binarizer(), OneHotEncoder(sparse=False), StandardScaler()) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) model_onnx = convert_sklearn(model, 'pipe3', [('input', FloatTensorType([1, 2]))]) filename = "temp_onnx_helper_load_save.onnx" save_onnx_model(model_onnx, filename) model = load_onnx_model(filename) list(enumerate_model_node_outputs(model)) new_model = select_model_inputs_outputs(model, 'variable') self.assertTrue(new_model.graph is not None) tr1 = self.get_model(model) tr2 = self.get_model(new_model) X = X.astype(numpy.float32) X1 = tr1(X) X2 = tr2(X) self.assertEqual(X1.shape, (4, 2)) self.assertEqual(X2.shape, (4, 2))
def test_onnx_helper_load_save(self): model = make_pipeline(StandardScaler(), Binarizer(threshold=0.5)) X = numpy.array([[0.1, 1.1], [0.2, 2.2]]) model.fit(X) model_onnx = convert_sklearn( model, 'binarizer', [('input', FloatTensorType([None, 2]))]) model_onnx.ir_version = get_ir_version(TARGET_OPSET) filename = "temp_onnx_helper_load_save.onnx" save_onnx_model(model_onnx, filename) model = load_onnx_model(filename) list(enumerate_model_node_outputs(model)) new_model = select_model_inputs_outputs(model, 'variable') self.assertTrue(new_model.graph is not None) # pylint: disable=E1101 tr1 = self.get_model(model) tr2 = self.get_model(new_model) X = X.astype(numpy.float32) X1 = tr1(X) X2 = tr2(X) self.assertEqual(X1.shape, (2, 2)) self.assertEqual(X2.shape, (2, 2))
def enumerate_create(onnx_bytes, output_names=None, enforce_float32=True): """ Creates multiple *OnnxTransformer*, one for each requested intermediate node. onnx_bytes : bytes output_names: string requested output names or None to request all and have method *transform* to store all of them in a dataframe enforce_float32 : boolean :epkg:`onnxruntime` only supports *float32*, :epkg:`scikit-learn` usually uses double floats, this parameter ensures that every array of double floats is converted into single floats :return: iterator on OnnxTransformer *('output name', OnnxTransformer)* """ selected = None if output_names is None else set(output_names) model = load_onnx_model(onnx_bytes) for out in enumerate_model_node_outputs(model): m = select_model_inputs_outputs(model, out) if selected is None or out in selected: tr = OnnxTransformer(m.SerializeToString(), enforce_float32=enforce_float32) yield out, tr
def test_model_knn_regressor2_1_radius(self): model, X = self._fit_model_simple( RadiusNeighborsRegressor(algorithm="brute"), n_targets=2) model_onnx = convert_sklearn( model, "KNN regressor", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) sess = InferenceSession(model_onnx.SerializeToString()) got = sess.run(None, {'input': X.astype(numpy.float32)})[0] exp = model.predict(X.astype(numpy.float32)) if any(numpy.isnan(got.ravel())): # The model is unexpectedly producing nan values # not on all platforms. # It happens when two matrices are multiplied, # one is (2, 20, 20), second is (20, 20) # and contains only 0 or 1 values. # The output contains nan values on the first row # but not on the second one. rows = [ '--EXP--', str(exp), '--GOT--', str(got), '--EVERY-OUTPUT--' ] for out in enumerate_model_node_outputs(model_onnx, add_node=False): onx = select_model_inputs_outputs(model_onnx, out) sess = InferenceSession(onx.SerializeToString()) res = sess.run(None, {'input': X.astype(numpy.float32)}) rows.append('--{}--'.format(out)) rows.append(str(res)) if (StrictVersion(onnxruntime.__version__) < StrictVersion("1.4.0")): return raise AssertionError('\n'.join(rows)) assert_almost_equal(exp, got, decimal=5)
pred_onx = sess.run(None, inputs) print("predict", pred_onx[0][:5]) print("predict_proba", pred_onx[1][:1]) #################################### # Compute intermediate outputs # ++++++++++++++++++++++++++++ # # Unfortunately, there is actually no way to ask # *onnxruntime* to retrieve the output of intermediate nodes. # We need to modifies the *ONNX* before it is given to *onnxruntime*. # Let's see first the list of intermediate output. model_onnx = load_onnx_model("pipeline_titanic.onnx") for out in enumerate_model_node_outputs(model_onnx): print(out) ################################ # Not that easy to tell which one is what as the *ONNX* # has more operators than the original *scikit-learn* pipelines. # The graph at :ref:`l-plot-complex-pipeline-graph` # helps up to find the outputs of both numerical # and textual pipeline: *variable1*, *variable2*. # Let's look into the numerical pipeline first. num_onnx = select_model_inputs_outputs(model_onnx, 'variable1') save_onnx_model(num_onnx, "pipeline_titanic_numerical.onnx") ################################ # Let's compute the numerical features.
def print_model_outputs(model_path, input_tensor, print_tensor=False): model_onnx = load_onnx_model(model_path) for idx, out in enumerate(enumerate_model_node_outputs(model_onnx)): print_specific_output(model_path, input_tensor, out, print_tensor)
def get_io_shapes(model): """returns map io_name -> shape""" rv = {} intermediate_outputs = list(enumerate_model_node_outputs(model)) initializers = [i.name for i in model.graph.initializer] inputs = [i for i in model.graph.input if i.name not in initializers] assert len(inputs) == 1 t = inputs[0].type.tensor_type.elem_type assert t == onnx.TensorProto.FLOAT dtype = np.float32 if dtype == np.float32: elem_type = onnx.TensorProto.FLOAT else: assert dtype == np.float64 elem_type = onnx.TensorProto.DOUBLE # create inputs as zero tensors input_map = {} for inp in inputs: shape = tuple(d.dim_value if d.dim_value != 0 else 1 for d in inp.type.tensor_type.shape.dim) input_map[inp.name] = np.zeros(shape, dtype=dtype) # also save it's shape rv[inp.name] = shape new_out = [] # add all old outputs for out in model.graph.output: new_out.append(out) for out_name in intermediate_outputs: if out_name in rv: # inputs were already added continue # create new output #nt = onnx.TypeProto() #nt.tensor_type.elem_type = elem_type value_info = ValueInfoProto() value_info.name = out_name new_out.append(value_info) # ok run once and get all outputs graph = make_graph(model.graph.node, model.graph.name, model.graph.input, new_out, model.graph.initializer) # this model is not a valud model since the outputs don't have shape type info... # but it still will execute! skip the check_model step new_onnx_model = make_model_with_graph(model, graph, check_model=False) sess = ort.InferenceSession(new_onnx_model.SerializeToString()) res = sess.run(None, input_map) names = [o.name for o in sess.get_outputs()] out_map = {name: output for name, output in zip(names, res)} for out_name in intermediate_outputs: if out_name in rv: # inputs were already added continue rv[out_name] = out_map[out_name].shape return rv
def stan_select_model_inputs_outputs(model, dtype, inputs, outputs, io_shapes): """ a modificiation of select_model_input_outputs from sklearn-on Takes a model and changes its inputs and outputs :param model: *ONNX* model :param inputs: new inputs :return: modified model The function removes unneeded nodes. """ if dtype == np.float32: elem_type = onnx.TensorProto.FLOAT else: assert dtype == np.float64 elem_type = onnx.TensorProto.DOUBLE if inputs is None: raise NotImplementedError("Parameter inputs cannot be empty.") if outputs is None: raise NotImplementedError("Parameter inputs cannot be empty.") if not isinstance(inputs, list): inputs = [inputs] if not isinstance(outputs, list): outputs = [outputs] ########## mark_var = { } # keys are (input or node output) names, vals 1 = keep, 0 = delete for out in enumerate_model_node_outputs(model): mark_var[out] = 0 for inp in model.graph.input: mark_var[inp.name] = 0 for out in outputs: if out not in mark_var: raise ValueError( "Desired Output '{}' not found in model.".format(out)) initializers = [i.name for i in model.graph.initializer] for inp in inputs: if inp not in mark_var: raise ValueError( "Desired Input '{}' not found in model.".format(inp)) if inp not in initializers: mark_var[inp] = 1 nodes = list(enumerate(model.graph.node)) mark_op = { } # these are the marks for the node indices, 1 = keep, 0 = delete for node in nodes: mark_op[node[0]] = 0 # We mark all the nodes we need to keep. nb = 1 # number marked... used as a termination condition keep_initializers = [] while nb > 0: nb = 0 for index, node in nodes: if mark_op[index] == 1: # node was already processed, skip continue mod = False # is this a newly-marked node? node_initializers = [] for inp in node.input: if inp in outputs: continue if not inp in mark_var or mark_var.get(inp, 0) == 0: node_initializers.append(inp) # was initializer elif mark_var[inp] == 1: # make the node because its input was marked mark_op[index] = 1 mod = True for out in node.output: if out in inputs: continue if mark_var[out] == 1: # mark the node because the output was marked mark_op[index] = 1 mod = True if not mod: # none of the node's inputs were marked, skip it continue keep_initializers += node_initializers nb += 1 # mark the node and all its inputs / outputs for out in node.output: if mark_var.get(out, 0) == 1: continue if out in outputs: continue mark_var[out] = 1 nb += 1 for inp in node.input: if mark_var.get(inp, 0) == 1: continue if inp in inputs: continue mark_var[inp] = 1 nb += 1 # All nodes verifies mark_op[node.name] == 1 keep_nodes = [node[1] for node in nodes if mark_op[node[0]] == 1] var_in = [] for inp in inputs: nt = onnx.TypeProto() nt.tensor_type.elem_type = elem_type # inputs need shape info, which is not in the graph! shape = io_shapes[inp] for s in shape: nt.tensor_type.shape.dim.add() nt.tensor_type.shape.dim[-1].dim_value = s value_info = ValueInfoProto(type=nt) value_info.name = inp var_in.append(value_info) # add initializers to inputs for i in model.graph.input: if i.name in keep_initializers: var_in.append(i) var_out = [] for out in outputs: nt = onnx.TypeProto() nt.tensor_type.elem_type = elem_type # inputs need shape info, which is not in the graph! shape = io_shapes[out] for s in shape: nt.tensor_type.shape.dim.add() nt.tensor_type.shape.dim[-1].dim_value = s value_info = ValueInfoProto(type=nt) value_info.name = out var_out.append(value_info) init_out = [ init for init in model.graph.initializer if init.name in keep_initializers ] graph = make_graph(keep_nodes, model.graph.name, var_in, var_out, init_out) #print(f"making model with inputs {inputs} / outputs {outputs} and nodes len: {len(keep_nodes)}") onnx_model = make_model_with_graph(model, graph) return onnx_model