def test_simple_column_transformer(self): if ColumnTransformer is None: return data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) model = ColumnTransformer([("scaler1", StandardScaler(), [0]), ("scaler2", RobustScaler(), [1])]) model.fit(data) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps(model, "coulmn transformer", [("input", FloatTensorType([None, 2]))]) assert len(steps) == 2 assert len(all_models) == 3 model.transform(data) for step in steps: onnx_step = step['onnx_step'] sess = onnxruntime.InferenceSession(onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': data}) onnx_output = onnx_outputs[0] skl_outputs = step['model']._debug.outputs['transform'] assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output.tolist(), skl_outputs.tolist())
def test_simple_pipeline_predict_proba(self): data = load_iris() X, y = data.data, data.target model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())]) model.fit(X, y) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps( model, "pipeline", [("input", FloatTensorType([None, X.shape[1]]))]) assert len(steps) == 2 assert len(all_models) == 3 model.predict_proba(X) for step in steps: onnx_step = step['onnx_step'] sess = onnxruntime.InferenceSession(onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': X.astype(numpy.float32)}) dbg_outputs = step['model']._debug.outputs if 'transform' in dbg_outputs: onnx_output = onnx_outputs[0] skl_outputs = dbg_outputs['transform'] else: onnx_output = onnx_outputs[1] skl_outputs = dbg_outputs['predict_proba'] assert_almost_equal(onnx_output, skl_outputs, decimal=6) compare_objects(onnx_output, skl_outputs)
def test_simple_pipeline(self): data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) model = Pipeline([("scaler1", StandardScaler()), ("scaler2", StandardScaler())]) model.fit(data) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps(model, "pipeline", [("input", FloatTensorType([None, 2]))]) assert len(steps) == 2 assert len(all_models) == 3 model.transform(data) for step in steps: onnx_step = step['onnx_step'] sess = onnxruntime.InferenceSession(onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': data}) onnx_output = onnx_outputs[0] skl_outputs = step['model']._debug.outputs['transform'] assert str(step['model']._debug) is not None sdt = step['model']._debug.display(data, 5) assert 'shape' in sdt assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output, skl_outputs)
def test_missing_converter(self): data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) model = Pipeline([("scaler1", StandardScaler()), ("scaler2", StandardScaler()), ("scaler3", MyScaler())]) model.fit(data) all_models = list(enumerate_pipeline_models(model)) try: collect_intermediate_steps(model, "pipeline", [("input", FloatTensorType([None, 2]))], target_opset=TARGET_OPSET) except MissingShapeCalculator as e: assert "MyScaler" in str(e) assert "gallery" in str(e) _alter_model_for_debugging(model, recursive=True) model.transform(data) all_models = list(enumerate_pipeline_models(model)) for ind, step, last in all_models: if ind == (0, ): # whole pipeline continue step_model = step data_in = step_model._debug.inputs['transform'] t = guess_data_type(data_in) try: onnx_step = convert_sklearn(step_model, initial_types=t, target_opset=TARGET_OPSET) except MissingShapeCalculator as e: if "MyScaler" in str(e): continue raise sess = onnxruntime.InferenceSession(onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': data_in}) onnx_output = onnx_outputs[0] skl_outputs = step_model._debug.outputs['transform'] assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output, skl_outputs)
def test_simple_pipeline(self): for opset in (11, TARGET_OPSET): if opset > TARGET_OPSET: continue data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) model = Pipeline([("scaler1", StandardScaler()), ("scaler2", StandardScaler())]) model.fit(data) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps( model, "pipeline", [("input", FloatTensorType([None, 2]))], target_opset=opset) assert len(steps) == 2 assert len(all_models) == 3 expected = 'version:%d}' % opset expected1 = 'version:1}' model.transform(data) for step in steps: onnx_step = step['onnx_step'] text = str(onnx_step).replace('\n', ' ').replace(' ', '') if expected not in text and expected1 not in text: raise AssertionError("Unable to find '{}'\n'{}'\n".format( expected, text)) sess = onnxruntime.InferenceSession( onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': data}) onnx_output = onnx_outputs[0] skl_outputs = step['model']._debug.outputs['transform'] assert str(step['model']._debug) is not None sdt = step['model']._debug.display(data, 5) assert 'shape' in sdt assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output, skl_outputs)
def test_simple_feature_union(self): data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) model = FeatureUnion([("scaler1", StandardScaler()), ("scaler2", RobustScaler())]) model.fit(data) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps( model, "feature union", [("input", FloatTensorType([None, 2]))], target_opset=TARGET_OPSET) assert len(steps) == 2 assert len(all_models) == 3 model.transform(data) for step in steps: onnx_step = step['onnx_step'] sess = onnxruntime.InferenceSession(onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': data}) onnx_output = onnx_outputs[0] skl_outputs = step['model']._debug.outputs['transform'] assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output, skl_outputs)
print("onnxruntime") print(timeit("sess.run(None, {'input': X_digits[:1].astype(np.float32)})[1]", number=10000, globals=globals())) ############################################### # Intermediate steps # ++++++++++++++++++ # # Let's imagine the final output is wrong and we need # to look into each component of the pipeline which one # is failing. The following method modifies the scikit-learn # pipeline to steal the intermediate outputs and produces # an smaller ONNX graph for every operator. steps = collect_intermediate_steps(pipe, "pipeline", initial_types) assert len(steps) == 2 pipe.predict_proba(X_digits[:2]) for i, step in enumerate(steps): onnx_step = step['onnx_step'] sess = rt.InferenceSession(onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': X_digits[:2].astype(np.float32)}) skl_outputs = step['model']._debug.outputs if 'transform' in skl_outputs: compare_objects(skl_outputs['transform'], onnx_outputs[0]) print("benchmark", step['model'].__class__) print("scikit-learn") print(timeit("step['model'].transform(X_digits[:1])",
from skl2onnx.common.data_types import FloatTensorType ########################### # The pipeline. data = load_iris() X = data.data pipe = Pipeline(steps=[('std', StandardScaler()), ('km', KMeans(3))]) pipe.fit(X) ################################# # The function goes through every step, # overloads the methods *transform* and # returns an ONNX graph for every step. steps = collect_intermediate_steps( pipe, "pipeline", [("X", FloatTensorType([None, X.shape[1]]))]) ##################################### # We call method transform to population the # cache the overloaded methods *transform* keeps. pipe.transform(X) ####################################### # We compute every step and compare # ONNX and scikit-learn outputs. for step in steps: print('----------------------------') print(step['model']) onnx_step = step['onnx_step'] sess = InferenceSession(onnx_step.SerializeToString())
timeit("sess.run(None, {'input': X_digits[:1].astype(np.float32)})[1]", number=10000, globals=globals())) ############################################### # Intermediate steps # ++++++++++++++++++ # # Let's imagine the final output is wrong and we need # to look into each component of the pipeline which one # is failing. The following method modifies the scikit-learn # pipeline to steal the intermediate outputs and produces # an smaller ONNX graph for every operator. steps = collect_intermediate_steps(pipe, "pipeline", initial_types, target_opset=11) assert len(steps) == 2 pipe.predict_proba(X_digits[:2]) for i, step in enumerate(steps): onnx_step = step['onnx_step'] sess = rt.InferenceSession(onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': X_digits[:2].astype(np.float32)}) skl_outputs = step['model']._debug.outputs if 'transform' in skl_outputs: compare_objects(skl_outputs['transform'], onnx_outputs[0]) print("benchmark", step['model'].__class__) print("scikit-learn")