def test_simple_column_transformer(self):
        if ColumnTransformer is None:
            return
        data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
                           dtype=numpy.float32)
        model = ColumnTransformer([("scaler1", StandardScaler(), [0]),
                                  ("scaler2", RobustScaler(), [1])])
        model.fit(data)
        all_models = list(enumerate_pipeline_models(model))

        steps = collect_intermediate_steps(model, "coulmn transformer",
                                           [("input",
                                             FloatTensorType([None, 2]))])

        assert len(steps) == 2
        assert len(all_models) == 3

        model.transform(data)
        for step in steps:
            onnx_step = step['onnx_step']
            sess = onnxruntime.InferenceSession(onnx_step.SerializeToString())
            onnx_outputs = sess.run(None, {'input': data})
            onnx_output = onnx_outputs[0]
            skl_outputs = step['model']._debug.outputs['transform']
            assert_almost_equal(onnx_output, skl_outputs)
            compare_objects(onnx_output.tolist(), skl_outputs.tolist())
    def test_simple_pipeline_predict_proba(self):
        data = load_iris()
        X, y = data.data, data.target
        model = Pipeline([("scaler1", StandardScaler()),
                          ("lr", LogisticRegression())])
        model.fit(X, y)
        all_models = list(enumerate_pipeline_models(model))

        steps = collect_intermediate_steps(
            model, "pipeline",
            [("input", FloatTensorType([None, X.shape[1]]))])

        assert len(steps) == 2
        assert len(all_models) == 3

        model.predict_proba(X)
        for step in steps:
            onnx_step = step['onnx_step']
            sess = onnxruntime.InferenceSession(onnx_step.SerializeToString())
            onnx_outputs = sess.run(None, {'input': X.astype(numpy.float32)})
            dbg_outputs = step['model']._debug.outputs
            if 'transform' in dbg_outputs:
                onnx_output = onnx_outputs[0]
                skl_outputs = dbg_outputs['transform']
            else:
                onnx_output = onnx_outputs[1]
                skl_outputs = dbg_outputs['predict_proba']
            assert_almost_equal(onnx_output, skl_outputs, decimal=6)
            compare_objects(onnx_output, skl_outputs)
    def test_simple_pipeline(self):
        data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
                           dtype=numpy.float32)
        model = Pipeline([("scaler1", StandardScaler()),
                          ("scaler2", StandardScaler())])
        model.fit(data)
        all_models = list(enumerate_pipeline_models(model))

        steps = collect_intermediate_steps(model, "pipeline",
                                           [("input",
                                             FloatTensorType([None, 2]))])

        assert len(steps) == 2
        assert len(all_models) == 3

        model.transform(data)
        for step in steps:
            onnx_step = step['onnx_step']
            sess = onnxruntime.InferenceSession(onnx_step.SerializeToString())
            onnx_outputs = sess.run(None, {'input': data})
            onnx_output = onnx_outputs[0]
            skl_outputs = step['model']._debug.outputs['transform']
            assert str(step['model']._debug) is not None
            sdt = step['model']._debug.display(data, 5)
            assert 'shape' in sdt
            assert_almost_equal(onnx_output, skl_outputs)
            compare_objects(onnx_output, skl_outputs)
Example #4
0
    def test_missing_converter(self):
        data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
                           dtype=numpy.float32)
        model = Pipeline([("scaler1", StandardScaler()),
                          ("scaler2", StandardScaler()),
                          ("scaler3", MyScaler())])
        model.fit(data)
        all_models = list(enumerate_pipeline_models(model))

        try:
            collect_intermediate_steps(model,
                                       "pipeline",
                                       [("input", FloatTensorType([None, 2]))],
                                       target_opset=TARGET_OPSET)
        except MissingShapeCalculator as e:
            assert "MyScaler" in str(e)
            assert "gallery" in str(e)

        _alter_model_for_debugging(model, recursive=True)
        model.transform(data)
        all_models = list(enumerate_pipeline_models(model))

        for ind, step, last in all_models:
            if ind == (0, ):
                # whole pipeline
                continue
            step_model = step
            data_in = step_model._debug.inputs['transform']
            t = guess_data_type(data_in)
            try:
                onnx_step = convert_sklearn(step_model,
                                            initial_types=t,
                                            target_opset=TARGET_OPSET)
            except MissingShapeCalculator as e:
                if "MyScaler" in str(e):
                    continue
                raise
            sess = onnxruntime.InferenceSession(onnx_step.SerializeToString())
            onnx_outputs = sess.run(None, {'input': data_in})
            onnx_output = onnx_outputs[0]
            skl_outputs = step_model._debug.outputs['transform']
            assert_almost_equal(onnx_output, skl_outputs)
            compare_objects(onnx_output, skl_outputs)
Example #5
0
    def test_simple_pipeline(self):
        for opset in (11, TARGET_OPSET):
            if opset > TARGET_OPSET:
                continue
            data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
                               dtype=numpy.float32)
            model = Pipeline([("scaler1", StandardScaler()),
                              ("scaler2", StandardScaler())])
            model.fit(data)
            all_models = list(enumerate_pipeline_models(model))

            steps = collect_intermediate_steps(
                model,
                "pipeline", [("input", FloatTensorType([None, 2]))],
                target_opset=opset)

            assert len(steps) == 2
            assert len(all_models) == 3

            expected = 'version:%d}' % opset
            expected1 = 'version:1}'
            model.transform(data)
            for step in steps:
                onnx_step = step['onnx_step']
                text = str(onnx_step).replace('\n', ' ').replace(' ', '')
                if expected not in text and expected1 not in text:
                    raise AssertionError("Unable to find '{}'\n'{}'\n".format(
                        expected, text))
                sess = onnxruntime.InferenceSession(
                    onnx_step.SerializeToString())
                onnx_outputs = sess.run(None, {'input': data})
                onnx_output = onnx_outputs[0]
                skl_outputs = step['model']._debug.outputs['transform']
                assert str(step['model']._debug) is not None
                sdt = step['model']._debug.display(data, 5)
                assert 'shape' in sdt
                assert_almost_equal(onnx_output, skl_outputs)
                compare_objects(onnx_output, skl_outputs)
Example #6
0
    def test_simple_feature_union(self):
        data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
                           dtype=numpy.float32)
        model = FeatureUnion([("scaler1", StandardScaler()),
                              ("scaler2", RobustScaler())])
        model.fit(data)
        all_models = list(enumerate_pipeline_models(model))
        steps = collect_intermediate_steps(
            model,
            "feature union", [("input", FloatTensorType([None, 2]))],
            target_opset=TARGET_OPSET)

        assert len(steps) == 2
        assert len(all_models) == 3

        model.transform(data)
        for step in steps:
            onnx_step = step['onnx_step']
            sess = onnxruntime.InferenceSession(onnx_step.SerializeToString())
            onnx_outputs = sess.run(None, {'input': data})
            onnx_output = onnx_outputs[0]
            skl_outputs = step['model']._debug.outputs['transform']
            assert_almost_equal(onnx_output, skl_outputs)
            compare_objects(onnx_output, skl_outputs)
print("onnxruntime")
print(timeit("sess.run(None, {'input': X_digits[:1].astype(np.float32)})[1]",
             number=10000, globals=globals()))

###############################################
# Intermediate steps
# ++++++++++++++++++
#
# Let's imagine the final output is wrong and we need
# to look into each component of the pipeline which one
# is failing. The following method modifies the scikit-learn
# pipeline to steal the intermediate outputs and produces
# an smaller ONNX graph for every operator.


steps = collect_intermediate_steps(pipe, "pipeline",
                                   initial_types)

assert len(steps) == 2

pipe.predict_proba(X_digits[:2])

for i, step in enumerate(steps):
    onnx_step = step['onnx_step']
    sess = rt.InferenceSession(onnx_step.SerializeToString())
    onnx_outputs = sess.run(None, {'input': X_digits[:2].astype(np.float32)})
    skl_outputs = step['model']._debug.outputs
    if 'transform' in skl_outputs:
        compare_objects(skl_outputs['transform'], onnx_outputs[0])
        print("benchmark", step['model'].__class__)
        print("scikit-learn")
        print(timeit("step['model'].transform(X_digits[:1])",
from skl2onnx.common.data_types import FloatTensorType

###########################
# The pipeline.

data = load_iris()
X = data.data

pipe = Pipeline(steps=[('std', StandardScaler()), ('km', KMeans(3))])
pipe.fit(X)

#################################
# The function goes through every step,
# overloads the methods *transform* and
# returns an ONNX graph for every step.
steps = collect_intermediate_steps(
    pipe, "pipeline", [("X", FloatTensorType([None, X.shape[1]]))])

#####################################
# We call method transform to population the
# cache the overloaded methods *transform* keeps.
pipe.transform(X)

#######################################
# We compute every step and compare
# ONNX and scikit-learn outputs.

for step in steps:
    print('----------------------------')
    print(step['model'])
    onnx_step = step['onnx_step']
    sess = InferenceSession(onnx_step.SerializeToString())
Example #9
0
    timeit("sess.run(None, {'input': X_digits[:1].astype(np.float32)})[1]",
           number=10000,
           globals=globals()))

###############################################
# Intermediate steps
# ++++++++++++++++++
#
# Let's imagine the final output is wrong and we need
# to look into each component of the pipeline which one
# is failing. The following method modifies the scikit-learn
# pipeline to steal the intermediate outputs and produces
# an smaller ONNX graph for every operator.

steps = collect_intermediate_steps(pipe,
                                   "pipeline",
                                   initial_types,
                                   target_opset=11)

assert len(steps) == 2

pipe.predict_proba(X_digits[:2])

for i, step in enumerate(steps):
    onnx_step = step['onnx_step']
    sess = rt.InferenceSession(onnx_step.SerializeToString())
    onnx_outputs = sess.run(None, {'input': X_digits[:2].astype(np.float32)})
    skl_outputs = step['model']._debug.outputs
    if 'transform' in skl_outputs:
        compare_objects(skl_outputs['transform'], onnx_outputs[0])
        print("benchmark", step['model'].__class__)
        print("scikit-learn")