def common_test_cast_regressor(self, dtype, input_type): model = CastRegressor(DecisionTreeRegressor(max_depth=2), dtype=dtype) data = numpy.array( [[0.1, 0.2, 3.1], [1, 1, 0], [0, 2, 1], [1, 0, 2], [0.1, 2.1, 1.1], [1.1, 0.1, 2.2], [-0.1, -2.1, -1.1], [-1.1, -0.1, -2.2], [0.2, 2.2, 1.2], [1.2, 0.2, 2.2]], dtype=numpy.float32) y = (numpy.sum(data, axis=1, keepdims=0) + numpy.random.randn(data.shape[0])) model.fit(data, y) pred = model assert pred.dtype == dtype model_onnx = convert_sklearn(model, "cast", [("input", FloatTensorType([None, 3]))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model(data, model, model_onnx, basename="SklearnCastRegressor{}".format( input_type.__class__.__name__))
def test_pipeline(self): def maxdiff(a1, a2): d = numpy.abs(a1.ravel() - a2.ravel()) return d.max() X, y = make_regression(10000, 10, random_state=3) X_train, X_test, y_train, _ = train_test_split(X, y, random_state=3) Xi_train, yi_train = X_train.copy(), y_train.copy() Xi_test = X_test.copy() for i in range(X.shape[1]): Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2**i).astype(numpy.int64) Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2**i).astype(numpy.int64) max_depth = 10 Xi_test = Xi_test.astype(numpy.float32) # model 1 model1 = Pipeline([('scaler', StandardScaler()), ('dt', DecisionTreeRegressor(max_depth=max_depth))]) model1.fit(Xi_train, yi_train) exp1 = model1.predict(Xi_test) onx1 = to_onnx(model1, X_train[:1].astype(numpy.float32), target_opset=TARGET_OPSET) sess1 = InferenceSession(onx1.SerializeToString()) got1 = sess1.run(None, {'X': Xi_test})[0] md1 = maxdiff(exp1, got1) # model 2 model2 = Pipeline([ ('cast64', CastTransformer(dtype=numpy.float64)), ('scaler', StandardScaler()), ('cast', CastTransformer()), ('dt', CastRegressor(DecisionTreeRegressor(max_depth=max_depth), dtype=numpy.float32)) ]) model2.fit(Xi_train, yi_train) exp2 = model2.predict(Xi_test) onx = to_onnx(model2, X_train[:1].astype(numpy.float32), options={StandardScaler: { 'div': 'div_cast' }}, target_opset=TARGET_OPSET) sess2 = InferenceSession(onx.SerializeToString()) got2 = sess2.run(None, {'X': Xi_test})[0] md2 = maxdiff(exp2, got2) assert md2 <= md1 assert md2 <= 0.0
############################################ # Perfect, no discrepencies at all. print(diff(skl5, ort5)) ############################################## # CastRegressor # +++++++++++++ # # The previous example demonstrated the type difference for # the predicted values explains the small differences between # :epkg:`scikit-learn` and :epkg:`onnxruntime`. But it does not # with the current ONNX. Another option is to cast the # the predictions into floats in the :epkg:`scikit-learn` pipeline. ctree = CastRegressor(DecisionTreeRegressor(max_depth=max_depth)) ctree.fit(Xi_train, yi_train) onx6 = to_onnx(ctree, Xi_train[:1].astype(numpy.float32)) sess6 = InferenceSession(onx6.SerializeToString(), providers=['CPUExecutionProvider']) skl6 = ctree.predict(X32) ort6 = sess6.run(None, {'X': X32})[0] print(diff(skl6, ort6)) ############################## # Success!