def _test_gaussian_mixture_full_black_op_noargmax(self): data = load_iris() X = data.data model = GaussianMixture(n_components=2, covariance_type='full') model.fit(X) with self.assertRaises(RuntimeError): convert_sklearn(model, "GM", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, black_op={'Add'}) model_onnx = convert_sklearn(model, "GM", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, black_op={'ReduceLogSumExp', 'ArgMax'}) self.assertIsNotNone(model_onnx) self.assertNotIn('ArgMax', str(model_onnx)) dump_data_and_model(X.astype(np.float64)[40:60], model, model_onnx, basename="GaussianMixtureC2FullBLNMDouble", intermediate_steps=False) self._test_score(model, X, TARGET_OPSET)
def test_gaussian_mixture_full(self): data = load_iris() X = data.data model = GaussianMixture(n_components=2, covariance_type='full') model.fit(X) model_onnx = convert_sklearn( model, "GM", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float64)[40:60], model, model_onnx, basename="GaussianMixtureC2FullDouble", intermediate_steps=False) self._test_score(model, X, TARGET_OPSET)
def test_kmeans_clustering(self): data = load_iris() X = data.data model = KMeans(n_clusters=3) model.fit(X) model_onnx = convert_sklearn(model, "kmeans", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model(X[40:60], model, model_onnx, basename="SklearnKMeansDoubleGemm-Dec4")
def test_model_knn_regressor_double(self): # Could not find an implementation for the node To_TopK:TopK(11) model, X = fit_regression_model(KNeighborsRegressor(n_neighbors=2)) model_onnx = convert_sklearn( model, "KNN regressor", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, options={id(model): { 'optim': 'cdist' }}) dump_data_and_model(X.astype(np.float64)[:7], model, model_onnx, basename="SklearnKNeighborsRegressorDouble")
def test_model_voting_regression(self): # Could not find an implementation for the node Sum:Sum(8) model = VotingRegressor([ ('lr', LinearRegression()), ('dt', SGDRegressor())]) model, X = fit_regression_model(model) model_onnx = convert_sklearn( model, "voting regression", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model( X.astype(np.float64), model, model_onnx, basename="SklearnVotingRegressorDouble", comparable_outputs=[0])
def test_gpr_rbf_fitted_true_double(self): gp = GaussianProcessRegressor(alpha=1e-7, n_restarts_optimizer=15, normalize_y=True) gp, X = fit_regression_model(gp) model_onnx = to_onnx(gp, initial_types=[('X', DoubleTensorType([None, None]))], target_opset=TARGET_OPSET) dump_data_and_model(X.astype(np.float64), gp, model_onnx, verbose=False, basename="SklearnGaussianProcessRBFTDouble")
def test_grid_search_gaussian_regressor_double(self): tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}] clf = GridSearchCV(GaussianProcessRegressor(), tuned_parameters, cv=3) model, X = fit_regression_model(clf) model_onnx = convert_sklearn( model, "GridSearchCV", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float64), model, model_onnx, basename="SklearnGridSearchGaussianRegressionDouble" "-OneOffArray-Dec4")
def test_issue_789(self): n_samples, n_features = 10000, 10 X, y = make_regression(n_samples, n_features) tx1, vx1, ty1, vy1 = train_test_split(X, y) model = GaussianProcessRegressor() pipe = make_pipeline(MinMaxScaler(feature_range=(-1, 1)), model) pipe.fit(tx1, ty1) initial_type = [('data_in', DoubleTensorType([None, X.shape[1]]))] onx = to_onnx(pipe, initial_types=initial_type, target_opset=_TARGET_OPSET_) sess = InferenceSession(onx.SerializeToString()) pred = sess.run(None, {'data_in': vx1.astype(np.float64)}) assert_almost_equal(pipe.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel())
def test_model_gaussian_mixture_multiclass(self): model, X = self._fit_model_multiclass_classification( GaussianMixture(), load_iris()) model_onnx = convert_sklearn( model, "gaussian_mixture", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnMclGaussianMixtureDouble") self._test_score(model, X, TARGET_OPSET)
def test_model_gaussian_mixture_binary_classification(self): model, X = self._fit_model_binary_classification( GaussianMixture(), load_iris()) for tg in range(min(9, TARGET_OPSET), TARGET_OPSET + 1): with self.subTest(target_opset=tg): if tg < 11: with self.assertRaises(RuntimeError): model_onnx = convert_sklearn( model, "gaussian_mixture", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=tg) continue model_onnx = convert_sklearn( model, "gaussian_mixture", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=tg) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnBinGaussianMixtureDouble") self._test_score(model, X, tg)
def test_model_bayesian_mixture_binary_classification(self): for cov in ["full", "tied", "diag", "spherical"]: with self.subTest(cov=cov): model, X = self._fit_model_binary_classification( BayesianGaussianMixture(), load_iris(), covariance_type=cov) model_onnx = convert_sklearn( model, "gaussian_mixture", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, basename="SklearnBinBayesianGaussianMixtureDouble") self._test_score(model, X, TARGET_OPSET)
def test_sklearn_nca_double(self): model, X_test = fit_classification_model( NeighborhoodComponentsAnalysis(n_components=2, max_iter=4, random_state=42), 3) X_test = X_test.astype(numpy.float64) model_onnx = convert_sklearn( model, "NCA", [("input", DoubleTensorType((None, X_test.shape[1])))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADouble")
def convert_dataframe_schema(df, drop=None): inputs = [] for k, v in zip(df.columns, df.dtypes): if drop is not None and k in drop: continue if v == 'int64': t = Int64TensorType([1, 1]) elif v == 'float32': t = FloatTensorType([1, 1]) elif v == 'float64': t = DoubleTensorType([1, 1]) else: t = StringTensorType([1, 1]) inputs.append((k, t)) return inputs
def test_update_onnx_initializers(self): model = make_pipeline(StandardScaler()) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) model_onnx = convert_sklearn(model, "pipe3", [("input", DoubleTensorType([None, 2]))], target_opset=TARGET_OPSET) init = get_initializers(model_onnx) self.assertEqual(len(init), 2) for v in init.values(): v[:] = 1.5 update_onnx_initializers(model_onnx, init) init = get_initializers(model_onnx) assert_almost_equal(init['Di_Divcst'], numpy.array([1.5, 1.5])) assert_almost_equal(init['Su_Subcst'], numpy.array([1.5, 1.5]))
def test_gpr_rbf_fitted_true(self): gp = GaussianProcessRegressor(alpha=1e-7, n_restarts_optimizer=15, normalize_y=True) gp, X = fit_regression_model(gp) # return_cov=False, return_std=False model_onnx = to_onnx( gp, initial_types=[('X', DoubleTensorType([None, None]))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model(X, gp, model_onnx, verbose=False, basename="SklearnGaussianProcessRBFT")
def test_model_linear_regression64(self): model, X = fit_regression_model(linear_model.LinearRegression()) model_onnx = convert_sklearn(model, "linear regression", [("input", DoubleTensorType(X.shape))]) self.assertIsNotNone(model_onnx) self.assertIn("elem_type: 11", str(model_onnx)) dump_data_and_model( X.astype(numpy.float64), model, model_onnx, basename="SklearnLinearRegression64-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
def ptype2vttype(it, shape): if it == TensorProto.FLOAT: # pylint: disable=E1101 return FloatTensorType(shape) if it == TensorProto.DOUBLE: # pylint: disable=E1101 return DoubleTensorType(shape) if it == TensorProto.INT64: # pylint: disable=E1101 return Int64TensorType(shape) if it == TensorProto.INT32: # pylint: disable=E1101 return Int32TensorType(shape) if it == TensorProto.BOOL: # pylint: disable=E1101 return BooleanTensorType(shape) if it == TensorProto.STRING: # pylint: disable=E1101 return StringTensorType(shape) raise NotImplementedError( # pragma: no cover "Unrecognized proto type {} with shape {}".format(it, shape))
def test_gpr_cosine_fitted_true_double(self): gp = GaussianProcessRegressor(alpha=1e-5, n_restarts_optimizer=25, normalize_y=False, kernel=PairwiseKernel(metric='cosine')) gp, X = fit_regression_model( gp, n_features=2, n_samples=20, factor=0.01) # return_cov=False, return_std=False model_onnx = to_onnx( gp, initial_types=[('X', DoubleTensorType([None, None]))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model(X.astype(np.float64), gp, model_onnx, verbose=False, basename="SklearnGaussianProcessCosineDouble")
def test_onnxrt_gather0_double(self): data = numpy.random.randn(5, 4, 3, 2).astype(numpy.float64) indices = numpy.array([0, 1, 3], dtype=numpy.int64) y = numpy.take(data, indices, axis=0) op = OnnxGather('X', 'I', op_version=get_opset_number_from_onnx(), axis=0, output_names=['out']) onx = op.to_onnx(inputs=[('X', DoubleTensorType()), ('I', Int64TensorType())]) oinf = OnnxInference(onx) res = oinf.run({'X': data, 'I': indices}) self.assertEqualArray(y, res['out'])
def test_model_bayesian_ridge_return_std_normalize_double(self): model, X = fit_regression_model( linear_model.BayesianRidge(normalize=True), n_features=2, n_samples=50) model_onnx = convert_sklearn( model, "bayesian ridge", [("input", DoubleTensorType([None, X.shape[1]]))], options={linear_model.BayesianRidge: {'return_std': True}}) self.assertIsNotNone(model_onnx) X = X.astype(numpy.float64) sess = InferenceSession(model_onnx.SerializeToString()) outputs = sess.run(None, {'input': X}) pred, std = model.predict(X, return_std=True) assert_almost_equal(pred, outputs[0].ravel()) assert_almost_equal(std, outputs[1].ravel(), decimal=4)
def test_model_mlpregressor_64(self): # Could not find an implementation for the node Relu:Relu(6) # Could not find an implementation for the node Tanh:Tanh(6) # Could not find an implementation for the node Sigmoid:Sigmoid(6) for activation in ['relu', 'tanh', 'logistic']: with self.subTest(activation=activation): model, X = fit_regression_model( MLPRegressor(activation=activation)) model_onnx = convert_sklearn( model, "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIn("elem_type: 11", str(model_onnx)) dump_data_and_model( X.astype(np.float64), model, model_onnx, basename="SklearnMLPRegressorDouble%s" % activation)
def test_model_knn_regressor_double_radius(self): model, X = self._fit_model(RadiusNeighborsRegressor()) model_onnx = convert_sklearn( model, "KNN regressor", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, options={id(model): {'optim': 'cdist'}}) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float64)[:7], model, model_onnx, basename="SklearnRadiusNeighborsRegressor64") dump_data_and_model( (X + 0.1).astype(numpy.float64)[:7], model, model_onnx, basename="SklearnRadiusNeighborsRegressor64")
def test_pca_default(self): def _fit_model_pca(model): data = load_diabetes() X_train, X_test, *_ = train_test_split( data.data, data.target, test_size=0.2, random_state=42) model.fit(X_train) return model, X_test.astype(np.float64) model, X_test = _fit_model_pca(PCA(random_state=42)) model_onnx = convert_sklearn( model, initial_types=[ ("input", DoubleTensorType([None, X_test.shape[1]]))]) dump_data_and_model( X_test, model, model_onnx, basename="SklearnPCADoubleDefault")
def test_gpr_fitted_partial_float64_operator_cdist_quad(self): data = load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y) gp = GaussianProcessRegressor(kernel=RationalQuadratic(), alpha=100.) gp.fit(X_train, y_train) try: to_onnx(gp, initial_types=[('X', FloatTensorType([None, None]))], options={GaussianProcessRegressor: { 'optim': 'CDIST' }}, target_opset=TARGET_OPSET) raise AssertionError("CDIST is not implemented") except ValueError: pass model_onnx = to_onnx( gp, initial_types=[('X', FloatTensorType([None, None]))], options={GaussianProcessRegressor: { 'optim': 'cdist' }}, target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) name_save = inspect.currentframe().f_code.co_name + '.onnx' with open(name_save, 'wb') as f: f.write(model_onnx.SerializeToString()) try: self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {}) except RuntimeError as e: if "CDist is not a registered" in str(e): return except AssertionError as e: assert "Max relative difference:" in str(e) model_onnx = to_onnx(gp, initial_types=[('X', DoubleTensorType([None, None]))], dtype=np.float64, target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) self.check_outputs(gp, model_onnx, X_test, {})
def test_model_pls_regression64(self): X = numpy.array([[0., 0., 1.], [1., 0., 0.], [2., 2., 2.], [2., 5., 4.]], numpy.float64) Y = numpy.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]], numpy.float64) pls2 = PLSRegression(n_components=2) pls2.fit(X, Y) model_onnx = convert_sklearn( pls2, "scikit-learn pls64", [("input", DoubleTensorType([None, X.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X, pls2, model_onnx, methods=['predict'], basename="SklearnPLSRegression64", allow_failure="StrictVersion(" "onnxruntime.__version__)<= StrictVersion('0.2.1')")
def test_max_abs_scaler_double(self): model = MaxAbsScaler() data = [ [0.0, 0.0, -3.0], [1.0, 1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, 2.0], ] model.fit(data) model_onnx = convert_sklearn(model, "scaler", [("input", DoubleTensorType([None, 3]))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model(numpy.array(data, dtype=numpy.float64), model, model_onnx, basename="SklearnMaxAbsScalerDouble")
def test_model_pls_regression64(self): X = numpy.array( [[0., 0., 1.], [1., 0., 0.], [2., 2., 2.], [2., 5., 4.]], numpy.float64) Y = numpy.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]], numpy.float64) pls2 = PLSRegression(n_components=2) pls2.fit(X, Y) model_onnx = convert_sklearn( pls2, "scikit-learn pls64", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model(X, pls2, model_onnx, methods=['predict'], basename="SklearnPLSRegression64")
def test_random_forest_classifier_double(self): model, X = fit_classification_model(RandomForestClassifier( n_estimators=5, random_state=42), 3, is_double=True) for opv in [1, 2, 3]: model_onnx = convert_sklearn( model, "random forest classifier", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset={ 'ai.onnx.ml': opv, '': TARGET_OPSET }) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnRandomForestClassifierDouble")
def test_model_bayesian_ridge_return_std_normalize_double(self): model, X = fit_regression_model(BayesianRidge(normalize=True), n_features=2, n_samples=50) model_onnx = convert_sklearn( model, "bayesian ridge", [("input", DoubleTensorType([None, X.shape[1]]))], options={BayesianRidge: { 'return_std': True }}) self.assertIsNotNone(model_onnx) X = X.astype(numpy.float64) sess = OnnxInference(model_onnx) outputs = sess.run({'input': X}) pred, std = model.predict(X, return_std=True) self.assertEqualArray(pred, outputs['variable'].ravel()) self.assertEqualArray(std, outputs['std'].ravel(), decimal=4)
def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self): state = np.random.RandomState(0) X = 15 * state.rand(100, 2) y = np.sin(X[:, 0] - X[:, 1]).ravel() y += 0.5 * (0.5 - state.rand(X.shape[0])) y /= 10 X_train, X_test, y_train, _ = train_test_split(X, y) gp = GaussianProcessRegressor( kernel=ExpSineSquared(periodicity_bounds=(1e-10, 1e10)), alpha=1e-7, n_restarts_optimizer=25, normalize_y=True, random_state=1) try: gp.fit(X_train, y_train) except (AttributeError, TypeError): # unstable bug in scikit-learn, fixed in 0.24 return # return_cov=False, return_std=False options = {GaussianProcessRegressor: {"return_std": True}} gp.predict(X_train, return_std=True) model_onnx = to_onnx(gp, initial_types=[('X', DoubleTensorType([None, None]))], options=options, target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test.astype(np.float64), gp, model_onnx, verbose=False, basename="SklearnGaussianProcessExpSineSquaredStdT-Out0-Dec2", disable_optimisation=True) self.check_outputs( gp, model_onnx, X_test.astype(np.float64), predict_attributes=options[GaussianProcessRegressor], decimal=4, disable_optimisation=True)