def test_lightgbm_pytorch_extra_config(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) # Create ONNX model model_name = "hummingbird.ml.test.lightgbm" extra_config = {} extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = model_name extra_config[constants.ONNX_INITIAL_TYPES] = [ ("input", FloatTensorType([X.shape[0], X.shape[1]])) ] onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) assert onnx_model.model.graph.name == model_name
def _parse_libsvm_simple_model(scope, model, inputs): ''' This function handles all non-pipeline models. :param scope: Scope object :param model: A libsvm object (e.g., OneHotEncoder and LogisticRegression) :param inputs: A list of variables :return: A list of output variables which will be passed to next stage ''' if model.get_svm_type() in (0, 1): label_variable = scope.declare_local_variable('label', FloatTensorType()) probability_map_variable = scope.declare_local_variable( 'probabilities', FloatTensorType()) this_operator = scope.declare_local_operator("LibSvmSVC", model) this_operator.inputs = inputs this_operator.outputs.append(label_variable) this_operator.outputs.append(probability_map_variable) elif model.get_svm_type() in (4, 3): # We assume that all scikit-learn operator can only produce a single float tensor. variable = scope.declare_local_variable('variable', FloatTensorType()) this_operator = scope.declare_local_operator("LibSvmSVR", model) this_operator.inputs = inputs this_operator.outputs.append(variable) else: raise ValueError("Unknown SVM type '{0}'".format(model.get_svm_type())) return this_operator.outputs
def _parse_xgboost_simple_model(scope, model, inputs): ''' This function handles all non-pipeline models. :param scope: Scope object :param model: A xgboost object :param inputs: A list of variables :return: A list of output variables which will be passed to next stage ''' this_operator = scope.declare_local_operator(_get_xgboost_operator_name(model), model) this_operator.inputs = inputs if (type(model) in xgboost_classifier_list or getattr(model, 'operator_name', None) == 'XGBClassifier'): # For classifiers, we may have two outputs, one for label and the other one for probabilities of all classes. # Notice that their types here are not necessarily correct and they will be fixed in shape inference phase label_variable = scope.declare_local_variable('label', FloatTensorType()) probability_map_variable = scope.declare_local_variable('probabilities', FloatTensorType()) this_operator.outputs.append(label_variable) this_operator.outputs.append(probability_map_variable) else: # We assume that all scikit-learn operator can only produce a single float tensor. variable = scope.declare_local_variable('variable', FloatTensorType()) this_operator.outputs.append(variable) return this_operator.outputs
def test_non_ascii_variable_name_pipeline(self): data = dedent(""" pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest 1,1,"A",female,29.0,0,0,24160,211.3375,B5,S,2,,"MO" 1,1,"B",male,0.9167,1,2,113781,151.55,C22 C26,S,11,,"Can" 1,0,"C",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Can" 1,0,"D",male,30.0,1,2,113781,151.55,C22 C26,S,,135.0,"Can" 1,0,"E",female,25.0,1,2,113781,151.55,C22 C26,S,,,"Can" 1,1,"F",male,48.0,0,0,19952,26.55,E12,S,3,,"NY" 1,1,"G",female,63.0,1,0,13502,77.9583,D7,S,10,,"NY" 1,0,"H",male,39.0,0,0,112050,0.0,A36,S,,,"NI" 1,1,"I",female,53.0,2,0,11769,51.4792,C101,S,D,,"NY" 1,0,"J",male,71.0,0,0,PC 17609,49.5042,,C,,22.0,"Uruguay" 1,0,"K",male,47.0,1,0,PC 17757,227.525,C62 C64,C,,124.0,"NY" 1,1,"L",female,18.0,1,0,PC 17757,227.525,C62 C64,C,4,,"NY" 1,1,"M",female,24.0,0,0,PC 17477,69.3,B35,C,9,,"F" 1,1,"N",female,26.0,0,0,19877,78.85,,S,6,, 1,1,"L",male,80.0,0,0,27042,30.0,A23,S,B,,"Yorks" 1,0,"O",male,,0,0,PC 17318,25.925,,S,,,"NY" 1,0,"P",male,24.0,0,1,PC 17558,247.5208,B58 B60,C,,,"PQ" 1,1,"Q",female,50.0,0,1,PC 17558,247.5208,B58 B60,C,6,,"PQ" 1,1,"R",female,32.0,0,0,11813,76.2917,D15,C,8,, 1,0,"S",male,36.0,0,0,13050,75.2417,C6,C,A,,"MN" """).strip(" \n") data = pd.read_csv(StringIO(data)) data.rename(columns={"age": "年齢"}, inplace=True) X = data.drop('survived', axis=1) # y = data['survived'] cols = ['embarked', 'sex', 'pclass', '年齢', 'fare'] X = X[cols] for cat in ['embarked', 'sex', 'pclass']: X[cat].fillna('missing', inplace=True) numeric_features = ['年齢', 'fare'] numeric_transformer = Pipeline( steps=[('imputer', SimpleImputer( strategy='median')), ('scaler', StandardScaler())]) categorical_features = ['embarked', 'sex', 'pclass'] categorical_transformer = Pipeline( steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))]) preprocessor = ColumnTransformer( transformers=[('num', numeric_transformer, numeric_features), ('cat', categorical_transformer, categorical_features)]) preprocessor.fit_transform(X) initial_type = [('pclass', Int64TensorType(shape=[None, 1])), ('sex', StringTensorType(shape=[None, 1])), ('年齢', FloatTensorType(shape=[None, 1])), ('fare', FloatTensorType(shape=[None, 1])), ('embarked', StringTensorType(shape=[None, 1]))] onnx_object = convert_sklearn(preprocessor, initial_types=initial_type, target_opset=TARGET_OPSET) sess = InferenceSession(onnx_object.SerializeToString()) self.assertTrue(sess is not None)
def test_to_onnx_type(self): dt = FloatTensorType((1, 5)) assert str(dt) == 'FloatTensorType(shape=(1, 5))' onx = dt.to_onnx_type() assert "dim_value: 5" in str(onx) tt = onx.tensor_type assert "dim_value: 5" in str(tt) assert tt.elem_type == 1 o = onx.sequence_type assert str(o) == ""
def test_variable_names_distinct(self): pipeline = Pipeline([("passthrough", Passthrough())]) initial_types = [("INPUTA", FloatTensorType([None, 2]))] final_types = [("OUTPUTA", FloatTensorType([None, 2]))] model_onnx = convert_sklearn(pipeline, initial_types=initial_types, target_opset=TARGET_OPSET, final_types=final_types, verbose=0) x = np.array([0, 1, 1, 0], dtype=np.float32).reshape((-1, 2)) sess = InferenceSession(model_onnx.SerializeToString()) got = sess.run(None, {'INPUTA': x}) assert_almost_equal(x, got[0])
def test_lightgbm_onnx_pytorch(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) pt_model = convert(onnx_ml_model, "torch", X) assert pt_model # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) np.testing.assert_allclose(onnx_ml_pred[0].flatten(), pt_model.predict(X))
def _test_lgbm(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification onnx_pred[0] = onnx_model.predict_proba(X) onnx_pred[1] = onnx_model.predict(X) return onnx_ml_pred, onnx_pred, output_names
def test_onnx_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X) self.assertIsNotNone(hb_model) self.assertTrue(hb_model._session.get_session_options( ).intra_op_num_threads == psutil.cpu_count(logical=False)) self.assertTrue( hb_model._session.get_session_options().inter_op_num_threads == 1)
def _guess_type_proto(data_type, dims): # This could be moved to onnxconverter_common. for d in dims: if d == 0: raise RuntimeError("Dimension should not be null: {}.".format( list(dims))) if data_type == onnx_proto.TensorProto.FLOAT: return FloatTensorType(dims) if data_type == onnx_proto.TensorProto.DOUBLE: return DoubleTensorType(dims) if data_type == onnx_proto.TensorProto.STRING: return StringTensorType(dims) if data_type == onnx_proto.TensorProto.INT64: return Int64TensorType(dims) if data_type == onnx_proto.TensorProto.INT32: return Int32TensorType(dims) if data_type == onnx_proto.TensorProto.BOOL: return BooleanTensorType(dims) if data_type == onnx_proto.TensorProto.INT8: return Int8TensorType(dims) if data_type == onnx_proto.TensorProto.UINT8: return UInt8TensorType(dims) if Complex64TensorType is not None: if data_type == onnx_proto.TensorProto.COMPLEX64: return Complex64TensorType(dims) if data_type == onnx_proto.TensorProto.COMPLEX128: return Complex128TensorType(dims) raise NotImplementedError( "Unsupported data_type '{}'. You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." "".format(data_type))
def _guess_type_proto_str(data_type, dims): # This could be moved to onnxconverter_common. if data_type == "tensor(float)": return FloatTensorType(dims) if data_type == "tensor(double)": return DoubleTensorType(dims) if data_type == "tensor(string)": return StringTensorType(dims) if data_type == "tensor(int64)": return Int64TensorType(dims) if data_type == "tensor(int32)": return Int32TensorType(dims) if data_type == "tensor(bool)": return BooleanTensorType(dims) if data_type == "tensor(int8)": return Int8TensorType(dims) if data_type == "tensor(uint8)": return UInt8TensorType(dims) if Complex64TensorType is not None: if data_type == "tensor(complex64)": return Complex64TensorType(dims) if data_type == "tensor(complex128)": return Complex128TensorType(dims) raise NotImplementedError( "Unsupported data_type '{}'. You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." "".format(data_type))
def calculate_aad_forest_output_shapes(operator): check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) check_input_and_output_types(operator, good_input_types=[FloatTensorType]) N = operator.inputs[0].type.shape[0] operator.outputs[0].type = FloatTensorType(shape=[N])
def test_variable_names_output(self): pipeline = Pipeline([("passthrough", Passthrough())]) initial_types = [("input", FloatTensorType([None, 2]))] final_types = initial_types with self.assertRaises(RuntimeError): convert_sklearn(pipeline, initial_types=initial_types, target_opset=TARGET_OPSET, final_types=final_types)
def test_lightgbm(self): X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = lightgbm.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) onx = xmlt.convert( model, 'dummy', initial_types=[('X', FloatTensorType([None, X.shape[1]]))], target_opset=9) assert "ir_version: 4" in str(onx).lower()
def _parse_aad(scope, model, inputs): this_operator = scope.declare_local_operator(_get_aad_operator_name(model), model) this_operator.inputs = inputs # FIXME: probably another variable is required for anomality label score_variable = scope.declare_local_variable('score', FloatTensorType()) this_operator.outputs.append(score_variable) return this_operator.outputs
def _declare_input_variables(topology, raw_model_container, extra_config): # Declare input variables. inputs = [] n_inputs = extra_config[ constants.N_INPUTS] if constants.N_INPUTS in extra_config else 1 if constants.INPUT_NAMES in extra_config: assert n_inputs == len(extra_config[constants.INPUT_NAMES]) if constants.TEST_INPUT in extra_config: from onnxconverter_common.data_types import ( FloatTensorType, DoubleTensorType, Int32TensorType, Int64TensorType, StringTensorType, ) test_input = extra_config[constants.TEST_INPUT] if n_inputs > 1 else [ extra_config[constants.TEST_INPUT] ] for i in range(n_inputs): input = test_input[i] input_name = (extra_config[constants.INPUT_NAMES][i] if constants.INPUT_NAMES in extra_config else "input_{}".format(i)) if input.dtype == np.float32: input_type = FloatTensorType(input.shape) elif input.dtype == np.float64: input_type = DoubleTensorType(input.shape) elif input.dtype == np.int32: input_type = Int32TensorType(input.shape) elif input.dtype == np.int64: input_type = Int64TensorType(input.shape) elif input.dtype.kind in constants.SUPPORTED_STRING_TYPES: input_type = StringTensorType(input.shape) else: raise NotImplementedError( "Type {} not supported. Please fill an issue on https://github.com/microsoft/hummingbird/." .format(input.dtype)) inputs.append( topology.declare_logical_variable(input_name, type=input_type)) else: # We have no information on the input. Sklearn/Spark-ML always gets as input a single dataframe, # therefore by default we start with a single `input` variable input_name = extra_config[constants.INPUT_NAMES][ 0] if constants.TEST_INPUT in extra_config else "input" var = topology.declare_logical_variable(input_name) inputs.append(var) # The object raw_model_container is a part of the topology we're going to return. # We use it to store the inputs of the Sklearn/Spark-ML's computational graph. for variable in inputs: raw_model_container.add_input(variable) return inputs
def _test_non_ascii_variable_name(self): model, X = fit_regression_model(LinearRegression()) model_onnx = to_onnx(model, name="linear regression", initial_types=[ ("年齢", FloatTensorType([None, X.shape[1]])) ], target_opset=TARGET_OPSET) sess = InferenceSession(model_onnx.SerializeToString()) # Invalid Feed Input Name:\u5e74\u9f62 # sess.run(None, {'年齢': X}) self.assertTrue(sess is not None)
def _parse_h2o(scope, model, inputs): ''' :param scope: Scope object :param model: A h2o model data object :param inputs: A list of variables :return: A list of output variables which will be passed to next stage ''' this_operator = scope.declare_local_operator("H2OTreeMojo", model) this_operator.inputs = inputs if model["params"]["classifier"]: label_variable = scope.declare_local_variable('label', FloatTensorType()) probability_map_variable = scope.declare_local_variable( 'probabilities', FloatTensorType()) this_operator.outputs.append(label_variable) this_operator.outputs.append(probability_map_variable) else: variable = scope.declare_local_variable('variable', FloatTensorType()) this_operator.outputs.append(variable) return this_operator.outputs
def _guess_numpy_type(data_type, dims): # This could be moved to onnxconverter_common. if data_type == np.float32: return FloatTensorType(dims) elif data_type in (np.float64, float): # onnxruntime does not support double. return FloatTensorType(dims) elif data_type in (np.str, str, object): return StringTensorType(dims) elif data_type in (np.int64, np.uint64) or str(data_type) == '<U6': return Int64TensorType(dims) elif data_type in (np.int32, np.uint32) or str(data_type) in ('<U4', '<U1'): # noqa return Int32TensorType(dims) elif data_type == np.bool: return BooleanTensorType(dims) else: raise NotImplementedError( "Unsupported data_type '{}'. You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." "".format(data_type))
def test_variable_names(self): pipeline = Pipeline([("passthrough", Passthrough())]) initial_types = [("input", FloatTensorType([None, 2]))] model_onnx = convert_sklearn(pipeline, initial_types=initial_types, target_opset=TARGET_OPSET, verbose=0) self.assertIn('Identity', str(model_onnx)) x = np.array([0, 1, 1, 0], dtype=np.float32).reshape((-1, 2)) sess = InferenceSession(model_onnx.SerializeToString()) name = sess.get_inputs()[0].name got = sess.run(None, {name: x}) assert_almost_equal(x, got[0])
def test_lightgbm_pytorch(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) self.assertRaises(RuntimeError, convert, onnx_ml_model, "torch")
def guess_tensor_type(data_type): """ Guess the corresponding variable type based on input type. """ if isinstance(data_type, DoubleTensorType): return DoubleTensorType() if isinstance(data_type, DictionaryType): return guess_tensor_type(data_type.value_type) if not isinstance(data_type, (Int64TensorType, Int32TensorType, BooleanTensorType, FloatTensorType, StringTensorType, DoubleTensorType)): raise TypeError("data_type is not a tensor type but '{}'.".format( type(data_type))) return FloatTensorType()
def _guess_type_proto(data_type, dims): # This could be moved to onnxconverter_common. if data_type == onnx_proto.TensorProto.FLOAT: return FloatTensorType(dims) elif data_type == onnx_proto.TensorProto.DOUBLE: return DoubleTensorType(dims) elif data_type == onnx_proto.TensorProto.STRING: return StringTensorType(dims) elif data_type == onnx_proto.TensorProto.INT64: return Int64TensorType(dims) elif data_type == onnx_proto.TensorProto.INT32: return Int32TensorType(dims) elif data_type == onnx_proto.TensorProto.BOOL: return BooleanTensorType(dims) else: raise NotImplementedError( "Unsupported data_type '{}'. You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." "".format(data_type))
def guess_tensor_type(data_type): """ Guess the corresponding variable output type based on input type. It returns type if *data_type* is a real. It returns *FloatTensorType* if *data_type* is an integer. """ if isinstance(data_type, DoubleTensorType): return DoubleTensorType() if isinstance(data_type, DictionaryType): return guess_tensor_type(data_type.value_type) if Complex64TensorType is not None: if isinstance(data_type, (Complex64TensorType, Complex128TensorType)): return data_type.__class__() if not isinstance( data_type, (Int64TensorType, Int32TensorType, BooleanTensorType, FloatTensorType, StringTensorType, DoubleTensorType, Int8TensorType, UInt8TensorType)): raise TypeError("data_type is not a tensor type but '{}'.".format( type(data_type))) return FloatTensorType()
def _guess_numpy_type(data_type, dims): # This could be moved to onnxconverter_common. if data_type == np.float32: return FloatTensorType(dims) if data_type == np.float64: return DoubleTensorType(dims) if data_type in (np.str, str, object) or str(data_type) in ('<U1', ) or ( hasattr(data_type, 'type') and data_type.type is np.str_): # noqa return StringTensorType(dims) if data_type in (np.int64, np.uint64) or str(data_type) == '<U6': return Int64TensorType(dims) if data_type in (np.int32, np.uint32) or str(data_type) in ('<U4', ): # noqa return Int32TensorType(dims) if data_type == np.bool: return BooleanTensorType(dims) raise NotImplementedError( "Unsupported data_type '{}'. You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." "".format(data_type))
def test_onnx_no_test_data_float(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model
def _guess_numpy_type(data_type, dims): # This could be moved to onnxconverter_common. if data_type == np.float32: return FloatTensorType(dims) if data_type == np.float64: return DoubleTensorType(dims) if data_type in (np.str_, str, object) or str(data_type) in ('<U1', ) or ( hasattr(data_type, 'type') and data_type.type is np.str_): # noqa return StringTensorType(dims) if data_type in (np.int64, ) or str(data_type) == '<U6': return Int64TensorType(dims) if data_type in (np.int32, ) or str(data_type) in ('<U4', ): # noqa return Int32TensorType(dims) if data_type == np.uint8: return UInt8TensorType(dims) if data_type in (np.bool_, bool): return BooleanTensorType(dims) if data_type in (np.str_, str): return StringTensorType(dims) if data_type == np.int8: return Int8TensorType(dims) if data_type == np.int16: return Int16TensorType(dims) if data_type == np.uint64: return UInt64TensorType(dims) if data_type == np.uint32: return UInt32TensorType(dims) if data_type == np.uint16: return UInt16TensorType(dims) if data_type == np.float16: return Float16TensorType(dims) if Complex64TensorType is not None: if data_type == np.complex64: return Complex64TensorType(dims) if data_type == np.complex128: return Complex128TensorType(dims) raise NotImplementedError( "Unsupported data_type %r (type=%r). You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." "" % (data_type, type(data_type)))
def from_pb(obj): """ Creates a data type from a protobuf object. """ def get_shape(tt): return [ tt.shape.dim[i].dim_value for i in range(len(tt.shape.dim)) ] if hasattr(obj, 'extend'): return [Variable.from_pb(o) for o in obj] name = obj.name if obj.type.tensor_type: tt = obj.type.tensor_type elem = tt.elem_type shape = get_shape(tt) if elem == onnx_proto.TensorProto.FLOAT: ty = FloatTensorType(shape) elif elem == onnx_proto.TensorProto.BOOL: ty = BooleanTensorType(shape) elif elem == onnx_proto.TensorProto.DOUBLE: ty = DoubleTensorType(shape) elif elem == onnx_proto.TensorProto.STRING: ty = StringTensorType(shape) elif elem == onnx_proto.TensorProto.INT64: ty = Int64TensorType(shape) elif elem == onnx_proto.TensorProto.INT32: ty = Int32TensorType(shape) else: raise NotImplementedError("Unsupported type '{}' " "(elem_type={}).".format( type(obj.type.tensor_type), elem)) else: raise NotImplementedError("Unsupported type '{}' as " "a string ({}).".format(type(obj), obj)) return Variable(name, name, None, ty)
def _parse_sparkml_simple_model(spark, scope, model, global_inputs, output_dict): ''' This function handles all non-pipeline models. :param scope: Scope object :param model: A spark-ml Transformer/Evaluator (e.g., OneHotEncoder and LogisticRegression) :param global_inputs: A list of variables :param output_dict: An accumulated list of output_original_name->(ref_count, variable) :return: A list of output variables which will be passed to next stage ''' this_operator = scope.declare_local_operator( get_sparkml_operator_name(type(model)), model) this_operator.raw_params = {'SparkSession': spark} raw_input_names = get_input_names(model) this_operator.inputs = [ _get_variable_for_input(scope, x, global_inputs, output_dict) for x in raw_input_names ] raw_output_names = get_output_names(model) for output_name in raw_output_names: variable = scope.declare_local_variable(output_name, FloatTensorType()) this_operator.outputs.append(variable) output_dict[variable.raw_name] = [0, variable]
import onnxmltools from onnxconverter_common.data_types import FloatTensorType from onnxmltools.convert import convert_xgboost iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) clr = XGBClassifier() clr.fit(X_train, y_train) print(clr) ########################### # Convert a model into ONNX # +++++++++++++++++++++++++ initial_type = [('float_input', FloatTensorType([None, 4]))] onx = convert_xgboost(clr, initial_types=initial_type) ################################### # Compute the predictions with onnxruntime # ++++++++++++++++++++++++++++++++++++++++ sess = rt.InferenceSession(onx.SerializeToString()) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) ############################################### # With DMatrix