def test_lightgbm_pytorch_extra_config(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        # Create ONNX model
        model_name = "hummingbird.ml.test.lightgbm"
        extra_config = {}
        extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = model_name
        extra_config[constants.ONNX_INITIAL_TYPES] = [
            ("input", FloatTensorType([X.shape[0], X.shape[1]]))
        ]
        onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config)

        assert onnx_model.model.graph.name == model_name
Exemple #2
0
def _parse_libsvm_simple_model(scope, model, inputs):
    '''
    This function handles all non-pipeline models.

    :param scope: Scope object
    :param model: A libsvm object (e.g., OneHotEncoder and LogisticRegression)
    :param inputs: A list of variables
    :return: A list of output variables which will be passed to next stage
    '''

    if model.get_svm_type() in (0, 1):
        label_variable = scope.declare_local_variable('label',
                                                      FloatTensorType())
        probability_map_variable = scope.declare_local_variable(
            'probabilities', FloatTensorType())
        this_operator = scope.declare_local_operator("LibSvmSVC", model)
        this_operator.inputs = inputs
        this_operator.outputs.append(label_variable)
        this_operator.outputs.append(probability_map_variable)
    elif model.get_svm_type() in (4, 3):
        # We assume that all scikit-learn operator can only produce a single float tensor.
        variable = scope.declare_local_variable('variable', FloatTensorType())
        this_operator = scope.declare_local_operator("LibSvmSVR", model)
        this_operator.inputs = inputs
        this_operator.outputs.append(variable)
    else:
        raise ValueError("Unknown SVM type '{0}'".format(model.get_svm_type()))
    return this_operator.outputs
Exemple #3
0
def _parse_xgboost_simple_model(scope, model, inputs):
    '''
    This function handles all non-pipeline models.

    :param scope: Scope object
    :param model: A xgboost object
    :param inputs: A list of variables
    :return: A list of output variables which will be passed to next stage
    '''
    this_operator = scope.declare_local_operator(_get_xgboost_operator_name(model), model)
    this_operator.inputs = inputs

    if (type(model) in xgboost_classifier_list or
            getattr(model, 'operator_name', None) == 'XGBClassifier'):
        # For classifiers, we may have two outputs, one for label and the other one for probabilities of all classes.
        # Notice that their types here are not necessarily correct and they will be fixed in shape inference phase
        label_variable = scope.declare_local_variable('label', FloatTensorType())
        probability_map_variable = scope.declare_local_variable('probabilities', FloatTensorType())
        this_operator.outputs.append(label_variable)
        this_operator.outputs.append(probability_map_variable)
    else:
        # We assume that all scikit-learn operator can only produce a single float tensor.
        variable = scope.declare_local_variable('variable', FloatTensorType())
        this_operator.outputs.append(variable)
    return this_operator.outputs
Exemple #4
0
    def test_non_ascii_variable_name_pipeline(self):

        data = dedent("""
            pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
            1,1,"A",female,29.0,0,0,24160,211.3375,B5,S,2,,"MO"
            1,1,"B",male,0.9167,1,2,113781,151.55,C22 C26,S,11,,"Can"
            1,0,"C",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Can"
            1,0,"D",male,30.0,1,2,113781,151.55,C22 C26,S,,135.0,"Can"
            1,0,"E",female,25.0,1,2,113781,151.55,C22 C26,S,,,"Can"
            1,1,"F",male,48.0,0,0,19952,26.55,E12,S,3,,"NY"
            1,1,"G",female,63.0,1,0,13502,77.9583,D7,S,10,,"NY"
            1,0,"H",male,39.0,0,0,112050,0.0,A36,S,,,"NI"
            1,1,"I",female,53.0,2,0,11769,51.4792,C101,S,D,,"NY"
            1,0,"J",male,71.0,0,0,PC 17609,49.5042,,C,,22.0,"Uruguay"
            1,0,"K",male,47.0,1,0,PC 17757,227.525,C62 C64,C,,124.0,"NY"
            1,1,"L",female,18.0,1,0,PC 17757,227.525,C62 C64,C,4,,"NY"
            1,1,"M",female,24.0,0,0,PC 17477,69.3,B35,C,9,,"F"
            1,1,"N",female,26.0,0,0,19877,78.85,,S,6,,
            1,1,"L",male,80.0,0,0,27042,30.0,A23,S,B,,"Yorks"
            1,0,"O",male,,0,0,PC 17318,25.925,,S,,,"NY"
            1,0,"P",male,24.0,0,1,PC 17558,247.5208,B58 B60,C,,,"PQ"
            1,1,"Q",female,50.0,0,1,PC 17558,247.5208,B58 B60,C,6,,"PQ"
            1,1,"R",female,32.0,0,0,11813,76.2917,D15,C,8,,
            1,0,"S",male,36.0,0,0,13050,75.2417,C6,C,A,,"MN"
        """).strip(" \n")
        data = pd.read_csv(StringIO(data))
        data.rename(columns={"age": "年齢"}, inplace=True)
        X = data.drop('survived', axis=1)
        # y = data['survived']
        cols = ['embarked', 'sex', 'pclass', '年齢', 'fare']
        X = X[cols]
        for cat in ['embarked', 'sex', 'pclass']:
            X[cat].fillna('missing', inplace=True)
        numeric_features = ['年齢', 'fare']
        numeric_transformer = Pipeline(
            steps=[('imputer', SimpleImputer(
                strategy='median')), ('scaler', StandardScaler())])
        categorical_features = ['embarked', 'sex', 'pclass']
        categorical_transformer = Pipeline(
            steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
        preprocessor = ColumnTransformer(
            transformers=[('num', numeric_transformer, numeric_features),
                          ('cat', categorical_transformer,
                           categorical_features)])
        preprocessor.fit_transform(X)
        initial_type = [('pclass', Int64TensorType(shape=[None, 1])),
                        ('sex', StringTensorType(shape=[None, 1])),
                        ('年齢', FloatTensorType(shape=[None, 1])),
                        ('fare', FloatTensorType(shape=[None, 1])),
                        ('embarked', StringTensorType(shape=[None, 1]))]

        onnx_object = convert_sklearn(preprocessor,
                                      initial_types=initial_type,
                                      target_opset=TARGET_OPSET)
        sess = InferenceSession(onnx_object.SerializeToString())
        self.assertTrue(sess is not None)
Exemple #5
0
 def test_to_onnx_type(self):
     dt = FloatTensorType((1, 5))
     assert str(dt) == 'FloatTensorType(shape=(1, 5))'
     onx = dt.to_onnx_type()
     assert "dim_value: 5" in str(onx)
     tt = onx.tensor_type
     assert "dim_value: 5" in str(tt)
     assert tt.elem_type == 1
     o = onx.sequence_type
     assert str(o) == ""
Exemple #6
0
 def test_variable_names_distinct(self):
     pipeline = Pipeline([("passthrough", Passthrough())])
     initial_types = [("INPUTA", FloatTensorType([None, 2]))]
     final_types = [("OUTPUTA", FloatTensorType([None, 2]))]
     model_onnx = convert_sklearn(pipeline,
                                  initial_types=initial_types,
                                  target_opset=TARGET_OPSET,
                                  final_types=final_types,
                                  verbose=0)
     x = np.array([0, 1, 1, 0], dtype=np.float32).reshape((-1, 2))
     sess = InferenceSession(model_onnx.SerializeToString())
     got = sess.run(None, {'INPUTA': x})
     assert_almost_equal(x, got[0])
Exemple #7
0
    def test_lightgbm_onnx_pytorch(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        pt_model = convert(onnx_ml_model, "torch", X)
        assert pt_model

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        np.testing.assert_allclose(onnx_ml_pred[0].flatten(),
                                   pt_model.predict(X))
Exemple #8
0
    def _test_lgbm(self, X, model, extra_config={}):
        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        # Create ONNX model
        onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        onnx_pred = [[] for i in range(len(output_names))]
        if len(output_names) == 1:  # regression
            onnx_pred = onnx_model.predict(X)
        else:  # classification
            onnx_pred[0] = onnx_model.predict_proba(X)
            onnx_pred[1] = onnx_model.predict(X)

        return onnx_ml_pred, onnx_pred, output_names
    def test_onnx_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X)

        self.assertIsNotNone(hb_model)
        self.assertTrue(hb_model._session.get_session_options(
        ).intra_op_num_threads == psutil.cpu_count(logical=False))
        self.assertTrue(
            hb_model._session.get_session_options().inter_op_num_threads == 1)
Exemple #10
0
def _guess_type_proto(data_type, dims):
    # This could be moved to onnxconverter_common.
    for d in dims:
        if d == 0:
            raise RuntimeError("Dimension should not be null: {}.".format(
                list(dims)))
    if data_type == onnx_proto.TensorProto.FLOAT:
        return FloatTensorType(dims)
    if data_type == onnx_proto.TensorProto.DOUBLE:
        return DoubleTensorType(dims)
    if data_type == onnx_proto.TensorProto.STRING:
        return StringTensorType(dims)
    if data_type == onnx_proto.TensorProto.INT64:
        return Int64TensorType(dims)
    if data_type == onnx_proto.TensorProto.INT32:
        return Int32TensorType(dims)
    if data_type == onnx_proto.TensorProto.BOOL:
        return BooleanTensorType(dims)
    if data_type == onnx_proto.TensorProto.INT8:
        return Int8TensorType(dims)
    if data_type == onnx_proto.TensorProto.UINT8:
        return UInt8TensorType(dims)
    if Complex64TensorType is not None:
        if data_type == onnx_proto.TensorProto.COMPLEX64:
            return Complex64TensorType(dims)
        if data_type == onnx_proto.TensorProto.COMPLEX128:
            return Complex128TensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type '{}'. You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "".format(data_type))
Exemple #11
0
def _guess_type_proto_str(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == "tensor(float)":
        return FloatTensorType(dims)
    if data_type == "tensor(double)":
        return DoubleTensorType(dims)
    if data_type == "tensor(string)":
        return StringTensorType(dims)
    if data_type == "tensor(int64)":
        return Int64TensorType(dims)
    if data_type == "tensor(int32)":
        return Int32TensorType(dims)
    if data_type == "tensor(bool)":
        return BooleanTensorType(dims)
    if data_type == "tensor(int8)":
        return Int8TensorType(dims)
    if data_type == "tensor(uint8)":
        return UInt8TensorType(dims)
    if Complex64TensorType is not None:
        if data_type == "tensor(complex64)":
            return Complex64TensorType(dims)
        if data_type == "tensor(complex128)":
            return Complex128TensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type '{}'. You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "".format(data_type))
Exemple #12
0
def calculate_aad_forest_output_shapes(operator):
    check_input_and_output_numbers(operator,
                                   input_count_range=1,
                                   output_count_range=1)
    check_input_and_output_types(operator, good_input_types=[FloatTensorType])
    N = operator.inputs[0].type.shape[0]

    operator.outputs[0].type = FloatTensorType(shape=[N])
Exemple #13
0
 def test_variable_names_output(self):
     pipeline = Pipeline([("passthrough", Passthrough())])
     initial_types = [("input", FloatTensorType([None, 2]))]
     final_types = initial_types
     with self.assertRaises(RuntimeError):
         convert_sklearn(pipeline,
                         initial_types=initial_types,
                         target_opset=TARGET_OPSET,
                         final_types=final_types)
 def test_lightgbm(self):
     X = [[0, 1], [1, 1], [2, 0], [1, 2]]
     X = np.array(X, dtype=np.float32)
     y = [0, 1, 0, 1]
     model = lightgbm.LGBMClassifier(n_estimators=3, min_child_samples=1)
     model.fit(X, y)
     onx = xmlt.convert(
         model, 'dummy', initial_types=[('X', FloatTensorType([None, X.shape[1]]))],
         target_opset=9)
     assert "ir_version: 4" in str(onx).lower()
Exemple #15
0
def _parse_aad(scope, model, inputs):
    this_operator = scope.declare_local_operator(_get_aad_operator_name(model),
                                                 model)
    this_operator.inputs = inputs

    # FIXME: probably another variable is required for anomality label
    score_variable = scope.declare_local_variable('score', FloatTensorType())

    this_operator.outputs.append(score_variable)

    return this_operator.outputs
Exemple #16
0
def _declare_input_variables(topology, raw_model_container, extra_config):
    # Declare input variables.
    inputs = []
    n_inputs = extra_config[
        constants.N_INPUTS] if constants.N_INPUTS in extra_config else 1
    if constants.INPUT_NAMES in extra_config:
        assert n_inputs == len(extra_config[constants.INPUT_NAMES])
    if constants.TEST_INPUT in extra_config:
        from onnxconverter_common.data_types import (
            FloatTensorType,
            DoubleTensorType,
            Int32TensorType,
            Int64TensorType,
            StringTensorType,
        )

        test_input = extra_config[constants.TEST_INPUT] if n_inputs > 1 else [
            extra_config[constants.TEST_INPUT]
        ]
        for i in range(n_inputs):
            input = test_input[i]
            input_name = (extra_config[constants.INPUT_NAMES][i]
                          if constants.INPUT_NAMES in extra_config else
                          "input_{}".format(i))
            if input.dtype == np.float32:
                input_type = FloatTensorType(input.shape)
            elif input.dtype == np.float64:
                input_type = DoubleTensorType(input.shape)
            elif input.dtype == np.int32:
                input_type = Int32TensorType(input.shape)
            elif input.dtype == np.int64:
                input_type = Int64TensorType(input.shape)
            elif input.dtype.kind in constants.SUPPORTED_STRING_TYPES:
                input_type = StringTensorType(input.shape)
            else:
                raise NotImplementedError(
                    "Type {} not supported. Please fill an issue on https://github.com/microsoft/hummingbird/."
                    .format(input.dtype))
            inputs.append(
                topology.declare_logical_variable(input_name, type=input_type))
    else:
        # We have no information on the input. Sklearn/Spark-ML always gets as input a single dataframe,
        # therefore by default we start with a single `input` variable
        input_name = extra_config[constants.INPUT_NAMES][
            0] if constants.TEST_INPUT in extra_config else "input"
        var = topology.declare_logical_variable(input_name)
        inputs.append(var)

    # The object raw_model_container is a part of the topology we're going to return.
    # We use it to store the inputs of the Sklearn/Spark-ML's computational graph.
    for variable in inputs:
        raw_model_container.add_input(variable)

    return inputs
Exemple #17
0
 def _test_non_ascii_variable_name(self):
     model, X = fit_regression_model(LinearRegression())
     model_onnx = to_onnx(model,
                          name="linear regression",
                          initial_types=[
                              ("年齢", FloatTensorType([None, X.shape[1]]))
                          ],
                          target_opset=TARGET_OPSET)
     sess = InferenceSession(model_onnx.SerializeToString())
     # Invalid Feed Input Name:\u5e74\u9f62
     # sess.run(None, {'年齢': X})
     self.assertTrue(sess is not None)
Exemple #18
0
def _parse_h2o(scope, model, inputs):
    '''
    :param scope: Scope object
    :param model: A h2o model data object
    :param inputs: A list of variables
    :return: A list of output variables which will be passed to next stage
    '''
    this_operator = scope.declare_local_operator("H2OTreeMojo", model)
    this_operator.inputs = inputs

    if model["params"]["classifier"]:
        label_variable = scope.declare_local_variable('label',
                                                      FloatTensorType())
        probability_map_variable = scope.declare_local_variable(
            'probabilities', FloatTensorType())
        this_operator.outputs.append(label_variable)
        this_operator.outputs.append(probability_map_variable)
    else:
        variable = scope.declare_local_variable('variable', FloatTensorType())
        this_operator.outputs.append(variable)
    return this_operator.outputs
def _guess_numpy_type(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == np.float32:
        return FloatTensorType(dims)
    elif data_type in (np.float64, float):
        # onnxruntime does not support double.
        return FloatTensorType(dims)
    elif data_type in (np.str, str, object):
        return StringTensorType(dims)
    elif data_type in (np.int64, np.uint64) or str(data_type) == '<U6':
        return Int64TensorType(dims)
    elif data_type in (np.int32,
                       np.uint32) or str(data_type) in ('<U4', '<U1'):  # noqa
        return Int32TensorType(dims)
    elif data_type == np.bool:
        return BooleanTensorType(dims)
    else:
        raise NotImplementedError(
            "Unsupported data_type '{}'. You may raise an issue "
            "at https://github.com/onnx/sklearn-onnx/issues."
            "".format(data_type))
Exemple #20
0
 def test_variable_names(self):
     pipeline = Pipeline([("passthrough", Passthrough())])
     initial_types = [("input", FloatTensorType([None, 2]))]
     model_onnx = convert_sklearn(pipeline,
                                  initial_types=initial_types,
                                  target_opset=TARGET_OPSET,
                                  verbose=0)
     self.assertIn('Identity', str(model_onnx))
     x = np.array([0, 1, 1, 0], dtype=np.float32).reshape((-1, 2))
     sess = InferenceSession(model_onnx.SerializeToString())
     name = sess.get_inputs()[0].name
     got = sess.run(None, {name: x})
     assert_almost_equal(x, got[0])
Exemple #21
0
    def test_lightgbm_pytorch(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        self.assertRaises(RuntimeError, convert, onnx_ml_model, "torch")
def guess_tensor_type(data_type):
    """
    Guess the corresponding variable type based on input type.
    """
    if isinstance(data_type, DoubleTensorType):
        return DoubleTensorType()
    if isinstance(data_type, DictionaryType):
        return guess_tensor_type(data_type.value_type)
    if not isinstance(data_type,
                      (Int64TensorType, Int32TensorType, BooleanTensorType,
                       FloatTensorType, StringTensorType, DoubleTensorType)):
        raise TypeError("data_type is not a tensor type but '{}'.".format(
            type(data_type)))
    return FloatTensorType()
Exemple #23
0
def _guess_type_proto(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == onnx_proto.TensorProto.FLOAT:
        return FloatTensorType(dims)
    elif data_type == onnx_proto.TensorProto.DOUBLE:
        return DoubleTensorType(dims)
    elif data_type == onnx_proto.TensorProto.STRING:
        return StringTensorType(dims)
    elif data_type == onnx_proto.TensorProto.INT64:
        return Int64TensorType(dims)
    elif data_type == onnx_proto.TensorProto.INT32:
        return Int32TensorType(dims)
    elif data_type == onnx_proto.TensorProto.BOOL:
        return BooleanTensorType(dims)
    else:
        raise NotImplementedError(
            "Unsupported data_type '{}'. You may raise an issue "
            "at https://github.com/onnx/sklearn-onnx/issues."
            "".format(data_type))
Exemple #24
0
def guess_tensor_type(data_type):
    """
    Guess the corresponding variable output type based
    on input type. It returns type if *data_type* is a real.
    It returns *FloatTensorType* if *data_type* is an integer.
    """
    if isinstance(data_type, DoubleTensorType):
        return DoubleTensorType()
    if isinstance(data_type, DictionaryType):
        return guess_tensor_type(data_type.value_type)
    if Complex64TensorType is not None:
        if isinstance(data_type, (Complex64TensorType, Complex128TensorType)):
            return data_type.__class__()
    if not isinstance(
            data_type,
        (Int64TensorType, Int32TensorType, BooleanTensorType, FloatTensorType,
         StringTensorType, DoubleTensorType, Int8TensorType, UInt8TensorType)):
        raise TypeError("data_type is not a tensor type but '{}'.".format(
            type(data_type)))
    return FloatTensorType()
Exemple #25
0
def _guess_numpy_type(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == np.float32:
        return FloatTensorType(dims)
    if data_type == np.float64:
        return DoubleTensorType(dims)
    if data_type in (np.str, str, object) or str(data_type) in ('<U1', ) or (
            hasattr(data_type, 'type') and data_type.type is np.str_):  # noqa
        return StringTensorType(dims)
    if data_type in (np.int64, np.uint64) or str(data_type) == '<U6':
        return Int64TensorType(dims)
    if data_type in (np.int32,
                     np.uint32) or str(data_type) in ('<U4', ):  # noqa
        return Int32TensorType(dims)
    if data_type == np.bool:
        return BooleanTensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type '{}'. You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "".format(data_type))
    def test_onnx_no_test_data_float(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model
Exemple #27
0
def _guess_numpy_type(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == np.float32:
        return FloatTensorType(dims)
    if data_type == np.float64:
        return DoubleTensorType(dims)
    if data_type in (np.str_, str, object) or str(data_type) in ('<U1', ) or (
            hasattr(data_type, 'type') and data_type.type is np.str_):  # noqa
        return StringTensorType(dims)
    if data_type in (np.int64, ) or str(data_type) == '<U6':
        return Int64TensorType(dims)
    if data_type in (np.int32, ) or str(data_type) in ('<U4', ):  # noqa
        return Int32TensorType(dims)
    if data_type == np.uint8:
        return UInt8TensorType(dims)
    if data_type in (np.bool_, bool):
        return BooleanTensorType(dims)
    if data_type in (np.str_, str):
        return StringTensorType(dims)
    if data_type == np.int8:
        return Int8TensorType(dims)
    if data_type == np.int16:
        return Int16TensorType(dims)
    if data_type == np.uint64:
        return UInt64TensorType(dims)
    if data_type == np.uint32:
        return UInt32TensorType(dims)
    if data_type == np.uint16:
        return UInt16TensorType(dims)
    if data_type == np.float16:
        return Float16TensorType(dims)
    if Complex64TensorType is not None:
        if data_type == np.complex64:
            return Complex64TensorType(dims)
        if data_type == np.complex128:
            return Complex128TensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type %r (type=%r). You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "" % (data_type, type(data_type)))
    def from_pb(obj):
        """
        Creates a data type from a protobuf object.
        """
        def get_shape(tt):
            return [
                tt.shape.dim[i].dim_value for i in range(len(tt.shape.dim))
            ]

        if hasattr(obj, 'extend'):
            return [Variable.from_pb(o) for o in obj]
        name = obj.name
        if obj.type.tensor_type:
            tt = obj.type.tensor_type
            elem = tt.elem_type
            shape = get_shape(tt)
            if elem == onnx_proto.TensorProto.FLOAT:
                ty = FloatTensorType(shape)
            elif elem == onnx_proto.TensorProto.BOOL:
                ty = BooleanTensorType(shape)
            elif elem == onnx_proto.TensorProto.DOUBLE:
                ty = DoubleTensorType(shape)
            elif elem == onnx_proto.TensorProto.STRING:
                ty = StringTensorType(shape)
            elif elem == onnx_proto.TensorProto.INT64:
                ty = Int64TensorType(shape)
            elif elem == onnx_proto.TensorProto.INT32:
                ty = Int32TensorType(shape)
            else:
                raise NotImplementedError("Unsupported type '{}' "
                                          "(elem_type={}).".format(
                                              type(obj.type.tensor_type),
                                              elem))
        else:
            raise NotImplementedError("Unsupported type '{}' as "
                                      "a string ({}).".format(type(obj), obj))

        return Variable(name, name, None, ty)
Exemple #29
0
def _parse_sparkml_simple_model(spark, scope, model, global_inputs,
                                output_dict):
    '''
    This function handles all non-pipeline models.

    :param scope: Scope object
    :param model: A spark-ml Transformer/Evaluator (e.g., OneHotEncoder and LogisticRegression)
    :param global_inputs: A list of variables
    :param output_dict: An accumulated list of output_original_name->(ref_count, variable)
    :return: A list of output variables which will be passed to next stage
    '''
    this_operator = scope.declare_local_operator(
        get_sparkml_operator_name(type(model)), model)
    this_operator.raw_params = {'SparkSession': spark}
    raw_input_names = get_input_names(model)
    this_operator.inputs = [
        _get_variable_for_input(scope, x, global_inputs, output_dict)
        for x in raw_input_names
    ]
    raw_output_names = get_output_names(model)
    for output_name in raw_output_names:
        variable = scope.declare_local_variable(output_name, FloatTensorType())
        this_operator.outputs.append(variable)
        output_dict[variable.raw_name] = [0, variable]
import onnxmltools
from onnxconverter_common.data_types import FloatTensorType
from onnxmltools.convert import convert_xgboost

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = XGBClassifier()
clr.fit(X_train, y_train)
print(clr)

###########################
# Convert a model into ONNX
# +++++++++++++++++++++++++

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_xgboost(clr, initial_types=initial_type)

###################################
# Compute the predictions with onnxruntime
# ++++++++++++++++++++++++++++++++++++++++

sess = rt.InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name],
                    {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)

###############################################
# With DMatrix