Exemple #1
0
def _guess_type_proto(data_type, dims):
    # This could be moved to onnxconverter_common.
    for d in dims:
        if d == 0:
            raise RuntimeError("Dimension should not be null: {}.".format(
                list(dims)))
    if data_type == onnx_proto.TensorProto.FLOAT:
        return FloatTensorType(dims)
    if data_type == onnx_proto.TensorProto.DOUBLE:
        return DoubleTensorType(dims)
    if data_type == onnx_proto.TensorProto.STRING:
        return StringTensorType(dims)
    if data_type == onnx_proto.TensorProto.INT64:
        return Int64TensorType(dims)
    if data_type == onnx_proto.TensorProto.INT32:
        return Int32TensorType(dims)
    if data_type == onnx_proto.TensorProto.BOOL:
        return BooleanTensorType(dims)
    if data_type == onnx_proto.TensorProto.INT8:
        return Int8TensorType(dims)
    if data_type == onnx_proto.TensorProto.UINT8:
        return UInt8TensorType(dims)
    if Complex64TensorType is not None:
        if data_type == onnx_proto.TensorProto.COMPLEX64:
            return Complex64TensorType(dims)
        if data_type == onnx_proto.TensorProto.COMPLEX128:
            return Complex128TensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type '{}'. You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "".format(data_type))
Exemple #2
0
def _declare_input_variables(topology, raw_model_container, extra_config):
    # Declare input variables.
    inputs = []
    n_inputs = extra_config[
        constants.N_INPUTS] if constants.N_INPUTS in extra_config else 1
    if constants.INPUT_NAMES in extra_config:
        assert n_inputs == len(extra_config[constants.INPUT_NAMES])
    if constants.TEST_INPUT in extra_config:
        from onnxconverter_common.data_types import (
            FloatTensorType,
            DoubleTensorType,
            Int32TensorType,
            Int64TensorType,
            StringTensorType,
        )

        test_input = extra_config[constants.TEST_INPUT] if n_inputs > 1 else [
            extra_config[constants.TEST_INPUT]
        ]
        for i in range(n_inputs):
            input = test_input[i]
            input_name = (extra_config[constants.INPUT_NAMES][i]
                          if constants.INPUT_NAMES in extra_config else
                          "input_{}".format(i))
            if input.dtype == np.float32:
                input_type = FloatTensorType(input.shape)
            elif input.dtype == np.float64:
                input_type = DoubleTensorType(input.shape)
            elif input.dtype == np.int32:
                input_type = Int32TensorType(input.shape)
            elif input.dtype == np.int64:
                input_type = Int64TensorType(input.shape)
            elif input.dtype.kind in constants.SUPPORTED_STRING_TYPES:
                input_type = StringTensorType(input.shape)
            else:
                raise NotImplementedError(
                    "Type {} not supported. Please fill an issue on https://github.com/microsoft/hummingbird/."
                    .format(input.dtype))
            inputs.append(
                topology.declare_logical_variable(input_name, type=input_type))
    else:
        # We have no information on the input. Sklearn/Spark-ML always gets as input a single dataframe,
        # therefore by default we start with a single `input` variable
        input_name = extra_config[constants.INPUT_NAMES][
            0] if constants.TEST_INPUT in extra_config else "input"
        var = topology.declare_logical_variable(input_name)
        inputs.append(var)

    # The object raw_model_container is a part of the topology we're going to return.
    # We use it to store the inputs of the Sklearn/Spark-ML's computational graph.
    for variable in inputs:
        raw_model_container.add_input(variable)

    return inputs
Exemple #3
0
def _guess_numpy_type(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == np.float32:
        return FloatTensorType(dims)
    if data_type == np.float64:
        return DoubleTensorType(dims)
    if data_type in (np.str_, str, object) or str(data_type) in ('<U1', ) or (
            hasattr(data_type, 'type') and data_type.type is np.str_):  # noqa
        return StringTensorType(dims)
    if data_type in (np.int64, ) or str(data_type) == '<U6':
        return Int64TensorType(dims)
    if data_type in (np.int32, ) or str(data_type) in ('<U4', ):  # noqa
        return Int32TensorType(dims)
    if data_type == np.uint8:
        return UInt8TensorType(dims)
    if data_type in (np.bool_, bool):
        return BooleanTensorType(dims)
    if data_type in (np.str_, str):
        return StringTensorType(dims)
    if data_type == np.int8:
        return Int8TensorType(dims)
    if data_type == np.int16:
        return Int16TensorType(dims)
    if data_type == np.uint64:
        return UInt64TensorType(dims)
    if data_type == np.uint32:
        return UInt32TensorType(dims)
    if data_type == np.uint16:
        return UInt16TensorType(dims)
    if data_type == np.float16:
        return Float16TensorType(dims)
    if Complex64TensorType is not None:
        if data_type == np.complex64:
            return Complex64TensorType(dims)
        if data_type == np.complex128:
            return Complex128TensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type %r (type=%r). You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "" % (data_type, type(data_type)))
    def test_onnx_no_test_data_string(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([["a", "b", "c"]])
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test backends are not case sensitive
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
def _guess_type_proto_str(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == "tensor(float)":
        return FloatTensorType(dims)
    if data_type == "tensor(double)":
        return DoubleTensorType(dims)
    if data_type == "tensor(string)":
        return StringTensorType(dims)
    if data_type == "tensor(int64)":
        return Int64TensorType(dims)
    if data_type == "tensor(int32)":
        return Int32TensorType(dims)
    if data_type == "tensor(bool)":
        return BooleanTensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type '{}'. You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "".format(data_type))
Exemple #6
0
def _guess_type_proto(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == onnx_proto.TensorProto.FLOAT:
        return FloatTensorType(dims)
    elif data_type == onnx_proto.TensorProto.DOUBLE:
        return DoubleTensorType(dims)
    elif data_type == onnx_proto.TensorProto.STRING:
        return StringTensorType(dims)
    elif data_type == onnx_proto.TensorProto.INT64:
        return Int64TensorType(dims)
    elif data_type == onnx_proto.TensorProto.INT32:
        return Int32TensorType(dims)
    elif data_type == onnx_proto.TensorProto.BOOL:
        return BooleanTensorType(dims)
    else:
        raise NotImplementedError(
            "Unsupported data_type '{}'. You may raise an issue "
            "at https://github.com/onnx/sklearn-onnx/issues."
            "".format(data_type))
def _guess_numpy_type(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == np.float32:
        return FloatTensorType(dims)
    elif data_type in (np.str, str,
                       object) or str(data_type) in ('<U1', ):  # noqa
        return StringTensorType(dims)
    elif data_type in (np.int64, np.uint64) or str(data_type) == '<U6':
        return Int64TensorType(dims)
    elif data_type in (np.int32,
                       np.uint32) or str(data_type) in ('<U4', ):  # noqa
        return Int32TensorType(dims)
    elif data_type == np.bool:
        return BooleanTensorType(dims)
    else:
        raise NotImplementedError(
            "Unsupported data_type '{}'. You may raise an issue "
            "at https://github.com/onnx/sklearn-onnx/issues."
            "".format(data_type))
    def from_pb(obj):
        """
        Creates a data type from a protobuf object.
        """
        def get_shape(tt):
            return [
                tt.shape.dim[i].dim_value for i in range(len(tt.shape.dim))
            ]

        if hasattr(obj, 'extend'):
            return [Variable.from_pb(o) for o in obj]
        name = obj.name
        if obj.type.tensor_type:
            tt = obj.type.tensor_type
            elem = tt.elem_type
            shape = get_shape(tt)
            if elem == onnx_proto.TensorProto.FLOAT:
                ty = FloatTensorType(shape)
            elif elem == onnx_proto.TensorProto.BOOL:
                ty = BooleanTensorType(shape)
            elif elem == onnx_proto.TensorProto.DOUBLE:
                ty = DoubleTensorType(shape)
            elif elem == onnx_proto.TensorProto.STRING:
                ty = StringTensorType(shape)
            elif elem == onnx_proto.TensorProto.INT64:
                ty = Int64TensorType(shape)
            elif elem == onnx_proto.TensorProto.INT32:
                ty = Int32TensorType(shape)
            else:
                raise NotImplementedError("Unsupported type '{}' "
                                          "(elem_type={}).".format(
                                              type(obj.type.tensor_type),
                                              elem))
        else:
            raise NotImplementedError("Unsupported type '{}' as "
                                      "a string ({}).".format(type(obj), obj))

        return Variable(name, name, None, ty)
Exemple #9
0
from onnxconverter_common.data_types import FloatTensorType, StringTensorType
from onnxmltools.convert import convert_xgboost, convert_lightgbm, \
    convert_tensorflow

env = Env()
TRANS_PATH = env.str('TRANS_PATH', './outputs/trans')
MODEL_PATH_XGB = env.str('MODEL_PATH_XGB', './outputs/xgb')
MODEL_PATH_LGB = env.str('MODEL_PATH_LGB', './outputs/lgb')
MODEL_PATH_DCN = env.str('MODEL_PATH_DCN', './outputs/dcn')
ONNX_TRANS_PATH = env.str('TRANS_PATH', './outputs/trans.onnx')
ONNX_MODEl_PATH_XGB = env.str('ONNX_MODEl_PATH_XGB', './outputs/xgb.onnx')
ONNX_MODEl_PATH_LGB = env.str('ONNX_MODEl_PATH_LGB', './outputs/lgb.onnx')
ONNX_MODEl_PATH_DCN = env.str('ONNX_MODEl_PATH_DCN', './outputs/dcn.onnx')

trans_initial_type = [('num_feat', FloatTensorType([None, 13])),
                      ('cat_feat', StringTensorType([None, 26]))]
model_initial_type = [('num_feat', FloatTensorType([None, 39]))]

print('convert sklearn transformer')
trans = joblib.load(TRANS_PATH)
onx = convert_sklearn(trans, initial_types=trans_initial_type)
onnx.save(onx, ONNX_TRANS_PATH)

print('convert XGBoost model')
model = xgb.XGBClassifier()
model.load_model(MODEL_PATH_XGB)
onx = convert_xgboost(model, initial_types=model_initial_type)
onnx.save(onx, ONNX_MODEl_PATH_XGB)

print('convert LightGBM model')
model = lgb.Booster(model_file=MODEL_PATH_LGB)