Python Int32TensorTypeの例、onnxconverter_common.data_types.Int32TensorType Pythonの例

コード例 #1

0

ファイルを表示

ファイル: data_types.py プロジェクト: xadupre/sklearn-onnx

def _guess_type_proto(data_type, dims):
    # This could be moved to onnxconverter_common.
    for d in dims:
        if d == 0:
            raise RuntimeError("Dimension should not be null: {}.".format(
                list(dims)))
    if data_type == onnx_proto.TensorProto.FLOAT:
        return FloatTensorType(dims)
    if data_type == onnx_proto.TensorProto.DOUBLE:
        return DoubleTensorType(dims)
    if data_type == onnx_proto.TensorProto.STRING:
        return StringTensorType(dims)
    if data_type == onnx_proto.TensorProto.INT64:
        return Int64TensorType(dims)
    if data_type == onnx_proto.TensorProto.INT32:
        return Int32TensorType(dims)
    if data_type == onnx_proto.TensorProto.BOOL:
        return BooleanTensorType(dims)
    if data_type == onnx_proto.TensorProto.INT8:
        return Int8TensorType(dims)
    if data_type == onnx_proto.TensorProto.UINT8:
        return UInt8TensorType(dims)
    if Complex64TensorType is not None:
        if data_type == onnx_proto.TensorProto.COMPLEX64:
            return Complex64TensorType(dims)
        if data_type == onnx_proto.TensorProto.COMPLEX128:
            return Complex128TensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type '{}'. You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "".format(data_type))

コード例 #2

0

ファイルを表示

ファイル: data_types.py プロジェクト: xadupre/sklearn-onnx

def _guess_type_proto_str(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == "tensor(float)":
        return FloatTensorType(dims)
    if data_type == "tensor(double)":
        return DoubleTensorType(dims)
    if data_type == "tensor(string)":
        return StringTensorType(dims)
    if data_type == "tensor(int64)":
        return Int64TensorType(dims)
    if data_type == "tensor(int32)":
        return Int32TensorType(dims)
    if data_type == "tensor(bool)":
        return BooleanTensorType(dims)
    if data_type == "tensor(int8)":
        return Int8TensorType(dims)
    if data_type == "tensor(uint8)":
        return UInt8TensorType(dims)
    if Complex64TensorType is not None:
        if data_type == "tensor(complex64)":
            return Complex64TensorType(dims)
        if data_type == "tensor(complex128)":
            return Complex128TensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type '{}'. You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "".format(data_type))

コード例 #3

0

ファイルを表示

ファイル: test_backends.py プロジェクト: xxcheng0708/hummingbird

    def test_forgotten_backend_string(self):
        from sklearn.preprocessing import LabelEncoder

        model = LabelEncoder()
        data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32)
        model.fit(data)

        self.assertRaises(ValueError, hummingbird.ml.convert, model,
                          [("input", Int32TensorType([6, 1]))])

コード例 #4

0

ファイルを表示

ファイル: _parse.py プロジェクト: ankitshah009/hummingbird

def _declare_input_variables(topology, raw_model_container, extra_config):
    # Declare input variables.
    inputs = []
    n_inputs = extra_config[
        constants.N_INPUTS] if constants.N_INPUTS in extra_config else 1
    if constants.INPUT_NAMES in extra_config:
        assert n_inputs == len(extra_config[constants.INPUT_NAMES])
    if constants.TEST_INPUT in extra_config:
        from onnxconverter_common.data_types import (
            FloatTensorType,
            DoubleTensorType,
            Int32TensorType,
            Int64TensorType,
            StringTensorType,
        )

        test_input = extra_config[constants.TEST_INPUT] if n_inputs > 1 else [
            extra_config[constants.TEST_INPUT]
        ]
        for i in range(n_inputs):
            input = test_input[i]
            input_name = (extra_config[constants.INPUT_NAMES][i]
                          if constants.INPUT_NAMES in extra_config else
                          "input_{}".format(i))
            if input.dtype == np.float32:
                input_type = FloatTensorType(input.shape)
            elif input.dtype == np.float64:
                input_type = DoubleTensorType(input.shape)
            elif input.dtype == np.int32:
                input_type = Int32TensorType(input.shape)
            elif input.dtype == np.int64:
                input_type = Int64TensorType(input.shape)
            elif input.dtype.kind in constants.SUPPORTED_STRING_TYPES:
                input_type = StringTensorType(input.shape)
            else:
                raise NotImplementedError(
                    "Type {} not supported. Please fill an issue on https://github.com/microsoft/hummingbird/."
                    .format(input.dtype))
            inputs.append(
                topology.declare_logical_variable(input_name, type=input_type))
    else:
        # We have no information on the input. Sklearn/Spark-ML always gets as input a single dataframe,
        # therefore by default we start with a single `input` variable
        input_name = extra_config[constants.INPUT_NAMES][
            0] if constants.TEST_INPUT in extra_config else "input"
        var = topology.declare_logical_variable(input_name)
        inputs.append(var)

    # The object raw_model_container is a part of the topology we're going to return.
    # We use it to store the inputs of the Sklearn/Spark-ML's computational graph.
    for variable in inputs:
        raw_model_container.add_input(variable)

    return inputs

コード例 #5

0

ファイルを表示

ファイル: test_backends.py プロジェクト: zhouyuegit/hummingbird

    def test_onnx_no_test_data_int(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

コード例 #6

0

ファイルを表示

ファイル: data_types.py プロジェクト: zn16/sklearn-onnx

def _guess_type_proto(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == onnx_proto.TensorProto.FLOAT:
        return FloatTensorType(dims)
    elif data_type == onnx_proto.TensorProto.DOUBLE:
        return DoubleTensorType(dims)
    elif data_type == onnx_proto.TensorProto.STRING:
        return StringTensorType(dims)
    elif data_type == onnx_proto.TensorProto.INT64:
        return Int64TensorType(dims)
    elif data_type == onnx_proto.TensorProto.INT32:
        return Int32TensorType(dims)
    elif data_type == onnx_proto.TensorProto.BOOL:
        return BooleanTensorType(dims)
    else:
        raise NotImplementedError(
            "Unsupported data_type '{}'. You may raise an issue "
            "at https://github.com/onnx/sklearn-onnx/issues."
            "".format(data_type))

コード例 #7

0

ファイルを表示

ファイル: data_types.py プロジェクト: scnakandala/sklearn-onnx

def _guess_numpy_type(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == np.float32:
        return FloatTensorType(dims)
    elif data_type in (np.str, str,
                       object) or str(data_type) in ('<U1', ):  # noqa
        return StringTensorType(dims)
    elif data_type in (np.int64, np.uint64) or str(data_type) == '<U6':
        return Int64TensorType(dims)
    elif data_type in (np.int32,
                       np.uint32) or str(data_type) in ('<U4', ):  # noqa
        return Int32TensorType(dims)
    elif data_type == np.bool:
        return BooleanTensorType(dims)
    else:
        raise NotImplementedError(
            "Unsupported data_type '{}'. You may raise an issue "
            "at https://github.com/onnx/sklearn-onnx/issues."
            "".format(data_type))

コード例 #8

0

ファイルを表示

ファイル: data_types.py プロジェクト: xadupre/sklearn-onnx

def _guess_numpy_type(data_type, dims):
    # This could be moved to onnxconverter_common.
    if data_type == np.float32:
        return FloatTensorType(dims)
    if data_type == np.float64:
        return DoubleTensorType(dims)
    if data_type in (np.str_, str, object) or str(data_type) in ('<U1', ) or (
            hasattr(data_type, 'type') and data_type.type is np.str_):  # noqa
        return StringTensorType(dims)
    if data_type in (np.int64, ) or str(data_type) == '<U6':
        return Int64TensorType(dims)
    if data_type in (np.int32, ) or str(data_type) in ('<U4', ):  # noqa
        return Int32TensorType(dims)
    if data_type == np.uint8:
        return UInt8TensorType(dims)
    if data_type in (np.bool_, bool):
        return BooleanTensorType(dims)
    if data_type in (np.str_, str):
        return StringTensorType(dims)
    if data_type == np.int8:
        return Int8TensorType(dims)
    if data_type == np.int16:
        return Int16TensorType(dims)
    if data_type == np.uint64:
        return UInt64TensorType(dims)
    if data_type == np.uint32:
        return UInt32TensorType(dims)
    if data_type == np.uint16:
        return UInt16TensorType(dims)
    if data_type == np.float16:
        return Float16TensorType(dims)
    if Complex64TensorType is not None:
        if data_type == np.complex64:
            return Complex64TensorType(dims)
        if data_type == np.complex128:
            return Complex128TensorType(dims)
    raise NotImplementedError(
        "Unsupported data_type %r (type=%r). You may raise an issue "
        "at https://github.com/onnx/sklearn-onnx/issues."
        "" % (data_type, type(data_type)))

コード例 #9

0

ファイルを表示

ファイル: _topology.py プロジェクト: Christopher154/sklearn-onnx

    def from_pb(obj):
        """
        Creates a data type from a protobuf object.
        """
        def get_shape(tt):
            return [
                tt.shape.dim[i].dim_value for i in range(len(tt.shape.dim))
            ]

        if hasattr(obj, 'extend'):
            return [Variable.from_pb(o) for o in obj]
        name = obj.name
        if obj.type.tensor_type:
            tt = obj.type.tensor_type
            elem = tt.elem_type
            shape = get_shape(tt)
            if elem == onnx_proto.TensorProto.FLOAT:
                ty = FloatTensorType(shape)
            elif elem == onnx_proto.TensorProto.BOOL:
                ty = BooleanTensorType(shape)
            elif elem == onnx_proto.TensorProto.DOUBLE:
                ty = DoubleTensorType(shape)
            elif elem == onnx_proto.TensorProto.STRING:
                ty = StringTensorType(shape)
            elif elem == onnx_proto.TensorProto.INT64:
                ty = Int64TensorType(shape)
            elif elem == onnx_proto.TensorProto.INT32:
                ty = Int32TensorType(shape)
            else:
                raise NotImplementedError("Unsupported type '{}' "
                                          "(elem_type={}).".format(
                                              type(obj.type.tensor_type),
                                              elem))
        else:
            raise NotImplementedError("Unsupported type '{}' as "
                                      "a string ({}).".format(type(obj), obj))

        return Variable(name, name, None, ty)

コード例 #10

0

ファイルを表示

def parse_sklearn_api_model(model, extra_config={}):
    """
    Puts *scikit-learn* object into an abstract representation so that our framework can work seamlessly on models created
    with different machine learning tools.

    Args:
        model: A model object in scikit-learn format

    Returns:
        A `onnxconverter_common.topology.Topology` object representing the input model
    """
    assert model is not None, "Cannot convert a mode of type None."

    raw_model_container = CommonSklearnModelContainer(model)

    # Declare a computational graph. It will become a representation of
    # the input scikit-learn model after parsing.
    topology = Topology(raw_model_container)

    # Declare an object to provide variables' and operators' naming mechanism.
    # One global scope is enough for parsing scikit-learn models.
    scope = topology.declare_scope("__root__")

    # Declare input variables.
    inputs = []
    n_inputs = extra_config[
        constants.N_INPUTS] if constants.N_INPUTS in extra_config else 1
    if constants.INPUT_NAMES in extra_config:
        assert n_inputs == len(extra_config[constants.INPUT_NAMES])
    if constants.TEST_INPUT in extra_config:
        from onnxconverter_common.data_types import FloatTensorType, DoubleTensorType, Int32TensorType, Int64TensorType

        test_input = extra_config[constants.TEST_INPUT] if n_inputs > 1 else [
            extra_config[constants.TEST_INPUT]
        ]
        for i in range(n_inputs):
            input = test_input[i]
            input_name = (extra_config[constants.INPUT_NAMES][i]
                          if constants.INPUT_NAMES in extra_config else
                          "input_{}".format(i))
            if input.dtype == np.float32:
                input_type = FloatTensorType(input.shape)
            elif input.dtype == np.float64:
                input_type = DoubleTensorType(input.shape)
            elif input.dtype == np.int32:
                input_type = Int32TensorType(input.shape)
            elif input.dtype == np.int64:
                input_type = Int64TensorType(input.shape)
            else:
                raise RuntimeError(
                    "Type {} not supported. Please fill an issue on https://github.com/microsoft/hummingbird/."
                    .format(type(input.dtype)))
            inputs.append(
                scope.declare_local_variable(input_name, type=input_type))
    else:
        # We have no information on the input. Sklearn always gets as input a single dataframe,
        # therefore by default we start with a single `input` variable
        input_name = extra_config[constants.INPUT_NAMES][
            0] if constants.TEST_INPUT in extra_config else "input"
        inputs.append(scope.declare_local_variable(input_name))

    # The object raw_model_container is a part of the topology we're going to return.
    # We use it to store the inputs of the scikit-learn's computational graph.
    for variable in inputs:
        raw_model_container.add_input(variable)

    # Parse the input scikit-learn model into its scope with the topology.
    # Get the outputs of the model.
    outputs = _parse_sklearn_api(scope, model, inputs)

    # Use the output names specified by the user, if any
    if constants.OUTPUT_NAMES in extra_config:
        assert len(extra_config[constants.OUTPUT_NAMES]) == len(outputs)
        for i in range(len(outputs)):
            outputs[i].raw_name = extra_config[constants.OUTPUT_NAMES][i]

    # The object raw_model_container is a part of the topology we're going to return.
    # We use it to store the outputs of the scikit-learn's computational graph.
    for variable in outputs:
        raw_model_container.add_output(variable)

    return topology

コード例 #11

0

ファイルを表示

ファイル: convert.py プロジェクト: scnakandala/hummingbird

def _convert_onnxml(model, backend, test_input, device, extra_config={}):
    """
    This function converts the specified [ONNX-ML] model into its *backend* counterpart.
    The supported operators can be found at `hummingbird.ml.supported`.
    """
    assert model is not None
    assert torch_installed(), "To use Hummingbird you need to install torch."

    import onnx

    # The conversion requires some test input for tracing.
    # Test inputs can be either provided or generate from the input schema of the model.
    # Generate some test input if necessary.
    if test_input is None:
        import torch
        from onnxconverter_common.data_types import FloatTensorType, DoubleTensorType, Int32TensorType, Int64TensorType

        tvm_backend = None
        if tvm_installed():
            import tvm

            tvm_backend = tvm.__name__

        # Get the input information from the ONNX schema.
        initial_types = []
        for input in model.graph.input:
            name = input.name if hasattr(input, "name") else None
            data_type = (
                input.type.tensor_type.elem_type if hasattr(input, "type")
                and hasattr(input.type, "tensor_type")
                and hasattr(input.type.tensor_type, "elem_type") else None)
            if name is None:
                raise RuntimeError(
                    "Cannot fetch input name or data_type from the ONNX schema. Please provide some test input."
                )
            if data_type is None:
                raise RuntimeError(
                    "Cannot fetch input data_type from the ONNX schema, or data type is not tensor_type. Please provide some test input."
                )
            if not hasattr(input.type.tensor_type, "shape"):
                raise RuntimeError(
                    "Cannot fetch input shape from ONNX schema. Please provide some test input."
                )
            shape = [dim.dim_value for dim in input.type.tensor_type.shape.dim]

            if len(shape) == 1:
                shape = [1, shape[0]]
            assert len(shape) == 2
            # In ONNX dynamic dimensions will have a shape of 0. Fix the 0-shape in the batch dimension if they exist.
            if shape[0] == 0:
                shape[0] = 1

            if data_type == 1:
                initial_types.append((name, FloatTensorType(shape)))
            elif data_type == 11:
                initial_types.append((name, DoubleTensorType(shape)))
            elif data_type == 6:
                initial_types.append((name, Int32TensorType(shape)))
            elif data_type == 7:
                initial_types.append((name, Int64TensorType(shape)))
            else:
                raise RuntimeError(
                    "Input data type {} not supported. Please fill an issue at https://github.com/microsoft/hummingbird/, or pass some test_input"
                    .format(data_type))

        first_shape = initial_types[0][1].shape
        assert all(
            map(lambda x: x[1].shape == first_shape, initial_types)
        ), "Hummingbird currently supports only inputs with same shape."
        extra_config[constants.N_INPUTS] = len(initial_types)
        extra_config[constants.N_FEATURES] = extra_config[
            constants.N_INPUTS] * first_shape[1]

        # Generate some random input data if necessary for the model conversion.
        if backend == onnx.__name__ or backend == tvm_backend or backend == torch.jit.__name__:
            test_input = []
            for i, it in enumerate(initial_types):
                if type(it[1]) is FloatTensorType:
                    test_input.append(
                        np.array(np.random.rand(first_shape[0],
                                                first_shape[1]),
                                 dtype=np.float32))
                elif type(it[1]) is DoubleTensorType:
                    test_input.append(
                        np.random.rand(first_shape[0], first_shape[1]))
                elif type(it[1]) is Int32TensorType:
                    test_input.append(
                        np.array(np.random.randint(100, size=first_shape),
                                 dtype=np.int32))
                elif type(it[1]) is Int64TensorType:
                    test_input.append(np.random.randint(100, size=first_shape))
                else:
                    raise RuntimeError(
                        "Type {} not supported. Please fill an issue on https://github.com/microsoft/hummingbird/."
                        .format(type(it[1])))
            if extra_config[constants.N_INPUTS] == 1:
                test_input = test_input[0]
            else:
                test_input = tuple(test_input)
            extra_config[constants.TEST_INPUT] = test_input

    # Set the number of features. Some converter requires to know in advance the number of features.
    if constants.N_FEATURES not in extra_config and test_input is not None:
        if len(test_input.shape) < 2:
            extra_config[constants.N_FEATURES] = 1
        else:
            extra_config[constants.N_FEATURES] = test_input.shape[1]

    # Set the initializers. Some converter requires the access to initializers.
    initializers = {} if model.graph.initializer is None else {
        in_.name: in_
        for in_ in model.graph.initializer
    }
    extra_config[constants.ONNX_INITIALIZERS] = initializers

    # Parse ONNX model as our internal data structure (i.e., Topology).
    topology = parse_onnx_api_model(model)

    # Convert the Topology object into a PyTorch model.
    hb_model = topology_converter(topology,
                                  backend,
                                  test_input,
                                  device,
                                  extra_config=extra_config)
    return hb_model