コード例 #1
0
 def conv(scope, operator, container):
     dtype = guess_numpy_type(operator.inputs[0].type)
     W = operator.raw_operator.W.astype(dtype)
     op = OnnxSub(
         operator.inputs[0], W, output_names=operator.outputs,
         op_version=TARGET_OPSET)
     op.add_to(scope, container)
     text = str(container)
     if 'name:"Su_Sub"' not in text:
         raise AssertionError(
             "Unnamed operator: '{}'".format(text))
     nin = list(op.enumerate_initial_types())
     nno = list(op.enumerate_nodes())
     nva = list(op.enumerate_variables())
     assert len(nin) == 1
     assert nin[0][0] == 'input'
     assert nin[0][1].shape == [None, 2]
     assert len(nno) == 1
     assert nno[0].output_names == ['variable']
     assert len(nva) == 1
     assert isinstance(nva[0], tuple)
     assert nva[0][1] == 0
コード例 #2
0
        def conv(scope, operator, container):
            X = operator.inputs[0]
            out = operator.outputs
            op = operator.raw_operator
            dtype = guess_numpy_type(X.type)

            C = op.cluster_centers_
            C2 = row_norms(C, squared=True).astype(dtype)
            C = C.astype(dtype)

            rs = OnnxReduceSumSquare(
                X, axes=[1], keepdims=1,
                op_version=container.target_opset)

            N = X.type.shape[0]
            if isinstance(N, int):
                zeros = np.zeros((N, ))
            else:
                zeros = OnnxMul(
                    rs, np.array([0], dtype=np.float32),
                    op_version=container.target_opset)

            z = OnnxAdd(
                rs,
                OnnxGemm(
                    X, C, zeros, alpha=-2., transB=1,
                    op_version=container.target_opset),
                op_version=container.target_opset)
            y2 = OnnxAdd(C2, z, op_version=container.target_opset)
            lo = OnnxArgMin(
                y2, axis=1, keepdims=0, output_names=out[:1],
                op_version=container.target_opset)
            y2s = OnnxSqrt(
                y2, output_names=out[1:],
                op_version=container.target_opset)

            lo.add_to(scope, container)
            y2s.add_to(scope, container)
コード例 #3
0
def decorrelate_transformer_converter(scope, operator, container):
    op = operator.raw_operator
    opv = container.target_opset
    out = operator.outputs

    X = operator.inputs[0]

    dtype = guess_numpy_type(X.type)
    options = container.get_options(op, dict(use_gemm=False))
    use_gemm = options['use_gemm']
    print('conversion: use_gemm=', use_gemm)

    if use_gemm:
        Y = OnnxGemm(X, op.coef_.astype(dtype),
                     (- op.mean_ @ op.coef_).astype(dtype),
                     op_version=opv, alpha=1., beta=1.,
                     output_names=out[:1])
    else:
        Y = OnnxMatMul(
            OnnxSub(X, op.mean_.astype(dtype), op_version=opv),
            op.coef_.astype(dtype),
            op_version=opv, output_names=out[:1])
    Y.add_to(scope, container)
コード例 #4
0
def decorrelate_transformer_converter(scope, operator, container):
    op = operator.raw_operator
    opv = container.target_opset
    out = operator.outputs

    X = operator.inputs[0]

    dtype = guess_numpy_type(X.type)

    Y1 = OnnxMatMul(OnnxSub(X, op.mean_.astype(dtype), op_version=opv),
                    op.coef_.astype(dtype),
                    op_version=opv,
                    output_names=out[:1])

    Y2 = OnnxGemm(X,
                  op.coef_.astype(dtype), (-op.mean_ @ op.coef_).astype(dtype),
                  op_version=opv,
                  alpha=1.,
                  beta=1.,
                  output_names=out[1:2])

    Y1.add_to(scope, container)
    Y2.add_to(scope, container)
コード例 #5
0
ファイル: conv_xgboost.py プロジェクト: xadupre/mlprodict
    def convert(scope, operator, container):
        "converter method"
        dtype = guess_numpy_type(operator.inputs[0].type)
        if dtype != numpy.float64:
            dtype = numpy.float32
        xgb_node = operator.raw_operator
        inputs = operator.inputs
        objective, base_score, js_trees = XGBConverter.common_members(
            xgb_node, inputs)

        if objective in ["reg:gamma", "reg:tweedie"]:
            raise RuntimeError(
                "Objective '{}' not supported.".format(objective))

        booster = xgb_node.get_booster()
        if booster is None:
            raise RuntimeError("The model was probably not trained.")

        attr_pairs = XGBRegressorConverter._get_default_tree_attribute_pairs()
        attr_pairs['base_values'] = [base_score]
        XGBConverter.fill_tree_attributes(
            js_trees, attr_pairs, [1 for _ in js_trees], False)

        # add nodes
        if dtype == numpy.float64:
            container.add_node('TreeEnsembleRegressorDouble', operator.input_full_names,
                               operator.output_full_names,
                               name=scope.get_unique_operator_name(
                                   'TreeEnsembleRegressorDouble'),
                               op_domain='mlprodict', **attr_pairs)
        else:
            container.add_node('TreeEnsembleRegressor', operator.input_full_names,
                               operator.output_full_names,
                               name=scope.get_unique_operator_name(
                                   'TreeEnsembleRegressor'),
                               op_domain='ai.onnx.ml', **attr_pairs)
コード例 #6
0
ファイル: conv_lightgbm.py プロジェクト: sdpython/mlprodict
def convert_lightgbm(scope, operator, container):  # pylint: disable=R0914
    """
    This converters reuses the code from
    `LightGbm.py <https://github.com/onnx/onnxmltools/blob/master/onnxmltools/convert/
    lightgbm/operator_converters/LightGbm.py>`_ and makes
    some modifications. It implements converters
    for models in :epkg:`lightgbm`.
    """
    verbose = getattr(container, 'verbose', 0)
    gbm_model = operator.raw_operator
    if hasattr(gbm_model, '_model_dict_info'):
        gbm_text, info = gbm_model._model_dict_info
    else:
        if verbose >= 2:
            print("[convert_lightgbm] dump_model")  # pragma: no cover
        gbm_text, info = dump_lgbm_booster(gbm_model.booster_, verbose=verbose)
    if verbose >= 2:
        print(  # pragma: no cover
            "[convert_lightgbm] modify_tree_for_rule_in_set")
    modify_tree_for_rule_in_set(gbm_text, use_float=True, verbose=verbose,
                                info=info)

    attrs = get_default_tree_classifier_attribute_pairs()
    attrs['name'] = operator.full_name

    # Create different attributes for classifier and
    # regressor, respectively
    post_transform = None
    if gbm_text['objective'].startswith('binary'):
        n_classes = 1
        attrs['post_transform'] = 'LOGISTIC'
    elif gbm_text['objective'].startswith('multiclass'):
        n_classes = gbm_text['num_class']
        attrs['post_transform'] = 'SOFTMAX'
    elif gbm_text['objective'].startswith('regression'):
        n_classes = 1  # Regressor has only one output variable
        attrs['post_transform'] = 'NONE'
        attrs['n_targets'] = n_classes
    elif gbm_text['objective'].startswith(('poisson', 'gamma')):
        n_classes = 1  # Regressor has only one output variable
        attrs['n_targets'] = n_classes
        # 'Exp' is not a supported post_transform value in the ONNX spec yet,
        # so we need to add an 'Exp' post transform node to the model
        attrs['post_transform'] = 'NONE'
        post_transform = "Exp"
    else:
        raise RuntimeError(  # pragma: no cover
            "LightGBM objective should be cleaned already not '{}'.".format(
                gbm_text['objective']))

    # Use the same algorithm to parse the tree
    if verbose >= 2:  # pragma: no cover
        from tqdm import tqdm
        loop = tqdm(gbm_text['tree_info'])
        loop.set_description("parse")
    else:
        loop = gbm_text['tree_info']
    for i, tree in enumerate(loop):
        tree_id = i
        class_id = tree_id % n_classes
        # tree['shrinkage'] --> LightGbm provides figures with it already.
        learning_rate = 1.
        _parse_tree_structure(
            tree_id, class_id, learning_rate, tree['tree_structure'], attrs)

    if verbose >= 2:
        print("[convert_lightgbm] onnx")  # pragma: no cover
    # Sort nodes_* attributes. For one tree, its node indexes
    # should appear in an ascent order in nodes_nodeids. Nodes
    # from a tree with a smaller tree index should appear
    # before trees with larger indexes in nodes_nodeids.
    node_numbers_per_tree = Counter(attrs['nodes_treeids'])
    tree_number = len(node_numbers_per_tree.keys())
    accumulated_node_numbers = [0] * tree_number
    for i in range(1, tree_number):
        accumulated_node_numbers[i] = (
            accumulated_node_numbers[i - 1] + node_numbers_per_tree[i - 1])
    global_node_indexes = []
    for i in range(len(attrs['nodes_nodeids'])):
        tree_id = attrs['nodes_treeids'][i]
        node_id = attrs['nodes_nodeids'][i]
        global_node_indexes.append(
            accumulated_node_numbers[tree_id] + node_id)
    for k, v in attrs.items():
        if k.startswith('nodes_'):
            merged_indexes = zip(
                copy.deepcopy(global_node_indexes), v)
            sorted_list = [pair[1]
                           for pair in sorted(merged_indexes,
                                              key=lambda x: x[0])]
            attrs[k] = sorted_list

    dtype = guess_numpy_type(operator.inputs[0].type)
    if dtype != numpy.float64:
        dtype = numpy.float32

    # Create ONNX object
    if (gbm_text['objective'].startswith('binary') or
            gbm_text['objective'].startswith('multiclass')):
        # Prepare label information for both of TreeEnsembleClassifier
        # and ZipMap
        class_type = onnx_proto.TensorProto.STRING  # pylint: disable=E1101
        zipmap_attrs = {'name': scope.get_unique_variable_name('ZipMap')}
        if all(isinstance(i, (numbers.Real, bool, numpy.bool_))
               for i in gbm_model.classes_):
            class_type = onnx_proto.TensorProto.INT64  # pylint: disable=E1101
            class_labels = [int(i) for i in gbm_model.classes_]
            attrs['classlabels_int64s'] = class_labels
            zipmap_attrs['classlabels_int64s'] = class_labels
        elif all(isinstance(i, str) for i in gbm_model.classes_):
            class_labels = [str(i) for i in gbm_model.classes_]
            attrs['classlabels_strings'] = class_labels
            zipmap_attrs['classlabels_strings'] = class_labels
        else:
            raise ValueError(  # pragma: no cover
                'Only string and integer class labels are allowed')

        # Create tree classifier
        probability_tensor_name = scope.get_unique_variable_name(
            'probability_tensor')
        label_tensor_name = scope.get_unique_variable_name('label_tensor')

        if dtype == numpy.float64:
            container.add_node('TreeEnsembleClassifierDouble', operator.input_full_names,
                               [label_tensor_name, probability_tensor_name],
                               op_domain='mlprodict', op_version=1, **attrs)
        else:
            container.add_node('TreeEnsembleClassifier', operator.input_full_names,
                               [label_tensor_name, probability_tensor_name],
                               op_domain='ai.onnx.ml', op_version=1, **attrs)

        prob_tensor = probability_tensor_name

        if gbm_model.boosting_type == 'rf':
            col_index_name = scope.get_unique_variable_name('col_index')
            first_col_name = scope.get_unique_variable_name('first_col')
            zeroth_col_name = scope.get_unique_variable_name('zeroth_col')
            denominator_name = scope.get_unique_variable_name('denominator')
            modified_first_col_name = scope.get_unique_variable_name(
                'modified_first_col')
            unit_float_tensor_name = scope.get_unique_variable_name(
                'unit_float_tensor')
            merged_prob_name = scope.get_unique_variable_name('merged_prob')
            predicted_label_name = scope.get_unique_variable_name(
                'predicted_label')
            classes_name = scope.get_unique_variable_name('classes')
            final_label_name = scope.get_unique_variable_name('final_label')

            container.add_initializer(
                col_index_name, onnx_proto.TensorProto.INT64, [], [1])  # pylint: disable=E1101
            container.add_initializer(
                unit_float_tensor_name, onnx_proto.TensorProto.FLOAT, [], [1.0])  # pylint: disable=E1101
            container.add_initializer(
                denominator_name, onnx_proto.TensorProto.FLOAT, [], [100.0])  # pylint: disable=E1101
            container.add_initializer(classes_name, class_type,
                                      [len(class_labels)], class_labels)

            container.add_node(
                'ArrayFeatureExtractor',
                [probability_tensor_name, col_index_name],
                first_col_name,
                name=scope.get_unique_operator_name(
                    'ArrayFeatureExtractor'),
                op_domain='ai.onnx.ml')
            apply_div(scope, [first_col_name, denominator_name],
                      modified_first_col_name, container, broadcast=1)
            apply_sub(
                scope, [unit_float_tensor_name, modified_first_col_name],
                zeroth_col_name, container, broadcast=1)
            container.add_node(
                'Concat', [zeroth_col_name, modified_first_col_name],
                merged_prob_name,
                name=scope.get_unique_operator_name('Concat'), axis=1)
            container.add_node(
                'ArgMax', merged_prob_name,
                predicted_label_name,
                name=scope.get_unique_operator_name('ArgMax'), axis=1)
            container.add_node(
                'ArrayFeatureExtractor', [classes_name, predicted_label_name],
                final_label_name,
                name=scope.get_unique_operator_name('ArrayFeatureExtractor'),
                op_domain='ai.onnx.ml')
            apply_reshape(scope, final_label_name,
                          operator.outputs[0].full_name,
                          container, desired_shape=[-1, ])
            prob_tensor = merged_prob_name
        else:
            container.add_node('Identity', label_tensor_name,
                               operator.outputs[0].full_name,
                               name=scope.get_unique_operator_name('Identity'))

        # Convert probability tensor to probability map
        # (keys are labels while values are the associated probabilities)
        container.add_node('Identity', prob_tensor,
                           operator.outputs[1].full_name)
    else:
        # Create tree regressor
        output_name = scope.get_unique_variable_name('output')

        keys_to_be_renamed = list(
            k for k in attrs if k.startswith('class_'))

        for k in keys_to_be_renamed:
            # Rename class_* attribute to target_*
            # because TreeEnsebmleClassifier
            # and TreeEnsembleClassifier have different ONNX attributes
            attrs['target' + k[5:]] = copy.deepcopy(attrs[k])
            del attrs[k]

        options = container.get_options(gbm_model, dict(split=-1))
        split = options['split']
        if split == -1:
            if dtype == numpy.float64:
                container.add_node(
                    'TreeEnsembleRegressorDouble', operator.input_full_names,
                    output_name, op_domain='mlprodict', op_version=1, **attrs)
            else:
                container.add_node(
                    'TreeEnsembleRegressor', operator.input_full_names,
                    output_name, op_domain='ai.onnx.ml', op_version=1, **attrs)
        else:
            tree_attrs = _split_tree_ensemble_atts(attrs, split)
            tree_nodes = []
            for i, ats in enumerate(tree_attrs):
                tree_name = scope.get_unique_variable_name('tree%d' % i)
                if dtype == numpy.float64:
                    container.add_node(
                        'TreeEnsembleRegressorDouble', operator.input_full_names,
                        tree_name, op_domain='mlprodict', op_version=1, **ats)
                    tree_nodes.append(tree_name)
                else:
                    container.add_node(
                        'TreeEnsembleRegressor', operator.input_full_names,
                        tree_name, op_domain='ai.onnx.ml', op_version=1, **ats)
                    cast_name = scope.get_unique_variable_name('dtree%d' % i)
                    container.add_node(
                        'Cast', tree_name, cast_name, to=TensorProto.DOUBLE,  # pylint: disable=E1101
                        name=scope.get_unique_operator_name("dtree%d" % i))
                    tree_nodes.append(cast_name)
            if dtype == numpy.float64:
                container.add_node(
                    'Sum', tree_nodes, output_name,
                    name=scope.get_unique_operator_name("sumtree%d" % len(tree_nodes)))
            else:
                cast_name = scope.get_unique_variable_name('ftrees')
                container.add_node(
                    'Sum', tree_nodes, cast_name,
                    name=scope.get_unique_operator_name("sumtree%d" % len(tree_nodes)))
                container.add_node(
                    'Cast', cast_name, output_name, to=TensorProto.FLOAT,  # pylint: disable=E1101
                    name=scope.get_unique_operator_name("dtree%d" % i))

        if gbm_model.boosting_type == 'rf':
            denominator_name = scope.get_unique_variable_name('denominator')

            container.add_initializer(
                denominator_name, onnx_proto.TensorProto.FLOAT,  # pylint: disable=E1101
                [], [100.0])

            apply_div(scope, [output_name, denominator_name],
                      operator.output_full_names, container, broadcast=1)
        elif post_transform:
            container.add_node(
                post_transform, output_name,
                operator.output_full_names,
                name=scope.get_unique_operator_name(
                    post_transform))
        else:
            container.add_node('Identity', output_name,
                               operator.output_full_names,
                               name=scope.get_unique_operator_name('Identity'))

    if verbose >= 2:
        print("[convert_lightgbm] end")  # pragma: no cover
コード例 #7
0
def live_decorrelate_transformer_converter(scope, operator, container):
    # shortcuts
    op = operator.raw_operator
    opv = container.target_opset
    out = operator.outputs

    # We retrieve the unique input.
    X = operator.inputs[0]

    # We guess its type. If the operator ingests float (or double),
    # it outputs float (or double).
    proto_dtype = guess_proto_type(X.type)
    dtype = guess_numpy_type(X.type)

    # Lines in comment specify the numpy computation
    # the ONNX code implements.
    # mean_ = numpy.mean(X, axis=0, keepdims=True)
    mean = OnnxReduceMean(X, axes=[0], keepdims=1, op_version=opv)

    # This is trick I often use. The converter automatically
    # chooses a name for every output. In big graph,
    # it is difficult to know which operator is producing which output.
    # This line just tells every node must prefix its ouputs with this string.
    # It also applies to all inputs nodes unless this method
    # was called for one of these nodes.
    mean.set_onnx_name_prefix('mean')

    # X2 = X - mean_
    X2 = OnnxSub(X, mean, op_version=opv)

    # V = X2.T @ X2 / X2.shape[0]
    N = OnnxGatherElements(OnnxShape(X, op_version=opv),
                           numpy.array([0], dtype=numpy.int64),
                           op_version=opv)
    Nf = OnnxCast(N, to=proto_dtype, op_version=opv)

    # Every output involved in N and Nf is prefixed by 'N'.
    Nf.set_onnx_name_prefix('N')

    V = OnnxDiv(OnnxMatMul(OnnxTranspose(X2, op_version=opv),
                           X2,
                           op_version=opv),
                Nf,
                op_version=opv)
    V.set_onnx_name_prefix('V1')

    # V += numpy.identity(V.shape[0]) * self.alpha
    V = OnnxAdd(V,
                op.alpha * numpy.identity(op.nf_, dtype=dtype),
                op_version=opv)
    V.set_onnx_name_prefix('V2')

    # L, P = numpy.linalg.eig(V)
    LP = OnnxEig(V, eigv=True, op_version=opv)
    LP.set_onnx_name_prefix('LP')

    # Linv = L ** (-0.5)
    # Notation LP[0] means OnnxPow is taking the first output
    # of operator OnnxEig, LP[1] would mean the second one
    # LP is not allowed as it is ambiguous
    Linv = OnnxPow(LP[0], numpy.array([-0.5], dtype=dtype), op_version=opv)
    Linv.set_onnx_name_prefix('Linv')

    # diag = numpy.diag(Linv)
    diag = OnnxMul(OnnxEyeLike(numpy.zeros((op.nf_, op.nf_),
                                           dtype=numpy.int64),
                               k=0,
                               op_version=opv),
                   Linv,
                   op_version=opv)
    diag.set_onnx_name_prefix('diag')

    # root = P @ diag @ P.transpose()
    trv = OnnxTranspose(LP[1], op_version=opv)
    coef_left = OnnxMatMul(LP[1], diag, op_version=opv)
    coef_left.set_onnx_name_prefix('coef_left')
    coef = OnnxMatMul(coef_left, trv, op_version=opv)
    coef.set_onnx_name_prefix('coef')

    # Same part as before.
    Y = OnnxMatMul(X2, coef, op_version=opv, output_names=out[:1])
    Y.set_onnx_name_prefix('Y')

    # The last line specifies the final output.
    # Every node involved in the computation is added to the ONNX
    # graph at this stage.
    Y.add_to(scope, container)
コード例 #8
0
ファイル: conv_xgboost.py プロジェクト: xadupre/mlprodict
    def convert(scope, operator, container):
        "convert method"
        dtype = guess_numpy_type(operator.inputs[0].type)
        if dtype != numpy.float64:
            dtype = numpy.float32
        xgb_node = operator.raw_operator
        inputs = operator.inputs

        objective, base_score, js_trees = XGBConverter.common_members(
            xgb_node, inputs)
        if base_score is None:
            raise RuntimeError("base_score cannot be None")
        params = XGBConverter.get_xgb_params(xgb_node)

        attr_pairs = XGBClassifierConverter._get_default_tree_attribute_pairs()
        XGBConverter.fill_tree_attributes(
            js_trees, attr_pairs, [1 for _ in js_trees], True)

        if len(attr_pairs['class_treeids']) == 0:
            raise RuntimeError("XGBoost model is empty.")
        if 'n_estimators' not in params:
            raise RuntimeError(
                "Parameters not found, existing:\n{}".format(
                    pformat(params)))
        ncl = (max(attr_pairs['class_treeids']) + 1) // params['n_estimators']
        if ncl <= 1:
            ncl = 2
            # See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L23.
            attr_pairs['post_transform'] = "LOGISTIC"
            attr_pairs['class_ids'] = [0 for v in attr_pairs['class_treeids']]
        else:
            # See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L35.
            attr_pairs['post_transform'] = "SOFTMAX"
            # attr_pairs['base_values'] = [base_score for n in range(ncl)]
            attr_pairs['class_ids'] = [v % ncl
                                       for v in attr_pairs['class_treeids']]

        classes = xgb_node.classes_
        if (numpy.issubdtype(classes.dtype, numpy.floating) or
                numpy.issubdtype(classes.dtype, numpy.signedinteger)):
            attr_pairs['classlabels_int64s'] = classes.astype('int')
        else:
            classes = numpy.array([s.encode('utf-8') for s in classes])
            attr_pairs['classlabels_strings'] = classes

        if dtype == numpy.float64:
            op_name = "TreeEnsembleClassifierDouble"
        else:
            op_name = "TreeEnsembleClassifier"

        # add nodes
        if objective == "binary:logistic":
            ncl = 2
            container.add_node(op_name, operator.input_full_names,
                               operator.output_full_names,
                               name=scope.get_unique_operator_name(
                                   op_name),
                               op_domain='ai.onnx.ml', **attr_pairs)
        elif objective == "multi:softprob":
            ncl = len(js_trees) // params['n_estimators']
            container.add_node(op_name, operator.input_full_names,
                               operator.output_full_names,
                               name=scope.get_unique_operator_name(
                                   op_name),
                               op_domain='ai.onnx.ml', **attr_pairs)
        elif objective == "reg:logistic":
            ncl = len(js_trees) // params['n_estimators']
            if ncl == 1:
                ncl = 2
            container.add_node(op_name, operator.input_full_names,
                               operator.output_full_names,
                               name=scope.get_unique_operator_name(
                                   op_name),
                               op_domain='ai.onnx.ml', **attr_pairs)
        else:
            raise RuntimeError("Unexpected objective: {0}".format(objective))
コード例 #9
0
    def _to_onnx(self, op_version=None, signature=None, version=None):
        """
        Returns the onnx graph produced by function `fct_`.
        """
        if self.onnx_ is None and self.fct_ is not None:
            from skl2onnx.common.data_types import guess_numpy_type
            from .onnx_variable import OnnxVar

            inputs, outputs, kwargs, n_optional, n_variables = (  # pylint: disable=W0612
                self._parse_annotation(
                    signature=signature, version=version))
            if ((signature is None or not signature.n_variables) and
                    isinstance(version, tuple) and
                    len(inputs) > len(version)):
                raise NotImplementedError(  # pragma: no cover
                    "Mismatch between additional parameters %r "
                    "(n_optional=%r) and version %r for function %r from %r."
                    "" % (kwargs, n_optional, version, self.fct_,
                          getattr(self.fct_, '__module__', None)))
            names_in = [oi[0] for oi in inputs]
            names_out = [oi[0] for oi in outputs]
            names_var = [OnnxVar(n, dtype=guess_numpy_type(dt[1]))
                         for n, dt in zip(names_in, inputs)]

            if 'op_version' in self.fct_.__code__.co_varnames:
                onx_var = None
                onx_algebra = self.fct_(
                    *names_in, op_version=op_version, **kwargs)
            else:
                onx_var = self.fct_(*names_var, **kwargs)
                if not hasattr(onx_var, 'to_algebra'):
                    raise TypeError(  # pragma: no cover
                        "The function %r to convert must return an instance of "
                        "OnnxVar but returns type %r." % (self.fct_, type(onx_var)))
                onx_algebra = onx_var.to_algebra(op_version=op_version)

            hidden_algebras, var_graphs = self._find_hidden_algebras(
                onx_var, onx_algebra)
            if len(hidden_algebras) > 0:
                # for gr in var_graphs:
                #     print(type(gr), dir(gr))
                # for k, v in hidden_algebras.items():
                #     print("*", type(v.alg_), dir(v.alg_))
                #     import pprint
                #     #pprint.pprint(dir(v.alg_))
                raise NotImplementedError(
                    "Subgraph only supports constants (operator If, Loop, "
                    "Scan). hidden_algebras=%r var_graphs=%r" % (
                        hidden_algebras, var_graphs))

            if isinstance(onx_algebra, str):
                raise RuntimeError(  # pragma: no cover
                    "Unexpected str type %r." % onx_algebra)
            if isinstance(onx_algebra, tuple):
                raise NotImplementedError(  # pragma: no cover
                    "Not implemented when the function returns multiple results.")
            if hasattr(onx_algebra, 'to_onnx'):
                # skl2onnx algebra
                onx_algebra.output_names = names_out
                onx = onx_algebra.to_onnx(inputs=inputs,
                                          target_opset=op_version,
                                          outputs=outputs)
                # optimisation
                onx_optimized = onnx_optimisations(onx)
                self.onnx_ = onx_optimized

        if self.onnx_ is None:
            raise RuntimeError(  # pragma: no cover
                "Unable to get the ONNX graph (class %r, fct_=%r)" % (
                    type(self), self.fct_))
        return self.onnx_
コード例 #10
0
def live_decorrelate_transformer_converter(scope, operator, container):
    op = operator.raw_operator
    opv = container.target_opset
    out = operator.outputs

    # We retrieve the unique input.
    X = operator.inputs[0]
    proto_dtype = guess_proto_type(X.type)

    dtype = guess_numpy_type(X.type)

    # new part

    # mean_ = numpy.mean(X, axis=0, keepdims=True)
    mean = OnnxReduceMean(X, axes=[0], keepdims=1, op_version=opv)
    mean.set_onnx_name_prefix('mean')

    # X2 = X - mean_
    X2 = OnnxSub(X, mean, op_version=opv)

    # V = X2.T @ X2 / X2.shape[0]
    N = OnnxGatherElements(OnnxShape(X, op_version=opv),
                           numpy.array([0], dtype=numpy.int64),
                           op_version=opv)
    Nf = OnnxCast(N, to=proto_dtype, op_version=opv)
    Nf.set_onnx_name_prefix('N')

    V = OnnxDiv(OnnxMatMul(OnnxTranspose(X2, op_version=opv),
                           X2,
                           op_version=opv),
                Nf,
                op_version=opv)
    V.set_onnx_name_prefix('V1')

    # V += numpy.identity(V.shape[0]) * self.alpha
    V = OnnxAdd(V,
                op.alpha * numpy.identity(op.nf_, dtype=dtype),
                op_version=opv)
    V.set_onnx_name_prefix('V2')

    # L, P = numpy.linalg.eig(V)
    LP = OnnxEig(V, eigv=True, op_version=opv)
    LP.set_onnx_name_prefix('LP')

    # Linv = L ** (-0.5)
    Linv = OnnxPow(LP[0], numpy.array([-0.5], dtype=dtype), op_version=opv)
    Linv.set_onnx_name_prefix('Linv')

    # diag = numpy.diag(Linv)
    diag = OnnxMul(OnnxEyeLike(numpy.array([op.nf_, op.nf_],
                                           dtype=numpy.int64),
                               k=0,
                               op_version=opv),
                   Linv,
                   op_version=opv)
    diag.set_onnx_name_prefix('diag')

    # root = P @ diag @ P.transpose()
    trv = OnnxTranspose(LP[1], op_version=opv)
    coef_left = OnnxMatMul(LP[1], diag, op_version=opv)
    coef_left.set_onnx_name_prefix('coef_left')
    coef = OnnxMatMul(coef_left, trv, op_version=opv)
    coef.set_onnx_name_prefix('coef')

    # Same part as before.
    Y = OnnxMatMul(X2, coef, op_version=opv, output_names=out[:1])
    Y.set_onnx_name_prefix('Y')
    Y.add_to(scope, container)
コード例 #11
0
def new_convert_sklearn_function_transformer(scope, operator, container):
    """
    Rewrites the converters implemented in
    :epkg:`sklearn-onnx` to support custom functions
    implemented with :ref:`l-numpy-onnxpy`.
    """
    op = operator.raw_operator
    fct = op.func
    if hasattr(fct, 'signed_compiled'):
        dtype = guess_numpy_type(operator.inputs[0].type)
        fct = fct[FctVersion((dtype, ), None)]
    if hasattr(fct, 'compiled'):
        compiled = fct.compiled
        if not hasattr(compiled, 'onnx_'):
            raise RuntimeError(  # pragma: no cover
                "Attribute 'onnx_' is missing, function was not "
                "converted to onnx.")
        onx = compiled.onnx_
        graph = onx.graph
        nodes = graph.node

        # renaming all intermediate variables
        names = []
        for node in nodes:
            for name in node.input:
                names.append(name)
            for name in node.output:
                names.append(name)
        names = set(names)
        names_mapping = {}
        for name in names:
            names_mapping[name] = scope.get_unique_variable_name(
                'ft_%s' % name)

        # adding identities
        apply_identity(scope, operator.inputs[0].full_name,
                       names_mapping[graph.input[0].name], container)
        apply_identity(scope, names_mapping[graph.output[0].name],
                       operator.outputs[0].full_name, container)

        # adding initializers
        for init in graph.initializer:
            init = copy.deepcopy(init)
            name = names_mapping[init.name]
            init.name = name
            content = init.SerializeToString()
            container.initializers_strings[content] = name
            container.initializers.append(init)

        # adding nodes
        for node in nodes:
            atts = {}
            for att in node.attribute:
                atts[att.name] = _copy_attributes(att)
            container.add_node(
                node.op_type,
                [names_mapping[n] for n in node.input],
                [names_mapping[n] for n in node.output],
                name=scope.get_unique_operator_name('ft_%s' % node.op_type),
                **atts)
        return

    if op.func is not None:
        raise TypeError(  # pragma: no cover
            "FunctionTransformer is not supported unless the "
            "transform function is of type %r or "
            "wrapped with onnxnumpy." % type(op.func))
    if len(operator.inputs) == 1:
        apply_identity(scope, operator.inputs[0].full_name,
                       operator.outputs[0].full_name, container)
    else:
        apply_concat(scope, [i.full_name for i in operator.inputs],
                     operator.outputs[0].full_name, container)
コード例 #12
0
ファイル: convert.py プロジェクト: xadupre/mlprodict
def to_onnx(model,
            X=None,
            name=None,
            initial_types=None,
            target_opset=None,
            options=None,
            rewrite_ops=False,
            white_op=None,
            black_op=None,
            final_types=None):
    """
    Converts a model using on :epkg:`sklearn-onnx`.

    @param      model           model to convert or a function
                                wrapped into :epkg:`_PredictScorer` with
                                function :epkg:`make_scorer`
    @param      X               training set (at least one row),
                                can be None, it is used to infered the
                                input types (*initial_types*)
    @param      initial_types   if *X* is None, then *initial_types* must be
                                defined
    @param      name            name of the produced model
    @param      target_opset    to do it with a different target opset
    @param      options         additional parameters for the conversion
    @param      rewrite_ops     rewrites some existing converters,
                                the changes are permanent
    @param      white_op        white list of ONNX nodes allowed
                                while converting a pipeline, if empty,
                                all are allowed
    @param      black_op        black list of ONNX nodes allowed
                                while converting a pipeline, if empty,
                                none are blacklisted
    @param      final_types     a python list. Works the same way as
                                initial_types but not mandatory, it is used
                                to overwrites the type (if type is not None)
                                and the name of every output.
    @return                     converted model

    The function rewrites function *to_onnx* from :epkg:`sklearn-onnx`
    but may changes a few converters if *rewrite_ops* is True.
    For example, :epkg:`ONNX` only supports *TreeEnsembleRegressor*
    for float but not for double. It becomes available
    if ``rewrite_ops=True``.

    .. faqref::
        :title: How to deal with a dataframe as input?

        Each column of the dataframe is considered as an named input.
        The first step is to make sure that every column type is correct.
        :epkg:`pandas` tends to select the least generic type to
        hold the content of one column. :epkg:`ONNX` does not automatically
        cast the data it receives. The data must have the same type with
        the model is converted and when the converted model receives
        the data to predict.

        .. runpython::
            :showcode:

            from io import StringIO
            from textwrap import dedent
            import numpy
            import pandas
            from pyquickhelper.pycode import ExtTestCase
            from sklearn.preprocessing import OneHotEncoder
            from sklearn.pipeline import Pipeline
            from sklearn.compose import ColumnTransformer
            from mlprodict.onnx_conv import to_onnx
            from mlprodict.onnxrt import OnnxInference

            text = dedent('''
                __SCHEMA__
                7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red
                7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red
                7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red
                11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red
                ''')
            text = text.replace(
                "__SCHEMA__",
                "fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,"
                "free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,"
                "alcohol,quality,color")

            X_train = pandas.read_csv(StringIO(text))
            for c in X_train.columns:
                if c != 'color':
                    X_train[c] = X_train[c].astype(numpy.float32)
            numeric_features = [c for c in X_train if c != 'color']

            pipe = Pipeline([
                ("prep", ColumnTransformer([
                    ("color", Pipeline([
                        ('one', OneHotEncoder()),
                        ('select', ColumnTransformer(
                            [('sel1', 'passthrough', [0])]))
                    ]), ['color']),
                    ("others", "passthrough", numeric_features)
                ])),
            ])

            pipe.fit(X_train)
            pred = pipe.transform(X_train)
            print(pred)

            model_onnx = to_onnx(pipe, X_train, target_opset=12)
            oinf = OnnxInference(model_onnx)

            # The dataframe is converted into a dictionary,
            # each key is a column name, each value is a numpy array.
            inputs = {c: X_train[c].values for c in X_train.columns}
            inputs = {c: v.reshape((v.shape[0], 1)) for c, v in inputs.items()}

            onxp = oinf.run(inputs)
            print(onxp)
    """
    if isinstance(model, OnnxOperatorMixin):
        if not hasattr(model, 'op_version'):
            raise RuntimeError(  # pragma: no cover
                "Missing attribute 'op_version' for type '{}'.".format(
                    type(model)))
        return model.to_onnx(X=X,
                             name=name,
                             options=options,
                             black_op=black_op,
                             white_op=white_op,
                             final_types=final_types)
    if rewrite_ops:
        old_values = register_rewritten_operators()
        register_converters()
    else:
        old_values = None

    def _guess_type_(X, itype, dtype):
        initial_types = guess_initial_types(X, itype)
        if dtype is None:
            if hasattr(X, 'dtypes'):  # DataFrame
                dtype = numpy.float32
            elif hasattr(X, 'dtype'):
                dtype = X.dtype
            elif hasattr(X, 'type'):
                dtype = guess_numpy_type(X.type)
            elif initial_types is not None:
                dtype = guess_numpy_type(initial_types[0][1])
            else:
                raise RuntimeError(  # pragma: no cover
                    "dtype cannot be guessed: {}".format(type(X)))
            if dtype != numpy.float64:
                dtype = numpy.float32
        if dtype is None:
            raise RuntimeError("dtype cannot be None")  # pragma: no cover
        if isinstance(dtype, FloatTensorType):
            dtype = numpy.float32
        elif isinstance(dtype, DoubleTensorType):
            dtype = numpy.float64
        new_dtype = dtype
        if isinstance(dtype, numpy.ndarray):
            new_dtype = dtype.dtype
        elif isinstance(dtype, DataType):
            new_dtype = numpy.float32
        if new_dtype not in (numpy.float32, numpy.float64, numpy.int64,
                             numpy.int32):
            raise NotImplementedError(  # pragma: no cover
                "dtype should be real not {} ({})".format(new_dtype, dtype))
        return initial_types, dtype, new_dtype

    if isinstance(model, _PredictScorer):
        if X is not None and not isinstance(X, OrderedDict):
            raise ValueError(
                "For a scorer, parameter X should be a OrderedDict not {}."
                "".format(type(X)))
        if initial_types is None:
            dts = []
            initial_types = []
            for k, v in X.items():
                if hasattr(v, 'dtype'):
                    dtype = guess_numpy_type(v.dtype)
                else:
                    dtype = v
                if dtype != numpy.float64:
                    dtype = numpy.float32
                it, _, ndt = _guess_type_(v, None, dtype)
                for i in range(len(it)):  # pylint: disable=C0200
                    it[i] = (k, it[i][1])  # pylint: disable=C0200
                initial_types.extend(it)
                dts.append(ndt)
            ndt = set(dts)
            if len(ndt) != 1:
                raise RuntimeError(  # pragma: no cover
                    "Multiple dtype is not efficient {}.".format(ndt))
        res = convert_scorer(model,
                             initial_types,
                             name=name,
                             target_opset=target_opset,
                             options=options,
                             black_op=black_op,
                             white_op=white_op,
                             final_types=final_types)
    else:
        if name is None:
            name = "mlprodict_ONNX(%s)" % model.__class__.__name__

        initial_types, dtype, _ = _guess_type_(X, initial_types, None)
        res = convert_sklearn(model,
                              initial_types=initial_types,
                              name=name,
                              target_opset=target_opset,
                              options=options,
                              black_op=black_op,
                              white_op=white_op,
                              final_types=final_types)

    if old_values is not None:
        register_rewritten_operators(old_values)
    return res
コード例 #13
0
def convert_score_cdist_sum(scope, operator, container):
    """
    Converts function @see fn score_cdist_sum into :epkg:`ONNX`.
    """
    op = operator.raw_operator
    if op._fct != score_cdist_sum:  # pylint: disable=W0143
        raise RuntimeError(  # pragma: no cover
            "The wrong converter was called {} != {}.".format(
                op._fct, score_cdist_sum))

    from skl2onnx.algebra.complex_functions import onnx_cdist
    from skl2onnx.algebra.onnx_ops import OnnxReduceSum  # pylint: disable=E0611
    from skl2onnx.common.data_types import guess_numpy_type

    X = operator.inputs[0]
    Y = operator.inputs[1]
    out = operator.outputs
    opv = container.target_opset
    dtype = guess_numpy_type(operator.inputs[0].type)
    if dtype != numpy.float64:
        dtype = numpy.float32
    out = operator.outputs

    options = container.get_options(score_cdist_sum, dict(cdist=None))

    kwargs = op.kwargs

    if options.get('cdist', None) == 'single-node':
        attrs = kwargs
        cdist_name = scope.get_unique_variable_name('cdist')
        container.add_node('CDist', [X.full_name, Y.full_name],
                           cdist_name,
                           op_domain='mlprodict',
                           name=scope.get_unique_operator_name('CDist'),
                           **attrs)
        container.add_node('ReduceSum', [cdist_name],
                           out[0].full_name,
                           axes=[1],
                           keepdims=0,
                           name=scope.get_unique_operator_name('ReduceSum'))
    else:
        metric = kwargs['metric']
        if metric == 'minkowski':
            dists = onnx_cdist(X,
                               Y,
                               dtype=dtype,
                               op_version=opv,
                               metric=metric,
                               p=kwargs.get('p', 2))
        else:
            dists = onnx_cdist(X,
                               Y,
                               dtype=dtype,
                               op_version=opv,
                               metric=kwargs['metric'])

        res = OnnxReduceSum(dists,
                            axes=[1],
                            keepdims=0,
                            output_names=[out[0].full_name],
                            op_version=opv)
        res.add_to(scope, container)