def conv(scope, operator, container): dtype = guess_numpy_type(operator.inputs[0].type) W = operator.raw_operator.W.astype(dtype) op = OnnxSub( operator.inputs[0], W, output_names=operator.outputs, op_version=TARGET_OPSET) op.add_to(scope, container) text = str(container) if 'name:"Su_Sub"' not in text: raise AssertionError( "Unnamed operator: '{}'".format(text)) nin = list(op.enumerate_initial_types()) nno = list(op.enumerate_nodes()) nva = list(op.enumerate_variables()) assert len(nin) == 1 assert nin[0][0] == 'input' assert nin[0][1].shape == [None, 2] assert len(nno) == 1 assert nno[0].output_names == ['variable'] assert len(nva) == 1 assert isinstance(nva[0], tuple) assert nva[0][1] == 0
def conv(scope, operator, container): X = operator.inputs[0] out = operator.outputs op = operator.raw_operator dtype = guess_numpy_type(X.type) C = op.cluster_centers_ C2 = row_norms(C, squared=True).astype(dtype) C = C.astype(dtype) rs = OnnxReduceSumSquare( X, axes=[1], keepdims=1, op_version=container.target_opset) N = X.type.shape[0] if isinstance(N, int): zeros = np.zeros((N, )) else: zeros = OnnxMul( rs, np.array([0], dtype=np.float32), op_version=container.target_opset) z = OnnxAdd( rs, OnnxGemm( X, C, zeros, alpha=-2., transB=1, op_version=container.target_opset), op_version=container.target_opset) y2 = OnnxAdd(C2, z, op_version=container.target_opset) lo = OnnxArgMin( y2, axis=1, keepdims=0, output_names=out[:1], op_version=container.target_opset) y2s = OnnxSqrt( y2, output_names=out[1:], op_version=container.target_opset) lo.add_to(scope, container) y2s.add_to(scope, container)
def decorrelate_transformer_converter(scope, operator, container): op = operator.raw_operator opv = container.target_opset out = operator.outputs X = operator.inputs[0] dtype = guess_numpy_type(X.type) options = container.get_options(op, dict(use_gemm=False)) use_gemm = options['use_gemm'] print('conversion: use_gemm=', use_gemm) if use_gemm: Y = OnnxGemm(X, op.coef_.astype(dtype), (- op.mean_ @ op.coef_).astype(dtype), op_version=opv, alpha=1., beta=1., output_names=out[:1]) else: Y = OnnxMatMul( OnnxSub(X, op.mean_.astype(dtype), op_version=opv), op.coef_.astype(dtype), op_version=opv, output_names=out[:1]) Y.add_to(scope, container)
def decorrelate_transformer_converter(scope, operator, container): op = operator.raw_operator opv = container.target_opset out = operator.outputs X = operator.inputs[0] dtype = guess_numpy_type(X.type) Y1 = OnnxMatMul(OnnxSub(X, op.mean_.astype(dtype), op_version=opv), op.coef_.astype(dtype), op_version=opv, output_names=out[:1]) Y2 = OnnxGemm(X, op.coef_.astype(dtype), (-op.mean_ @ op.coef_).astype(dtype), op_version=opv, alpha=1., beta=1., output_names=out[1:2]) Y1.add_to(scope, container) Y2.add_to(scope, container)
def convert(scope, operator, container): "converter method" dtype = guess_numpy_type(operator.inputs[0].type) if dtype != numpy.float64: dtype = numpy.float32 xgb_node = operator.raw_operator inputs = operator.inputs objective, base_score, js_trees = XGBConverter.common_members( xgb_node, inputs) if objective in ["reg:gamma", "reg:tweedie"]: raise RuntimeError( "Objective '{}' not supported.".format(objective)) booster = xgb_node.get_booster() if booster is None: raise RuntimeError("The model was probably not trained.") attr_pairs = XGBRegressorConverter._get_default_tree_attribute_pairs() attr_pairs['base_values'] = [base_score] XGBConverter.fill_tree_attributes( js_trees, attr_pairs, [1 for _ in js_trees], False) # add nodes if dtype == numpy.float64: container.add_node('TreeEnsembleRegressorDouble', operator.input_full_names, operator.output_full_names, name=scope.get_unique_operator_name( 'TreeEnsembleRegressorDouble'), op_domain='mlprodict', **attr_pairs) else: container.add_node('TreeEnsembleRegressor', operator.input_full_names, operator.output_full_names, name=scope.get_unique_operator_name( 'TreeEnsembleRegressor'), op_domain='ai.onnx.ml', **attr_pairs)
def convert_lightgbm(scope, operator, container): # pylint: disable=R0914 """ This converters reuses the code from `LightGbm.py <https://github.com/onnx/onnxmltools/blob/master/onnxmltools/convert/ lightgbm/operator_converters/LightGbm.py>`_ and makes some modifications. It implements converters for models in :epkg:`lightgbm`. """ verbose = getattr(container, 'verbose', 0) gbm_model = operator.raw_operator if hasattr(gbm_model, '_model_dict_info'): gbm_text, info = gbm_model._model_dict_info else: if verbose >= 2: print("[convert_lightgbm] dump_model") # pragma: no cover gbm_text, info = dump_lgbm_booster(gbm_model.booster_, verbose=verbose) if verbose >= 2: print( # pragma: no cover "[convert_lightgbm] modify_tree_for_rule_in_set") modify_tree_for_rule_in_set(gbm_text, use_float=True, verbose=verbose, info=info) attrs = get_default_tree_classifier_attribute_pairs() attrs['name'] = operator.full_name # Create different attributes for classifier and # regressor, respectively post_transform = None if gbm_text['objective'].startswith('binary'): n_classes = 1 attrs['post_transform'] = 'LOGISTIC' elif gbm_text['objective'].startswith('multiclass'): n_classes = gbm_text['num_class'] attrs['post_transform'] = 'SOFTMAX' elif gbm_text['objective'].startswith('regression'): n_classes = 1 # Regressor has only one output variable attrs['post_transform'] = 'NONE' attrs['n_targets'] = n_classes elif gbm_text['objective'].startswith(('poisson', 'gamma')): n_classes = 1 # Regressor has only one output variable attrs['n_targets'] = n_classes # 'Exp' is not a supported post_transform value in the ONNX spec yet, # so we need to add an 'Exp' post transform node to the model attrs['post_transform'] = 'NONE' post_transform = "Exp" else: raise RuntimeError( # pragma: no cover "LightGBM objective should be cleaned already not '{}'.".format( gbm_text['objective'])) # Use the same algorithm to parse the tree if verbose >= 2: # pragma: no cover from tqdm import tqdm loop = tqdm(gbm_text['tree_info']) loop.set_description("parse") else: loop = gbm_text['tree_info'] for i, tree in enumerate(loop): tree_id = i class_id = tree_id % n_classes # tree['shrinkage'] --> LightGbm provides figures with it already. learning_rate = 1. _parse_tree_structure( tree_id, class_id, learning_rate, tree['tree_structure'], attrs) if verbose >= 2: print("[convert_lightgbm] onnx") # pragma: no cover # Sort nodes_* attributes. For one tree, its node indexes # should appear in an ascent order in nodes_nodeids. Nodes # from a tree with a smaller tree index should appear # before trees with larger indexes in nodes_nodeids. node_numbers_per_tree = Counter(attrs['nodes_treeids']) tree_number = len(node_numbers_per_tree.keys()) accumulated_node_numbers = [0] * tree_number for i in range(1, tree_number): accumulated_node_numbers[i] = ( accumulated_node_numbers[i - 1] + node_numbers_per_tree[i - 1]) global_node_indexes = [] for i in range(len(attrs['nodes_nodeids'])): tree_id = attrs['nodes_treeids'][i] node_id = attrs['nodes_nodeids'][i] global_node_indexes.append( accumulated_node_numbers[tree_id] + node_id) for k, v in attrs.items(): if k.startswith('nodes_'): merged_indexes = zip( copy.deepcopy(global_node_indexes), v) sorted_list = [pair[1] for pair in sorted(merged_indexes, key=lambda x: x[0])] attrs[k] = sorted_list dtype = guess_numpy_type(operator.inputs[0].type) if dtype != numpy.float64: dtype = numpy.float32 # Create ONNX object if (gbm_text['objective'].startswith('binary') or gbm_text['objective'].startswith('multiclass')): # Prepare label information for both of TreeEnsembleClassifier # and ZipMap class_type = onnx_proto.TensorProto.STRING # pylint: disable=E1101 zipmap_attrs = {'name': scope.get_unique_variable_name('ZipMap')} if all(isinstance(i, (numbers.Real, bool, numpy.bool_)) for i in gbm_model.classes_): class_type = onnx_proto.TensorProto.INT64 # pylint: disable=E1101 class_labels = [int(i) for i in gbm_model.classes_] attrs['classlabels_int64s'] = class_labels zipmap_attrs['classlabels_int64s'] = class_labels elif all(isinstance(i, str) for i in gbm_model.classes_): class_labels = [str(i) for i in gbm_model.classes_] attrs['classlabels_strings'] = class_labels zipmap_attrs['classlabels_strings'] = class_labels else: raise ValueError( # pragma: no cover 'Only string and integer class labels are allowed') # Create tree classifier probability_tensor_name = scope.get_unique_variable_name( 'probability_tensor') label_tensor_name = scope.get_unique_variable_name('label_tensor') if dtype == numpy.float64: container.add_node('TreeEnsembleClassifierDouble', operator.input_full_names, [label_tensor_name, probability_tensor_name], op_domain='mlprodict', op_version=1, **attrs) else: container.add_node('TreeEnsembleClassifier', operator.input_full_names, [label_tensor_name, probability_tensor_name], op_domain='ai.onnx.ml', op_version=1, **attrs) prob_tensor = probability_tensor_name if gbm_model.boosting_type == 'rf': col_index_name = scope.get_unique_variable_name('col_index') first_col_name = scope.get_unique_variable_name('first_col') zeroth_col_name = scope.get_unique_variable_name('zeroth_col') denominator_name = scope.get_unique_variable_name('denominator') modified_first_col_name = scope.get_unique_variable_name( 'modified_first_col') unit_float_tensor_name = scope.get_unique_variable_name( 'unit_float_tensor') merged_prob_name = scope.get_unique_variable_name('merged_prob') predicted_label_name = scope.get_unique_variable_name( 'predicted_label') classes_name = scope.get_unique_variable_name('classes') final_label_name = scope.get_unique_variable_name('final_label') container.add_initializer( col_index_name, onnx_proto.TensorProto.INT64, [], [1]) # pylint: disable=E1101 container.add_initializer( unit_float_tensor_name, onnx_proto.TensorProto.FLOAT, [], [1.0]) # pylint: disable=E1101 container.add_initializer( denominator_name, onnx_proto.TensorProto.FLOAT, [], [100.0]) # pylint: disable=E1101 container.add_initializer(classes_name, class_type, [len(class_labels)], class_labels) container.add_node( 'ArrayFeatureExtractor', [probability_tensor_name, col_index_name], first_col_name, name=scope.get_unique_operator_name( 'ArrayFeatureExtractor'), op_domain='ai.onnx.ml') apply_div(scope, [first_col_name, denominator_name], modified_first_col_name, container, broadcast=1) apply_sub( scope, [unit_float_tensor_name, modified_first_col_name], zeroth_col_name, container, broadcast=1) container.add_node( 'Concat', [zeroth_col_name, modified_first_col_name], merged_prob_name, name=scope.get_unique_operator_name('Concat'), axis=1) container.add_node( 'ArgMax', merged_prob_name, predicted_label_name, name=scope.get_unique_operator_name('ArgMax'), axis=1) container.add_node( 'ArrayFeatureExtractor', [classes_name, predicted_label_name], final_label_name, name=scope.get_unique_operator_name('ArrayFeatureExtractor'), op_domain='ai.onnx.ml') apply_reshape(scope, final_label_name, operator.outputs[0].full_name, container, desired_shape=[-1, ]) prob_tensor = merged_prob_name else: container.add_node('Identity', label_tensor_name, operator.outputs[0].full_name, name=scope.get_unique_operator_name('Identity')) # Convert probability tensor to probability map # (keys are labels while values are the associated probabilities) container.add_node('Identity', prob_tensor, operator.outputs[1].full_name) else: # Create tree regressor output_name = scope.get_unique_variable_name('output') keys_to_be_renamed = list( k for k in attrs if k.startswith('class_')) for k in keys_to_be_renamed: # Rename class_* attribute to target_* # because TreeEnsebmleClassifier # and TreeEnsembleClassifier have different ONNX attributes attrs['target' + k[5:]] = copy.deepcopy(attrs[k]) del attrs[k] options = container.get_options(gbm_model, dict(split=-1)) split = options['split'] if split == -1: if dtype == numpy.float64: container.add_node( 'TreeEnsembleRegressorDouble', operator.input_full_names, output_name, op_domain='mlprodict', op_version=1, **attrs) else: container.add_node( 'TreeEnsembleRegressor', operator.input_full_names, output_name, op_domain='ai.onnx.ml', op_version=1, **attrs) else: tree_attrs = _split_tree_ensemble_atts(attrs, split) tree_nodes = [] for i, ats in enumerate(tree_attrs): tree_name = scope.get_unique_variable_name('tree%d' % i) if dtype == numpy.float64: container.add_node( 'TreeEnsembleRegressorDouble', operator.input_full_names, tree_name, op_domain='mlprodict', op_version=1, **ats) tree_nodes.append(tree_name) else: container.add_node( 'TreeEnsembleRegressor', operator.input_full_names, tree_name, op_domain='ai.onnx.ml', op_version=1, **ats) cast_name = scope.get_unique_variable_name('dtree%d' % i) container.add_node( 'Cast', tree_name, cast_name, to=TensorProto.DOUBLE, # pylint: disable=E1101 name=scope.get_unique_operator_name("dtree%d" % i)) tree_nodes.append(cast_name) if dtype == numpy.float64: container.add_node( 'Sum', tree_nodes, output_name, name=scope.get_unique_operator_name("sumtree%d" % len(tree_nodes))) else: cast_name = scope.get_unique_variable_name('ftrees') container.add_node( 'Sum', tree_nodes, cast_name, name=scope.get_unique_operator_name("sumtree%d" % len(tree_nodes))) container.add_node( 'Cast', cast_name, output_name, to=TensorProto.FLOAT, # pylint: disable=E1101 name=scope.get_unique_operator_name("dtree%d" % i)) if gbm_model.boosting_type == 'rf': denominator_name = scope.get_unique_variable_name('denominator') container.add_initializer( denominator_name, onnx_proto.TensorProto.FLOAT, # pylint: disable=E1101 [], [100.0]) apply_div(scope, [output_name, denominator_name], operator.output_full_names, container, broadcast=1) elif post_transform: container.add_node( post_transform, output_name, operator.output_full_names, name=scope.get_unique_operator_name( post_transform)) else: container.add_node('Identity', output_name, operator.output_full_names, name=scope.get_unique_operator_name('Identity')) if verbose >= 2: print("[convert_lightgbm] end") # pragma: no cover
def live_decorrelate_transformer_converter(scope, operator, container): # shortcuts op = operator.raw_operator opv = container.target_opset out = operator.outputs # We retrieve the unique input. X = operator.inputs[0] # We guess its type. If the operator ingests float (or double), # it outputs float (or double). proto_dtype = guess_proto_type(X.type) dtype = guess_numpy_type(X.type) # Lines in comment specify the numpy computation # the ONNX code implements. # mean_ = numpy.mean(X, axis=0, keepdims=True) mean = OnnxReduceMean(X, axes=[0], keepdims=1, op_version=opv) # This is trick I often use. The converter automatically # chooses a name for every output. In big graph, # it is difficult to know which operator is producing which output. # This line just tells every node must prefix its ouputs with this string. # It also applies to all inputs nodes unless this method # was called for one of these nodes. mean.set_onnx_name_prefix('mean') # X2 = X - mean_ X2 = OnnxSub(X, mean, op_version=opv) # V = X2.T @ X2 / X2.shape[0] N = OnnxGatherElements(OnnxShape(X, op_version=opv), numpy.array([0], dtype=numpy.int64), op_version=opv) Nf = OnnxCast(N, to=proto_dtype, op_version=opv) # Every output involved in N and Nf is prefixed by 'N'. Nf.set_onnx_name_prefix('N') V = OnnxDiv(OnnxMatMul(OnnxTranspose(X2, op_version=opv), X2, op_version=opv), Nf, op_version=opv) V.set_onnx_name_prefix('V1') # V += numpy.identity(V.shape[0]) * self.alpha V = OnnxAdd(V, op.alpha * numpy.identity(op.nf_, dtype=dtype), op_version=opv) V.set_onnx_name_prefix('V2') # L, P = numpy.linalg.eig(V) LP = OnnxEig(V, eigv=True, op_version=opv) LP.set_onnx_name_prefix('LP') # Linv = L ** (-0.5) # Notation LP[0] means OnnxPow is taking the first output # of operator OnnxEig, LP[1] would mean the second one # LP is not allowed as it is ambiguous Linv = OnnxPow(LP[0], numpy.array([-0.5], dtype=dtype), op_version=opv) Linv.set_onnx_name_prefix('Linv') # diag = numpy.diag(Linv) diag = OnnxMul(OnnxEyeLike(numpy.zeros((op.nf_, op.nf_), dtype=numpy.int64), k=0, op_version=opv), Linv, op_version=opv) diag.set_onnx_name_prefix('diag') # root = P @ diag @ P.transpose() trv = OnnxTranspose(LP[1], op_version=opv) coef_left = OnnxMatMul(LP[1], diag, op_version=opv) coef_left.set_onnx_name_prefix('coef_left') coef = OnnxMatMul(coef_left, trv, op_version=opv) coef.set_onnx_name_prefix('coef') # Same part as before. Y = OnnxMatMul(X2, coef, op_version=opv, output_names=out[:1]) Y.set_onnx_name_prefix('Y') # The last line specifies the final output. # Every node involved in the computation is added to the ONNX # graph at this stage. Y.add_to(scope, container)
def convert(scope, operator, container): "convert method" dtype = guess_numpy_type(operator.inputs[0].type) if dtype != numpy.float64: dtype = numpy.float32 xgb_node = operator.raw_operator inputs = operator.inputs objective, base_score, js_trees = XGBConverter.common_members( xgb_node, inputs) if base_score is None: raise RuntimeError("base_score cannot be None") params = XGBConverter.get_xgb_params(xgb_node) attr_pairs = XGBClassifierConverter._get_default_tree_attribute_pairs() XGBConverter.fill_tree_attributes( js_trees, attr_pairs, [1 for _ in js_trees], True) if len(attr_pairs['class_treeids']) == 0: raise RuntimeError("XGBoost model is empty.") if 'n_estimators' not in params: raise RuntimeError( "Parameters not found, existing:\n{}".format( pformat(params))) ncl = (max(attr_pairs['class_treeids']) + 1) // params['n_estimators'] if ncl <= 1: ncl = 2 # See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L23. attr_pairs['post_transform'] = "LOGISTIC" attr_pairs['class_ids'] = [0 for v in attr_pairs['class_treeids']] else: # See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L35. attr_pairs['post_transform'] = "SOFTMAX" # attr_pairs['base_values'] = [base_score for n in range(ncl)] attr_pairs['class_ids'] = [v % ncl for v in attr_pairs['class_treeids']] classes = xgb_node.classes_ if (numpy.issubdtype(classes.dtype, numpy.floating) or numpy.issubdtype(classes.dtype, numpy.signedinteger)): attr_pairs['classlabels_int64s'] = classes.astype('int') else: classes = numpy.array([s.encode('utf-8') for s in classes]) attr_pairs['classlabels_strings'] = classes if dtype == numpy.float64: op_name = "TreeEnsembleClassifierDouble" else: op_name = "TreeEnsembleClassifier" # add nodes if objective == "binary:logistic": ncl = 2 container.add_node(op_name, operator.input_full_names, operator.output_full_names, name=scope.get_unique_operator_name( op_name), op_domain='ai.onnx.ml', **attr_pairs) elif objective == "multi:softprob": ncl = len(js_trees) // params['n_estimators'] container.add_node(op_name, operator.input_full_names, operator.output_full_names, name=scope.get_unique_operator_name( op_name), op_domain='ai.onnx.ml', **attr_pairs) elif objective == "reg:logistic": ncl = len(js_trees) // params['n_estimators'] if ncl == 1: ncl = 2 container.add_node(op_name, operator.input_full_names, operator.output_full_names, name=scope.get_unique_operator_name( op_name), op_domain='ai.onnx.ml', **attr_pairs) else: raise RuntimeError("Unexpected objective: {0}".format(objective))
def _to_onnx(self, op_version=None, signature=None, version=None): """ Returns the onnx graph produced by function `fct_`. """ if self.onnx_ is None and self.fct_ is not None: from skl2onnx.common.data_types import guess_numpy_type from .onnx_variable import OnnxVar inputs, outputs, kwargs, n_optional, n_variables = ( # pylint: disable=W0612 self._parse_annotation( signature=signature, version=version)) if ((signature is None or not signature.n_variables) and isinstance(version, tuple) and len(inputs) > len(version)): raise NotImplementedError( # pragma: no cover "Mismatch between additional parameters %r " "(n_optional=%r) and version %r for function %r from %r." "" % (kwargs, n_optional, version, self.fct_, getattr(self.fct_, '__module__', None))) names_in = [oi[0] for oi in inputs] names_out = [oi[0] for oi in outputs] names_var = [OnnxVar(n, dtype=guess_numpy_type(dt[1])) for n, dt in zip(names_in, inputs)] if 'op_version' in self.fct_.__code__.co_varnames: onx_var = None onx_algebra = self.fct_( *names_in, op_version=op_version, **kwargs) else: onx_var = self.fct_(*names_var, **kwargs) if not hasattr(onx_var, 'to_algebra'): raise TypeError( # pragma: no cover "The function %r to convert must return an instance of " "OnnxVar but returns type %r." % (self.fct_, type(onx_var))) onx_algebra = onx_var.to_algebra(op_version=op_version) hidden_algebras, var_graphs = self._find_hidden_algebras( onx_var, onx_algebra) if len(hidden_algebras) > 0: # for gr in var_graphs: # print(type(gr), dir(gr)) # for k, v in hidden_algebras.items(): # print("*", type(v.alg_), dir(v.alg_)) # import pprint # #pprint.pprint(dir(v.alg_)) raise NotImplementedError( "Subgraph only supports constants (operator If, Loop, " "Scan). hidden_algebras=%r var_graphs=%r" % ( hidden_algebras, var_graphs)) if isinstance(onx_algebra, str): raise RuntimeError( # pragma: no cover "Unexpected str type %r." % onx_algebra) if isinstance(onx_algebra, tuple): raise NotImplementedError( # pragma: no cover "Not implemented when the function returns multiple results.") if hasattr(onx_algebra, 'to_onnx'): # skl2onnx algebra onx_algebra.output_names = names_out onx = onx_algebra.to_onnx(inputs=inputs, target_opset=op_version, outputs=outputs) # optimisation onx_optimized = onnx_optimisations(onx) self.onnx_ = onx_optimized if self.onnx_ is None: raise RuntimeError( # pragma: no cover "Unable to get the ONNX graph (class %r, fct_=%r)" % ( type(self), self.fct_)) return self.onnx_
def live_decorrelate_transformer_converter(scope, operator, container): op = operator.raw_operator opv = container.target_opset out = operator.outputs # We retrieve the unique input. X = operator.inputs[0] proto_dtype = guess_proto_type(X.type) dtype = guess_numpy_type(X.type) # new part # mean_ = numpy.mean(X, axis=0, keepdims=True) mean = OnnxReduceMean(X, axes=[0], keepdims=1, op_version=opv) mean.set_onnx_name_prefix('mean') # X2 = X - mean_ X2 = OnnxSub(X, mean, op_version=opv) # V = X2.T @ X2 / X2.shape[0] N = OnnxGatherElements(OnnxShape(X, op_version=opv), numpy.array([0], dtype=numpy.int64), op_version=opv) Nf = OnnxCast(N, to=proto_dtype, op_version=opv) Nf.set_onnx_name_prefix('N') V = OnnxDiv(OnnxMatMul(OnnxTranspose(X2, op_version=opv), X2, op_version=opv), Nf, op_version=opv) V.set_onnx_name_prefix('V1') # V += numpy.identity(V.shape[0]) * self.alpha V = OnnxAdd(V, op.alpha * numpy.identity(op.nf_, dtype=dtype), op_version=opv) V.set_onnx_name_prefix('V2') # L, P = numpy.linalg.eig(V) LP = OnnxEig(V, eigv=True, op_version=opv) LP.set_onnx_name_prefix('LP') # Linv = L ** (-0.5) Linv = OnnxPow(LP[0], numpy.array([-0.5], dtype=dtype), op_version=opv) Linv.set_onnx_name_prefix('Linv') # diag = numpy.diag(Linv) diag = OnnxMul(OnnxEyeLike(numpy.array([op.nf_, op.nf_], dtype=numpy.int64), k=0, op_version=opv), Linv, op_version=opv) diag.set_onnx_name_prefix('diag') # root = P @ diag @ P.transpose() trv = OnnxTranspose(LP[1], op_version=opv) coef_left = OnnxMatMul(LP[1], diag, op_version=opv) coef_left.set_onnx_name_prefix('coef_left') coef = OnnxMatMul(coef_left, trv, op_version=opv) coef.set_onnx_name_prefix('coef') # Same part as before. Y = OnnxMatMul(X2, coef, op_version=opv, output_names=out[:1]) Y.set_onnx_name_prefix('Y') Y.add_to(scope, container)
def new_convert_sklearn_function_transformer(scope, operator, container): """ Rewrites the converters implemented in :epkg:`sklearn-onnx` to support custom functions implemented with :ref:`l-numpy-onnxpy`. """ op = operator.raw_operator fct = op.func if hasattr(fct, 'signed_compiled'): dtype = guess_numpy_type(operator.inputs[0].type) fct = fct[FctVersion((dtype, ), None)] if hasattr(fct, 'compiled'): compiled = fct.compiled if not hasattr(compiled, 'onnx_'): raise RuntimeError( # pragma: no cover "Attribute 'onnx_' is missing, function was not " "converted to onnx.") onx = compiled.onnx_ graph = onx.graph nodes = graph.node # renaming all intermediate variables names = [] for node in nodes: for name in node.input: names.append(name) for name in node.output: names.append(name) names = set(names) names_mapping = {} for name in names: names_mapping[name] = scope.get_unique_variable_name( 'ft_%s' % name) # adding identities apply_identity(scope, operator.inputs[0].full_name, names_mapping[graph.input[0].name], container) apply_identity(scope, names_mapping[graph.output[0].name], operator.outputs[0].full_name, container) # adding initializers for init in graph.initializer: init = copy.deepcopy(init) name = names_mapping[init.name] init.name = name content = init.SerializeToString() container.initializers_strings[content] = name container.initializers.append(init) # adding nodes for node in nodes: atts = {} for att in node.attribute: atts[att.name] = _copy_attributes(att) container.add_node( node.op_type, [names_mapping[n] for n in node.input], [names_mapping[n] for n in node.output], name=scope.get_unique_operator_name('ft_%s' % node.op_type), **atts) return if op.func is not None: raise TypeError( # pragma: no cover "FunctionTransformer is not supported unless the " "transform function is of type %r or " "wrapped with onnxnumpy." % type(op.func)) if len(operator.inputs) == 1: apply_identity(scope, operator.inputs[0].full_name, operator.outputs[0].full_name, container) else: apply_concat(scope, [i.full_name for i in operator.inputs], operator.outputs[0].full_name, container)
def to_onnx(model, X=None, name=None, initial_types=None, target_opset=None, options=None, rewrite_ops=False, white_op=None, black_op=None, final_types=None): """ Converts a model using on :epkg:`sklearn-onnx`. @param model model to convert or a function wrapped into :epkg:`_PredictScorer` with function :epkg:`make_scorer` @param X training set (at least one row), can be None, it is used to infered the input types (*initial_types*) @param initial_types if *X* is None, then *initial_types* must be defined @param name name of the produced model @param target_opset to do it with a different target opset @param options additional parameters for the conversion @param rewrite_ops rewrites some existing converters, the changes are permanent @param white_op white list of ONNX nodes allowed while converting a pipeline, if empty, all are allowed @param black_op black list of ONNX nodes allowed while converting a pipeline, if empty, none are blacklisted @param final_types a python list. Works the same way as initial_types but not mandatory, it is used to overwrites the type (if type is not None) and the name of every output. @return converted model The function rewrites function *to_onnx* from :epkg:`sklearn-onnx` but may changes a few converters if *rewrite_ops* is True. For example, :epkg:`ONNX` only supports *TreeEnsembleRegressor* for float but not for double. It becomes available if ``rewrite_ops=True``. .. faqref:: :title: How to deal with a dataframe as input? Each column of the dataframe is considered as an named input. The first step is to make sure that every column type is correct. :epkg:`pandas` tends to select the least generic type to hold the content of one column. :epkg:`ONNX` does not automatically cast the data it receives. The data must have the same type with the model is converted and when the converted model receives the data to predict. .. runpython:: :showcode: from io import StringIO from textwrap import dedent import numpy import pandas from pyquickhelper.pycode import ExtTestCase from sklearn.preprocessing import OneHotEncoder from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from mlprodict.onnx_conv import to_onnx from mlprodict.onnxrt import OnnxInference text = dedent(''' __SCHEMA__ 7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red 7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red 7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red 11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red ''') text = text.replace( "__SCHEMA__", "fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides," "free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates," "alcohol,quality,color") X_train = pandas.read_csv(StringIO(text)) for c in X_train.columns: if c != 'color': X_train[c] = X_train[c].astype(numpy.float32) numeric_features = [c for c in X_train if c != 'color'] pipe = Pipeline([ ("prep", ColumnTransformer([ ("color", Pipeline([ ('one', OneHotEncoder()), ('select', ColumnTransformer( [('sel1', 'passthrough', [0])])) ]), ['color']), ("others", "passthrough", numeric_features) ])), ]) pipe.fit(X_train) pred = pipe.transform(X_train) print(pred) model_onnx = to_onnx(pipe, X_train, target_opset=12) oinf = OnnxInference(model_onnx) # The dataframe is converted into a dictionary, # each key is a column name, each value is a numpy array. inputs = {c: X_train[c].values for c in X_train.columns} inputs = {c: v.reshape((v.shape[0], 1)) for c, v in inputs.items()} onxp = oinf.run(inputs) print(onxp) """ if isinstance(model, OnnxOperatorMixin): if not hasattr(model, 'op_version'): raise RuntimeError( # pragma: no cover "Missing attribute 'op_version' for type '{}'.".format( type(model))) return model.to_onnx(X=X, name=name, options=options, black_op=black_op, white_op=white_op, final_types=final_types) if rewrite_ops: old_values = register_rewritten_operators() register_converters() else: old_values = None def _guess_type_(X, itype, dtype): initial_types = guess_initial_types(X, itype) if dtype is None: if hasattr(X, 'dtypes'): # DataFrame dtype = numpy.float32 elif hasattr(X, 'dtype'): dtype = X.dtype elif hasattr(X, 'type'): dtype = guess_numpy_type(X.type) elif initial_types is not None: dtype = guess_numpy_type(initial_types[0][1]) else: raise RuntimeError( # pragma: no cover "dtype cannot be guessed: {}".format(type(X))) if dtype != numpy.float64: dtype = numpy.float32 if dtype is None: raise RuntimeError("dtype cannot be None") # pragma: no cover if isinstance(dtype, FloatTensorType): dtype = numpy.float32 elif isinstance(dtype, DoubleTensorType): dtype = numpy.float64 new_dtype = dtype if isinstance(dtype, numpy.ndarray): new_dtype = dtype.dtype elif isinstance(dtype, DataType): new_dtype = numpy.float32 if new_dtype not in (numpy.float32, numpy.float64, numpy.int64, numpy.int32): raise NotImplementedError( # pragma: no cover "dtype should be real not {} ({})".format(new_dtype, dtype)) return initial_types, dtype, new_dtype if isinstance(model, _PredictScorer): if X is not None and not isinstance(X, OrderedDict): raise ValueError( "For a scorer, parameter X should be a OrderedDict not {}." "".format(type(X))) if initial_types is None: dts = [] initial_types = [] for k, v in X.items(): if hasattr(v, 'dtype'): dtype = guess_numpy_type(v.dtype) else: dtype = v if dtype != numpy.float64: dtype = numpy.float32 it, _, ndt = _guess_type_(v, None, dtype) for i in range(len(it)): # pylint: disable=C0200 it[i] = (k, it[i][1]) # pylint: disable=C0200 initial_types.extend(it) dts.append(ndt) ndt = set(dts) if len(ndt) != 1: raise RuntimeError( # pragma: no cover "Multiple dtype is not efficient {}.".format(ndt)) res = convert_scorer(model, initial_types, name=name, target_opset=target_opset, options=options, black_op=black_op, white_op=white_op, final_types=final_types) else: if name is None: name = "mlprodict_ONNX(%s)" % model.__class__.__name__ initial_types, dtype, _ = _guess_type_(X, initial_types, None) res = convert_sklearn(model, initial_types=initial_types, name=name, target_opset=target_opset, options=options, black_op=black_op, white_op=white_op, final_types=final_types) if old_values is not None: register_rewritten_operators(old_values) return res
def convert_score_cdist_sum(scope, operator, container): """ Converts function @see fn score_cdist_sum into :epkg:`ONNX`. """ op = operator.raw_operator if op._fct != score_cdist_sum: # pylint: disable=W0143 raise RuntimeError( # pragma: no cover "The wrong converter was called {} != {}.".format( op._fct, score_cdist_sum)) from skl2onnx.algebra.complex_functions import onnx_cdist from skl2onnx.algebra.onnx_ops import OnnxReduceSum # pylint: disable=E0611 from skl2onnx.common.data_types import guess_numpy_type X = operator.inputs[0] Y = operator.inputs[1] out = operator.outputs opv = container.target_opset dtype = guess_numpy_type(operator.inputs[0].type) if dtype != numpy.float64: dtype = numpy.float32 out = operator.outputs options = container.get_options(score_cdist_sum, dict(cdist=None)) kwargs = op.kwargs if options.get('cdist', None) == 'single-node': attrs = kwargs cdist_name = scope.get_unique_variable_name('cdist') container.add_node('CDist', [X.full_name, Y.full_name], cdist_name, op_domain='mlprodict', name=scope.get_unique_operator_name('CDist'), **attrs) container.add_node('ReduceSum', [cdist_name], out[0].full_name, axes=[1], keepdims=0, name=scope.get_unique_operator_name('ReduceSum')) else: metric = kwargs['metric'] if metric == 'minkowski': dists = onnx_cdist(X, Y, dtype=dtype, op_version=opv, metric=metric, p=kwargs.get('p', 2)) else: dists = onnx_cdist(X, Y, dtype=dtype, op_version=opv, metric=kwargs['metric']) res = OnnxReduceSum(dists, axes=[1], keepdims=0, output_names=[out[0].full_name], op_version=opv) res.add_to(scope, container)