def lightgbm_classifier_shape_extractor(operator): N = operator.inputs[0].type.shape[0] class_labels = operator.raw_operator.classes_ if all(isinstance(i, numpy.ndarray) for i in class_labels): class_labels = numpy.concatenate(class_labels) if all(isinstance(i, str) for i in class_labels): operator.outputs[0].type = StringTensorType(shape=[N]) operator.outputs[1].type = SequenceType(DictionaryType(StringTensorType([]), FloatTensorType([])), N) elif all(isinstance(i, (numbers.Real, bool, numpy.bool_)) for i in class_labels): operator.outputs[0].type = Int64TensorType(shape=[N]) operator.outputs[1].type = SequenceType(DictionaryType(Int64TensorType([]), FloatTensorType([])), N) else: raise ValueError('Unsupported or mixed label types')
def custom_parser(scope, model, inputs, custom_parsers=None): if custom_parsers is not None and model in custom_parsers: return custom_parsers[model](scope, model, inputs, custom_parsers=custom_parsers) if all( isinstance(i, (numbers.Real, bool, np.bool_)) for i in model.classes_): label_type = Int64TensorType() else: label_type = StringTensorType() output_label = scope.declare_local_variable( 'output_label', label_type) this_operator = scope.declare_local_operator( 'LgbmClassifier', model) this_operator.inputs = inputs probability_map_variable = scope.declare_local_variable( 'output_probability', SequenceType(DictionaryType(label_type, scope.tensor_type()))) this_operator.outputs.append(output_label) this_operator.outputs.append(probability_map_variable) return this_operator.outputs
def get_defined_outputs(outputs, onnx_node, typed_inputs=None, variables=None, dtype=None, schema=None, schema_inputs=None): """ Gets types of predefined outputs when they cannot be inferred. Some part of it should be automated based on type constraints. :param outputs: requested outputs :param onnx_node: :epkg:`ONNX` node definition :param typed_inputs: known typed inputs of the node as `tuple(name, type)` :param variables: registered variables created by previous operators :param dtype: float computational type :param schema: defined outputs by schema (*expected_outputs*) :param schema_inputs: defined inputs by schema (*expected_inputs*) :return: typed outputs as ``tuple(name, type)`` """ if schema is None: ft = DoubleTensorType if dtype == numpy.float64 else FloatTensorType elif len(schema) != 1: raise ValueError( # pragma: no cover "schema should only contain one output not {}.".format(schema)) else: if isinstance(schema, DataType): ft = schema[0].__class__ else: ft = schema[0][1].__class__ if onnx_node.op_type in { 'ZipMap', 'ArgMin', 'ArgMax', 'Shape', 'Greater', 'Less', 'Equal', 'TopK', 'Cast', 'ArrayFeatureExtractor', 'Reshape', 'Transpose', 'Scan', 'ConstantOfShape' }: if onnx_node.op_type == "ZipMap": # ZipMap otype = SequenceType(DictionaryType(Int64Type(), ft())) outputs = [(name, otype) for name in outputs] elif (onnx_node.op_type in ("ArgMin", "ArgMax", 'Shape') and len(outputs) == 1): # ArgMin, ArgMax, Shape outputs = [(outputs[0], Int64TensorType())] elif (onnx_node.op_type in ("Greater", "Less", 'Equal') and len(outputs) == 1): # Greater, Less, Equal outputs = [(outputs[0], BooleanTensorType())] elif onnx_node.op_type == "TopK" and len(outputs) == 2: # TopK if len(typed_inputs) != 2: raise RuntimeError( # pragma: no cover "Wrong typed_inputs, got {}.".format(typed_inputs)) outputs = [(outputs[0], typed_inputs[0][1]), (outputs[1], Int64TensorType())] elif onnx_node.op_type == "Cast" and len(outputs) == 1: # Cast ttyp = _guess_type_proto(onnx_node.attribute[0].i, dims=None) outputs = [(outputs[0], ttyp)] elif onnx_node.op_type == "ArrayFeatureExtractor": # ArrayFeatureExtractor if len(typed_inputs) != 2: raise RuntimeError( # pragma: no cover "Wrong typed_inputs, got {}.".format(typed_inputs)) outputs = [(outputs[0], typed_inputs[0][1])] elif onnx_node.op_type in ('Reshape', 'Transpose'): # Reshape outputs = [(outputs[0], typed_inputs[0][1].__class__())] elif onnx_node.op_type == 'Scan': # Scan if len(outputs) != len(typed_inputs): raise RuntimeError( # pragma: no cover "Dimension mismatch, operator Scan should have " "the same number of inputs and outputs {} != {}" ".".format(len(outputs), len(typed_inputs))) outputs = [(o, t[1].__class__()) for o, t in zip(outputs, typed_inputs)] elif onnx_node.op_type == "ConstantOfShape": # ConstantOfShape outputs = [(outputs[0], ft())] elif 'Classifier' in onnx_node.op_type: # Good chance that's a classifier. outputs = [(outputs[0], Int64TensorType()), (outputs[1], ft())] else: if schema_inputs is not None and schema is not None: dt = {} for got, exp in zip(typed_inputs, schema_inputs): if isinstance(exp[1], str): dt[exp[1]] = got out = [] for i in range(len(outputs)): # pylint: disable=C0200 o = outputs[i] if isinstance(o, str): exp = schema[i] if exp[1] in dt: out.append((o, dt[exp[1]][1].__class__())) else: nt = _guess_type_proto_str(exp[1], None) out.append((o, nt)) elif (isinstance(o, tuple) and (isinstance(o[1], str) or o[1] is None)): exp = schema[i] if exp[1] in dt: out.append((o[0], dt[exp[1]][1].__class__())) else: nt = _guess_type_proto_str(exp[1], None) out.append((o[0], nt)) else: out.append(o) outputs = out elif len(typed_inputs) == 1 and len(outputs) == 1: # Default case # Assuming the only output is the same as the only input. outputs = [(outputs[0], typed_inputs[0][1])] else: # Default outputs = [(name, ft()) for name in outputs] for name, typ in outputs: if typ in ('T', None, '', 'I'): raise NotImplementedError( # pragma: no cover "Undefined output type: %r (outputs=%r, typed_inputs=%r, " "dtype=%r, schema=%r, schema_inputs=%r, onnx_node=%r, " "variables=%r)." % (typ, outputs, typed_inputs, dtype, schema, schema_inputs, onnx_node, variables)) if not isinstance(name, str): raise NotImplementedError( # pragma: no cover "Undefined output type: %r (outputs=%r, typed_inputs=%r, " "dtype=%r, schema=%r, schema_inputs=%r, onnx_node=%r, " "variables=%r)." % (typ, outputs, typed_inputs, dtype, schema, schema_inputs, onnx_node, variables)) return outputs
def proto2vars(values): """ Converts proto values to Variables. """ def ptype2vttype(it, shape): if it == TensorProto.FLOAT: # pylint: disable=E1101 return FloatTensorType(shape) if it == TensorProto.DOUBLE: # pylint: disable=E1101 return DoubleTensorType(shape) if it == TensorProto.INT64: # pylint: disable=E1101 return Int64TensorType(shape) if it == TensorProto.INT32: # pylint: disable=E1101 return Int32TensorType(shape) if it == TensorProto.BOOL: # pylint: disable=E1101 return BooleanTensorType(shape) if it == TensorProto.STRING: # pylint: disable=E1101 return StringTensorType(shape) if Float16TensorType is None: if it == TensorProto.FLOAT16: # pylint: disable=E1101 return Float16TensorType(shape) raise NotImplementedError( # pragma: no cover "Unrecognized proto type {} with shape {}".format(it, shape)) def ptype2vtype(it): if it == TensorProto.FLOAT: # pylint: disable=E1101 return FloatType() if it == TensorProto.INT64: # pylint: disable=E1101 return Int64Type() raise NotImplementedError( # pragma: no cover "Unrecognized proto type {}".format(it)) res = [] for v_ in values: v = v_ name = v.name if hasattr(v, 'name') else None if hasattr(v, 'type') and str(v.type) != '': t = v.type v = proto2vars([t])[0][1] elif hasattr(v, 'sequence_type') and str(v.sequence_type) != '': subtype = proto2vars([v.sequence_type.elem_type])[0][1] v = SequenceType(subtype) elif hasattr(v, 'tensor_type') and str(v.tensor_type) != '': tt = v.tensor_type el = tt.elem_type shape = tt.shape dim = shape.dim if len(dim) == 0: shape = [] else: shape = [dim[i].dim_value for i in range(len(dim))] v = ptype2vttype(el, shape) elif hasattr(v, 'map_type') and str(v.map_type) != '': mt = v.map_type keyt = ptype2vtype(mt.key_type) valt = proto2vars([mt.value_type])[0][1] v = DictionaryType(keyt, valt) else: raise RuntimeError( # pragma: no cover "Unable to build a variable from {}.".format(v)) if v.shape is not None and 0 in v.shape: # Replaces 0 by None new_shape = tuple(None if d == 0 else d for d in v.shape) if new_shape in ((None, ), None): v = v.__class__() else: v = v.__class__(new_shape) if v.shape is not None and 0 in v.shape: raise RuntimeError( # pragma: no cover "Shape cannot be empty: '{}': {}.".format(name, v_)) res.append((name, v)) return res
def get_defined_outputs(outputs, onnx_node, typed_inputs=None, variables=None, dtype=None): """ Gets types of predefined outputs when they cannot be inferred. Some part of it should be automated based on type constraints. @param outputs requested outputs @param onnx_node :epkg:`ONNX` node definition @param typed_inputs known typed inputs of the node as ``tuple(name, type)`` @param variables registered variables created by previous operators @param dtype float computational type @return typed outputs as ``tuple(name, type)`` """ ft = DoubleTensorType if dtype == numpy.float64 else FloatTensorType # ZipMap if onnx_node.op_type == "ZipMap": otype = SequenceType(DictionaryType( Int64Type(), ft())) outputs = [(name, otype) for name in outputs] # ArgMin, ArgMax, Shape elif onnx_node.op_type in ("ArgMin", "ArgMax", 'Shape') and len(outputs) == 1: outputs = [(outputs[0], Int64TensorType())] # Greater, Less, Equal elif onnx_node.op_type in ("Greater", "Less", 'Equal') and len(outputs) == 1: outputs = [(outputs[0], BooleanTensorType())] # TopK elif onnx_node.op_type == "TopK" and len(outputs) == 2: if len(typed_inputs) != 2: raise RuntimeError( "Wrong typed_inputs, got {}.".format(typed_inputs)) outputs = [(outputs[0], typed_inputs[0][1]), (outputs[1], Int64TensorType())] # Cast elif onnx_node.op_type == "Cast" and len(outputs) == 1: ttyp = _guess_type_proto(onnx_node.attribute[0].i, dims=None) outputs = [(outputs[0], ttyp)] # ArrayFeatureExtractor elif onnx_node.op_type == "ArrayFeatureExtractor": if len(typed_inputs) != 2: raise RuntimeError( "Wrong typed_inputs, got {}.".format(typed_inputs)) outputs = [(outputs[0], typed_inputs[0][1])] elif 'Classifier' in onnx_node.op_type: # Good chance that's a classifier. outputs = [(outputs[0], Int64TensorType()), (outputs[1], ft())] # Reshape elif onnx_node.op_type in ('Reshape', 'Transpose'): outputs = [(outputs[0], typed_inputs[0][1].__class__())] # Scan elif onnx_node.op_type == 'Scan': if len(outputs) != len(typed_inputs): raise RuntimeError("Dimension mismatch, operator Scan should have " "the same number of inputs and outputs {} != {}" ".".format(len(outputs), len(typed_inputs))) outputs = [(o, t[1].__class__()) for o, t in zip(outputs, typed_inputs)] # ConstantOfShape elif onnx_node.op_type == "ConstantOfShape": outputs = [(outputs[0], ft())] # Default case # Assuming the only output is the same as the only input. elif len(typed_inputs) == 1 and len(outputs) == 1: outputs = [(outputs[0], typed_inputs[0][1])] # Default else: outputs = [(name, ft()) for name in outputs] return outputs