def test_onnx_example_cdist_in_custom_ops(self): x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2)) x2 = np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]).astype(np.float32).reshape((4, 2)) opv = _TARGET_OPSET_ cop = OnnxAdd('input', 'input', op_version=opv) cop2 = OnnxIdentity(OnnxCDist(cop, x2, op_version=opv), output_names=['cdist'], op_version=opv) model_def = cop2.to_onnx(inputs=[('input', FloatTensorType([None, None]))], outputs=[('cdist', FloatTensorType())]) try: sess = InferenceSession(model_def.SerializeToString()) except RuntimeError as e: if "CDist is not a registered" in str(e): return res = sess.run(None, {'input': x}) exp = scipy_cdist(x * 2, x2, metric="sqeuclidean") assert_almost_equal(exp, res[0], decimal=5) x = np.array( [[6.1, 2.8, 4.7, 1.2], [5.7, 3.8, 1.7, 0.3], [7.7, 2.6, 6.9, 2.3], [6.0, 2.9, 4.5, 1.5], [6.8, 2.8, 4.8, 1.4], [5.4, 3.4, 1.5, 0.4], [5.6, 2.9, 3.6, 1.3], [6.9, 3.1, 5.1, 2.3]], dtype=np.float32) cop = OnnxAdd('input', 'input', op_version=opv) cop2 = OnnxIdentity(OnnxCDist(cop, x, op_version=opv), output_names=['cdist'], op_version=opv) model_def = cop2.to_onnx(inputs=[('input', FloatTensorType([None, None]))], outputs=[('cdist', FloatTensorType())]) sess = InferenceSession(model_def.SerializeToString()) res = sess.run(None, {'input': x}) exp = scipy_cdist(x * 2, x, metric="sqeuclidean") assert_almost_equal(exp, res[0], decimal=4)
def onnx_nearest_neighbors_indices(X, Y, k, metric='euclidean', dtype=None, op_version=None, keep_distances=False, optim=None, **kwargs): """ Retrieves the nearest neigbours *ONNX*. :param X: features or *OnnxOperatorMixin* :param Y: neighbours or *OnnxOperatorMixin* :param k: number of neighbours to retrieve :param metric: requires metric :param dtype: numerical type :param op_version: opset version :param keep_distance: returns the distances as well (second position) :param optim: implements specific optimisations, ``'cdist'`` replaces *Scan* operator by operator *CDist* :param kwargs: additional parameters for function @see fn onnx_cdist :return: top indices """ if optim == 'cdist': from skl2onnx.algebra.custom_ops import OnnxCDist dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, **kwargs) elif optim is None: dim_in = Y.shape[1] if hasattr(Y, 'shape') else None dim_out = Y.shape[0] if hasattr(Y, 'shape') else None dist = onnx_cdist(X, Y, metric=metric, dtype=dtype, op_version=op_version, dim_in=dim_in, dim_out=dim_out, **kwargs) else: raise ValueError("Unknown optimisation '{}'.".format(optim)) if op_version < 10: neg_dist = OnnxMul(dist, np.array( [-1], dtype=dtype), op_version=op_version) node = OnnxTopK_1(neg_dist, k=k, op_version=1, **kwargs) elif op_version < 11: neg_dist = OnnxMul(dist, np.array( [-1], dtype=dtype), op_version=op_version) node = OnnxTopK_10(neg_dist, np.array([k], dtype=np.int64), op_version=10, **kwargs) else: node = OnnxTopK_11(dist, np.array([k], dtype=np.int64), largest=0, sorted=1, op_version=11, **kwargs) if keep_distances: return (node[1], OnnxMul(node[0], np.array( [-1], dtype=dtype), op_version=op_version)) if keep_distances: return (node[1], node[0]) return node[1]
import onnx import onnxruntime as rt from onnxruntime import InferenceSession import skl2onnx from skl2onnx.algebra.custom_ops import OnnxCDist from skl2onnx.common.data_types import FloatTensorType X = np.ones((2, 4), dtype=np.float32) Y = np.ones((3, 4), dtype=np.float32) Y *= 2 print(cdist(X, Y, metric='euclidean')) #################################### # ONNX op = OnnxCDist('X', 'Y', op_version=12, output_names=['Z'], metric='euclidean') onx = op.to_onnx({'X': X, 'Y': Y}, outputs=[('Z', FloatTensorType())]) print(onx) ######################################## # CDist and onnxruntime # +++++++++++++++++++++ # # We compute the output of CDist operator # with onnxruntime. sess = InferenceSession(onx.SerializeToString()) res = sess.run(None, {'X': X, 'Y': Y}) print(res) #####################################
def _nan_euclidean_distance(container, model, input_name, op_version, optim): training_data = model._fit_X.astype(container.dtype) shape = OnnxShape(input_name, op_version=op_version) zero = OnnxConstantOfShape(shape, value=make_tensor("value", container.proto_dtype, (1, ), [0]), op_version=op_version) missing_input_name = OnnxIsNaN(input_name, op_version=op_version) masked_input_name = OnnxWhere(missing_input_name, zero, input_name, op_version=op_version) missing_y = np.isnan(training_data) training_data[missing_y] = 0 d_in = training_data.shape[1] if hasattr(training_data, 'shape') else None d_out = training_data.shape[0] if hasattr(training_data, 'shape') else None if optim is None: dist = _onnx_cdist_sqeuclidean(masked_input_name, training_data, dtype=container.dtype, op_version=container.target_opset, dim_in=d_in, dim_out=d_out) elif optim == 'cdist': from skl2onnx.algebra.custom_ops import OnnxCDist dist = OnnxCDist(masked_input_name, training_data, metric='sqeuclidean', op_version=container.target_opset) else: raise RuntimeError("Unexpected optimization '{}'.".format(optim)) dist1 = OnnxMatMul(OnnxMul(masked_input_name, masked_input_name, op_version=op_version), missing_y.T.astype(container.dtype), op_version=op_version) dist2 = OnnxMatMul(OnnxCast(missing_input_name, to=container.proto_dtype, op_version=op_version), (training_data * training_data).T.astype( container.dtype), op_version=op_version) distances = OnnxSub(dist, OnnxAdd(dist1, dist2, op_version=op_version), op_version=op_version) present_x = OnnxSub(np.array([1], dtype=container.dtype), OnnxCast(missing_input_name, to=container.proto_dtype, op_version=op_version), op_version=op_version) present_y = (1. - missing_y).astype(container.dtype) present_count = OnnxMatMul(present_x, present_y.T.astype(container.dtype), op_version=op_version) present_count = OnnxMax(np.array([1], dtype=container.dtype), present_count, op_version=op_version) dist = OnnxDiv(distances, present_count, op_version=op_version) return OnnxMul(dist, np.array([d_in], dtype=container.dtype), op_version=op_version), missing_input_name
def onnx_nearest_neighbors_indices_radius(X, Y, radius, metric='euclidean', dtype=None, op_version=None, keep_distances=False, optim=None, proto_dtype=None, **kwargs): """ Retrieves the nearest neigbours *ONNX*. :param X: features or *OnnxOperatorMixin* :param Y: neighbours or *OnnxOperatorMixin* :param radius: radius :param metric: requires metric :param dtype: numerical type :param op_version: opset version :param keep_distance: returns the distances as well (second position) :param optim: implements specific optimisations, ``'cdist'`` replaces *Scan* operator by operator *CDist* :param kwargs: additional parameters for function @see fn onnx_cdist :return: 3 squares matrices, indices or -1, distance or 0, based on the fact that the distance is below the radius, binary weights """ opv = op_version if optim == 'cdist': from skl2onnx.algebra.custom_ops import OnnxCDist dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, **kwargs) elif optim is None: dim_in = Y.shape[1] if hasattr(Y, 'shape') else None dim_out = Y.shape[0] if hasattr(Y, 'shape') else None dist = onnx_cdist(X, Y, metric=metric, dtype=dtype, op_version=op_version, dim_in=dim_in, dim_out=dim_out, **kwargs) else: raise ValueError("Unknown optimisation '{}'.".format(optim)) less = OnnxLess(dist, np.array([radius], dtype=dtype), op_version=opv) less.set_onnx_name_prefix('cond') shape = OnnxShape(dist, op_version=opv) zero = OnnxCast(OnnxConstantOfShape(shape, op_version=opv), op_version=opv, to=proto_dtype) tensor_value = py_make_float_array(-1, dtype=np.float32, as_tensor=True) minus = OnnxCast(OnnxConstantOfShape(shape, op_version=opv, value=tensor_value), op_version=opv, to=onnx_proto.TensorProto.INT64) minus_range = OnnxAdd(OnnxNeg(OnnxCumSum(minus, np.array([1], dtype=np.int64), op_version=opv), op_version=opv), minus, op_version=opv) minus_range.set_onnx_name_prefix('arange') dist_only = OnnxWhere(less, dist, zero, op_version=opv) dist_only.set_onnx_name_prefix('nndist') indices = OnnxWhere(less, minus_range, minus, op_version=opv) indices.set_onnx_name_prefix('nnind') binary = OnnxCast(less, to=proto_dtype, op_version=opv) binary.set_onnx_name_prefix('nnbin') return indices, dist_only, binary
def kernel_pca_converter(scope: Scope, operator: Operator, container: ModelComponentContainer): op = operator.raw_operator op_version = container.target_opset X = operator.inputs[0] dtype = guess_numpy_type(X.type) options = container.get_options(op, dict(optim=None)) optim = options['optim'] # def _get_kernel(self, X, Y=None): # return pairwise_kernels( # X, Y, metric=self.kernel, filter_params=True, **params) if callable(op.kernel): raise RuntimeError( "Unable to converter KernelPCA with a custom kernel %r." "" % op.kernel) if op.kernel == 'precomputed': raise RuntimeError( "The converter is not implemented when kernel=%r for " "type=%r." % (op.kernel, type(op))) kernel = op.kernel params = {"gamma": op.gamma, "degree": op.degree, "coef0": op.coef0} if kernel == 'linear': Y = op.X_fit_.astype(dtype) dist = OnnxMatMul(X, OnnxTranspose(Y, perm=[1, 0], op_version=op_version), op_version=op_version) elif kernel == 'cosine': yn = normalize(op.X_fit_, copy=True) ynt = yn.astype(dtype) norm = OnnxSqrt(OnnxReduceSumApi11(OnnxPow(X, np.array([2], dtype=np.int64), op_version=op_version), axes=[1], op_version=op_version, keepdims=1), op_version=op_version) dist = OnnxMatMul(OnnxDiv(X, norm, op_version=op_version), OnnxTranspose(ynt, perm=[1, 0], op_version=op_version), op_version=op_version) elif kernel in ('poly', 'sigmoid'): Y = op.X_fit_.astype(dtype) dot = OnnxMatMul(X, OnnxTranspose(Y, perm=[1, 0], op_version=op_version), op_version=op_version) if params['gamma'] is None: gamma = np.array([1. / Y.shape[1]], dtype=dtype) else: gamma = np.array([params['gamma']], dtype=dtype) dot_g = OnnxMul(dot, gamma, op_version=op_version) dot_c = OnnxAdd(dot_g, np.array([params['coef0']], dtype=dtype), op_version=op_version) if kernel == 'poly': dist = OnnxPow(dot_c, np.array([params['degree']], dtype=np.int64), op_version=op_version) else: dist = OnnxTanh(dot_c, op_version=op_version) elif kernel == 'rbf': if optim == 'cdist': from skl2onnx.algebra.custom_ops import OnnxCDist Y = op.X_fit_.astype(dtype) pair = OnnxCDist(X, Y, metric='sqeuclidean', op_version=op_version) elif optim is None: Y = op.X_fit_.astype(dtype) dim_in = Y.shape[1] if hasattr(Y, 'shape') else None dim_out = Y.shape[0] if hasattr(Y, 'shape') else None pair = onnx_cdist(X, Y, metric='sqeuclidean', dtype=dtype, op_version=op_version, dim_in=dim_in, dim_out=dim_out) else: raise ValueError("Unknown optimisation '{}'.".format(optim)) if params['gamma'] is None: gamma = np.array([-1. / Y.shape[1]], dtype=dtype) else: gamma = np.array([-params['gamma']], dtype=dtype) pair_g = OnnxMul(pair, gamma, op_version=op_version) dist = OnnxExp(pair_g, op_version=op_version) else: raise ValueError("Unknown kernel '{}'.".format(kernel)) # K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) K = OnnxSubEstimator(op._centerer, dist, op_version=op_version) if hasattr(op, 'eigenvalues_'): # scikit-learn>=1.0 non_zeros = np.flatnonzero(op.eigenvalues_) scaled_alphas = np.zeros_like(op.eigenvectors_) scaled_alphas[:, non_zeros] = (op.eigenvectors_[:, non_zeros] / np.sqrt(op.eigenvalues_[non_zeros])) else: # scikit-learn<1.0 non_zeros = np.flatnonzero(op.lambdas_) scaled_alphas = np.zeros_like(op.alphas_) scaled_alphas[:, non_zeros] = (op.alphas_[:, non_zeros] / np.sqrt(op.lambdas_[non_zeros])) # np.dot(K, scaled_alphas) output = OnnxMatMul(K, scaled_alphas.astype(dtype), op_version=op_version, output_names=operator.outputs[:1]) # register the output output.add_to(scope, container)