예제 #1
0
    def test_onnx_example_cdist_in_custom_ops(self):
        x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
        x2 = np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0,
                       0]).astype(np.float32).reshape((4, 2))
        opv = _TARGET_OPSET_
        cop = OnnxAdd('input', 'input', op_version=opv)
        cop2 = OnnxIdentity(OnnxCDist(cop, x2, op_version=opv),
                            output_names=['cdist'],
                            op_version=opv)

        model_def = cop2.to_onnx(inputs=[('input',
                                          FloatTensorType([None, None]))],
                                 outputs=[('cdist', FloatTensorType())])

        try:
            sess = InferenceSession(model_def.SerializeToString())
        except RuntimeError as e:
            if "CDist is not a registered" in str(e):
                return
        res = sess.run(None, {'input': x})
        exp = scipy_cdist(x * 2, x2, metric="sqeuclidean")
        assert_almost_equal(exp, res[0], decimal=5)

        x = np.array(
            [[6.1, 2.8, 4.7, 1.2], [5.7, 3.8, 1.7, 0.3], [7.7, 2.6, 6.9, 2.3],
             [6.0, 2.9, 4.5, 1.5], [6.8, 2.8, 4.8, 1.4], [5.4, 3.4, 1.5, 0.4],
             [5.6, 2.9, 3.6, 1.3], [6.9, 3.1, 5.1, 2.3]],
            dtype=np.float32)
        cop = OnnxAdd('input', 'input', op_version=opv)
        cop2 = OnnxIdentity(OnnxCDist(cop, x, op_version=opv),
                            output_names=['cdist'],
                            op_version=opv)

        model_def = cop2.to_onnx(inputs=[('input',
                                          FloatTensorType([None, None]))],
                                 outputs=[('cdist', FloatTensorType())])

        sess = InferenceSession(model_def.SerializeToString())
        res = sess.run(None, {'input': x})
        exp = scipy_cdist(x * 2, x, metric="sqeuclidean")
        assert_almost_equal(exp, res[0], decimal=4)
예제 #2
0
def onnx_nearest_neighbors_indices(X, Y, k, metric='euclidean', dtype=None,
                                   op_version=None, keep_distances=False,
                                   optim=None, **kwargs):
    """
    Retrieves the nearest neigbours *ONNX*.
    :param X: features or *OnnxOperatorMixin*
    :param Y: neighbours or *OnnxOperatorMixin*
    :param k: number of neighbours to retrieve
    :param metric: requires metric
    :param dtype: numerical type
    :param op_version: opset version
    :param keep_distance: returns the distances as well (second position)
    :param optim: implements specific optimisations,
        ``'cdist'`` replaces *Scan* operator by operator *CDist*
    :param kwargs: additional parameters for function @see fn onnx_cdist
    :return: top indices
    """
    if optim == 'cdist':
        from skl2onnx.algebra.custom_ops import OnnxCDist
        dist = OnnxCDist(X, Y, metric=metric, op_version=op_version,
                         **kwargs)
    elif optim is None:
        dim_in = Y.shape[1] if hasattr(Y, 'shape') else None
        dim_out = Y.shape[0] if hasattr(Y, 'shape') else None
        dist = onnx_cdist(X, Y, metric=metric, dtype=dtype,
                          op_version=op_version,
                          dim_in=dim_in, dim_out=dim_out,
                          **kwargs)
    else:
        raise ValueError("Unknown optimisation '{}'.".format(optim))
    if op_version < 10:
        neg_dist = OnnxMul(dist, np.array(
            [-1], dtype=dtype), op_version=op_version)
        node = OnnxTopK_1(neg_dist, k=k, op_version=1, **kwargs)
    elif op_version < 11:
        neg_dist = OnnxMul(dist, np.array(
            [-1], dtype=dtype), op_version=op_version)
        node = OnnxTopK_10(neg_dist, np.array([k], dtype=np.int64),
                           op_version=10, **kwargs)
    else:
        node = OnnxTopK_11(dist, np.array([k], dtype=np.int64),
                           largest=0, sorted=1,
                           op_version=11, **kwargs)
        if keep_distances:
            return (node[1], OnnxMul(node[0], np.array(
                [-1], dtype=dtype), op_version=op_version))
    if keep_distances:
        return (node[1], node[0])
    return node[1]
예제 #3
0
import onnx
import onnxruntime as rt
from onnxruntime import InferenceSession
import skl2onnx
from skl2onnx.algebra.custom_ops import OnnxCDist
from skl2onnx.common.data_types import FloatTensorType

X = np.ones((2, 4), dtype=np.float32)
Y = np.ones((3, 4), dtype=np.float32)
Y *= 2
print(cdist(X, Y, metric='euclidean'))

####################################
# ONNX

op = OnnxCDist('X', 'Y', op_version=12, output_names=['Z'], metric='euclidean')
onx = op.to_onnx({'X': X, 'Y': Y}, outputs=[('Z', FloatTensorType())])
print(onx)

########################################
# CDist and onnxruntime
# +++++++++++++++++++++
#
# We compute the output of CDist operator
# with onnxruntime.

sess = InferenceSession(onx.SerializeToString())
res = sess.run(None, {'X': X, 'Y': Y})
print(res)

#####################################
예제 #4
0
def _nan_euclidean_distance(container, model, input_name, op_version, optim):
    training_data = model._fit_X.astype(container.dtype)
    shape = OnnxShape(input_name, op_version=op_version)
    zero = OnnxConstantOfShape(shape,
                               value=make_tensor("value",
                                                 container.proto_dtype, (1, ),
                                                 [0]),
                               op_version=op_version)
    missing_input_name = OnnxIsNaN(input_name, op_version=op_version)
    masked_input_name = OnnxWhere(missing_input_name,
                                  zero,
                                  input_name,
                                  op_version=op_version)
    missing_y = np.isnan(training_data)
    training_data[missing_y] = 0
    d_in = training_data.shape[1] if hasattr(training_data, 'shape') else None
    d_out = training_data.shape[0] if hasattr(training_data, 'shape') else None

    if optim is None:
        dist = _onnx_cdist_sqeuclidean(masked_input_name,
                                       training_data,
                                       dtype=container.dtype,
                                       op_version=container.target_opset,
                                       dim_in=d_in,
                                       dim_out=d_out)
    elif optim == 'cdist':
        from skl2onnx.algebra.custom_ops import OnnxCDist
        dist = OnnxCDist(masked_input_name,
                         training_data,
                         metric='sqeuclidean',
                         op_version=container.target_opset)
    else:
        raise RuntimeError("Unexpected optimization '{}'.".format(optim))
    dist1 = OnnxMatMul(OnnxMul(masked_input_name,
                               masked_input_name,
                               op_version=op_version),
                       missing_y.T.astype(container.dtype),
                       op_version=op_version)
    dist2 = OnnxMatMul(OnnxCast(missing_input_name,
                                to=container.proto_dtype,
                                op_version=op_version),
                       (training_data * training_data).T.astype(
                           container.dtype),
                       op_version=op_version)
    distances = OnnxSub(dist,
                        OnnxAdd(dist1, dist2, op_version=op_version),
                        op_version=op_version)
    present_x = OnnxSub(np.array([1], dtype=container.dtype),
                        OnnxCast(missing_input_name,
                                 to=container.proto_dtype,
                                 op_version=op_version),
                        op_version=op_version)
    present_y = (1. - missing_y).astype(container.dtype)
    present_count = OnnxMatMul(present_x,
                               present_y.T.astype(container.dtype),
                               op_version=op_version)
    present_count = OnnxMax(np.array([1], dtype=container.dtype),
                            present_count,
                            op_version=op_version)
    dist = OnnxDiv(distances, present_count, op_version=op_version)
    return OnnxMul(dist,
                   np.array([d_in], dtype=container.dtype),
                   op_version=op_version), missing_input_name
예제 #5
0
def onnx_nearest_neighbors_indices_radius(X,
                                          Y,
                                          radius,
                                          metric='euclidean',
                                          dtype=None,
                                          op_version=None,
                                          keep_distances=False,
                                          optim=None,
                                          proto_dtype=None,
                                          **kwargs):
    """
    Retrieves the nearest neigbours *ONNX*.
    :param X: features or *OnnxOperatorMixin*
    :param Y: neighbours or *OnnxOperatorMixin*
    :param radius: radius
    :param metric: requires metric
    :param dtype: numerical type
    :param op_version: opset version
    :param keep_distance: returns the distances as well (second position)
    :param optim: implements specific optimisations,
        ``'cdist'`` replaces *Scan* operator by operator *CDist*
    :param kwargs: additional parameters for function @see fn onnx_cdist
    :return: 3 squares matrices, indices or -1, distance or 0,
        based on the fact that the distance is below the radius,
        binary weights
    """
    opv = op_version
    if optim == 'cdist':
        from skl2onnx.algebra.custom_ops import OnnxCDist
        dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, **kwargs)
    elif optim is None:
        dim_in = Y.shape[1] if hasattr(Y, 'shape') else None
        dim_out = Y.shape[0] if hasattr(Y, 'shape') else None
        dist = onnx_cdist(X,
                          Y,
                          metric=metric,
                          dtype=dtype,
                          op_version=op_version,
                          dim_in=dim_in,
                          dim_out=dim_out,
                          **kwargs)
    else:
        raise ValueError("Unknown optimisation '{}'.".format(optim))

    less = OnnxLess(dist, np.array([radius], dtype=dtype), op_version=opv)
    less.set_onnx_name_prefix('cond')
    shape = OnnxShape(dist, op_version=opv)
    zero = OnnxCast(OnnxConstantOfShape(shape, op_version=opv),
                    op_version=opv,
                    to=proto_dtype)
    tensor_value = py_make_float_array(-1, dtype=np.float32, as_tensor=True)
    minus = OnnxCast(OnnxConstantOfShape(shape,
                                         op_version=opv,
                                         value=tensor_value),
                     op_version=opv,
                     to=onnx_proto.TensorProto.INT64)
    minus_range = OnnxAdd(OnnxNeg(OnnxCumSum(minus,
                                             np.array([1], dtype=np.int64),
                                             op_version=opv),
                                  op_version=opv),
                          minus,
                          op_version=opv)
    minus_range.set_onnx_name_prefix('arange')

    dist_only = OnnxWhere(less, dist, zero, op_version=opv)
    dist_only.set_onnx_name_prefix('nndist')
    indices = OnnxWhere(less, minus_range, minus, op_version=opv)
    indices.set_onnx_name_prefix('nnind')
    binary = OnnxCast(less, to=proto_dtype, op_version=opv)
    binary.set_onnx_name_prefix('nnbin')
    return indices, dist_only, binary
예제 #6
0
def kernel_pca_converter(scope: Scope, operator: Operator,
                         container: ModelComponentContainer):
    op = operator.raw_operator
    op_version = container.target_opset
    X = operator.inputs[0]
    dtype = guess_numpy_type(X.type)
    options = container.get_options(op, dict(optim=None))
    optim = options['optim']

    # def _get_kernel(self, X, Y=None):
    # return pairwise_kernels(
    #         X, Y, metric=self.kernel, filter_params=True, **params)
    if callable(op.kernel):
        raise RuntimeError(
            "Unable to converter KernelPCA with a custom kernel %r."
            "" % op.kernel)
    if op.kernel == 'precomputed':
        raise RuntimeError(
            "The converter is not implemented when kernel=%r for "
            "type=%r." % (op.kernel, type(op)))

    kernel = op.kernel
    params = {"gamma": op.gamma, "degree": op.degree, "coef0": op.coef0}

    if kernel == 'linear':
        Y = op.X_fit_.astype(dtype)
        dist = OnnxMatMul(X,
                          OnnxTranspose(Y, perm=[1, 0], op_version=op_version),
                          op_version=op_version)
    elif kernel == 'cosine':
        yn = normalize(op.X_fit_, copy=True)
        ynt = yn.astype(dtype)
        norm = OnnxSqrt(OnnxReduceSumApi11(OnnxPow(X,
                                                   np.array([2],
                                                            dtype=np.int64),
                                                   op_version=op_version),
                                           axes=[1],
                                           op_version=op_version,
                                           keepdims=1),
                        op_version=op_version)
        dist = OnnxMatMul(OnnxDiv(X, norm, op_version=op_version),
                          OnnxTranspose(ynt,
                                        perm=[1, 0],
                                        op_version=op_version),
                          op_version=op_version)
    elif kernel in ('poly', 'sigmoid'):
        Y = op.X_fit_.astype(dtype)
        dot = OnnxMatMul(X,
                         OnnxTranspose(Y, perm=[1, 0], op_version=op_version),
                         op_version=op_version)
        if params['gamma'] is None:
            gamma = np.array([1. / Y.shape[1]], dtype=dtype)
        else:
            gamma = np.array([params['gamma']], dtype=dtype)
        dot_g = OnnxMul(dot, gamma, op_version=op_version)
        dot_c = OnnxAdd(dot_g,
                        np.array([params['coef0']], dtype=dtype),
                        op_version=op_version)
        if kernel == 'poly':
            dist = OnnxPow(dot_c,
                           np.array([params['degree']], dtype=np.int64),
                           op_version=op_version)
        else:
            dist = OnnxTanh(dot_c, op_version=op_version)
    elif kernel == 'rbf':
        if optim == 'cdist':
            from skl2onnx.algebra.custom_ops import OnnxCDist
            Y = op.X_fit_.astype(dtype)
            pair = OnnxCDist(X, Y, metric='sqeuclidean', op_version=op_version)
        elif optim is None:
            Y = op.X_fit_.astype(dtype)
            dim_in = Y.shape[1] if hasattr(Y, 'shape') else None
            dim_out = Y.shape[0] if hasattr(Y, 'shape') else None
            pair = onnx_cdist(X,
                              Y,
                              metric='sqeuclidean',
                              dtype=dtype,
                              op_version=op_version,
                              dim_in=dim_in,
                              dim_out=dim_out)
        else:
            raise ValueError("Unknown optimisation '{}'.".format(optim))
        if params['gamma'] is None:
            gamma = np.array([-1. / Y.shape[1]], dtype=dtype)
        else:
            gamma = np.array([-params['gamma']], dtype=dtype)
        pair_g = OnnxMul(pair, gamma, op_version=op_version)
        dist = OnnxExp(pair_g, op_version=op_version)
    else:
        raise ValueError("Unknown kernel '{}'.".format(kernel))

    #  K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
    K = OnnxSubEstimator(op._centerer, dist, op_version=op_version)

    if hasattr(op, 'eigenvalues_'):
        # scikit-learn>=1.0
        non_zeros = np.flatnonzero(op.eigenvalues_)
        scaled_alphas = np.zeros_like(op.eigenvectors_)
        scaled_alphas[:, non_zeros] = (op.eigenvectors_[:, non_zeros] /
                                       np.sqrt(op.eigenvalues_[non_zeros]))
    else:
        # scikit-learn<1.0
        non_zeros = np.flatnonzero(op.lambdas_)
        scaled_alphas = np.zeros_like(op.alphas_)
        scaled_alphas[:, non_zeros] = (op.alphas_[:, non_zeros] /
                                       np.sqrt(op.lambdas_[non_zeros]))

    # np.dot(K, scaled_alphas)
    output = OnnxMatMul(K,
                        scaled_alphas.astype(dtype),
                        op_version=op_version,
                        output_names=operator.outputs[:1])

    # register the output
    output.add_to(scope, container)