Example #1
0
def test1(model, X_test, number, repeat, name, **data):
    res = measure_time('model.predict(X_test)',
                       div_by_number=True,
                       number=number,
                       repeat=repeat,
                       context={
                           'X_test': X_test,
                           'model': model
                       })
    res['name'] = name
    res['runtime'] = 'skl'
    res['version'] = skl_version
    res['batch'] = "y"
    res.update(data)
    return res
Example #2
0
def test5(oinf, X_test, number, repeat, name, **data):
    res = measure_time('loop_model2(oinf, X_test)',
                       div_by_number=True,
                       number=number,
                       repeat=repeat,
                       context={
                           'oinf': oinf,
                           'X_test': X_test,
                           'loop_model2': loop_model2
                       })
    res['name'] = name
    res['runtime'] = 'mlprodict'
    res['version'] = pyrt_version
    res['batch'] = "n"
    res.update(data)
    return res
Example #3
0
def test4(oinf, X_test, number, repeat, name, **data):
    input = {'input': X_test}
    res = measure_time('oinf.run({"X": X_test})',
                       div_by_number=True,
                       number=number,
                       repeat=repeat,
                       context={
                           'oinf': oinf,
                           'X_test': X_test
                       })
    res['name'] = name
    res['runtime'] = 'mlprodict'
    res['version'] = pyrt_version
    res['batch'] = "y"
    res.update(data)
    return res
Example #4
0
def test3(sess, X_test, number, repeat, name, **data):
    res = measure_time('loop_model(sess, X_test)',
                       div_by_number=True,
                       number=number,
                       repeat=repeat,
                       context={
                           'sess': sess,
                           'X_test': X_test,
                           'loop_model': loop_model
                       })
    res['name'] = name
    res['runtime'] = 'onnxruntime'
    res['version'] = ort_version
    res['batch'] = "n"
    res.update(data)
    return res
Example #5
0
def test2(sess, X_test, number, repeat, name, **data):
    input = {'input': X_test}
    res = measure_time('sess.run(None, {"X": X_test})',
                       div_by_number=True,
                       number=number,
                       repeat=repeat,
                       context={
                           'sess': sess,
                           'X_test': X_test
                       })
    res['name'] = name
    res['runtime'] = 'onnxruntime'
    res['version'] = ort_version
    res['batch'] = "y"
    res.update(data)
    return res
Example #6
0
def _measure_time(stmt,
                  *x,
                  repeat=5,
                  number=5,
                  div_by_number=True,
                  first_run=True,
                  max_time=None):
    """
    Measures a statement and returns the results as a dictionary.

    :param stmt: string
    :param *x: inputs
    :param repeat: average over *repeat* experiment
    :param number: number of executions in one row
    :param div_by_number: divide by the number of executions
    :param first_run: if True, runs the function once before measuring
    :param max_time: execute the statement until the total goes
        beyond this time (approximatively), *repeat* is ignored,
        *div_by_number* must be set to True
    :return: dictionary

    See `Timer.repeat
    <https://docs.python.org/3/library/timeit.html?timeit.Timer.repeat>`_
    for a better understanding of parameter *repeat* and *number*.
    The function returns a duration corresponding to
    *number* times the execution of the main statement.
    """
    if first_run:
        try:
            stmt(*x)
        except RuntimeError as e:  # pragma: no cover
            raise RuntimeError("{}-{}".format(type(x), x.dtype)) from e

    def fct():
        stmt(*x)

    if first_run:
        fct()

    return measure_time(fct,
                        context={},
                        repeat=repeat,
                        number=number,
                        div_by_number=div_by_number,
                        max_time=max_time)
Example #7
0
def benchmark_op(axes, repeat=5, number=5, name="ReduceSum", shape_fct=None,
                 custom_impl=False):
    if shape_fct is None:
        def shape_fct(dim):
            return (3, dim, 1, 128, 64)
    ort_fct = build_ort_reducesum(axes)
    res = []
    for dim in tqdm([8, 16, 32, 64, 100, 128, 200,
                     256, 400, 512, 1024]):
        shape = shape_fct(dim)
        n_arrays = 10 if dim < 512 else 4
        xs = [numpy.random.rand(*shape).astype(numpy.float32)
              for _ in range(n_arrays)]
        ys = [numpy.array(axes, dtype=numpy.int64)
              for _ in range(n_arrays)]
        info = dict(axes=axes, shape=shape)

        # numpy
        ctx = dict(
            xs=xs, ys=ys,
            fct=lambda x, y: numpy.sum(x, *y),
            loop_fct=loop_fct)
        obs = measure_time(
            "loop_fct(fct, xs, ys)",
            div_by_number=True, context=ctx, repeat=repeat, number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy'
        obs.update(info)
        res.append(obs)

        # onnxruntime
        ctx['fct'] = ort_fct
        obs = measure_time(
            "loop_fct(fct, xs, ys)",
            div_by_number=True, context=ctx, repeat=repeat, number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort'
        obs.update(info)
        res.append(obs)

        if custom_impl:
            if axes != (0, ):
                raise RuntimeError(
                    "Unexpected axes=%r." % axes)
            ctx['fct'] = lambda x, y: custom_reducesum_rk_float(x)
            ctx['xs'] = [x.reshape((x.shape[0], -1)).copy() for x in xs]
            obs = measure_time(
                "loop_fct(fct, xs, ys)",
                div_by_number=True, context=ctx, repeat=repeat, number=number)
            obs['dim'] = dim
            obs['fct'] = 'custom'
            obs.update(info)
            res.append(obs)

        if tf_reduce_sum is not None:
            # tensorflow
            ctx['fct'] = tf_reduce_sum
            ctx['xs'] = [convert_to_tensor(x) for x in xs]
            ctx['ys'] = ys
            obs = measure_time(
                "loop_fct(fct, xs, ys)",
                div_by_number=True, context=ctx, repeat=repeat, number=number)
            obs['dim'] = dim
            obs['fct'] = 'tf'
            obs.update(info)
            res.append(obs)

        if torch_sum is not None:
            def torch_sum1(x, y):
                return torch_sum(x, y[0])

            def torch_sum2(x, y):
                return torch_sum(torch_sum(x, y[1]), y[0])

            # torch
            ctx['fct'] = torch_sum1 if len(axes) == 1 else torch_sum2
            ctx['xs'] = [from_numpy(x) for x in xs]
            ctx['ys'] = ys  # [from_numpy(y) for y in ys]
            obs = measure_time(
                "loop_fct(fct, xs, ys)",
                div_by_number=True, context=ctx, repeat=repeat, number=number)
            obs['dim'] = dim
            obs['fct'] = 'torch'
            obs.update(info)
            res.append(obs)

    # Dataframes
    shape_name = str(shape).replace(str(dim), "N")
    df = pandas.DataFrame(res)
    df.columns = [_.replace('dim', 'N') for _ in df.columns]
    piv = df.pivot('N', 'fct', 'average')

    rs = piv.copy()
    for c in ['ort', 'torch', 'tf', 'tf_copy']:
        if c in rs.columns:
            rs[c] = rs['numpy'] / rs[c]
    rs['numpy'] = 1.

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    piv.plot(logx=True, logy=True, ax=ax[0],
             title="%s benchmark\n%r - %r"
                   " lower better" % (name, shape_name, axes))
    ax[0].legend(prop={"size": 9})
    rs.plot(logx=True, logy=True, ax=ax[1],
            title="%s Speedup, baseline=numpy\n%r - %r"
                  " higher better" % (name, shape_name, axes))
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})
    return df, rs, ax
Example #8
0
def benchmark_op(axes,
                 repeat=2,
                 number=5,
                 name="ReduceMean",
                 shape_fct=None,
                 max_dim=None):
    if shape_fct is None:

        def shape_fct(dim):
            return (3, dim, 1, 128, 64)

    ort_fct = build_ort_reducemean(axes)
    res = []
    for dim in tqdm([4, 8, 16, 32, 64, 100, 128, 200, 256, 400, 512, 1024]):
        if max_dim is not None and dim > max_dim:
            continue
        shape = shape_fct(dim)
        n_arrays = 10 if dim < 512 else 4
        xs = [
            numpy.random.rand(*shape).astype(numpy.float32)
            for _ in range(n_arrays)
        ]
        ys = [numpy.array(axes, dtype=numpy.int64) for _ in range(n_arrays)]
        info = dict(axes=axes, shape=shape)

        # numpy
        fct = lambda x, y: numpy.mean(x, axis=tuple(y))
        ctx = dict(xs=xs, ys=ys, loop_fct=loop_fct)
        obs = measure_time(lambda: loop_fct(fct, xs, ys),
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy'
        obs.update(info)
        res.append(obs)

        # onnxruntime
        fct = ort_fct
        obs = measure_time(lambda: loop_fct(fct, xs, ys),
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort'
        obs.update(info)
        res.append(obs)

        if tf_reduce_mean is not None:
            # tensorflow
            fct = tf_reduce_mean
            ctx['xs'] = [convert_to_tensor(x) for x in xs]
            ctx['ys'] = ys
            obs = measure_time(lambda: loop_fct(fct, ctx['xs'], ctx['ys']),
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'tf'
            obs.update(info)
            res.append(obs)

        if torch_mean is not None:

            def torch_mean1(x, y):
                return torch_mean(x, y[0])

            def torch_mean2(x, y):
                return torch_mean(torch_mean(x, y[1]), y[0])

            # torch
            fct = torch_mean1 if len(axes) == 1 else torch_mean2
            ctx['xs'] = [from_numpy(x) for x in xs]
            ctx['ys'] = ys  # [from_numpy(y) for y in ys]
            obs = measure_time(lambda: loop_fct(fct, ctx['xs'], ctx['ys']),
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'torch'
            obs.update(info)
            res.append(obs)

    # Dataframes
    shape_name = str(shape).replace(str(dim), "N")
    df = pandas.DataFrame(res)
    df.columns = [_.replace('dim', 'N') for _ in df.columns]
    piv = df.pivot('N', 'fct', 'average')

    rs = piv.copy()
    for c in ['ort', 'torch', 'tf', 'tf_copy']:
        if c in rs.columns:
            rs[c] = rs['numpy'] / rs[c]
    rs['numpy'] = 1.

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    piv.plot(logx=True,
             logy=True,
             ax=ax[0],
             title="%s benchmark\n%r - %r"
             " lower better" % (name, shape_name, axes))
    ax[0].legend(prop={"size": 9})
    rs.plot(logx=True,
            logy=True,
            ax=ax[1],
            title="%s Speedup, baseline=numpy\n%r - %r"
            " higher better" % (name, shape_name, axes))
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})
    return df, rs, ax
Example #9
0
def benchmark_op(repeat=10,
                 number=10,
                 name="Slice",
                 shape_slice_fct=None,
                 save=None,
                 opset=14,
                 repeat_profile=1500,
                 verbose=1):
    if verbose:
        print("[benchmark_op] start repeat=%d number=%d repeat_profile=%d"
              " opset=%d." % (repeat, number, repeat_profile, opset))
    res = []
    for dim in tqdm([
            8, 16, 32, 64, 100, 128, 200, 256, 400, 512, 600, 784, 800, 1000,
            1024, 1200
    ]):
        shape, slices = shape_slice_fct(dim)
        onx, ort_fct, npy_fct, ort_fct_gpu = build_ort_op(save=save,
                                                          op_version=opset,
                                                          slices=slices)

        n_arrays = 20
        if dim >= 512:
            n_arrays = 10
        xs = [
            numpy.random.rand(*shape).astype(numpy.float32)
            for _ in range(n_arrays)
        ]
        info = dict(shape=shape)

        ctx = dict(xs=xs, loop_fct=loop_fct)

        # numpy
        ctx['fct'] = npy_fct
        obs = measure_time(lambda: loop_fct(npy_fct, xs),
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy'
        obs['shape'] = ",".join(map(str, shape))
        obs['slices'] = str(slices)
        obs.update(info)
        res.append(obs)

        # onnxruntime
        ctx['fct'] = ort_fct
        obs = measure_time(lambda: loop_fct(ort_fct, xs),
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort'
        obs['shape'] = ",".join(map(str, shape))
        obs['slices'] = str(slices)
        obs.update(info)
        res.append(obs)

        if ort_fct_gpu is not None:

            # onnxruntime
            dev = get_ort_device('cuda:0')
            ctx['xs'] = [C_OrtValue.ortvalue_from_numpy(x, dev) for x in xs]
            ctx['fct'] = ort_fct_gpu
            obs = measure_time(lambda: loop_fct(ort_fct_gpu, ctx['xs']),
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'ort_gpu'
            obs['shape'] = ",".join(map(str, shape))
            obs['slices'] = str(slices)
            obs.update(info)
            res.append(obs)

    # profiling CPU
    if verbose:
        print("[benchmark_op] done.")
        print("[benchmark_op] profile CPU.")
    so = SessionOptions()
    so.enable_profiling = True
    sess = InferenceSession(onx.SerializeToString(),
                            so,
                            providers=["CPUExecutionProvider"])
    for i in range(0, repeat_profile):
        sess.run(
            None,
            {'X': xs[-1]},
        )
    prof = sess.end_profiling()
    with open(prof, "r") as f:
        js = json.load(f)
    dfprof = DataFrame(OnnxWholeSession.process_profiling(js))
    dfprof['shape'] = ",".join(map(str, shape))
    dfprof['slices'] = str(slices)
    if verbose:
        print("[benchmark_op] done.")

    # profiling CPU
    if ort_fct_gpu is not None:
        if verbose:
            print("[benchmark_op] profile GPU.")
        so = SessionOptions()
        so.enable_profiling = True
        sess = InferenceSession(onx.SerializeToString(),
                                so,
                                providers=["CUDAExecutionProvider"])
        io_binding = sess.io_binding()._iobinding
        device = get_ort_device('cpu')

        for i in range(0, repeat_profile):
            x = ctx['xs'][-1]
            io_binding.bind_input('X', device, numpy.float32, x.shape(),
                                  x.data_ptr())
            io_binding.bind_output('Y', device)
            sess._sess.run_with_iobinding(io_binding, None)

        prof = sess.end_profiling()
        with open(prof, "r") as f:
            js = json.load(f)
        dfprofgpu = DataFrame(OnnxWholeSession.process_profiling(js))
        dfprofgpu['shape'] = ",".join(map(str, shape))
        dfprofgpu['slices'] = str(slices)
        if verbose:
            print("[benchmark_op] profile done.")
    else:
        dfprofgpu = None

    # Dataframes
    shape_name = str(shape).replace(str(dim), "N")
    df = pandas.DataFrame(res)
    piv = df.pivot('shape', 'fct', 'average')

    rs = piv.copy()
    for c in ['numpy', 'ort', 'ort_gpu']:
        if c in rs.columns:
            rs[f"numpy/{c}"] = rs['numpy'] / rs[c]
    rs = rs[[c for c in rs.columns if "/numpy" not in c]].copy()

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    piv.plot(logx=True,
             logy=True,
             ax=ax[0],
             title=f"{name} benchmark\n{shape_name!r} lower better")
    ax[0].legend(prop={"size": 9})
    rs.plot(
        logx=True,
        logy=True,
        ax=ax[1],
        title=f"{name} Speedup, baseline=numpy\n{shape_name!r} higher better")
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})
    return dfprof, dfprofgpu, df, rs, ax
Example #10
0
def benchmark_equation(equation):
    # equations
    ort_einsum = build_ort_einsum(equation)
    ort_einsum_decomposed = build_ort_decomposed(equation)
    res = []
    for dim in tqdm([8, 16, 32, 64, 100, 128, 200, 256, 500, 512]):
        xs = [
            numpy.random.rand(2, dim, 12, 64).astype(numpy.float32)
            for _ in range(5)
        ]
        ys = [
            numpy.random.rand(2, dim, 12, 64).astype(numpy.float32)
            for _ in range(5)
        ]

        # numpy
        ctx = dict(equation=equation,
                   xs=xs,
                   ys=ys,
                   einsum=numpy.einsum,
                   loop_einsum=loop_einsum,
                   loop_einsum_eq=loop_einsum_eq,
                   loop_einsum_eq_th=loop_einsum_eq_th)
        obs = measure_time("loop_einsum_eq(einsum, equation, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=5,
                           number=1)
        obs['dim'] = dim
        obs['fct'] = 'numpy.einsum'
        res.append(obs)

        # opt-einsum
        ctx['einsum'] = contract
        obs = measure_time("loop_einsum_eq(einsum, equation, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=5,
                           number=1)
        obs['dim'] = dim
        obs['fct'] = 'opt-einsum'
        res.append(obs)

        # onnxruntime
        ctx['einsum'] = ort_einsum
        obs = measure_time("loop_einsum(einsum, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=5,
                           number=1)
        obs['dim'] = dim
        obs['fct'] = 'ort_einsum'
        res.append(obs)

        # onnxruntime decomposed
        ctx['einsum'] = ort_einsum_decomposed
        obs = measure_time("loop_einsum(einsum, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=5,
                           number=1)
        obs['dim'] = dim
        obs['fct'] = 'ort_dec'
        res.append(obs)

        # custom implementation
        ctx['einsum'] = custom_einsum_float
        obs = measure_time("loop_einsum_eq_th(einsum, equation, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=5,
                           number=1)
        obs['dim'] = dim
        obs['fct'] = 'c_einsum'
        res.append(obs)

        # transpose + custom implementation
        ctx['einsum'] = custom_einsum_float_tr
        obs = measure_time("loop_einsum_eq(einsum, equation, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=5,
                           number=1)
        obs['dim'] = dim
        obs['fct'] = 'c_einsum_tr'
        res.append(obs)

        if tf_einsum is not None:
            # tensorflow
            ctx['einsum'] = tf_einsum
            ctx['xs'] = [convert_to_tensor(x) for x in xs]
            ctx['ys'] = [convert_to_tensor(y) for y in ys]
            obs = measure_time("loop_einsum_eq(einsum, equation, xs, ys)",
                               div_by_number=True,
                               context=ctx,
                               repeat=5,
                               number=1)
            obs['dim'] = dim
            obs['fct'] = 'tf_einsum'
            res.append(obs)

        if torch_einsum is not None:
            # torch
            ctx['einsum'] = torch_einsum
            ctx['xs'] = [from_numpy(x) for x in xs]
            ctx['ys'] = [from_numpy(y) for y in ys]
            obs = measure_time("loop_einsum_eq(einsum, equation, xs, ys)",
                               div_by_number=True,
                               context=ctx,
                               repeat=5,
                               number=1)
            obs['dim'] = dim
            obs['fct'] = 'torch_einsum'
            res.append(obs)

    # Dataframes
    df = pandas.DataFrame(res)
    piv = df.pivot('dim', 'fct', 'average')

    rs = piv.copy()
    rs['c_einsum'] = rs['numpy.einsum'] / rs['c_einsum']
    rs['ort_einsum'] = rs['numpy.einsum'] / rs['ort_einsum']
    rs['ort_dec'] = rs['numpy.einsum'] / rs['ort_dec']
    rs['opt-einsum'] = rs['numpy.einsum'] / rs['opt-einsum']
    if 'c_einsum_tr' in rs.columns:
        rs['c_einsum_tr'] = rs['numpy.einsum'] / rs['c_einsum_tr']
    if 'tf_einsum' in rs.columns:
        rs['tf_einsum'] = rs['numpy.einsum'] / rs['tf_einsum']
    if 'torch_einsum' in rs.columns:
        rs['torch_einsum'] = rs['numpy.einsum'] / rs['torch_einsum']
    rs['numpy.einsum'] = 1.

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(14, 5))
    piv.plot(logx=True,
             logy=True,
             ax=ax[0],
             title="Einsum benchmark\n%s -- (2, N, 12, 64)"
             " lower better" % equation)
    ax[0].legend(prop={"size": 9})
    rs.plot(logx=True,
            logy=True,
            ax=ax[1],
            title="Einsum Speedup, baseline=numpy\n%s -- (2, N, 12, 64)"
            " higher better" % equation)
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})

    return df, rs, ax
Example #11
0
def benchmark_op(repeat=5, number=2, name="Add", shape_fcts=None):
    if shape_fcts is None:

        def shape_fct(dim):
            return (5, dim, dim)

        shape_fcts = (shape_fct, shape_fct)
    ort_fct = build_ort_add()
    res = []
    for dim in tqdm(
        [8, 16, 32, 64, 100, 128, 200, 256, 400, 512, 1024, 1536, 2048, 2560]):
        shape1 = shape_fcts[0](dim)
        shape2 = shape_fcts[1](dim)
        n_arrays = (16 if dim < 512 else 4) if dim < 2048 else 4
        if len(shape1) > 3:
            n_arrays = int(n_arrays / 4)
        xs = [
            numpy.random.rand(*shape1).astype(numpy.float32)
            for _ in range(n_arrays)
        ]
        ys = [
            numpy.random.rand(*shape2).astype(numpy.float32)
            for _ in range(n_arrays)
        ]
        info = dict(shape1=shape1, shape2=shape2)

        # numpy
        ctx = dict(
            xs=xs,
            ys=ys,
            fct=lambda x, y: numpy.add(numpy.add(numpy.add(x, y), y), y),
            loop_fct=loop_fct)
        obs = measure_time("loop_fct(fct, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy'
        obs.update(info)
        res.append(obs)

        # onnxruntime
        ctx['fct'] = ort_fct
        obs = measure_time("loop_fct(fct, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort'
        obs.update(info)
        res.append(obs)

        if tf_add is not None:
            # tensorflow
            ctx['fct'] = lambda x, y: tf_add(tf_add(tf_add(x, y), y), y)
            ctx['xs'] = [convert_to_tensor(x) for x in xs]
            ctx['ys'] = [convert_to_tensor(y) for y in ys]
            obs = measure_time("loop_fct(fct, xs, ys)",
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'tf'
            obs.update(info)
            res.append(obs)

        if torch_add is not None:
            # torch
            ctx['fct'] = lambda x, y: torch_add(torch_add(torch_add(x, y), y),
                                                y)
            ctx['xs'] = [from_numpy(x) for x in xs]
            ctx['ys'] = [from_numpy(y) for y in ys]
            obs = measure_time("loop_fct(fct, xs, ys)",
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'torch'
            obs.update(info)
            res.append(obs)

    # Dataframes
    shape1_name = str(shape1).replace(str(dim), "N")
    shape2_name = str(shape2).replace(str(dim), "N")
    df = pandas.DataFrame(res)
    df.columns = [_.replace('dim', 'N') for _ in df.columns]
    piv = df.pivot('N', 'fct', 'average')

    rs = piv.copy()
    for c in ['ort', 'torch', 'tf']:
        if c in rs.columns:
            rs[c] = rs['numpy'] / rs[c]
    rs['numpy'] = 1.

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    piv.plot(logx=True,
             logy=True,
             ax=ax[0],
             title="%s benchmark\n%s + %s"
             " lower better" % (name, shape1_name, shape2_name))
    ax[0].legend(prop={"size": 9})
    rs.plot(logx=True,
            logy=True,
            ax=ax[1],
            title="%s Speedup, baseline=numpy\n%s + %s"
            " higher better" % (name, shape1_name, shape2_name))
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})
    return df, rs, ax
Example #12
0
print("profiling...")
txt = profile(runlocal, pyinst_format='text')
print(txt[1])

##########################################
# Now let's measure the performance the average
# computation time per observations for 2 to 100
# observations. The runtime implemented in
# :epkg:`mlprodict` parallizes the computation
# after a given number of observations.

obs = []
for N in tqdm(list(range(2, 21))):
    m = measure_time("oinf.run({'X': x})", {
        'oinf': oinf,
        'x': X32[:N]
    },
                     div_by_number=True,
                     number=20)
    m['N'] = N
    m['RT'] = 'ONNX'
    obs.append(m)

    with config_context(assume_finite=True):
        m = measure_time("hgb.predict(x)", {
            'hgb': hgb,
            'x': X32[:N]
        },
                         div_by_number=True,
                         number=15)
    m['N'] = N
    m['RT'] = 'SKL'
Example #13
0
r1 = py_topk.run({'X': X})
r1

###########################
#

r2 = ort_topk.run(None, {'X': X})
r2

#################################
# Some figures.

bs = []
bs.append(
    measure_time(lambda: py_topk.run({'X': X}),
                 context=globals(),
                 div_by_number=True))
bs[-1]['c'] = 'py'
bs[-1]

#################################
#

bs.append(
    measure_time(lambda: ort_topk.run(None, {'X': X}),
                 context=globals(),
                 div_by_number=True))
bs[-1]['c'] = 'or'
bs[-1]

#####################################
Example #14
0
def benchmark_op(perm, repeat=5, number=5, name="Transpose", shape_fct=None):
    if shape_fct is None:

        def shape_fct(dim):
            return (3, dim, 1, 512)

    ort_fct = build_ort_transpose(perm)
    res = []
    for dim in tqdm([8, 16, 32, 64, 100, 128, 200, 256, 400, 512, 1024]):
        shape = shape_fct(dim)
        n_arrays = 10 if dim < 512 else 4
        xs = [
            numpy.random.rand(*shape).astype(numpy.float32)
            for _ in range(n_arrays)
        ]
        ys = [perm for _ in range(n_arrays)]
        equation = perm2eq(perm)
        info = dict(perm=perm, shape=shape)

        # numpy
        ctx = dict(
            xs=xs,
            ys=ys,
            fct=lambda x, y: numpy.ascontiguousarray(numpy.transpose(x, y)),
            loop_fct=loop_fct)
        obs = measure_time("loop_fct(fct, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy'
        obs.update(info)
        res.append(obs)

        # onnxruntime
        ctx['fct'] = ort_fct
        obs = measure_time("loop_fct(fct, xs, ys)",
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort'
        obs.update(info)
        res.append(obs)

        if tf_transpose is not None:
            # tensorflow
            ctx['fct'] = tf_transpose
            ctx['xs'] = [convert_to_tensor(x) for x in xs]
            ctx['ys'] = [convert_to_tensor(y) for y in ys]
            obs = measure_time("loop_fct(fct, xs, ys)",
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'tf'
            obs.update(info)
            res.append(obs)

            # tensorflow with copy
            ctx['fct'] = lambda x, y: tf_transpose(convert_to_tensor(x)).numpy(
            )
            ctx['xs'] = xs
            ctx['ys'] = ys
            obs = measure_time("loop_fct(fct, xs, ys)",
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'tf_copy'
            obs.update(info)
            res.append(obs)

        if torch_einsum is not None:
            # torch
            ctx['fct'] = lambda x, y: torch_einsum(equation, x).contiguous()
            ctx['xs'] = [from_numpy(x) for x in xs]
            ctx['ys'] = ys  # [from_numpy(y) for y in ys]
            obs = measure_time("loop_fct(fct, xs, ys)",
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'torch'
            obs.update(info)
            res.append(obs)

    # Dataframes
    shape_name = str(shape).replace(str(dim), "N")
    df = pandas.DataFrame(res)
    df.columns = [_.replace('dim', 'N') for _ in df.columns]
    piv = df.pivot('N', 'fct', 'average')

    rs = piv.copy()
    for c in ['ort', 'torch', 'tf', 'tf_copy']:
        if c in rs.columns:
            rs[c] = rs['numpy'] / rs[c]
    rs['numpy'] = 1.

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    piv.plot(logx=True,
             logy=True,
             ax=ax[0],
             title="%s benchmark\n%r - %r - %s"
             " lower better" % (name, shape_name, perm, equation))
    ax[0].legend(prop={"size": 9})
    rs.plot(logx=True,
            logy=True,
            ax=ax[1],
            title="%s Speedup, baseline=numpy\n%r - %r - %s"
            " higher better" % (name, shape_name, perm, equation))
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})
    return df, rs, ax
Example #15
0
def latency(model,
            law='normal',
            size=1,
            number=10,
            repeat=10,
            max_time=0,
            runtime="onnxruntime",
            device='cpu',
            profiling=None):
    """
    Measures the latency of a model (python API).

    :param model: ONNX graph
    :param law: random law used to generate fake inputs
    :param size: batch size, it replaces the first dimension
        of every input if it is left unknown
    :param number: number of calls to measure
    :param repeat: number of times to repeat the experiment
    :param max_time: if it is > 0, it runs as many time during
        that period of time
    :param runtime: available runtime
    :param device: device, `cpu`, `cuda:0`
    :param profiling: if True, profile the execution of every
        node, if can be sorted by name or type,
        the value for this parameter should e in `(None, 'name', 'type')`,
    :return: dictionary or a tuple (dictionary, dataframe)
        if the profiling is enable

    .. cmdref::
        :title: Measures model latency
        :cmd: -m mlprodict latency --help
        :lid: l-cmd-latency

        The command generates random inputs and call many times the
        model on these inputs. It returns the processing time for one
        iteration.

        Example::

            python -m mlprodict latency --model "model.onnx"
    """
    if isinstance(model, str) and not os.path.exists(model):
        raise FileNotFoundError(  # pragma: no cover
            "Unable to find model %r." % model)
    if profiling not in (None, '', 'name', 'type'):
        raise ValueError("Unexpected value for profiling: %r." % profiling)
    size = int(size)
    number = int(number)
    repeat = int(repeat)
    if max_time in (None, 0, ""):
        max_time = None
    else:
        max_time = float(max_time)
        if max_time <= 0:
            max_time = None

    if law != "normal":
        raise ValueError("Only law='normal' is supported, not %r." % law)

    if device in ('cpu', 'CPUExecutionProviders'):
        providers = ['CPUExecutionProviders']
    elif device in ('cuda:0', 'CUDAExecutionProviders'):
        if runtime != 'onnxruntime':
            raise NotImplementedError(  # pragma: no cover
                "Only runtime 'onnxruntime' supports this device or provider "
                "%r." % device)
        providers = ['CUDAExecutionProviders']
    elif ',' in device:
        if runtime != 'onnxruntime':
            raise NotImplementedError(  # pragma: no cover
                "Only runtime 'onnxruntime' supports this device or provider "
                "%r." % device)
        providers = device.split(',')
        allp = set(get_all_providers())
        for p in providers:
            if p not in allp:
                raise ValueError(
                    "One device or provider %r is not supported among %r."
                    "" % (p, allp))
    else:
        raise ValueError(  # pragma no cover
            "Device %r not supported." % device)

    if runtime == "onnxruntime":
        if profiling in ('name', 'type'):
            so = SessionOptions()
            so.enable_profiling = True
            sess = InferenceSession(model, sess_options=so)
        else:
            sess = InferenceSession(model)
        fct = lambda feeds: sess.run(None, feeds)
        inputs = sess.get_inputs()
    else:
        if profiling in ('name', 'type'):
            runtime_options = {"enable_profiling": True}
            if runtime != 'onnxruntime1':
                raise NotImplementedError(  # pragma: no cover
                    "Profiling is not implemented for runtime=%r." % runtime)
        else:
            runtime_options = None
        oinf = OnnxInference(model,
                             runtime=runtime,
                             runtime_options=runtime_options)
        fct = lambda feeds: oinf.run(feeds)
        inputs = oinf.obj.graph.input

    feeds = random_feed(inputs, size)
    res = measure_time(lambda: fct(feeds),
                       number=number,
                       repeat=repeat,
                       context={},
                       max_time=max_time,
                       div_by_number=True)
    for k, v in feeds.items():
        res["shape(%s)" % k] = "x".join(map(str, v.shape))
    if profiling in ('name', 'type'):
        if runtime == 'onnxruntime':
            profile_name = sess.end_profiling()
            with open(profile_name, 'r', encoding='utf-8') as f:
                js = json.load(f)
            js = OnnxWholeSession.process_profiling(js)
            df = DataFrame(js)
        else:
            df = oinf.get_profiling(as_df=True)
        if profiling == 'name':
            gr = df[['dur', "args_op_name",
                     "name"]].groupby(["args_op_name",
                                       "name"]).sum().sort_values('dur')
        else:
            gr = df[['dur', "args_op_name"
                     ]].groupby("args_op_name").sum().sort_values('dur')
        return res, gr

    return res
Example #16
0
def benchmark_equation():
    # equations
    ort_where = build_ort_where()
    ort_where_add = build_ort_where_add()
    res = []
    for dim in tqdm([8, 16, 32, 64, 100, 128, 200,
                     256, 500, 512, 1024, 2048]):
        repeat = 5
        number = 10

        conds = [(numpy.random.rand(dim, dim) < 0.5).astype(numpy.bool_)
                 for _ in range(repeat)]
        xs = [numpy.random.rand(dim, dim).astype(numpy.float32)
              for _ in range(repeat)]
        ys = [numpy.random.rand(dim, dim).astype(numpy.float32)
              for _ in range(repeat)]

        # numpy
        ctx = dict(conds=conds, xs=xs, ys=ys, where=numpy.where,
                   loop_where=loop_where)
        obs = measure_time(
            "loop_where(where, conds, xs, ys)",
            div_by_number=True, context=ctx, repeat=repeat, number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy.where'
        res.append(obs)

        # numpy add
        ctx['where'] = numpy_where_add
        obs = measure_time(
            "loop_where(where, conds, xs, ys)",
            div_by_number=True, context=ctx, repeat=repeat, number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy_where_add'
        res.append(obs)

        # onnxruntime
        ctx['where'] = ort_where
        obs = measure_time(
            "loop_where(where, conds, xs, ys)",
            div_by_number=True, context=ctx, repeat=repeat, number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort_where'
        res.append(obs)

        # onnxruntime - 2
        ctx['where'] = ort_where_add
        ctx['conds'] = [c.astype(numpy.float32) for c in conds]
        obs = measure_time(
            "loop_where(where, conds, xs, ys)",
            div_by_number=True, context=ctx, repeat=repeat, number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort_where_add'
        res.append(obs)

        if tf_where is not None:
            # tensorflow
            ctx['where'] = tf_where
            ctx['conds'] = [convert_to_tensor(c) for c in conds]
            ctx['xs'] = [convert_to_tensor(x) for x in xs]
            ctx['ys'] = [convert_to_tensor(y) for y in ys]
            obs = measure_time(
                "loop_where(where, conds, xs, ys)",
                div_by_number=True, context=ctx, repeat=repeat, number=number)
            obs['dim'] = dim
            obs['fct'] = 'tf_where'
            res.append(obs)

        if torch_where is not None:
            # torch
            ctx['where'] = torch_where
            ctx['conds'] = [from_numpy(c) for c in conds]
            ctx['xs'] = [from_numpy(x) for x in xs]
            ctx['ys'] = [from_numpy(y) for y in ys]
            obs = measure_time(
                "loop_where(where, conds, xs, ys)",
                div_by_number=True, context=ctx, repeat=repeat, number=number)
            obs['dim'] = dim
            obs['fct'] = 'torch_where'
            res.append(obs)

    # Dataframes
    df = pandas.DataFrame(res)
    piv = df.pivot('dim', 'fct', 'average')

    rs = piv.copy()
    rs['ort_where'] = rs['numpy.where'] / rs['ort_where']
    rs['numpy_where_add'] = rs['numpy.where'] / rs['numpy_where_add']
    rs['ort_where_add'] = rs['numpy.where'] / rs['ort_where_add']
    if 'tf_where' in rs.columns:
        rs['tf_where'] = rs['numpy.where'] / rs['tf_where']
    if 'torch_where' in rs.columns:
        rs['torch_where'] = rs['numpy.where'] / rs['torch_where']
    rs['numpy.where'] = 1.

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    piv.plot(logx=True, logy=True, ax=ax[0],
             title="Where benchmark -- (N, N)\nlower better")
    ax[0].legend(prop={"size": 9})
    rs.plot(logx=True, logy=True, ax=ax[1],
            title="Where Speedup, baseline=numpy -- (N, N)\nhigher better")
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})

    return df, rs, ax