コード例 #1
0
ファイル: predictor.py プロジェクト: vishalraj-95/ktrain
    def create_onnx_session(self,
                            onnx_model_path,
                            provider='CPUExecutionProvider'):
        """
        ```
        Creates ONNX inference session from provided onnx_model_path
        ```
        """

        from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers
        assert provider in get_all_providers(
        ), f"provider {provider} not found, {get_all_providers()}"

        # Few properties that might have an impact on performances (provided by MS)
        options = SessionOptions()
        options.intra_op_num_threads = 0
        options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

        # Load the model as a graph and prepare the CPU backend
        session = InferenceSession(onnx_model_path,
                                   options,
                                   providers=[provider])
        session.disable_fallback()

        #if 'OMP_NUM_THREADS' not in os.environ or 'OMP_WAIT_POLICY' not in os.environ:
        #warnings.warn('''We recommend adding the following at top of script for CPU inference:

        #from psutil import cpu_count
        ##Constants from the performance optimization available in onnxruntime
        ##It needs to be done before importing onnxruntime
        #os.environ["OMP_NUM_THREADS"] = str(cpu_count(logical=True))
        #os.environ["OMP_WAIT_POLICY"] = 'ACTIVE'
        #''')
        return session
コード例 #2
0
def onnx_runtime_inference(onnx_model_path,
                           tokenizer,
                           sentence1_list,
                           sentence2_list,
                           batch_size=None):
    # load onnx_model
    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    # options.intra_op_num_threads = 1
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

    # Load the model as a graph and prepare the CUDA backend
    ort_session = InferenceSession(
        onnx_model_path,
        options,
        providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
    tokens = tokenizer(sentence1_list,
                       sentence2_list,
                       padding=True,
                       truncation="longest_first",
                       max_length=64,
                       return_tensors='pt')

    if batch_size is None:
        ort_inputs = {k: to_numpy(v) for k, v in tokens.items()}
        ort_outs = ort_session.run(None, ort_inputs)[0]
    else:
        batches = onnx_batch_generator(tokens, batch_size=batch_size)
        ort_outs = np.vstack([ort_session.run(None, b)[0] for b in batches])
    return ort_outs
コード例 #3
0
def create_ort_session(onnx_model_path, use_gpu=True):
    from onnxruntime import SessionOptions, InferenceSession, GraphOptimizationLevel, __version__ as onnxruntime_version
    sess_options = SessionOptions()
    sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
    sess_options.intra_op_num_threads = 2
    sess_options.log_severity_level = 2
    execution_providers = ['CPUExecutionProvider'] if not use_gpu else ['CUDAExecutionProvider', 'CPUExecutionProvider']
    return InferenceSession(onnx_model_path, sess_options, providers=execution_providers)
コード例 #4
0
def create_onnxruntime_session(onnx_model_path,
                               use_gpu,
                               enable_all_optimization=True,
                               num_threads=-1,
                               enable_profiling=False,
                               verbose=False,
                               use_dml=False):
    session = None
    try:
        from onnxruntime import SessionOptions, InferenceSession, GraphOptimizationLevel, __version__ as onnxruntime_version
        sess_options = SessionOptions()

        if enable_all_optimization:
            sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
        else:
            sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC

        if enable_profiling:
            sess_options.enable_profiling = True

        if num_threads > 0:
            sess_options.intra_op_num_threads = num_threads
            logger.debug(f"Session option: intra_op_num_threads={sess_options.intra_op_num_threads}")

        if verbose:
            sess_options.log_severity_level = 0
        else:
            sess_options.log_severity_level = 4

        logger.debug(f"Create session for onnx model: {onnx_model_path}")
        if use_gpu:
            if use_dml:
                execution_providers = ['DmlExecutionProvider', 'CPUExecutionProvider']
            else:
                execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
        else:
            execution_providers = ['CPUExecutionProvider']
        session = InferenceSession(onnx_model_path, sess_options, providers=execution_providers)
    except:
        logger.error(f"Exception", exc_info=True)

    return session
コード例 #5
0
def create_ort_session(model_path, use_gpu):
    from onnxruntime import SessionOptions, InferenceSession, __version__ as ort_version, GraphOptimizationLevel
    sess_options = SessionOptions()
    sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
    execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'
                           ] if use_gpu else ['CPUExecutionProvider']

    ort_session = InferenceSession(model_path,
                                   sess_options,
                                   providers=execution_providers)
    return ort_session
コード例 #6
0
def optimize_model(model_path: Path, opt_model_path: Path):
    """
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    """
    sess_option = SessionOptions()
    sess_option.optimized_model_filepath = opt_model_path.as_posix()
    sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC
    _ = InferenceSession(model_path.as_posix(), sess_option, providers=["CPUExecutionProvider"])
コード例 #7
0
ファイル: mbti_app.py プロジェクト: hjlee9295/MBTI_twitter
def create_onnx_session(onnx_model_path):
    provider = 'CPUExecutionProvider'
    from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers
    assert provider in get_all_providers(
    ), f"provider {provider} not found, {get_all_providers()}"
    options = SessionOptions()
    options.intra_op_num_threads = 0
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
    session = InferenceSession(onnx_model_path, options, providers=[provider])
    session.disable_fallback()
    return session
コード例 #8
0
def create_onnxruntime_session(onnx_model_path,
                               use_gpu,
                               enable_all_optimization=True,
                               num_threads=-1,
                               verbose=False):
    session = None
    try:
        from onnxruntime import SessionOptions, InferenceSession, GraphOptimizationLevel, __version__ as onnxruntime_version
        sess_options = SessionOptions()

        if enable_all_optimization:
            sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
        else:
            sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC

        if num_threads > 0:
            sess_options.intra_op_num_threads = num_threads
            logger.debug(
                f"Session option: intra_op_num_threads={sess_options.intra_op_num_threads}"
            )
        elif (not use_gpu) and (version.parse(onnxruntime_version) <
                                version.parse('1.3.0')):
            # Set intra_op_num_threads = 1 to enable OpenMP for onnxruntime 1.2.0 (cpu)
            # onnxruntime-gpu is not built with openmp so it is better to use default (0) or cpu_count instead.
            sess_options.intra_op_num_threads = 1

        if verbose:
            sess_options.log_severity_level = 0

        logger.debug(f"Create session for onnx model: {onnx_model_path}")
        execution_providers = ['CPUExecutionProvider'] if not use_gpu else [
            'CUDAExecutionProvider', 'CPUExecutionProvider'
        ]
        session = InferenceSession(onnx_model_path,
                                   sess_options,
                                   providers=execution_providers)
    except:
        logger.error(f"Exception", exc_info=True)

    return session
コード例 #9
0
ファイル: infer.py プロジェクト: qhduan/CPM-LM-TF2
def create_model_for_provider(
        model_path: str,
        provider: str = 'CPUExecutionProvider') -> InferenceSession:
    assert provider in get_all_providers(
    ), f"provider {provider} not found, {get_all_providers()}"
    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = int(os.environ.get('NUM_THREADS', 4))
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
    # Load the model as a graph and prepare the CPU backend
    session = InferenceSession(model_path, options, providers=[provider])
    session.disable_fallback()
    return session
コード例 #10
0
def optimize_model(model_path: Path):
    '''
        Generate model that applies graph optimization (constant folding,etc.)
        parameter model_path: path to the original onnx model
        return: optimized onnx model
    '''
    opt_model_path = generate_identified_filename(model_path, "-opt")
    sess_option = SessionOptions()
    sess_option.optimized_model_filepath = opt_model_path.as_posix()
    sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC
    _ = InferenceSession(model_path.as_posix(), sess_option)
    optimized_model = onnx.load(opt_model_path.as_posix())
    return optimized_model
コード例 #11
0
def create_ort_session(model_path, use_gpu):
    from onnxruntime import SessionOptions, InferenceSession, __version__ as ort_version, GraphOptimizationLevel, get_available_providers
    sess_options = SessionOptions()
    sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
    execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if use_gpu else ['CPUExecutionProvider']
    if use_gpu:
        if 'CUDAExecutionProvider' not in get_available_providers():
            raise RuntimeError("CUDAExecutionProvider is not avaiable for --use_gpu!")
        else:
            print("use CUDAExecutionProvider")

    ort_session = InferenceSession(model_path, sess_options, providers=execution_providers)
    return ort_session
コード例 #12
0
    def create_model_for_provider(self):

        assert self.provider in get_all_providers(), f"provider {self.provider} not found, {get_all_providers()}"

        # Few properties that might have an impact on performances (provided by MS)
        options = SessionOptions()
        options.intra_op_num_threads = 1
        options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

        # Load the model as a graph and prepare the CPU backend
        session = InferenceSession(self.model_path, options, providers=[self.provider])
        session.disable_fallback()

        return session
コード例 #13
0
 def __init__(self, *args, **kwargs):
     "Overwrites the constructor."
     runtime_options = kwargs.pop('runtime_options', {})
     disable_optimisation = runtime_options.pop('disable_optimisation',
                                                False)
     if disable_optimisation:
         if 'sess_options' in kwargs:
             raise RuntimeError(
                 "Incompatible options, 'disable_options' and 'sess_options' cannot "
                 "be sepcified at the same time.")
         kwargs['sess_options'] = SessionOptions()
         kwargs['sess_options'].graph_optimization_level = (
             GraphOptimizationLevel.ORT_DISABLE_ALL)
     self.sess, self.outi, self.erri = _capture_output(
         lambda: InferenceSession(*args, **kwargs), 'c')
コード例 #14
0
 def check_outputs(self,
                   model,
                   model_onnx,
                   Xtest,
                   predict_attributes,
                   decimal=5,
                   skip_if_float32=False,
                   disable_optimisation=True):
     if "TransposeScaleMatMul" in str(model_onnx):
         raise RuntimeError("This node must not be added.")
     if predict_attributes is None:
         predict_attributes = {}
     exp = model.predict(Xtest, **predict_attributes)
     if disable_optimisation and GraphOptimizationLevel is not None:
         opts = SessionOptions()
         opts.graph_optimization_level = (
             GraphOptimizationLevel.ORT_DISABLE_ALL)
         sess = InferenceSession(model_onnx.SerializeToString(),
                                 sess_options=opts)
     else:
         sess = InferenceSession(model_onnx.SerializeToString())
     got = sess.run(None, {'X': Xtest})
     if isinstance(exp, tuple):
         if len(exp) != len(got):
             raise AssertionError("Mismatched number of outputs.")
         for i, (e, g) in enumerate(zip(exp, got)):
             if skip_if_float32 and g.dtype == np.float32:
                 continue
             try:
                 assert_almost_equal(self.remove_dim1(e),
                                     self.remove_dim1(g),
                                     decimal=decimal)
             except AssertionError as e:  # noqa
                 raise AssertionError(
                     "Mismatch for output {} and attributes {}"
                     ".".format(i, predict_attributes)) from e
     else:
         if skip_if_float32 and Xtest.dtype == np.float32:
             return
         assert_almost_equal(np.squeeze(exp),
                             np.squeeze(got),
                             decimal=decimal)
コード例 #15
0
def create_model_for_provider(model_path: str, provider: str) -> InferenceSession:
    """
    这里解释一下ExecutionProvider,ONNXRuntime用Provider表示不同的运行设备比如CUDAProvider等。
    目前ONNX Runtime v1.0支持了包括CPU,CUDA,TensorRT,MKL等七种Providers。
    :param model_path:
    :param provider:
    :return:
    """
    assert provider in get_all_providers(), f"provider {provider} not found, {get_all_providers()}"

    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = 1
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

    # Load the model as a graph and prepare the CPU backend
    session = InferenceSession(model_path, options, providers=[provider])
    session.disable_fallback()

    return session
コード例 #16
0
def create_model_for_provider(model_path: str, provider: str,
                              optimization_level: str) -> InferenceSession:

    assert provider in get_all_providers(
    ), f"provider {provider} not found, {get_all_providers()}"

    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = 1
    if optimization_level in GRAPH_OPTIMIZATIONS:
        options.graph_optimization_level = GRAPH_OPTIMIZATIONS[
            optimization_level]
    else:
        raise KeyError(
            f"Unknown Optimization Level {optimization_level} (Available optimization level are all/disable_all/basic/extended)"
        )

    # Load the model as a graph and prepare the CPU backend
    session = InferenceSession(model_path, options, providers=[provider])
    session.disable_fallback()

    return session
コード例 #17
0
ファイル: session.py プロジェクト: xadupre/mlprodict
 def __init__(self, onnx_data, runtime, runtime_options=None):
     """
     @param      onnx_data       :epkg:`ONNX` model or data
     @param      runtime         runtime to be used,
                                 mostly :epkg:`onnxruntime`
     @param      runtime_options runtime options
     """
     if runtime != 'onnxruntime1':
         raise NotImplementedError(  # pragma: no cover
             "runtime '{}' is not implemented.".format(runtime))
     if hasattr(onnx_data, 'SerializeToString'):
         onnx_data = onnx_data.SerializeToString()
     self.runtime = runtime
     sess_options = SessionOptions()
     self.run_options = RunOptions()
     try:
         sess_options.sessions_log_verbosity_level = 0
     except AttributeError:  # pragma: no cover
         # onnxruntime not recent enough.
         pass
     try:
         self.run_options.run_log_verbosity_level = 0
     except AttributeError:  # pragma: no cover
         # onnxruntime not recent enough.
         pass
     if (runtime_options is not None
             and runtime_options.get('disable_optimisation', False)):
         sess_options.graph_optimization_level = (
             GraphOptimizationLevel.ORT_ENABLE_ALL)
     try:
         self.sess = InferenceSession(onnx_data, sess_options=sess_options)
     except (OrtFail, OrtNotImplemented, OrtInvalidGraph,
             OrtInvalidArgument, OrtRuntimeException, RuntimeError) as e:
         raise RuntimeError(
             "Unable to create InferenceSession due to '{}'\n{}.".format(
                 e, display_onnx(onnx.load(BytesIO(onnx_data))))) from e
コード例 #18
0
def get_onnx_runtime_sessions(
    model_paths,
    default: bool = True,
    opt_level: int = 99,
    parallel_exe_mode: bool = True,
    n_threads: int = 4,
    provider=[
        'CPUExecutionProvider',
    ],
) -> InferenceSession:
    '''
            Optimizes the model 

    Args:
        path_to_encoder (str) : the path of input onnx encoder model.
        path_to_decoder (str) : the path of input onnx decoder model.
        path_to_initial_decoder (str) :  the path of input initial onnx decoder model.
        opt_level (int) : sess_options.GraphOptimizationLevel param if set 1 uses 'ORT_ENABLE_BASIC', 
                          2 for 'ORT_ENABLE_EXTENDED' and 99 for 'ORT_ENABLE_ALL',
                          default value is set to 99.
        parallel_exe_mode (bool) :  Sets the execution mode. Default is parallel.
        n_threads (int) :  Sets the number of threads used to parallelize the execution within nodes. Default is 0 to let onnxruntime choose
        provider : execution providers list.
        default : set this to true, ort will choose the best settings for your hardware.
                  (you can test out different settings for better results.)
  
    Returns:
        encoder_session : encoder onnx InferenceSession 
        decoder_session : decoder onnx InferenceSession
        decoder_sess_init : initial decoder onnx InferenceSession 

    '''
    path_to_encoder, path_to_decoder, path_to_initial_decoder = model_paths

    if default:

        encoder_sess = InferenceSession(str(path_to_encoder))

        decoder_sess = InferenceSession(str(path_to_decoder))

        decoder_sess_init = InferenceSession(str(path_to_initial_decoder))

    else:

        # Few properties that might have an impact on performances
        options = SessionOptions()

        if opt_level == 1:
            options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC
        elif opt_level == 2:
            options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_EXTENDED
        else:
            assert opt_level == 99
            options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

        # set this true for better performance
        if parallel_exe_mode == True:
            options.execution_mode = ExecutionMode.ORT_PARALLEL
        else:
            options.execution_mode = ExecutionMode.ORT_SEQUENTIAL

        options.intra_op_num_threads = n_threads
        # options.inter_op_num_threads = 10

        # options.enable_profiling = True

        encoder_sess = InferenceSession(str(path_to_encoder),
                                        options,
                                        providers=provider)

        decoder_sess = InferenceSession(str(path_to_decoder),
                                        options,
                                        providers=provider)

        decoder_sess_init = InferenceSession(str(path_to_initial_decoder),
                                             options,
                                             providers=provider)

    return encoder_sess, decoder_sess, decoder_sess_init
コード例 #19
0
def create_onnxruntime_session(
        onnx_model_path,
        use_gpu,
        provider=None,
        enable_all_optimization=True,
        num_threads=-1,
        enable_profiling=False,
        verbose=False,
        provider_options={},  # map execution provider name to its option
):
    session = None
    try:
        from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions

        sess_options = SessionOptions()

        if enable_all_optimization:
            sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
        else:
            sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC

        if enable_profiling:
            sess_options.enable_profiling = True

        if num_threads > 0:
            sess_options.intra_op_num_threads = num_threads
            logger.debug(
                f"Session option: intra_op_num_threads={sess_options.intra_op_num_threads}"
            )

        if verbose:
            sess_options.log_severity_level = 0
        else:
            sess_options.log_severity_level = 4

        logger.debug(f"Create session for onnx model: {onnx_model_path}")
        if use_gpu:
            if provider == "dml":
                providers = ["DmlExecutionProvider", "CPUExecutionProvider"]
            elif provider == "rocm":
                providers = ["ROCMExecutionProvider", "CPUExecutionProvider"]
            elif provider == "migraphx":
                providers = [
                    "MIGraphXExecutionProvider",
                    "ROCMExecutionProvider",
                    "CPUExecutionProvider",
                ]
            elif provider == "cuda":
                providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
            elif provider == "tensorrt":
                providers = [
                    "TensorrtExecutionProvider",
                    "CUDAExecutionProvider",
                    "CPUExecutionProvider",
                ]
            else:
                providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
        else:
            providers = ["CPUExecutionProvider"]

        if provider_options:
            providers = [
                (name,
                 provider_options[name]) if name in provider_options else name
                for name in providers
            ]

        session = InferenceSession(onnx_model_path,
                                   sess_options,
                                   providers=providers)
    except:
        logger.error(f"Exception", exc_info=True)

    return session
コード例 #20
0
ファイル: _op.py プロジェクト: xadupre/mlprodict
    def _init(self, variables=None):
        """
        Initializes the node.

        @param      variables               registered variables created by previous operators

        The current implementation for operator *Scan*
        only works for matrices.
        """
        try:
            self.alg_class = getattr(alg2, 'Onnx' + self.onnx_node.op_type)
        except AttributeError:
            self.alg_class = getattr(alg, 'Onnx' + self.onnx_node.op_type)
        inputs = list(self.onnx_node.input)
        self.mapping, self.inputs = self._name_mapping(inputs)
        self.outputs = list(self.onnx_node.output)

        options = self.options.copy()
        target_opset = options.pop('target_opset', None)
        domain = options.pop('domain', None)
        disable_optimisation = options.pop('disable_optimisation', False)
        ir_version = options.pop('ir_version', None)

        if domain == '' and target_opset < 9:
            # target_opset should be >= 9 not {} for main domain.
            # We assume it was the case when the graph was created.
            pass

        if self.onnx_node.op_type == 'ConstantOfShape':
            for k in options:
                v = options[k]
                if isinstance(v, numpy.ndarray):
                    options[k] = make_tensor(k,
                                             self._guess_proto_type(v.dtype),
                                             v.shape, v.tolist())

            self.inst_ = self.alg_class(*self.inputs,
                                        output_names=self.outputs,
                                        op_version=target_opset,
                                        **options)
            inputs = get_defined_inputs(self.inputs,
                                        variables,
                                        dtype=self.dtype)
            try:
                self.onnx_ = self.inst_.to_onnx(inputs,
                                                target_opset=target_opset,
                                                domain=domain)
                if "dim_value: 0" in str(self.onnx_):
                    raise RuntimeError(  # pragma: no cover
                        "Probable issue as one dimension is null.\n--\n{}".
                        format(self.onnx_))
            except AttributeError as e:  # pragma: no cover
                # older version of skl2onnx
                self.onnx_ = self.inst_.to_onnx(inputs)
                if "dim_value: 0" in str(self.onnx_):
                    raise RuntimeError(
                        "Probable issue as one dimension is null.\n--\n{}".
                        format(self.onnx_)) from e
            forced = False
        elif self.onnx_node.op_type == 'Scan':
            self.inst_ = self.alg_class(*self.inputs,
                                        output_names=self.outputs,
                                        op_version=target_opset,
                                        **options)
            inputs = get_defined_inputs(self.inputs,
                                        variables,
                                        dtype=self.dtype)
            outputs = get_defined_outputs(self.outputs,
                                          self.onnx_node,
                                          inputs,
                                          variables,
                                          dtype=self.dtype)
            inputs = [(name, cl.__class__([None, None]))
                      for (name, cl) in inputs]
            outputs = [(name, cl.__class__([None, None]))
                       for (name, cl) in outputs]
            self.onnx_ = self.inst_.to_onnx(inputs,
                                            outputs=outputs,
                                            target_opset=target_opset,
                                            domain=domain)
            if "dim_value: 0" in str(self.onnx_):
                raise RuntimeError(  # pragma: no cover
                    "Probable issue as one dimension is null.\n--\n{}".format(
                        self.onnx_))
            forced = True
        else:
            self.inst_ = self.alg_class(*self.inputs,
                                        output_names=self.outputs,
                                        op_version=target_opset,
                                        domain=domain,
                                        **options)
            inputs = get_defined_inputs(self.inputs,
                                        variables,
                                        dtype=self.dtype)

            try:
                self.onnx_ = self.inst_.to_onnx(inputs,
                                                target_opset=target_opset,
                                                domain=domain)
                if "dim_value: 0" in str(self.onnx_):
                    raise RuntimeError(  # pragma: no cover
                        "Probable issue as one dimension is null.\n--\n{}\n---\n{}"
                        .format(self.onnx_, inputs))
                forced = False
            except (RuntimeError, ValueError):
                # Let's try again by forcing output types.
                forced = True
                outputs = get_defined_outputs(self.outputs,
                                              self.onnx_node,
                                              inputs,
                                              variables,
                                              dtype=self.dtype)
                self.onnx_ = self.inst_.to_onnx(inputs,
                                                outputs=outputs,
                                                target_opset=target_opset,
                                                domain=domain)
                if "dim_value: 0" in str(self.onnx_):
                    raise RuntimeError(  # pragma: no cover
                        "Probable issue as one dimension is null.\n--\n{}".
                        format(self.onnx_)) from e

        if len(self.onnx_.graph.output) != len(self.outputs):
            # Something is wrong, falls back to default plan.
            forced = True
            outputs = get_defined_outputs(self.outputs,
                                          self.onnx_node,
                                          inputs,
                                          variables,
                                          dtype=self.dtype)
            self.onnx_ = self.inst_.to_onnx(inputs,
                                            outputs=outputs,
                                            target_opset=target_opset,
                                            domain=domain)
            if "dim_value: 0" in str(self.onnx_):
                raise RuntimeError(
                    "Probable issue as one dimension is null.\n--\n{}".format(
                        self.onnx_))
        else:
            lo = list(self.onnx_.graph.output)
            outputs = proto2vars(lo)

        sess_options = SessionOptions()
        self.run_options = RunOptions()

        try:
            sess_options.session_log_severity_level = 3
            # sess_options.sessions_log_verbosity_level = 0
        except AttributeError:
            # onnxruntime not recent enough.
            pass
        try:
            self.run_options.run_log_severity_level = 3
            # self.run_options.run_log_verbosity_level = 0
        except AttributeError:
            # onnxruntime not recent enough.
            pass
        if ir_version is not None:
            self.onnx_.ir_version = ir_version
        if disable_optimisation:
            sess_options.graph_optimization_level = (
                GraphOptimizationLevel.ORT_DISABLE_ALL)
        try:
            self.sess_ = InferenceSession(self.onnx_.SerializeToString(),
                                          sess_options=sess_options)
        except (RuntimeError, OrtNotImplemented, OrtInvalidGraph,
                OrtFail) as e:
            raise RuntimeError(
                "Unable to load node '{}' (output type was {})\n{}".format(
                    self.onnx_node.op_type,
                    "guessed" if forced else "inferred", self.onnx_)) from e
        self.typed_outputs_ = outputs
コード例 #21
0
###############################
# Create of the session.
data = []
files = []
legend = []

for graph_opt, name_opt in tqdm([
    (GraphOptimizationLevel.ORT_DISABLE_ALL, "ORT_DISABLE_ALL"),
    (GraphOptimizationLevel.ORT_ENABLE_BASIC, "ORT_ENABLE_BASIC"),
    (GraphOptimizationLevel.ORT_ENABLE_EXTENDED, "ORT_ENABLE_EXTENDED"),
    (GraphOptimizationLevel.ORT_ENABLE_ALL, "ORT_ENABLE_ALL")
]):

    so = SessionOptions()
    so.graph_optimization_level = graph_opt
    so.optimized_model_filepath = (os.path.split(filename)[-1] +
                                   f".optimized.{name_opt}.onnx")
    files.append(so.optimized_model_filepath)
    legend.append(name_opt)
    sess = InferenceSession(onx.SerializeToString(), so, providers=[provider])
    bind = SessionIOBinding(sess._sess)

    #####################################
    # Creates random data
    feed = random_feed(sess, batch)

    #####################################
    # moving the data on CPU or GPU
    feed_ort_value = OrderedDict(
        (name, (C_OrtValue.ortvalue_from_numpy(v, ort_device), v.dtype))