def build_onnx_function(self, opset, device, n_tensors): so = SessionOptions() so.log_severity_level = 4 # loss_grad self.penalty_onnx_ = function_onnx_graph("n_penalty_elastic_error", target_opset=opset, n_tensors=n_tensors, loss_shape=None, l1_weight=self.l1, l2_weight=self.l2) self.penalty_sess_ = InferenceSession( self.penalty_onnx_.SerializeToString(), so, providers=device_to_providers(device)) self.penalty_sess_bind_ = (self.penalty_sess_.io_binding()._iobinding) self.names_ = [i.name for i in self.penalty_onnx_.graph.input] # weight updates self.penalty_grad_onnx_ = function_onnx_graph( "update_penalty_elastic_error", target_opset=opset, l1=self.l1, l2=self.l2) self.penalty_grad_sess_ = InferenceSession( self.penalty_grad_onnx_.SerializeToString(), so, providers=device_to_providers(device)) self.penalty_grad_sess_binds_ = [ self.penalty_grad_sess_.io_binding()._iobinding for n in range(n_tensors) ]
def create_onnxruntime_session(onnx_model_path, use_gpu, verbose): session = None try: from onnxruntime import SessionOptions, InferenceSession sess_options = SessionOptions() if not use_gpu: sess_options.intra_op_num_threads = psutil.cpu_count(logical=True) logger.debug( f"Session option: intra_op_num_threads={sess_options.intra_op_num_threads}" ) if verbose: sess_options.log_severity_level = 0 logger.debug(f"Create session for onnx model: {onnx_model_path}") execution_providers = ['CPUExecutionProvider'] if not use_gpu else [ 'CUDAExecutionProvider', 'CPUExecutionProvider' ] session = InferenceSession(onnx_model_path, sess_options, providers=execution_providers) except: logger.error(f"Exception", exc_info=True) return session
def __init__(self, model_path, tokenizer_path, tag_predict_model): options = SessionOptions() options.execution_mode = ExecutionMode.ORT_SEQUENTIAL self.model = ort.InferenceSession(model_path, options) self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) self.tag_model = tag_predict_model
def onnx_runtime_inference(onnx_model_path, tokenizer, sentence1_list, sentence2_list, batch_size=None): # load onnx_model # Few properties that might have an impact on performances (provided by MS) options = SessionOptions() # options.intra_op_num_threads = 1 options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL # Load the model as a graph and prepare the CUDA backend ort_session = InferenceSession( onnx_model_path, options, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) tokens = tokenizer(sentence1_list, sentence2_list, padding=True, truncation="longest_first", max_length=64, return_tensors='pt') if batch_size is None: ort_inputs = {k: to_numpy(v) for k, v in tokens.items()} ort_outs = ort_session.run(None, ort_inputs)[0] else: batches = onnx_batch_generator(tokens, batch_size=batch_size) ort_outs = np.vstack([ort_session.run(None, b)[0] for b in batches]) return ort_outs
def build_onnx_function(self): """ Creates ONNX graph and *InferenceSession* related to any operations applying on *OrtValue*. """ opset = get_onnx_opset(self.model_onnx) so = SessionOptions() so.log_severity_level = 4 n = len(self.weights_to_train) # loss_grad self.learning_loss.build_onnx_function(opset, self.device, self.weight_name) # weight update self.learning_rate.build_onnx_function(opset, self.device, n) # regularization self.learning_penalty.build_onnx_function(opset, self.device, n) # zero self.zero_onnx_ = function_onnx_graph("zero") self.zero_sess_ = InferenceSession(self.zero_onnx_.SerializeToString(), so, providers=device_to_providers( self.device)) # logging if self.enable_logging: self._logger = logging.getLogger("onnxcustom") else: self._logger = None
def test_penalty_update(self): x = numpy.random.randn(10, 1).astype(numpy.float32) def fct(x): return numpy.sign(x) * 0.1 + (x * 0.9 * 2) exp_loss = x - fct(x) onx = function_onnx_graph('update_penalty_elastic_error', target_opset=get_max_opset(), dtype=numpy.float32, l1=0.1, l2=0.9) oinf = OnnxInference(onx) got = oinf.run({'X': x}) self.assertEqualArray(exp_loss, got['Y'], decimal=5) providers = device_to_providers('cpu') so = SessionOptions() so.log_severity_level = 4 sess = InferenceSession(onx.SerializeToString(), so, providers=providers) got = sess.run(None, {'X': x}) self.assertEqualArray(exp_loss, got[0], decimal=5)
def create_onnx_session(self, onnx_model_path, provider='CPUExecutionProvider'): """ Creates ONNX inference session from provided onnx_model_path """ from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers assert provider in get_all_providers( ), f"provider {provider} not found, {get_all_providers()}" # Few properties that might have an impact on performances (provided by MS) options = SessionOptions() options.intra_op_num_threads = 0 options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL # Load the model as a graph and prepare the CPU backend session = InferenceSession(onnx_model_path, options, providers=[provider]) session.disable_fallback() #if 'OMP_NUM_THREADS' not in os.environ or 'OMP_WAIT_POLICY' not in os.environ: #warnings.warn('''We recommend adding the following at top of script for CPU inference: #from psutil import cpu_count ##Constants from the performance optimization available in onnxruntime ##It needs to be done before importing onnxruntime #os.environ["OMP_NUM_THREADS"] = str(cpu_count(logical=True)) #os.environ["OMP_WAIT_POLICY"] = 'ACTIVE' #''') return session
def __setstate__(self, state): """ Overwrites getstate to get rid of InferenceSession. """ for k, v in state.items(): if k == 'ro_': self.ro_ = RunOptions() elif not k.endswith('_onnx_') and not k.endswith('_sess_'): setattr(self, k, v) so = SessionOptions() so.log_severity_level = 4 for k, v in state.items(): if k.endswith('_onnx_'): setattr(self, k, onnx.load(BytesIO(v))) k2 = k.replace("onnx", "sess") prov = state[k2] setattr( self, k2, InferenceSession(getattr(self, k).SerializeToString(), so, providers=prov)) for k, v in state.items(): if k.endswith('_bind_'): k2 = k[:-5] setattr(self, k, getattr(self, k2).io_binding()._iobinding) elif k.endswith('_binds_'): k2 = k[:-6] n = v setattr(self, k, [ getattr(self, k2).io_binding()._iobinding for i in range(n) ]) self.cache_in_ = {} self.cache_out_ = {} return self
def __init__(self, onnx_data, runtime): """ @param onnx_data :epkg:`ONNX` model or data @param runtime runtime to be used, mostly :epkg:`onnxruntime` """ if runtime != 'onnxruntime1': raise NotImplementedError( "runtime '{}' is not implemented.".format(runtime)) if hasattr(onnx_data, 'SerializeToString'): onnx_data = onnx_data.SerializeToString() self.runtime = runtime sess_options = SessionOptions() self.run_options = RunOptions() try: sess_options.session_log_severity_level = 3 # sess_options.sessions_log_verbosity_level = 0 except AttributeError: # pragma: no cover # onnxruntime not recent enough. pass try: self.run_options.run_log_severity_level = 3 # self.run_options.run_log_verbosity_level = 0 except AttributeError: # pragma: no cover # onnxruntime not recent enough. pass self.sess = InferenceSession(onnx_data, sess_options=sess_options)
def test_penalty_3w(self): loss = numpy.random.randn(1, 1).astype(numpy.float32) w1 = numpy.random.randn(10, 1).astype(numpy.float32) w2 = numpy.random.randn(5, 1).astype(numpy.float32) def fct(x): return numpy.abs(x).sum() * 0.1 + ((x)**2).sum() * 0.9 exp_loss = loss + fct(w1) + fct(w2) onx = function_onnx_graph('n_penalty_elastic_error', target_opset=get_max_opset(), dtype=numpy.float32, n_tensors=2, l1_weight=0.1, l2_weight=0.9, weight_name='weight') oinf = OnnxInference(onx) got = oinf.run({'loss': loss, 'W0': w1, 'W1': w2}) self.assertEqualArray(exp_loss.reshape((-1, )), got['Y'], decimal=5) providers = device_to_providers('cpu') so = SessionOptions() so.log_severity_level = 4 sess = InferenceSession(onx.SerializeToString(), so, providers=providers) got = sess.run(None, {'loss': loss, 'W0': w1, 'W1': w2}) self.assertEqualArray(exp_loss.reshape((-1, )), got[0], decimal=5)
def create_ort_session(onnx_model_path, use_gpu=True): from onnxruntime import SessionOptions, InferenceSession, GraphOptimizationLevel, __version__ as onnxruntime_version sess_options = SessionOptions() sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL sess_options.intra_op_num_threads = 2 sess_options.log_severity_level = 2 execution_providers = ['CPUExecutionProvider'] if not use_gpu else ['CUDAExecutionProvider', 'CPUExecutionProvider'] return InferenceSession(onnx_model_path, sess_options, providers=execution_providers)
def __setstate__(self, state): if get_library_path is None: raise ImportError("onnxruntime_extensions is not installed.") state['onnx_'] = load(BytesIO(state['onnx_'])) BaseEstimator.__setstate__(self, state) so = SessionOptions() so.register_custom_ops_library(get_library_path()) self.sess_ = InferenceSession(self.onnx_.SerializeToString(), so) return self
def create_training_session(training_onnx, weights_to_train, loss_output_name='loss', training_optimizer_name='SGDOptimizer', device='cpu'): """ Creates an instance of class `TrainingSession`. :param training_onnx: ONNX graph used to train :param weights_to_train: names of initializers to be optimized :param loss_output_name: name of the loss output :param training_optimizer_name: optimizer name :param device: `'cpu'` or `'cuda'` :return: instance of `TrainingSession` """ ort_parameters = TrainingParameters() ort_parameters.loss_output_name = loss_output_name output_types = {} for output in training_onnx.graph.output: output_types[output.name] = output.type.tensor_type ort_parameters.weights_to_train = set(weights_to_train) ort_parameters.training_optimizer_name = training_optimizer_name ort_parameters.optimizer_attributes_map = { name: {} for name in weights_to_train } ort_parameters.optimizer_int_attributes_map = { name: {} for name in weights_to_train } session_options = SessionOptions() session_options.use_deterministic_compute = True if hasattr(device, 'device_type'): if device.device_type() == device.cpu(): provider = ['CPUExecutionProvider'] elif device.device_type() == device.cuda(): provider = ['CUDAExecutionProvider'] else: raise ValueError(f"Unexpected device {device!r}.") else: if device == 'cpu': provider = ['CPUExecutionProvider'] elif device.startswith("cuda"): provider = ['CUDAExecutionProvider'] else: raise ValueError(f"Unexpected device {device!r}.") session = TrainingSession(training_onnx.SerializeToString(), ort_parameters, session_options, providers=provider) return session
def optimize_model(model_path: Path, opt_model_path: Path): """ Generate model that applies graph optimization (constant folding, etc.) parameter model_path: path to the original onnx model parameter opt_model_path: path to the optimized onnx model :return: optimized onnx model """ sess_option = SessionOptions() sess_option.optimized_model_filepath = opt_model_path.as_posix() sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC _ = InferenceSession(model_path.as_posix(), sess_option, providers=["CPUExecutionProvider"])
def create_onnx_session(onnx_model_path): provider = 'CPUExecutionProvider' from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers assert provider in get_all_providers( ), f"provider {provider} not found, {get_all_providers()}" options = SessionOptions() options.intra_op_num_threads = 0 options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL session = InferenceSession(onnx_model_path, options, providers=[provider]) session.disable_fallback() return session
def create_ort_session(model_path, use_gpu): from onnxruntime import SessionOptions, InferenceSession, __version__ as ort_version, GraphOptimizationLevel sess_options = SessionOptions() sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider' ] if use_gpu else ['CPUExecutionProvider'] ort_session = InferenceSession(model_path, sess_options, providers=execution_providers) return ort_session
def create_ort_session(model_path, use_gpu): from onnxruntime import SessionOptions, InferenceSession, __version__ as ort_version, GraphOptimizationLevel, get_available_providers sess_options = SessionOptions() sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if use_gpu else ['CPUExecutionProvider'] if use_gpu: if 'CUDAExecutionProvider' not in get_available_providers(): raise RuntimeError("CUDAExecutionProvider is not avaiable for --use_gpu!") else: print("use CUDAExecutionProvider") ort_session = InferenceSession(model_path, sess_options, providers=execution_providers) return ort_session
def create_model_for_provider( model_path: str, provider: str = 'CPUExecutionProvider') -> InferenceSession: assert provider in get_all_providers( ), f"provider {provider} not found, {get_all_providers()}" # Few properties that might have an impact on performances (provided by MS) options = SessionOptions() options.intra_op_num_threads = int(os.environ.get('NUM_THREADS', 4)) options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL # Load the model as a graph and prepare the CPU backend session = InferenceSession(model_path, options, providers=[provider]) session.disable_fallback() return session
def optimize_model(model_path: Path): ''' Generate model that applies graph optimization (constant folding,etc.) parameter model_path: path to the original onnx model return: optimized onnx model ''' opt_model_path = generate_identified_filename(model_path, "-opt") sess_option = SessionOptions() sess_option.optimized_model_filepath = opt_model_path.as_posix() sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC _ = InferenceSession(model_path.as_posix(), sess_option) optimized_model = onnx.load(opt_model_path.as_posix()) return optimized_model
def build_onnx_function(self, opset, device, n_tensors): so = SessionOptions() so.log_severity_level = 4 self.axpy_onnx_ = function_onnx_graph("axpy") self.axpy_sess_ = InferenceSession( self.axpy_onnx_.SerializeToString(), so, providers=device_to_providers(device)) self.axpy_sess_binds_ = [ self.axpy_sess_.io_binding()._iobinding for i in range(n_tensors)] self.alpha_ = numpy.array( [0], dtype=TENSOR_TYPE_TO_NP_TYPE[ self.axpy_onnx_.graph.input[0].type.tensor_type.elem_type])
def create_model_for_provider(self): assert self.provider in get_all_providers(), f"provider {self.provider} not found, {get_all_providers()}" # Few properties that might have an impact on performances (provided by MS) options = SessionOptions() options.intra_op_num_threads = 1 options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL # Load the model as a graph and prepare the CPU backend session = InferenceSession(self.model_path, options, providers=[self.provider]) session.disable_fallback() return session
def fit(self, X, y=None, sample_weight=None): """ The model is not trains this method is still needed to set the instance up and ready to transform. :param X: array of strings :param y: unused :param sample_weight: unused :return: self """ self.onnx_ = self._create_model(self.model_b64, opset=self.opset) so = SessionOptions() so.register_custom_ops_library(get_library_path()) self.sess_ = InferenceSession(self.onnx_.SerializeToString(), so) return self
def __init__(self, *args, **kwargs): "Overwrites the constructor." runtime_options = kwargs.pop('runtime_options', {}) disable_optimisation = runtime_options.pop('disable_optimisation', False) if disable_optimisation: if 'sess_options' in kwargs: raise RuntimeError( "Incompatible options, 'disable_options' and 'sess_options' cannot " "be sepcified at the same time.") kwargs['sess_options'] = SessionOptions() kwargs['sess_options'].graph_optimization_level = ( GraphOptimizationLevel.ORT_DISABLE_ALL) self.sess, self.outi, self.erri = _capture_output( lambda: InferenceSession(*args, **kwargs), 'c')
def __init__(self, onnx_bytes, sess_options=None, log_severity_level=4, device=None): if InferenceSession is None: raise ImportError( # pragma: no cover "onnxruntime is not available.") self.log_severity_level = log_severity_level if device is None: self.device = get_ort_device('cpu') else: self.device = get_ort_device(device) self.providers = device_to_providers(self.device) set_default_logger_severity(3) if sess_options is None: self.so = SessionOptions() self.so.log_severity_level = log_severity_level self.sess = OrtInferenceSession(onnx_bytes, sess_options=self.so, providers=self.providers) else: self.so = sess_options self.sess = OrtInferenceSession(onnx_bytes, sess_options=sess_options, providers=self.providers) self.ro = RunOptions() self.ro.log_severity_level = log_severity_level self.ro.log_verbosity_level = log_severity_level self.output_names = [o.name for o in self.get_outputs()]
def prepare(cls, model, device=None, **kwargs): """ Load the model and creates a :class:`onnxruntime.InferenceSession` ready to be used as a backend. :param model: ModelProto (returned by `onnx.load`), string for a filename or bytes for a serialized model :param device: requested device for the computation, None means the default one which depends on the compilation settings :param kwargs: see :class:`onnxruntime.SessionOptions` :return: :class:`onnxruntime.InferenceSession` """ if isinstance(model, OnnxRuntimeBackendRep): return model elif isinstance(model, InferenceSession): return OnnxRuntimeBackendRep(model) elif isinstance(model, (str, bytes)): options = SessionOptions() for k, v in kwargs.items(): if hasattr(options, k): setattr(options, k, v) inf = InferenceSession(model, options) # backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback # which may hide test failures. inf.disable_fallback() if device is not None and not cls.supports_device(device): raise RuntimeError("Incompatible device expected '{0}', got '{1}'".format(device, get_device())) return cls.prepare(inf, device, **kwargs) else: # type: ModelProto check_model(model) bin = model.SerializeToString() return cls.prepare(bin, device, **kwargs)
def convert_to_onnx(self, onnx_output_dir=None, set_onnx_arg=True): """Convert the model to ONNX format and save to output_dir Args: onnx_output_dir (str, optional): If specified, ONNX model will be saved to output_dir (else args.output_dir will be used). Defaults to None. set_onnx_arg (bool, optional): Updates the model args to set onnx=True. Defaults to True. """ # noqa if not onnx_output_dir: onnx_output_dir = os.path.join(self.options.output_dir, self.options.model_type, self.options.model_name, "onnx") os.makedirs(onnx_output_dir, exist_ok=True) if not os.listdir(onnx_output_dir): onnx_model_name = os.path.join(onnx_output_dir, "onnx_model.onnx") with tempfile.TemporaryDirectory() as temp_dir: basedir = os.path.basename(temp_dir) temp_dir = os.path.join(self.options.output_dir, basedir) self.save_model(output_dir=temp_dir, model=self.model) convert( framework="pt", model=temp_dir, tokenizer=self.tokenizer, output=Path(onnx_model_name), pipeline_name="ner", opset=11, ) self.tokenizer.save_pretrained(onnx_output_dir) self.config.save_pretrained(onnx_output_dir) onnx_options = SessionOptions() use_cuda = True if self._device.type != 'cpu' else False onnx_execution_provider = "CUDAExecutionProvider" if use_cuda else "CPUExecutionProvider" onnx_options.intra_op_num_threads = 1 onnx_options.execution_mode = ExecutionMode.ORT_SEQUENTIAL onnx_model_path = os.path.join(onnx_output_dir, "onnx_model.onnx") if self.options.dynamic_quantize: # Append "-quantized" at the end of the model's name quantized_model_path = generate_identified_filename( Path(onnx_model_path), "-quantized") quantize_dynamic(Path(onnx_model_path), quantized_model_path) onnx_model_path = quantized_model_path.as_posix() return InferenceSession(onnx_model_path, onnx_options, providers=[onnx_execution_provider])