Beispiel #1
0
    def run_onnxruntime(self,
                        model_path,
                        inputs,
                        output_names,
                        use_custom_ops=False):
        """Run test against onnxruntime backend."""
        import onnxruntime as rt
        providers = ['CPUExecutionProvider']
        if rt.get_device() == "GPU":
            gpus = os.environ.get("CUDA_VISIBLE_DEVICES")
            if gpus is None or len(gpus) > 1:
                providers = ['CUDAExecutionProvider']
        opt = rt.SessionOptions()
        if use_custom_ops:
            from onnxruntime_extensions import get_library_path
            opt.register_custom_ops_library(get_library_path())

        # in case of issues with the runtime, one can enable more logging
        # opt.log_severity_level = 0
        # opt.log_verbosity_level = 255
        # opt.enable_profiling = True

        m = rt.InferenceSession(model_path, opt, providers=providers)
        results = m.run(output_names, inputs)
        return results
Beispiel #2
0
 def __setstate__(self, state):
     if get_library_path is None:
         raise ImportError("onnxruntime_extensions is not installed.")
     state['onnx_'] = load(BytesIO(state['onnx_']))
     BaseEstimator.__setstate__(self, state)
     so = SessionOptions()
     so.register_custom_ops_library(get_library_path())
     self.sess_ = InferenceSession(self.onnx_.SerializeToString(), so)
     return self
Beispiel #3
0
    def fit(self, X, y=None, sample_weight=None):
        """
        The model is not trains this method is still needed to
        set the instance up and ready to transform.

        :param X: array of strings
        :param y: unused
        :param sample_weight: unused
        :return: self
        """
        self.onnx_ = self._create_model(self.model_b64, opset=self.opset)
        so = SessionOptions()
        so.register_custom_ops_library(get_library_path())
        self.sess_ = InferenceSession(self.onnx_.SerializeToString(), so)
        return self
    def run_onnxruntime(self,
                        name,
                        model_proto,
                        inputs,
                        outputs,
                        external_tensor_storage=None):
        """Run test against onnxruntime backend."""
        import onnxruntime as rt
        model_path = utils.save_onnx_model(
            TEMP_DIR,
            name,
            inputs,
            model_proto,
            include_test_data=True,
            as_text=utils.is_debug_mode(),
            external_tensor_storage=external_tensor_storage)
        logger.info("Model saved to %s", model_path)
        providers = ['CPUExecutionProvider']
        if rt.get_device() == "GPU":
            gpus = os.environ.get("CUDA_VISIBLE_DEVICES")
            if gpus is None or len(gpus) > 1:
                providers = ['CUDAExecutionProvider']

        opt = rt.SessionOptions()
        if self.use_custom_ops:
            from onnxruntime_extensions import get_library_path
            opt.register_custom_ops_library(get_library_path())
        if self.ort_profile is not None:
            opt.enable_profiling = True
        m = rt.InferenceSession(model_path,
                                sess_options=opt,
                                providers=providers)
        results = m.run(outputs, inputs)
        if self.perf:
            n = 0
            start = time.time()
            stop = start + PERF_TIME
            while time.time() < stop:
                for _ in range(PERF_STEP):
                    _ = m.run(outputs, inputs)
                n += PERF_STEP
            self.onnx_runtime = 1000 * (time.time() - start) / n
            logger.info("ORT perf {:.2f}ms/inference, n={}".format(
                self.onnx_runtime, n))
        if self.ort_profile is not None:
            tmp_path = m.end_profiling()
            shutil.move(tmp_path, self.ort_profile)
        return results
    return input_ids


input_list = np.array([
    [
        "23314 454 7560 85 5 3958 32 188131 454 11627 1369",
        "153 115 13761 3245 30128 21393 6 3958 6 33957 2011 126707 13820 18 75813 121046 6957 1284 18 46667 225006 153 24 33416 6 78175 111202 20179 95 39884 13639 425 16684 23314 194602 78403 2011 124999 153 196423 31 9607 363 36398 96335 68828 9351 45 10763 6635 7026 8834 73395 1230 82678 74",
        "106 25037 92 6 2566 3114 64 9271 41793 92",
        "48498 100 71 77463 26249 36049 141496 159201 41 1294 22970 144",
        "fr-fr", ""
    ],
    [
        "11493 5 337 67847",
        "305 13312 6650 20 351 1507 1202 337 67847 337 67847 11493 123 3177",
        "78600 30535 113543 81384 64 10248 64 864 910 2507 169 3742 6 7693",
        "337 67847 11493 123 3177 20 337 67847 35399", "en-id", ""
    ],
    [
        "6 8709 71684 1128 56963 9594",
        "378 122369 268 6 8709 71684 1128 4035 9056 11541 64632 37106 46879 2490 9839 5873 5 1210 37151 153 28292 194546 56963 18617 143964 9594 15 6 192141 10134 2846 1388 6 167039 8709 71684 1128 106000 194546 240762 6995 1173 35645 684 109052 5873 15 6 20212 10134 2846 1388 6 71729 38",
        "82414 496 9365 65451",
        "6 8709 71684 1128 14455 9065 9 12865 68818 1764", "zh-tw", ""
    ]
])

so = ort.SessionOptions()
so.register_custom_ops_library(get_library_path())
sess = ort.InferenceSession("RankLMToken.onnx", so)
res = sess.run(None, {"input": input_list})
print(res)