Beispiel #1
0
    def _run_pytorch(self, config: BenchmarkConfig) -> Benchmark:
        """
        :return:
        """
        LOGGER.info("Running PyTorch Eager benchmark")
        benchmark = Benchmark()

        dummy_inputs = self._get_dummy_inputs(
            batch_size=config.batch_size,
            seq_len=(config.sequence_length - self.tokenizer.num_special_tokens_to_add(pair=False))
        )

        inputs = self.tokenizer(
            dummy_inputs,
            is_split_into_words=True,
            return_tensors=TensorType.PYTORCH,
        )

        inputs = inputs.to(config.device)
        self.model = self.model.to(config.device)

        # Warmup
        for _ in trange(config.warmup_runs, desc="Warming up"):
            self.model(**inputs)

        # Run benchmark
        benchmark_duration_ns = config.benchmark_duration * SEC_TO_NS_SCALE
        while sum(benchmark.latencies) < benchmark_duration_ns:
            with benchmark.track():
                self.model(**inputs)

        benchmark.finalize(benchmark_duration_ns)

        return benchmark
Beispiel #2
0
    def execute(self, config: 'BenchmarkConfig') -> Benchmark:
        benchmark = Benchmark()

        try:
            model_opt_path = Path(self.onnx_path)
            opt_onnx_path = model_opt_path.with_suffix(".opt" +
                                                       model_opt_path.suffix)

            model_opt = optimize_model(
                self.onnx_path,
                model_type="bert",
                opt_level=int(self.session_opts.graph_optimization_level))
            model_opt.save_model_to_file(opt_onnx_path.absolute().as_posix())
            self.optimized_onnx_graph = opt_onnx_path.absolute().as_posix()
        except Exception as e:
            LOGGER.error(f"Unable to optimize ONNX BERT model: {e}")

        session = InferenceSession(self.optimized_onnx_graph or self.onnx_path,
                                   self.session_opts)

        dummy_inputs = self._get_dummy_inputs(
            batch_size=config.batch_size,
            seq_len=(config.sequence_length -
                     self.tokenizer.num_special_tokens_to_add(pair=False)))

        inputs = self.tokenizer(
            dummy_inputs,
            is_split_into_words=True,
            return_tensors=TensorType.NUMPY,
        )
        inputs = {k: v.astype("i8") for k, v in inputs.items()}

        # Warmup
        for _ in trange(config.warmup_runs, desc="Warming up"):
            session.run(None, inputs)

        # Run benchmark
        benchmark_duration_ns = config.benchmark_duration * SEC_TO_NS_SCALE
        while sum(benchmark.latencies) < benchmark_duration_ns:
            with benchmark.track():
                session.run(None, inputs)

        benchmark.finalize(benchmark_duration_ns)
        return benchmark
Beispiel #3
0
    def _run_torchscript(self, config: BenchmarkConfig) -> Benchmark:
        """
        :return:
        """
        LOGGER.info("Running TorchScript benchmark")
        benchmark = Benchmark()

        dummy_inputs = self._get_dummy_inputs(
            batch_size=config.batch_size,
            seq_len=(config.sequence_length - self.tokenizer.num_special_tokens_to_add(pair=False))
        )

        inputs = self.tokenizer(
            dummy_inputs,
            is_split_into_words=True,
            return_tensors=TensorType.PYTORCH,
        )

        inputs.to(config.device)
        self.model = self.model.to(config.device)

        # To be sure inputs will be presented with the right prototype
        ordered_inputs = OrderedDict({
            "input_ids": inputs.input_ids,
            "attention_mask": inputs.attention_mask,
            "token_type_ids": inputs.token_type_ids,
        })

        LOGGER.debug("Calling torch JIT on model (optimize=True)")
        model_scripted = torch.jit.trace(self.model, tuple(ordered_inputs.values()))

        with torch.jit.optimized_execution(True):
            for _ in trange(config.warmup_runs, desc="Warming up"):
                model_scripted(*ordered_inputs.values())

            benchmark_duration_ns = config.benchmark_duration * SEC_TO_NS_SCALE
            while sum(benchmark.latencies) < benchmark_duration_ns:
                with benchmark.track():
                    model_scripted(*ordered_inputs.values())

            benchmark.finalize(benchmark_duration_ns)
        return benchmark