Exemple #1
0
    def run(self, **kwargs):
        sys.path.append(RUNTIME_DIR)
        from PyRuntime import ExecutionSession

        session = ExecutionSession(self.exec_name)
        output = session.input_signature()
        return output
    def run(self, inputs, **kwargs):
        sys.path.append(RUNTIME_DIR)
        from PyRuntime import ExecutionSession

        session = ExecutionSession(self.exec_name)
        f = io.BytesIO()
        with redirect_c_stdout(f):
            try:
                session.run(inputs)
            except RuntimeError as re:
                pass
        output = f.getvalue().decode('utf-8')
        return output
Exemple #3
0
def main(model_path):
    model = onnx.load(model_path)
    intermediate_outputs = sum(
        [list(node.output) for node in model.graph.node], [])
    intermediate_outputs = list(OrderedDict.fromkeys(intermediate_outputs))
    model = extend_model_output(model, intermediate_outputs)

    with tempfile.TemporaryDirectory() as temp_dir:
        print("Temporary directory has been created at {}".format(temp_dir))

        # Save modified model & invoke onnx-mlir to compile it.
        temp_model_path = os.path.join(temp_dir, "model.onnx")
        onnx.save(model, temp_model_path)
        execute_commands([ONNX_MLIR, temp_model_path])

        # Use the generated shared library to create an execution session.
        temp_shared_lib_path = os.path.join(temp_dir, "model.so")
        sess = ExecutionSession(temp_shared_lib_path, "run_main_graph")

        # Generate random data as input.
        inputs = []
        input_names = []
        initializers = list(map(lambda x: x.name, model.graph.initializer))
        np.random.seed(42)
        for input_proto in model.graph.input:
            if input_proto.name not in initializers:
                input_names.append(input_proto.name)
                shape_proto = input_proto.type.tensor_type.shape
                explicit_shape = []
                for dim in shape_proto.dim:
                    assert dim.dim_value, "Can only debug models with inputs that have explicit shapes."
                    explicit_shape.append(dim.dim_value)
                inputs.append(
                    np.random.uniform(-1.0, 1.0,
                                      explicit_shape).astype(np.float32))

        # Run the compiled inference function on the randomly generated data.
        outs = sess.run(inputs)

        # Run the model with reference backend and get results.
        ref_session = prepare(temp_model_path)
        output_names = list(map(lambda x: x.name, model.graph.output))
        input_feed = dict(zip(input_names, inputs))
        ref_outs = ref_session.run(output_names, input_feed)

        # For each intermediate output tensor, compare results.
        for i, name in enumerate(intermediate_outputs):
            print("Verifying value of {}".format(name))
            np.testing.assert_array_almost_equal(ref_outs[i],
                                                 outs[i],
                                                 decimal=5)
Exemple #4
0
 def _get_onnxmlir_inference_session(self):
     try:
         # this has to be able to find the arch and OS specific PyRuntime .so file
         from PyRuntime import ExecutionSession
     except ImportError:
         raise MissingDependencyException(
             "PyRuntime package library must be in python path")
     return ExecutionSession(self._model_so_path, "run_main_graph")
Exemple #5
0
def load(
    tag: t.Union[str, Tag],
    model_store: "ModelStore" = Provide[BentoMLContainer.model_store],
) -> "ExecutionSession":
    """
    Load a model from BentoML local modelstore with given name.

    onnx-mlir is a compiler technology that can take an onnx model and lower it
    (using llvm) to an inference library that is optimized and has little external
    dependencies.

    The PyRuntime interface is created during the build of onnx-mlir using pybind.
    See the onnx-mlir supporting documentation for detail.

    Args:
        tag (:code:`Union[str, Tag]`):
            Tag of a saved model in BentoML local modelstore.
        model_store (:mod:`~bentoml._internal.models.store.ModelStore`, default to :mod:`BentoMLContainer.model_store`):
            BentoML modelstore, provided by DI Container.

    Returns:
        :obj:`ExecutionSession`: an instance of ONNX-MLir compiled model from BentoML modelstore.

    Examples:

    .. code-block:: python

        import bentoml

        session = bentoml.onnxmlir.load(tag)
        session.run(data)

    """
    model = model_store.get(tag)
    if model.info.module not in (MODULE_NAME, __name__):
        raise BentoMLException(
            f"Model {tag} was saved with module {model.info.module}, failed loading with {MODULE_NAME}."
        )
    compiled_path = model.path_of(model.info.options["compiled_path"])
    return ExecutionSession(compiled_path, "run_main_graph")  # type: ignore
Exemple #6
0
import numpy as np
from PyRuntime import ExecutionSession

# Load the model mnist.so compiled with onnx-mlir.
model = './mnist.so'
session = ExecutionSession(model)
# Print the models input/output signature, for display.
# Signature functions for info only, commented out if they cause problems.
print("input signature in json", session.input_signature())
print("output signature in json", session.output_signature())
# Create an input arbitrarily filled of 1.0 values.
input = np.array([
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
    -0.4242129623889923, -0.4242129623889923, -0.4242129623889923,
def main():
    # Get shape information if given.
    # args.shape_info in the form of 'input_index:d1xd2, input_index:d1xd2'
    input_shapes = {}
    if args.shape_info:
        for input_shape in args.shape_info.strip().split(","):
            input_index_shape = input_shape.split(":")
            input_index = input_index_shape[0]
            assert not (input_index in input_shapes), "Duplicate input indices"
            dims = [int(d) for d in input_index_shape[1].split("x")]
            input_shapes[int(input_index)] = dims

    # Load the onnx model.
    model = onnx.load(args.model_path)

    # Get the output names that we want to verify.
    # If using onnxruntime for verification, we can verify every operation output.
    output_names = [o.name for o in model.graph.output]
    output_names = list(OrderedDict.fromkeys(output_names))
    if (args.verify and args.verify == "onnxruntime"):
        output_names = sum([[n for n in node.output if n != '']
                            for node in model.graph.node], [])
        output_names = list(OrderedDict.fromkeys(output_names))
        model = extend_model_output(model, output_names)

    # Compile, run, and verify.
    with tempfile.TemporaryDirectory() as temp_dir:
        print("Temporary directory has been created at {}".format(temp_dir))

        # Prepare input data.
        inputs = []
        input_names = []
        if args.data_folder:
            assert args.data_folder, "No data folder given"
            inputs, input_names = read_input_from_refs(model, args.data_folder)
        else:
            inputs, input_names = generate_random_input(model, input_shapes)
        # Print the input if required.
        if (args.print_input):
            for i, inp in enumerate(inputs):
                print("The {} input {}:[{}x{}] is: \n {} \n".format(
                    ordinal(i + 1), input_names[i],
                    'x'.join([str(i) for i in inp.shape]), inp.dtype, inp))

        shared_lib_path = ""
        # If a shared library is given, use it without compiling the ONNX model.
        # Otherwise, compile the ONNX model.
        if (args.load_so):
            shared_lib_path = args.load_so
        else:
            print("Compiling the model ...")
            # Save modified model & invoke onnx-mlir to compile it.
            temp_model_path = os.path.join(temp_dir, "model.onnx")
            shared_lib_path = os.path.join(temp_dir, "model.so")
            onnx.save(model, temp_model_path)

            # Prepare compiler arguments.
            command_str = ONNX_MLIR
            if args.compile_args:
                command_str += " " + args.compile_args
            if args.compile_using_input_shape:
                # Use shapes of the reference inputs to compile the model.
                assert args.data_folder, "No data folder given"
                assert "shapeInformation" not in command_str, "shape info was set"
                shape_info = "--shapeInformation="
                for i in range(len(inputs)):
                    shape_info += str(i) + ":" + 'x'.join(
                        [str(d) for d in inputs[i].shape]) + ","
                shape_info = shape_info[:-1]
                command_str += " " + shape_info
                warning("the shapes of the model's inputs will be " \
                    "changed to the shapes of the inputs in the data folder")
            command_str += " " + temp_model_path

            start = time.perf_counter()
            execute_commands(command_str)
            end = time.perf_counter()
            print("  took ", end - start, " seconds.\n")

            # Save the generated .so file of the model if required.
            if (args.save_so):
                print("Saving the shared library to", args.save_so, "\n")
                execute_commands('rsync -ar {} {}'.format(
                    shared_lib_path, args.save_so))

        print("Running inference ...")
        start = time.perf_counter()
        # Use the generated shared library to create an execution session.
        sess = ExecutionSession(shared_lib_path, "run_main_graph")
        outs = sess.run(inputs)
        end = time.perf_counter()
        print("  took ", end - start, " seconds.\n")

        # Print the output if required.
        if (args.print_output):
            for i, out in enumerate(outs):
                print("The {} output {}:[{}x{}] is: \n {} \n".format(
                    ordinal(i + 1), output_names[i],
                    'x'.join([str(i) for i in out.shape]), out.dtype, out))

        # Store the input and output if required.
        if args.save_data:
            data_folder = args.save_data
            if not os.path.exists(data_folder):
                os.mkdir(data_folder)
            for i in range(len(inputs)):
                tensor = numpy_helper.from_array(inputs[i])
                tensor_path = os.path.join(data_folder,
                                           'input_{}.pb'.format(i))
                with open(tensor_path, 'wb') as f:
                    f.write(tensor.SerializeToString())
            for i in range(len(outs)):
                tensor = numpy_helper.from_array(outs[i])
                tensor_path = os.path.join(data_folder,
                                           'output_{}.pb'.format(i))
                with open(tensor_path, 'wb') as f:
                    f.write(tensor.SerializeToString())

        # Run the model with reference backend and get results.
        if (args.verify):
            ref_outs = []
            if (args.verify.lower() == "onnxruntime"):
                # Reference backend by using onnxruntime.
                import onnxruntime
                output_names = list(map(lambda x: x.name, model.graph.output))
                input_feed = dict(zip(input_names, inputs))
                print("Running inference using onnxruntime ...")
                start = time.perf_counter()
                ref_session = onnxruntime.InferenceSession(temp_model_path)
                ref_outs = ref_session.run(output_names, input_feed)
                end = time.perf_counter()
                print("  took ", end - start, " seconds.\n")
            elif (args.verify.lower() == "ref"):
                ref_outs = read_output_from_refs(model, args.data_folder)
            else:
                print("Invalid verify option")
                exit()

            # For each output tensor, compare results.
            for i, name in enumerate(output_names):
                print(
                    "Verifying value of {}:{}".format(name,
                                                      list(outs[i].shape)),
                    "using atol={}, rtol={} ...".format(args.atol, args.rtol))
                total_elements = 0
                mismatched_elements = 0
                for index, actual_val in np.ndenumerate(outs[i]):
                    total_elements += 1
                    ref_val = ref_outs[i][index]
                    # Use equation atol + rtol * abs(desired), that is used in assert_allclose.
                    diff = float(args.atol) + float(args.rtol) * abs(ref_val)
                    if (abs(actual_val - ref_val) <= diff):
                        continue
                    mismatched_elements += 1
                    print("  at {}".format(index),
                          "mismatch {} (actual)".format(actual_val),
                          "vs {} (reference)".format(ref_val))
                if mismatched_elements == 0:
                    print("  correct.\n".format(args.atol, args.rtol))
                else:
                    print("  mismatched elements {}/{}.\n".format(
                        mismatched_elements, total_elements))
def predict_df(inference_sess: ExecutionSession, df: pd.DataFrame):
    input_data = df.to_numpy().astype(np.float64)
    return inference_sess.run(input_data)