예제 #1
0
def _generate_pytorch_config(name,
                             output_path,
                             model_info,
                             max_batch_size=None):
    """given a workflow generates the trton modelconfig proto object describing the inputs
    and outputs to that workflow"""
    config = model_config.ModelConfig(name=name,
                                      platform="onnxruntime_onnx",
                                      max_batch_size=max_batch_size)

    for col, val in model_info["input"].items():
        config.input.append(
            model_config.ModelInput(name=col,
                                    data_type=_convert_dtype(val["dtype"]),
                                    dims=[-1, len(val["columns"])]))

    for col, val in model_info["output"].items():
        if len(val["columns"]) == 1:
            dims = [-1]
        else:
            dims = [-1, len(val["columns"])]
        config.output.append(
            model_config.ModelOutput(name=col,
                                     data_type=_convert_dtype(val["dtype"]),
                                     dims=dims))

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #2
0
def _generate_ensemble_config(name,
                              output_path,
                              nvt_config,
                              nn_config,
                              name_ext=""):
    config = model_config.ModelConfig(name=name + name_ext,
                                      platform="ensemble",
                                      max_batch_size=nvt_config.max_batch_size)
    config.input.extend(nvt_config.input)
    config.output.extend(nn_config.output)

    nvt_step = model_config.ModelEnsembling.Step(model_name=nvt_config.name,
                                                 model_version=-1)
    for input_col in nvt_config.input:
        nvt_step.input_map[input_col.name] = input_col.name
    for output_col in nvt_config.output:
        nvt_step.output_map[output_col.name] = output_col.name + "_nvt"

    tf_step = model_config.ModelEnsembling.Step(model_name=nn_config.name,
                                                model_version=-1)
    for input_col in nn_config.input:
        tf_step.input_map[input_col.name] = input_col.name + "_nvt"
    for output_col in nn_config.output:
        tf_step.output_map[output_col.name] = output_col.name

    config.ensemble_scheduling.step.append(nvt_step)
    config.ensemble_scheduling.step.append(tf_step)

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #3
0
def export_tensorflow_model(model, name, output_path, version=1):
    """Exports a TensorFlow model for serving with Triton

    Parameters
    ----------
    model:
        The tensorflow model that should be served
    name:
        The name of the triton model to export
    output_path:
        The path to write the exported model to
    """
    tf_model_path = os.path.join(output_path, str(version), "model.savedmodel")
    model.save(tf_model_path)
    config = model_config.ModelConfig(name=name,
                                      backend="tensorflow",
                                      platform="tensorflow_savedmodel")

    for col in model.inputs:
        config.input.append(
            model_config.ModelInput(name=col.name,
                                    data_type=_convert_dtype(col.dtype),
                                    dims=[-1, 1]))

    for col in model.outputs:
        config.output.append(
            model_config.ModelOutput(name=col.name.split("/")[0],
                                     data_type=_convert_dtype(col.dtype),
                                     dims=[-1, 1]))

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #4
0
def export_tensorflow_model(model, name, output_path, version=1):
    """Exports a TensorFlow model for serving with Triton

    Parameters
    ----------
    model:
        The tensorflow model that should be served
    name:
        The name of the triton model to export
    output_path:
        The path to write the exported model to
    """
    tf_model_path = os.path.join(output_path, str(version), "model.savedmodel")
    model.save(tf_model_path, include_optimizer=False)
    config = model_config.ModelConfig(name=name,
                                      backend="tensorflow",
                                      platform="tensorflow_savedmodel")

    inputs, outputs = model.inputs, model.outputs

    if not inputs or not outputs:
        signatures = getattr(model, "signatures", {}) or {}
        default_signature = signatures.get("serving_default")
        if not default_signature:
            # roundtrip saved model to disk to generate signature if it doesn't exist
            import tensorflow as tf

            reloaded = tf.keras.models.load_model(tf_model_path)
            default_signature = reloaded.signatures["serving_default"]

        inputs = list(default_signature.structured_input_signature[1].values())
        outputs = list(default_signature.structured_outputs.values())

    for col in inputs:
        config.input.append(
            model_config.ModelInput(name=col.name,
                                    data_type=_convert_dtype(col.dtype),
                                    dims=[-1, col.shape[1]]))

    for col in outputs:
        # this assumes the list columns are 1D tensors both for cats and conts
        config.output.append(
            model_config.ModelOutput(
                name=col.name.split("/")[0],
                data_type=_convert_dtype(col.dtype),
                dims=[-1, col.shape[1]],
            ))

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #5
0
def _generate_nvtabular_config(workflow,
                               name,
                               output_path,
                               output_model=None,
                               max_batch_size=None,
                               cats=None,
                               conts=None):
    """given a workflow generates the trton modelconfig proto object describing the inputs
    and outputs to that workflow"""

    config = model_config.ModelConfig(name=name,
                                      backend="python",
                                      max_batch_size=max_batch_size)

    if output_model == "hugectr":
        for column in workflow.column_group.input_column_names:
            dtype = workflow.input_dtypes[column]
            config.input.append(
                model_config.ModelInput(name=column,
                                        data_type=_convert_dtype(dtype),
                                        dims=[-1]))

        config.output.append(
            model_config.ModelOutput(name="DES",
                                     data_type=model_config.TYPE_FP32,
                                     dims=[-1]))

        config.output.append(
            model_config.ModelOutput(name="CATCOLUMN",
                                     data_type=model_config.TYPE_INT64,
                                     dims=[-1]))

        config.output.append(
            model_config.ModelOutput(name="ROWINDEX",
                                     data_type=model_config.TYPE_INT32,
                                     dims=[-1]))
    else:
        for column, dtype in workflow.input_dtypes.items():
            _add_model_param(column, dtype, model_config.ModelInput,
                             config.input)

        for column, dtype in workflow.output_dtypes.items():
            _add_model_param(column, dtype, model_config.ModelOutput,
                             config.output)

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #6
0
def _generate_model_config(workflow, name, output_path):
    """given a workflow generates the trton modelconfig proto object describing the inputs
    and outputs to that workflow"""
    config = model_config.ModelConfig(name=name, backend="python")

    for column in workflow.column_group.input_column_names:
        dtype = workflow.input_dtypes[column]
        config.input.append(
            model_config.ModelInput(name=column,
                                    data_type=_convert_dtype(dtype),
                                    dims=[-1]))

    for column, dtype in workflow.output_dtypes.items():
        config.output.append(
            model_config.ModelOutput(name=column,
                                     data_type=_convert_dtype(dtype),
                                     dims=[-1]))

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
예제 #7
0
def _generate_tensorflow_config(model, name, output_path):
    """given a workflow generates the trton modelconfig proto object describing the inputs
    and outputs to that workflow"""
    config = model_config.ModelConfig(name=name,
                                      backend="tensorflow",
                                      platform="tensorflow_savedmodel")

    for col in model.inputs:
        config.input.append(
            model_config.ModelInput(name=col.name,
                                    data_type=_convert_dtype(col.dtype),
                                    dims=[-1, 1]))

    for col in model.outputs:
        config.output.append(
            model_config.ModelOutput(name=col.name.split("/")[0],
                                     data_type=_convert_dtype(col.dtype),
                                     dims=[-1, 1]))

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #8
0
def _generate_ensemble_config(name,
                              output_path,
                              nvt_config,
                              nn_config,
                              name_ext=""):
    config = model_config.ModelConfig(name=name + name_ext,
                                      platform="ensemble",
                                      max_batch_size=nvt_config.max_batch_size)
    config.input.extend(nvt_config.input)
    config.output.extend(nn_config.output)

    nn_input_cols = set(col.name for col in nn_config.input)

    nvt_step = model_config.ModelEnsembling.Step(model_name=nvt_config.name,
                                                 model_version=-1)
    for input_col in nvt_config.input:
        nvt_step.input_map[input_col.name] = input_col.name
    for output_col in nvt_config.output:
        if output_col.name not in nn_input_cols:
            warnings.warn(
                f"Column {output_col.name} is being generated by NVTabular workflow "
                f" but is unused in {nn_config.name} model")
            continue
        nvt_step.output_map[output_col.name] = output_col.name + "_nvt"

    tf_step = model_config.ModelEnsembling.Step(model_name=nn_config.name,
                                                model_version=-1)
    for input_col in nn_config.input:
        tf_step.input_map[input_col.name] = input_col.name + "_nvt"
    for output_col in nn_config.output:
        tf_step.output_map[output_col.name] = output_col.name

    config.ensemble_scheduling.step.append(nvt_step)
    config.ensemble_scheduling.step.append(tf_step)

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #9
0
def _generate_hugectr_config(name,
                             output_path,
                             hugectr_params,
                             max_batch_size=None):
    config = model_config.ModelConfig(name=name,
                                      backend="hugectr",
                                      max_batch_size=max_batch_size)

    config.input.append(
        model_config.ModelInput(name="DES",
                                data_type=model_config.TYPE_FP32,
                                dims=[-1]))

    config.input.append(
        model_config.ModelInput(name="CATCOLUMN",
                                data_type=model_config.TYPE_INT64,
                                dims=[-1]))

    config.input.append(
        model_config.ModelInput(name="ROWINDEX",
                                data_type=model_config.TYPE_INT32,
                                dims=[-1]))

    for i in range(hugectr_params["n_outputs"]):
        config.output.append(
            model_config.ModelOutput(name="OUTPUT" + str(i),
                                     data_type=model_config.TYPE_FP32,
                                     dims=[-1]))

    config.instance_group.append(
        model_config.ModelInstanceGroup(gpus=[0], count=1, kind=1))

    config_hugectr = model_config.ModelParameter(
        string_value=hugectr_params["config"])
    config.parameters["config"].CopyFrom(config_hugectr)

    gpucache_val = hugectr_params.get("gpucache", "true")

    gpucache = model_config.ModelParameter(string_value=gpucache_val)
    config.parameters["gpucache"].CopyFrom(gpucache)

    gpucacheper_val = str(hugectr_params.get("gpucacheper_val", "0.5"))

    gpucacheper = model_config.ModelParameter(string_value=gpucacheper_val)
    config.parameters["gpucacheper"].CopyFrom(gpucacheper)

    label_dim = model_config.ModelParameter(
        string_value=str(hugectr_params["label_dim"]))
    config.parameters["label_dim"].CopyFrom(label_dim)

    slots = model_config.ModelParameter(
        string_value=str(hugectr_params["slots"]))
    config.parameters["slots"].CopyFrom(slots)

    des_feature_num = model_config.ModelParameter(
        string_value=str(hugectr_params["des_feature_num"]))
    config.parameters["des_feature_num"].CopyFrom(des_feature_num)

    cat_feature_num = model_config.ModelParameter(
        string_value=str(hugectr_params["cat_feature_num"]))
    config.parameters["cat_feature_num"].CopyFrom(cat_feature_num)

    max_nnz = model_config.ModelParameter(
        string_value=str(hugectr_params["max_nnz"]))
    config.parameters["max_nnz"].CopyFrom(max_nnz)

    embedding_vector_size = model_config.ModelParameter(
        string_value=str(hugectr_params["embedding_vector_size"]))
    config.parameters["embedding_vector_size"].CopyFrom(embedding_vector_size)

    embeddingkey_long_type_val = hugectr_params.get("embeddingkey_long_type",
                                                    "true")

    embeddingkey_long_type = model_config.ModelParameter(
        string_value=embeddingkey_long_type_val)
    config.parameters["embeddingkey_long_type"].CopyFrom(
        embeddingkey_long_type)

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #10
0
def _generate_nvtabular_config(
    workflow,
    name,
    output_path,
    output_model=None,
    max_batch_size=None,
    cats=None,
    conts=None,
    output_info=None,
    backend="python",
):
    """given a workflow generates the trton modelconfig proto object describing the inputs
    and outputs to that workflow"""
    config = model_config.ModelConfig(name=name,
                                      backend=backend,
                                      max_batch_size=max_batch_size)

    config.parameters[
        "python_module"].string_value = "nvtabular.inference.triton.model"
    config.parameters[
        "output_model"].string_value = output_model if output_model else ""

    if output_model == "hugectr":
        config.instance_group.append(model_config.ModelInstanceGroup(kind=2))

        for column in workflow.column_group.input_column_names:
            dtype = workflow.input_dtypes[column]
            config.input.append(
                model_config.ModelInput(name=column,
                                        data_type=_convert_dtype(dtype),
                                        dims=[-1]))

        config.output.append(
            model_config.ModelOutput(name="DES",
                                     data_type=model_config.TYPE_FP32,
                                     dims=[-1]))

        config.output.append(
            model_config.ModelOutput(name="CATCOLUMN",
                                     data_type=model_config.TYPE_INT64,
                                     dims=[-1]))

        config.output.append(
            model_config.ModelOutput(name="ROWINDEX",
                                     data_type=model_config.TYPE_INT32,
                                     dims=[-1]))
    elif output_model == "pytorch":
        for column, dtype in workflow.input_dtypes.items():
            _add_model_param(column, dtype, model_config.ModelInput,
                             config.input)

        for col, val in output_info.items():
            _add_model_param(
                col,
                val["dtype"],
                model_config.ModelOutput,
                config.output,
                [-1, len(val["columns"])],
            )
    else:
        for column, dtype in workflow.input_dtypes.items():
            _add_model_param(column, dtype, model_config.ModelInput,
                             config.input)

        for column, dtype in workflow.output_dtypes.items():
            _add_model_param(column, dtype, model_config.ModelOutput,
                             config.output)

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config
예제 #11
0
def export_pytorch_model(model,
                         workflow,
                         sparse_max,
                         name,
                         output_path,
                         use_fix_dtypes=True,
                         version=1,
                         backend="python"):
    """Exports a PyTorch model for serving with Triton

    Parameters
    ----------
    model:
        The PyTorch model that should be served
    workflow:
        The nvtabular workflow used in preprocessing
    sparse_max:
        Max length of the each row when the sparse data is converted to dense
    name:
        The name of the triton model to export
    output_path:
        The path to write the exported model to
    use_fix_dtypes:
        Transformers4Rec is using fixed dtypes and this option is
        whether to use fixed dtypes in inference or not
    version:
        Version of the model
    backend: "python" or "nvtabular"
        The backend that will be used for inference in Triton.
    """
    import cloudpickle
    import torch

    os.makedirs(os.path.join(output_path, str(version)), exist_ok=True)

    pt_model_path = os.path.join(output_path, str(version), "model.pth")
    torch.save(model.state_dict(), pt_model_path)

    pt_model_path = os.path.join(output_path, str(version), "model.pkl")
    with open(pt_model_path, "wb") as o:
        cloudpickle.dump(model, o)

    copyfile(
        os.path.join(os.path.dirname(__file__), "model", "model_pt.py"),
        os.path.join(output_path, str(version), "model.py"),
    )

    config = model_config.ModelConfig(name=name, backend=backend)

    for column, dtype in workflow.output_dtypes.items():
        _add_model_param(column, dtype, model_config.ModelInput, config.input)

    *_, last_layer = model.parameters()
    dims = last_layer.shape[0]
    dtype = last_layer.dtype
    config.output.append(
        model_config.ModelOutput(name="output",
                                 data_type=_convert_pytorch_dtype(dtype),
                                 dims=[-1, dims]))

    if sparse_max:
        with open(os.path.join(output_path, str(version), "model_info.json"),
                  "w") as o:
            model_info = dict()
            model_info["sparse_max"] = sparse_max
            model_info["use_fix_dtypes"] = use_fix_dtypes
            json.dump(model_info, o)

    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
        text_format.PrintMessage(config, o)
    return config