コード例 #1
0
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2"
    model_name = "pytorch-densenet"
    mms_inference_cmd = test_utils.get_mms_run_command(model_name, processor)
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"

    docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                      f" -p 80:8080 -p 8081:8081"
                      f" {image_uri} {mms_inference_cmd}")
    try:
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})",
            hide=True)
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        inference_result = test_utils.request_pytorch_inference_densenet(
            connection=ec2_connection)
        assert (
            inference_result
        ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2"

    finally:
        ec2_connection.run(f"docker rm -f {container_name}",
                           warn=True,
                           hide=True)
def host_setup_for_tensorflow_inference(serving_folder_path, framework_version, ec2_connection, is_neuron, model_name):
    # Tensorflow 1.x doesn't have package with version 1.15.2 so use only 1.15
    ec2_connection.run(
        (
            f"pip3 install --user -qq -U 'tensorflow<={framework_version}' "
            f" 'tensorflow-serving-api<={framework_version}' "
        ), hide=True
    )
    if os.path.exists(f"{serving_folder_path}"):
        ec2_connection.run(f"rm -rf {serving_folder_path}")
    if str(framework_version).startswith(TENSORFLOW1_VERSION):
        run_out = ec2_connection.run(
            f"git clone https://github.com/tensorflow/serving.git {serving_folder_path}"
        )
        git_branch_version = re.findall(r"[1-2]\.[0-9]\d", framework_version)[0]
        ec2_connection.run(
            f"cd {serving_folder_path} && git checkout r{git_branch_version}"
        )
        LOGGER.info(f"Clone TF serving repository status {run_out.return_code == 0}")
        if is_neuron:
            container_test_local_file = os.path.join("$HOME", "container_tests/bin/neuron_tests/mnist_client.py")
            ec2_connection.run(f"cp -f {container_test_local_file} {serving_folder_path}/tensorflow_serving/example")
            neuron_model_file_path = os.path.join(serving_folder_path, f"models/{model_name}/1")
            neuron_model_file = os.path.join(neuron_model_file_path, "saved_model.pb")
            LOGGER.info(f"Host Model path {neuron_model_file_path}")
            ec2_connection.run(f"mkdir -p {neuron_model_file_path}")
            model_file_path = f"https://aws-dlc-sample-models.s3.amazonaws.com/{model_name}_neuron/1/saved_model.pb"
            model_download = (
                f"wget -O {neuron_model_file} {model_file_path} "
            )
            ec2_connection.run(model_download)
    else:
        local_scripts_path = os.path.join("container_tests", "bin", "tensorflow_serving")
        ec2_connection.run(f"mkdir -p {serving_folder_path}")
        ec2_connection.run(f"cp -r {local_scripts_path} {serving_folder_path}")
コード例 #3
0
def run_ec2_mxnet_inference(image_uri, model_name, container_tag, ec2_connection, processor, region, target_port, target_management_port):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2-{container_tag}"
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"
    mms_inference_cmd = test_utils.get_inference_run_command(image_uri, model_name, processor)
    docker_run_cmd = (
        f"{docker_cmd} run -itd --name {container_name}"
        f" -p {target_port}:8080 -p {target_management_port}:8081"
        f" {image_uri} {mms_inference_cmd}"
    )
    try:
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})", hide=True
        )
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        if model_name == SQUEEZENET_MODEL:
            inference_result = test_utils.request_mxnet_inference(
                port=target_port, connection=ec2_connection, model="squeezenet"
            )
        elif model_name == BERT_MODEL:
            inference_result = test_utils.request_mxnet_inference_gluonnlp(
                port=target_port, connection=ec2_connection
            )
        elif model_name == RESNET_EIA_MODEL:
            inference_result = test_utils.request_mxnet_inference(
                port=target_port, connection=ec2_connection, model=model_name
            )
        assert (
            inference_result
        ), f"Failed to perform mxnet {model_name} inference test for image: {image_uri} on ec2"

    finally:
        ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
コード例 #4
0
def run_ec2_tensorflow_inference(image_uri, ec2_connection, grpc_port, region):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2"
    framework_version = get_tensorflow_framework_version(image_uri)
    home_dir = ec2_connection.run("echo $HOME").stdout.strip('\n')
    serving_folder_path = os.path.join(home_dir, "serving")
    model_path = os.path.join(serving_folder_path, "models", "mnist")
    mnist_client_path = os.path.join(serving_folder_path, "tensorflow_serving",
                                     "example", "mnist_client.py")
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"
    docker_run_cmd = (
        f"{docker_cmd} run -id --name {container_name} -p {grpc_port}:8500 "
        f"--mount type=bind,source={model_path},target=/models/mnist -e MODEL_NAME=mnist"
        f" {image_uri}")
    try:
        host_setup_for_tensorflow_inference(serving_folder_path,
                                            framework_version, ec2_connection)
        sleep(2)
        train_mnist_model(serving_folder_path, ec2_connection)
        sleep(10)
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})",
            hide=True)
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        sleep(20)
        test_utils.request_tensorflow_inference_grpc(
            script_file_path=mnist_client_path,
            port=grpc_port,
            connection=ec2_connection)
    finally:
        ec2_connection.run(f"docker rm -f {container_name}",
                           warn=True,
                           hide=True)
def run_ec2_tensorflow_inference(image_uri,
                                 ec2_connection,
                                 ec2_instance_ami,
                                 grpc_port,
                                 region,
                                 telemetry_mode=False):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2"
    framework_version = get_tensorflow_framework_version(image_uri)
    home_dir = ec2_connection.run("echo $HOME").stdout.strip('\n')
    serving_folder_path = os.path.join(home_dir, "serving")
    model_path = os.path.join(serving_folder_path, "models", "mnist")
    python_invoker = test_utils.get_python_invoker(ec2_instance_ami)
    mnist_client_path = os.path.join(serving_folder_path, "tensorflow_serving",
                                     "example", "mnist_client.py")

    is_neuron = "neuron" in image_uri

    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"
    if is_neuron:
        docker_run_cmd = (
            f"{docker_cmd} run -id --name {container_name} -p {grpc_port}:8500 "
            f"--device=/dev/neuron0 --net=host  --cap-add IPC_LOCK "
            f"-e NEURON_MONITOR_CW_REGION=us-east-1 -e NEURON_MONITOR_CW_NAMESPACE=tf1 "
            f"--mount type=bind,source={model_path},target=/models/mnist -e TEST_MODE=1 -e MODEL_NAME=mnist"
            f" {image_uri}")
    else:
        docker_run_cmd = (
            f"{docker_cmd} run -id --name {container_name} -p {grpc_port}:8500 "
            f"--mount type=bind,source={model_path},target=/models/mnist -e TEST_MODE=1 -e MODEL_NAME=mnist"
            f" {image_uri}")
    try:
        host_setup_for_tensorflow_inference(serving_folder_path,
                                            framework_version, ec2_connection,
                                            is_neuron, 'mnist', python_invoker)
        sleep(2)
        if not is_neuron:
            train_mnist_model(serving_folder_path, ec2_connection,
                              python_invoker)
            sleep(10)
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})",
            hide=True)
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        sleep(20)
        test_utils.request_tensorflow_inference_grpc(
            script_file_path=mnist_client_path,
            port=grpc_port,
            connection=ec2_connection,
            ec2_instance_ami=ec2_instance_ami)
        if telemetry_mode:
            check_telemetry(ec2_connection, container_name)
    finally:
        ec2_connection.run(f"docker rm -f {container_name}",
                           warn=True,
                           hide=True)
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2"
    model_name = "pytorch-densenet"
    if processor == "eia":
        image_framework, image_framework_version = get_framework_and_version_from_tag(
            image_uri)
        if image_framework_version == "1.3.1":
            model_name = "pytorch-densenet-v1-3-1"
    if processor == "neuron":
        model_name = "pytorch-resnet-neuron"

    inference_cmd = test_utils.get_inference_run_command(
        image_uri, model_name, processor)
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"

    if processor == "neuron":
        ec2_connection.run("sudo systemctl stop neuron-rtd"
                           )  # Stop neuron-rtd in host env for DLC to start it
        docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                          f" -p 80:8080 -p 8081:8081"
                          f" --device=/dev/neuron0 --cap-add IPC_LOCK"
                          f" --env NEURON_MONITOR_CW_REGION={region}"
                          f" {image_uri} {inference_cmd}")
    else:
        docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                          f" -p 80:8080 -p 8081:8081"
                          f" {image_uri} {inference_cmd}")
    try:
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})",
            hide=True)
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        server_type = get_inference_server_type(image_uri)
        inference_result = test_utils.request_pytorch_inference_densenet(
            connection=ec2_connection,
            model_name=model_name,
            server_type=server_type)
        assert (
            inference_result
        ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2"

    finally:
        ec2_connection.run(f"docker rm -f {container_name}",
                           warn=True,
                           hide=True)
コード例 #7
0
def host_setup_for_tensorflow_inference(serving_folder_path, framework_version,
                                        ec2_connection):
    # Tensorflow 1.x doesn't have package with version 1.15.2 so use only 1.15
    ec2_connection.run(
        (f"pip install --user -qq -U 'tensorflow<={framework_version}' "
         f" 'tensorflow-serving-api<={framework_version}'"),
        hide=True)
    if os.path.exists(f"{serving_folder_path}"):
        ec2_connection.run(f"rm -rf {serving_folder_path}")
    if str(framework_version).startswith(TENSORFLOW1_VERSION):
        run_out = ec2_connection.run(
            f"git clone https://github.com/tensorflow/serving.git {serving_folder_path}"
        )
        git_branch_version = re.findall(r"[1-2]\.[0-9]\d",
                                        framework_version)[0]
        ec2_connection.run(
            f"cd {serving_folder_path} && git checkout r{git_branch_version}")
        LOGGER.info(
            f"Clone TF serving repository status {run_out.return_code == 0}")
    else:
        local_scripts_path = os.path.join("container_tests", "bin",
                                          "tensorflow_serving")
        ec2_connection.run(f"mkdir -p {serving_folder_path}")
        ec2_connection.run(f"cp -r {local_scripts_path} {serving_folder_path}")
コード例 #8
0
def host_setup_for_tensorflow_inference(serving_folder_path, framework_version,
                                        ec2_connection, is_neuron, is_graviton,
                                        model_name, python_invoker):
    # Tensorflow 1.x doesn't have package with version 1.15.2 so use only 1.15
    if is_graviton:
        # TF training binary is used that is compatible for graviton instance type
        TF_URL = "https://aws-dlc-graviton-training-binaries.s3.us-west-2.amazonaws.com/tensorflow/2.6.0/tensorflow-2.6.0-cp38-cp38-linux_aarch64.whl"
        ec2_connection.run(
            (f"{python_invoker} -m pip install --no-cache-dir -U {TF_URL}"),
            hide=True)
        ec2_connection.run((
            f"{python_invoker} -m pip install --no-dependencies --no-cache-dir tensorflow-serving-api=={framework_version}"
        ),
                           hide=True)
    else:
        ec2_connection.run((
            f"{python_invoker} -m pip install --user -qq -U 'tensorflow<={framework_version}' "
            f" 'tensorflow-serving-api<={framework_version}' "),
                           hide=True)
    if os.path.exists(f"{serving_folder_path}"):
        ec2_connection.run(f"rm -rf {serving_folder_path}")
    if str(framework_version).startswith(TENSORFLOW1_VERSION):
        run_out = ec2_connection.run(
            f"git clone https://github.com/tensorflow/serving.git {serving_folder_path}"
        )
        git_branch_version = re.findall(r"[1-2]\.[0-9]\d",
                                        framework_version)[0]
        ec2_connection.run(
            f"cd {serving_folder_path} && git checkout r{git_branch_version}")
        LOGGER.info(
            f"Clone TF serving repository status {run_out.return_code == 0}")
        if is_neuron:
            container_test_local_file = os.path.join(
                "$HOME", "container_tests/bin/neuron_tests/mnist_client.py")
            ec2_connection.run(
                f"cp -f {container_test_local_file} {serving_folder_path}/tensorflow_serving/example"
            )
            neuron_model_file_path = os.path.join(serving_folder_path,
                                                  f"models/{model_name}/1")
            neuron_model_file = os.path.join(neuron_model_file_path,
                                             "saved_model.pb")
            LOGGER.info(f"Host Model path {neuron_model_file_path}")
            ec2_connection.run(f"mkdir -p {neuron_model_file_path}")
            model_file_path = f"https://aws-dlc-sample-models.s3.amazonaws.com/{model_name}_neuron/1/saved_model.pb"
            model_download = (
                f"wget -O {neuron_model_file} {model_file_path} ")
            ec2_connection.run(model_download)
    else:
        local_scripts_path = os.path.join("container_tests", "bin",
                                          "tensorflow_serving")
        ec2_connection.run(f"mkdir -p {serving_folder_path}")
        ec2_connection.run(f"cp -r {local_scripts_path} {serving_folder_path}")
        if is_neuron:
            neuron_local_model = os.path.join("$HOME", "container_tests",
                                              "bin", "neuron_tests", "simple")
            neuron_model_dir = os.path.join(serving_folder_path, "models")
            neuron_model_file_path = os.path.join(serving_folder_path,
                                                  "models", "model_name", "1")
            LOGGER.info(f"Host Model path {neuron_model_file_path}")
            LOGGER.info(f"Host Model Dir {neuron_model_dir}")
            ec2_connection.run(f"mkdir -p {neuron_model_file_path}")
            ec2_connection.run(
                f"cp -r {neuron_local_model} {neuron_model_dir}")