コード例 #1
0
 def terminate_ec2_instance():
     ec2_client.terminate_instances(InstanceIds=[instance_id])
     if test_utils.is_pr_context():
         test_utils.destroy_ssh_keypair(ec2_client, key_filename)
     else:
         with open(KEYS_TO_DESTROY_FILE, "a") as destroy_keys:
             destroy_keys.write(f"{key_filename}\n")
コード例 #2
0
def ec2_performance_tensorflow_inference(image_uri, processor, ec2_connection,
                                         ec2_instance_ami, region, threshold):
    docker_cmd = "nvidia-docker" if processor == "gpu" else "docker"
    container_test_local_dir = os.path.join("$HOME", "container_tests")
    tf_version = "1" if is_tf_version("1", image_uri) else "2"
    _, tf_api_version = get_framework_and_version_from_tag(image_uri)

    num_iterations = 500 if is_pr_context() else 1000
    # Make sure we are logged into ECR so we can pull the image
    ec2_connection.run(
        f"$(aws ecr get-login --no-include-email --region {region})",
        hide=True)

    ec2_connection.run(f"{docker_cmd} pull -q {image_uri} ")

    # Run performance inference command, display benchmark results to console

    if "graviton" in image_uri:
        # TF training binary is used that is compatible for graviton instance type

        ec2_connection.run((f"sudo apt install python3-pip"), hide=True)
        ec2_connection.run((
            f"pip3 install --user --upgrade awscli boto3 && pip3 install --user grpcio"
        ),
                           hide=True)
        ec2_connection.run((
            f"pip3 install --no-dependencies --user tensorflow-serving-api=={tf_api_version}"
        ),
                           hide=True)
    else:
        ec2_connection.run(f"pip3 install -U pip")
        ec2_connection.run(
            f"pip3 install boto3 grpcio 'tensorflow-serving-api<={tf_api_version}' --user --no-warn-script-location"
        )
    time_str = time.strftime("%Y-%m-%d-%H-%M-%S")
    commit_info = os.getenv("CODEBUILD_RESOLVED_SOURCE_VERSION")
    log_file = f"synthetic_{commit_info}_{time_str}.log"
    python_invoker = get_python_invoker(ec2_instance_ami)
    ec2_connection.run(
        f"{python_invoker} {container_test_local_dir}/bin/benchmark/tf{tf_version}_serving_perf.py "
        f"--processor {processor} --docker_image_name {image_uri} "
        f"--run_all_s3 --binary /usr/bin/tensorflow_model_server --get_perf --iterations {num_iterations} "
        f"2>&1 | tee {log_file}")
    ec2_performance_upload_result_to_s3_and_validate(
        ec2_connection,
        image_uri,
        log_file,
        "synthetic",
        threshold,
        post_process_inference,
        log_file,
    )
コード例 #3
0
def pytest_runtest_setup(item):
    """
    Handle custom markers and options
    """
    # Handle quick check tests
    quick_checks_opts = [
        mark for mark in item.iter_markers(name="quick_checks")
    ]
    # On PR, skip quick check tests unless we are on quick_checks job
    test_type = os.getenv("TEST_TYPE", "UNDEFINED")
    quick_checks_test_type = "quick_checks"
    if test_type != quick_checks_test_type and test_utils.is_pr_context():
        if quick_checks_opts:
            pytest.skip(
                f"Skipping quick check tests on PR, since test type is {test_type}, and not {quick_checks_test_type}"
            )

    # If we have enabled the quick_checks flag, we expect to only run tests marked as quick_check
    if item.config.getoption("--quick_checks"):
        if not quick_checks_opts:
            pytest.skip("Skipping non-quick-check tests")

    # Handle canary test conditional skipping
    if item.config.getoption("--canary"):
        canary_opts = [mark for mark in item.iter_markers(name="canary")]
        if not canary_opts:
            pytest.skip("Skipping non-canary tests")

    # Handle multinode conditional skipping
    if item.config.getoption("--multinode"):
        multinode_opts = [mark for mark in item.iter_markers(name="multinode")]
        if not multinode_opts:
            pytest.skip("Skipping non-multinode tests")

    # Handle efa conditional skipping
    if item.config.getoption("--efa"):
        efa_tests = [mark for mark in item.iter_markers(name="efa")]
        if not efa_tests:
            pytest.skip("Skipping non-efa tests")
コード例 #4
0
import pytest

from invoke import run
from invoke.context import Context
from retrying import retry

import test.test_utils.eks as eks_utils
from test.test_utils import is_pr_context, SKIP_PR_REASON, is_below_framework_version
from test.test_utils import get_framework_and_version_from_tag, get_cuda_version_from_tag
from packaging.version import Version

LOGGER = eks_utils.LOGGER


@pytest.mark.skipif(
    not is_pr_context(),
    reason=
    "Skip this test. It is already tested under PR context and we do not have enough resouces to test it again on mainline pipeline",
)
@pytest.mark.model("mnist")
def test_eks_pytorch_single_node_training(pytorch_training):
    """
    Function to create a pod using kubectl and given container image, and run MXNet training
    Args:
        :param setup_utils: environment in which EKS tools are setup
        :param pytorch_training: the ECR URI
    """

    training_result = False

    rand_int = random.randint(4001, 6000)
import json
import pytest

from invoke.context import Context

from test.test_utils import is_pr_context, PR_ONLY_REASON


@pytest.mark.usefixtures("sagemaker")
@pytest.mark.skipif(not is_pr_context(), reason=PR_ONLY_REASON)
@pytest.mark.model("N/A")
def test_binary_visibility(image: str):
    """
    Test to check if the binary built with image is public/private. Assumes that URIs beginning with 's3://' are private.
    This will mandate specifying all public links as ones beginning with 'https://'. While s3 objects beginning with
    'https://' may still be private, codebuild 'build' job uses 'curl' i.e. unsigned request to fetch them and hence should
    fail if an 'https://' link is still private
    """
    ctx = Context()
    labels = json.loads(ctx.run("docker inspect --format='{{json .Config.Labels}}' " + image).stdout.strip())

    for label_name, label_value in labels.items():
        if "uri" in label_name.lower():
            assert label_value.startswith("https://")
コード例 #6
0
 def delete_ssh_keypair():
     if test_utils.is_pr_context():
         test_utils.destroy_ssh_keypair(ec2_client, key_filename)
     else:
         with open(KEYS_TO_DESTROY_FILE, "a") as destroy_keys:
             destroy_keys.write(f"{key_filename}\n")
コード例 #7
0
import pytest

from invoke import run
from invoke.context import Context
from retrying import retry

import test.test_utils.eks as eks_utils
from test.test_utils import is_pr_context, SKIP_PR_REASON, is_below_framework_version
from test.test_utils import get_framework_and_version_from_tag, get_cuda_version_from_tag
from packaging.version import Version


LOGGER = eks_utils.LOGGER


@pytest.mark.skipif(not is_pr_context(), reason="Skip this test. It is already tested under PR context and we do not have enough resouces to test it again on mainline pipeline")
@pytest.mark.model("mnist")
def test_eks_pytorch_single_node_training(pytorch_training):
    """
    Function to create a pod using kubectl and given container image, and run MXNet training
    Args:
        :param setup_utils: environment in which EKS tools are setup
        :param pytorch_training: the ECR URI
    """

    training_result = False

    rand_int = random.randint(4001, 6000)

    yaml_path = os.path.join(os.sep, "tmp", f"pytorch_single_node_training_{rand_int}.yaml")
    pod_name = f"pytorch-single-node-training-{rand_int}"
コード例 #8
0
                vulnerability_severity[severity] = [vulnerability]

        # TODO: Remove this once we have whitelisted appropriate LOW/MEDIUM vulnerabilities
        if not (vulnerability_severity.get("CRITICAL")
                or vulnerability_severity.get("HIGH")):
            return

        raise DependencyCheckFailure(
            f"Unrecognized CVES have been reported : {vulnerability_severity}. "
            f"Allowed vulnerabilites are {allowed_vulnerabilities or None}. Please see "
            f"{dependency_check_report} for more details.")


@pytest.mark.model("N/A")
@pytest.mark.parametrize("ec2_instance_type", ["c5.4xlarge"], indirect=True)
@pytest.mark.skipif(is_pr_context(),
                    reason="Do not run dependency check on PR tests")
def test_dependency_check_cpu(cpu, ec2_connection):
    _run_dependency_check_test(cpu, ec2_connection, "cpu")


@pytest.mark.model("N/A")
@pytest.mark.parametrize("ec2_instance_type", ["p3.2xlarge"], indirect=True)
@pytest.mark.skipif(is_pr_context(),
                    reason="Do not run dependency check on PR tests")
def test_dependency_check_gpu(gpu, ec2_connection):
    _run_dependency_check_test(gpu, ec2_connection, "gpu")


@pytest.mark.model("N/A")
@pytest.mark.canary("Run pip check test regularly on production images")
コード例 #9
0
from test.test_utils import CONTAINER_TESTS_PREFIX, is_pr_context
from test.test_utils.ec2 import execute_ec2_training_test

PT_STANDALONE_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests",
                                 "testPyTorchStandalone")
PT_MNIST_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests",
                            "testPyTorch")
PT_REGRESSION_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests",
                                 "testPyTorchRegression")
PT_DGL_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "dgl_tests",
                          "testPyTorchDGL")
PT_APEX_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests",
                           "testNVApex")

if is_pr_context():
    PT_EC2_GPU_INSTANCE_TYPE = ["p3.2xlarge"]
    PT_EC2_CPU_INSTANCE_TYPE = ["c5.9xlarge"]
else:
    # TODO: Add p3dn if releasing
    PT_EC2_GPU_INSTANCE_TYPE = ["g3.4xlarge", "p2.8xlarge", "p3.16xlarge"]
    PT_EC2_CPU_INSTANCE_TYPE = [
        "c4.8xlarge", "c5.18xlarge", "m4.16xlarge", "t2.2xlarge"
    ]


@pytest.mark.parametrize("ec2_instance_type",
                         PT_EC2_GPU_INSTANCE_TYPE,
                         indirect=True)
def test_pytorch_standalone_gpu(pytorch_training, ec2_connection, gpu_only):
    execute_ec2_training_test(ec2_connection, pytorch_training,