def terminate_ec2_instance(): ec2_client.terminate_instances(InstanceIds=[instance_id]) if test_utils.is_pr_context(): test_utils.destroy_ssh_keypair(ec2_client, key_filename) else: with open(KEYS_TO_DESTROY_FILE, "a") as destroy_keys: destroy_keys.write(f"{key_filename}\n")
def ec2_performance_tensorflow_inference(image_uri, processor, ec2_connection, ec2_instance_ami, region, threshold): docker_cmd = "nvidia-docker" if processor == "gpu" else "docker" container_test_local_dir = os.path.join("$HOME", "container_tests") tf_version = "1" if is_tf_version("1", image_uri) else "2" _, tf_api_version = get_framework_and_version_from_tag(image_uri) num_iterations = 500 if is_pr_context() else 1000 # Make sure we are logged into ECR so we can pull the image ec2_connection.run( f"$(aws ecr get-login --no-include-email --region {region})", hide=True) ec2_connection.run(f"{docker_cmd} pull -q {image_uri} ") # Run performance inference command, display benchmark results to console if "graviton" in image_uri: # TF training binary is used that is compatible for graviton instance type ec2_connection.run((f"sudo apt install python3-pip"), hide=True) ec2_connection.run(( f"pip3 install --user --upgrade awscli boto3 && pip3 install --user grpcio" ), hide=True) ec2_connection.run(( f"pip3 install --no-dependencies --user tensorflow-serving-api=={tf_api_version}" ), hide=True) else: ec2_connection.run(f"pip3 install -U pip") ec2_connection.run( f"pip3 install boto3 grpcio 'tensorflow-serving-api<={tf_api_version}' --user --no-warn-script-location" ) time_str = time.strftime("%Y-%m-%d-%H-%M-%S") commit_info = os.getenv("CODEBUILD_RESOLVED_SOURCE_VERSION") log_file = f"synthetic_{commit_info}_{time_str}.log" python_invoker = get_python_invoker(ec2_instance_ami) ec2_connection.run( f"{python_invoker} {container_test_local_dir}/bin/benchmark/tf{tf_version}_serving_perf.py " f"--processor {processor} --docker_image_name {image_uri} " f"--run_all_s3 --binary /usr/bin/tensorflow_model_server --get_perf --iterations {num_iterations} " f"2>&1 | tee {log_file}") ec2_performance_upload_result_to_s3_and_validate( ec2_connection, image_uri, log_file, "synthetic", threshold, post_process_inference, log_file, )
def pytest_runtest_setup(item): """ Handle custom markers and options """ # Handle quick check tests quick_checks_opts = [ mark for mark in item.iter_markers(name="quick_checks") ] # On PR, skip quick check tests unless we are on quick_checks job test_type = os.getenv("TEST_TYPE", "UNDEFINED") quick_checks_test_type = "quick_checks" if test_type != quick_checks_test_type and test_utils.is_pr_context(): if quick_checks_opts: pytest.skip( f"Skipping quick check tests on PR, since test type is {test_type}, and not {quick_checks_test_type}" ) # If we have enabled the quick_checks flag, we expect to only run tests marked as quick_check if item.config.getoption("--quick_checks"): if not quick_checks_opts: pytest.skip("Skipping non-quick-check tests") # Handle canary test conditional skipping if item.config.getoption("--canary"): canary_opts = [mark for mark in item.iter_markers(name="canary")] if not canary_opts: pytest.skip("Skipping non-canary tests") # Handle multinode conditional skipping if item.config.getoption("--multinode"): multinode_opts = [mark for mark in item.iter_markers(name="multinode")] if not multinode_opts: pytest.skip("Skipping non-multinode tests") # Handle efa conditional skipping if item.config.getoption("--efa"): efa_tests = [mark for mark in item.iter_markers(name="efa")] if not efa_tests: pytest.skip("Skipping non-efa tests")
import pytest from invoke import run from invoke.context import Context from retrying import retry import test.test_utils.eks as eks_utils from test.test_utils import is_pr_context, SKIP_PR_REASON, is_below_framework_version from test.test_utils import get_framework_and_version_from_tag, get_cuda_version_from_tag from packaging.version import Version LOGGER = eks_utils.LOGGER @pytest.mark.skipif( not is_pr_context(), reason= "Skip this test. It is already tested under PR context and we do not have enough resouces to test it again on mainline pipeline", ) @pytest.mark.model("mnist") def test_eks_pytorch_single_node_training(pytorch_training): """ Function to create a pod using kubectl and given container image, and run MXNet training Args: :param setup_utils: environment in which EKS tools are setup :param pytorch_training: the ECR URI """ training_result = False rand_int = random.randint(4001, 6000)
import json import pytest from invoke.context import Context from test.test_utils import is_pr_context, PR_ONLY_REASON @pytest.mark.usefixtures("sagemaker") @pytest.mark.skipif(not is_pr_context(), reason=PR_ONLY_REASON) @pytest.mark.model("N/A") def test_binary_visibility(image: str): """ Test to check if the binary built with image is public/private. Assumes that URIs beginning with 's3://' are private. This will mandate specifying all public links as ones beginning with 'https://'. While s3 objects beginning with 'https://' may still be private, codebuild 'build' job uses 'curl' i.e. unsigned request to fetch them and hence should fail if an 'https://' link is still private """ ctx = Context() labels = json.loads(ctx.run("docker inspect --format='{{json .Config.Labels}}' " + image).stdout.strip()) for label_name, label_value in labels.items(): if "uri" in label_name.lower(): assert label_value.startswith("https://")
def delete_ssh_keypair(): if test_utils.is_pr_context(): test_utils.destroy_ssh_keypair(ec2_client, key_filename) else: with open(KEYS_TO_DESTROY_FILE, "a") as destroy_keys: destroy_keys.write(f"{key_filename}\n")
import pytest from invoke import run from invoke.context import Context from retrying import retry import test.test_utils.eks as eks_utils from test.test_utils import is_pr_context, SKIP_PR_REASON, is_below_framework_version from test.test_utils import get_framework_and_version_from_tag, get_cuda_version_from_tag from packaging.version import Version LOGGER = eks_utils.LOGGER @pytest.mark.skipif(not is_pr_context(), reason="Skip this test. It is already tested under PR context and we do not have enough resouces to test it again on mainline pipeline") @pytest.mark.model("mnist") def test_eks_pytorch_single_node_training(pytorch_training): """ Function to create a pod using kubectl and given container image, and run MXNet training Args: :param setup_utils: environment in which EKS tools are setup :param pytorch_training: the ECR URI """ training_result = False rand_int = random.randint(4001, 6000) yaml_path = os.path.join(os.sep, "tmp", f"pytorch_single_node_training_{rand_int}.yaml") pod_name = f"pytorch-single-node-training-{rand_int}"
vulnerability_severity[severity] = [vulnerability] # TODO: Remove this once we have whitelisted appropriate LOW/MEDIUM vulnerabilities if not (vulnerability_severity.get("CRITICAL") or vulnerability_severity.get("HIGH")): return raise DependencyCheckFailure( f"Unrecognized CVES have been reported : {vulnerability_severity}. " f"Allowed vulnerabilites are {allowed_vulnerabilities or None}. Please see " f"{dependency_check_report} for more details.") @pytest.mark.model("N/A") @pytest.mark.parametrize("ec2_instance_type", ["c5.4xlarge"], indirect=True) @pytest.mark.skipif(is_pr_context(), reason="Do not run dependency check on PR tests") def test_dependency_check_cpu(cpu, ec2_connection): _run_dependency_check_test(cpu, ec2_connection, "cpu") @pytest.mark.model("N/A") @pytest.mark.parametrize("ec2_instance_type", ["p3.2xlarge"], indirect=True) @pytest.mark.skipif(is_pr_context(), reason="Do not run dependency check on PR tests") def test_dependency_check_gpu(gpu, ec2_connection): _run_dependency_check_test(gpu, ec2_connection, "gpu") @pytest.mark.model("N/A") @pytest.mark.canary("Run pip check test regularly on production images")
from test.test_utils import CONTAINER_TESTS_PREFIX, is_pr_context from test.test_utils.ec2 import execute_ec2_training_test PT_STANDALONE_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests", "testPyTorchStandalone") PT_MNIST_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests", "testPyTorch") PT_REGRESSION_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests", "testPyTorchRegression") PT_DGL_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "dgl_tests", "testPyTorchDGL") PT_APEX_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests", "testNVApex") if is_pr_context(): PT_EC2_GPU_INSTANCE_TYPE = ["p3.2xlarge"] PT_EC2_CPU_INSTANCE_TYPE = ["c5.9xlarge"] else: # TODO: Add p3dn if releasing PT_EC2_GPU_INSTANCE_TYPE = ["g3.4xlarge", "p2.8xlarge", "p3.16xlarge"] PT_EC2_CPU_INSTANCE_TYPE = [ "c4.8xlarge", "c5.18xlarge", "m4.16xlarge", "t2.2xlarge" ] @pytest.mark.parametrize("ec2_instance_type", PT_EC2_GPU_INSTANCE_TYPE, indirect=True) def test_pytorch_standalone_gpu(pytorch_training, ec2_connection, gpu_only): execute_ec2_training_test(ec2_connection, pytorch_training,