def test_smdebug_gpu(training, ec2_connection, region, ec2_instance_type,
                     gpu_only, py3_only):
    if test_utils.is_image_incompatible_with_instance_type(
            training, ec2_instance_type):
        pytest.skip(
            f"Image {training} is incompatible with instance type {ec2_instance_type}"
        )
    smdebug_test_timeout = 2400
    if is_tf_version("1", training):
        if is_nightly_context():
            smdebug_test_timeout = 7200
        else:
            pytest.skip(
                "TF1 gpu smdebug tests can take up to 2 hours, thus we are only running in nightly context"
            )

    run_smdebug_test(
        training,
        ec2_connection,
        region,
        ec2_instance_type,
        docker_executable="nvidia-docker",
        container_name="smdebug-gpu",
        timeout=smdebug_test_timeout,
    )
def test_smdebug_gpu(training, ec2_connection, region, ec2_instance_type, gpu_only, py3_only):
    smdebug_test_timeout = 2400
    if is_tf_version("1", training):
        if is_nightly_context():
            smdebug_test_timeout = 7200
        else:
            pytest.skip("TF1 gpu smdebug tests can take up to 2 hours, thus we are only running in nightly context")

    run_smdebug_test(
        training,
        ec2_connection,
        region,
        ec2_instance_type,
        docker_executable="nvidia-docker",
        container_name="smdebug-gpu",
        timeout=smdebug_test_timeout
    )
        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8500")

        assert test_utils.request_tensorflow_inference(model_name=model_name,
                                                       port=port_to_forward)
    except ValueError as excp:
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")


@pytest.mark.skipif(not test_utils.is_nightly_context(),
                    reason="Running additional model in nightly context only")
@pytest.mark.skip(
    "Skipping test due to S3 permission issues. https://github.com/aws/deep-learning-containers/issues/818"
)
@pytest.mark.model("albert")
def test_eks_tensorflow_albert(tensorflow_inference):
    if "eia" in tensorflow_inference or "neuron" in tensorflow_inference:
        pytest.skip("Skipping EKS Test for EIA and neuron Images")
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "gpu" if "gpu" in tensorflow_inference else "cpu"

    model_name = f"albert"
            worker_instance_id,
            num_gpus=num_gpus,
            region=region,
        )
        model_name = get_tensorflow_model_name("gpu", model_name)
        inference_result = request_tensorflow_inference(
            model_name, ip_address=public_ip_address)
        assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}"

    finally:
        ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn,
                                                  service_name, task_family,
                                                  revision)


@pytest.mark.skipif(not is_nightly_context(),
                    reason="Running additional model in nightly context only")
@pytest.mark.model("albert")
@pytest.mark.parametrize("ecs_instance_type", ["c5.4xlarge"], indirect=True)
@pytest.mark.parametrize("ecs_ami", [ECS_AML2_CPU_USWEST2], indirect=True)
def test_ecs_tensorflow_inference_cpu_nlp(tensorflow_inference,
                                          ecs_container_instance, region,
                                          cpu_only):
    worker_instance_id, ecs_cluster_arn = ecs_container_instance
    public_ip_address = ec2_utils.get_public_ip(worker_instance_id,
                                                region=region)

    model_name = "albert"
    service_name = task_family = revision = None
    try:
        service_name, task_family, revision = ecs_utils.setup_ecs_inference_service(
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(selector_name, port_to_forward, "8500")

        assert test_utils.request_tensorflow_inference(model_name=model_name, port=port_to_forward)
    except ValueError as excp:
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")


@pytest.mark.skipif(not test_utils.is_nightly_context(), reason="Running additional model in nightly context only")
@pytest.mark.model("albert")
def test_eks_tensorflow_albert(tensorflow_inference):
    if "eia" in tensorflow_inference or "neuron" in tensorflow_inference:
        pytest.skip("Skipping EKS Test for EIA and neuron Images")
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "gpu" if "gpu" in tensorflow_inference else "cpu"

    model_name = f"albert"
    yaml_path = os.path.join(os.sep, "tmp", f"tensorflow_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"albert-{processor}-{rand_int}"

    search_replace_dict = {
        # TODO: Remove this once we have whitelisted appropriate LOW/MEDIUM vulnerabilities
        if not (vulnerability_severity.get("CRITICAL")
                or vulnerability_severity.get("HIGH")):
            return

        raise DependencyCheckFailure(
            f"Unrecognized CVEs have been reported : {vulnerability_severity}. "
            f"Allowed vulnerabilities are {allowed_vulnerabilities or None}. Please see "
            f"{dependency_check_report} for more details.")


@pytest.mark.model("N/A")
@pytest.mark.canary("Run dependency tests regularly on production images")
@pytest.mark.parametrize("ec2_instance_type", ["c5.4xlarge"], indirect=True)
@pytest.mark.skipif(
    not (is_nightly_context() or is_mainline_context() or
         (is_canary_context() and is_time_for_canary_safety_scan())),
    reason="Do not run dependency check on PR tests. "
    "Executing test in canaries pipeline during only a limited period of time."
)
def test_dependency_check_cpu(cpu, ec2_connection):
    _run_dependency_check_test(cpu, ec2_connection, "cpu")


@pytest.mark.model("N/A")
@pytest.mark.canary("Run dependency tests regularly on production images")
@pytest.mark.parametrize("ec2_instance_type", ["p3.2xlarge"], indirect=True)
@pytest.mark.skipif(
    not (is_nightly_context() or is_mainline_context() or
         (is_canary_context() and is_time_for_canary_safety_scan())),
    reason="Do not run dependency check on PR tests. "
Exemple #7
0
def pytest_generate_tests(metafunc):
    images = metafunc.config.getoption("--images")

    # Don't parametrize if there are no images to parametrize
    if not images:
        return

    # Parametrize framework specific tests
    for fixture in FRAMEWORK_FIXTURES:
        if fixture in metafunc.fixturenames:
            lookup = fixture.replace("_", "-")
            images_to_parametrize = []
            for image in images:
                if lookup_condition(lookup, image):
                    is_example_lookup = "example_only" in metafunc.fixturenames and "example" in image
                    is_huggingface_lookup = (
                        ("huggingface_only" in metafunc.fixturenames
                         or "huggingface" in metafunc.fixturenames)
                        and "huggingface" in image)
                    is_trcomp_lookup = "trcomp" in image and all(
                        fixture_name not in metafunc.fixturenames
                        for fixture_name in ["example_only"])
                    is_standard_lookup = all(
                        fixture_name not in metafunc.fixturenames
                        for fixture_name in
                        ["example_only", "huggingface_only"]) and all(
                            keyword not in image
                            for keyword in ["example", "huggingface"])
                    if "sagemaker_only" in metafunc.fixturenames and is_ec2_image(
                            image):
                        LOGGER.info(
                            f"Not running EC2 image {image} on sagemaker_only test"
                        )
                        continue
                    if is_sagemaker_image(image):
                        if "sagemaker_only" not in metafunc.fixturenames and "sagemaker" not in metafunc.fixturenames:
                            LOGGER.info(
                                f"Skipping test, as this function is not marked as 'sagemaker_only' or 'sagemaker'"
                            )
                            continue
                    if not framework_version_within_limit(metafunc, image):
                        continue
                    if "non_huggingface_only" in metafunc.fixturenames and "huggingface" in image:
                        continue
                    if "non_autogluon_only" in metafunc.fixturenames and "autogluon" in image:
                        continue
                    if "x86_compatible_only" in metafunc.fixturenames and "graviton" in image:
                        continue
                    if "training_compiler_only" in metafunc.fixturenames and not (
                            "trcomp" in image):
                        continue
                    if is_example_lookup or is_huggingface_lookup or is_standard_lookup or is_trcomp_lookup:
                        if "cpu_only" in metafunc.fixturenames and "cpu" in image and "eia" not in image:
                            images_to_parametrize.append(image)
                        elif "gpu_only" in metafunc.fixturenames and "gpu" in image:
                            images_to_parametrize.append(image)
                        elif "graviton_compatible_only" in metafunc.fixturenames and "graviton" in image:
                            images_to_parametrize.append(image)
                        elif ("cpu_only" not in metafunc.fixturenames
                              and "gpu_only" not in metafunc.fixturenames
                              and "graviton_compatible_only"
                              not in metafunc.fixturenames):
                            images_to_parametrize.append(image)

            # Remove all images tagged as "py2" if py3_only is a fixture
            if images_to_parametrize and "py3_only" in metafunc.fixturenames:
                images_to_parametrize = [
                    py3_image for py3_image in images_to_parametrize
                    if "py2" not in py3_image
                ]

            if is_nightly_context():
                nightly_images_to_parametrize = []
                # filter the nightly fixtures in the current functional context
                func_nightly_fixtures = {
                    key: value
                    for (key, value) in NIGHTLY_FIXTURES.items()
                    if key in metafunc.fixturenames
                }
                # iterate through image candidates and select images with labels that match all nightly fixture labels
                for image_candidate in images_to_parametrize:
                    if all([
                            are_valid_fixture_labels_present(
                                image_candidate, nightly_labels) for _,
                            nightly_labels in func_nightly_fixtures.items()
                    ]):
                        nightly_images_to_parametrize.append(image_candidate)
                images_to_parametrize = nightly_images_to_parametrize

            # Parametrize tests that spin up an ecs cluster or tests that spin up an EC2 instance with a unique name
            values_to_generate_for_fixture = {
                "ecs_container_instance": "ecs_cluster_name",
                "ec2_connection": "ec2_key_name",
            }

            fixtures_parametrized = generate_unique_values_for_fixtures(
                metafunc, images_to_parametrize,
                values_to_generate_for_fixture)
            if fixtures_parametrized:
                for new_fixture_name, test_parametrization in fixtures_parametrized.items(
                ):
                    metafunc.parametrize(f"{fixture},{new_fixture_name}",
                                         test_parametrization)
            else:
                metafunc.parametrize(fixture, images_to_parametrize)

    # Parametrize for framework agnostic tests, i.e. sanity
    if "image" in metafunc.fixturenames:
        metafunc.parametrize("image", images)