예제 #1
0
def _run_instance_role_disabled(image_uri, ec2_client, ec2_instance,
                                ec2_connection):
    expected_tag_key = "aws-dlc-autogenerated-tag-do-not-delete"

    ec2_instance_id, _ = ec2_instance
    account_id = test_utils.get_account_id_from_image_uri(image_uri)
    image_region = test_utils.get_region_from_image_uri(image_uri)
    repo_name, image_tag = test_utils.get_repository_and_tag_from_image_uri(
        image_uri)
    framework, _ = test_utils.get_framework_and_version_from_tag(image_uri)
    job_type = test_utils.get_job_type_from_image(image_uri)
    processor = test_utils.get_processor_from_image_uri(image_uri)

    container_name = f"{repo_name}-telemetry_bad_instance_role-ec2"

    docker_cmd = "nvidia-docker" if processor == "gpu" else "docker"

    test_utils.login_to_ecr_registry(ec2_connection, account_id, image_region)
    ec2_connection.run(f"{docker_cmd} pull -q {image_uri}")

    preexisting_ec2_instance_tags = ec2_utils.get_ec2_instance_tags(
        ec2_instance_id, ec2_client=ec2_client)
    if expected_tag_key in preexisting_ec2_instance_tags:
        ec2_client.remove_tags(Resources=[ec2_instance_id],
                               Tags=[{
                                   "Key": expected_tag_key
                               }])

    # Disable access to EC2 instance metadata
    ec2_connection.run(f"sudo route add -host 169.254.169.254 reject")

    if "tensorflow" in framework and job_type == "inference":
        model_name = "saved_model_half_plus_two"
        model_base_path = test_utils.get_tensorflow_model_base_path(image_uri)
        env_vars_list = test_utils.get_tensorflow_inference_environment_variables(
            model_name, model_base_path)
        env_vars = " ".join([
            f"-e {entry['name']}={entry['value']}" for entry in env_vars_list
        ])
        inference_command = get_tensorflow_inference_command_tf27_above(
            image_uri, model_name)
        ec2_connection.run(
            f"{docker_cmd} run {env_vars} --name {container_name} -id {image_uri} {inference_command}"
        )
        time.sleep(5)
    else:
        framework_to_import = framework.replace("huggingface_", "")
        framework_to_import = "torch" if framework_to_import == "pytorch" else framework_to_import
        ec2_connection.run(
            f"{docker_cmd} run --name {container_name} -id {image_uri} bash")
        output = ec2_connection.run(
            f"{docker_cmd} exec -i {container_name} python -c 'import {framework_to_import}; import time; time.sleep(5)'",
            warn=True)
        assert output.ok, f"'import {framework_to_import}' fails when credentials not configured"

    ec2_instance_tags = ec2_utils.get_ec2_instance_tags(ec2_instance_id,
                                                        ec2_client=ec2_client)
    assert expected_tag_key not in ec2_instance_tags, (
        f"{expected_tag_key} was applied as an instance tag."
        "EC2 create_tags went through even though it should not have")
예제 #2
0
def test_dlc_standard_labels(image, region):
    customer_type_label_prefix = "ec2" if test_utils.is_ec2_image(
        image) else "sagemaker"

    framework, fw_version = test_utils.get_framework_and_version_from_tag(
        image)
    framework = framework.replace('_', '-')
    fw_version = fw_version.replace('.', '-')
    device_type = test_utils.get_processor_from_image_uri(image)
    if device_type == "gpu":
        cuda_verison = test_utils.get_cuda_version_from_tag(image)
        device_type = f"{device_type}.{cuda_verison}"
    python_version = test_utils.get_python_version_from_image_uri(image)
    job_type = test_utils.get_job_type_from_image(image)
    transformers_version = test_utils.get_transformers_version_from_image_uri(
        image).replace('.', '-')
    os_version = test_utils.get_os_version_from_image_uri(image).replace(
        '.', '-')

    # TODO: Add x86 env variable to check explicitly for x86, instead of assuming that everything not graviton is x86
    arch_type = "graviton" if test_utils.is_graviton_architecture() else "x86"

    contributor = test_utils.get_contributor_from_image_uri(image)

    expected_labels = [
        f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.framework.{framework}.{fw_version}",
        f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.device.{device_type}",
        f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.python.{python_version}",
        f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.job.{job_type}",
        f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.arch.{arch_type}",
        f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.os.{os_version}",
    ]

    if contributor:
        expected_labels.append(
            f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.contributor.{contributor}"
        )
    if transformers_version:
        expected_labels.append(
            f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.lib.transformers.{transformers_version}"
        )

    actual_labels = test_utils.get_labels_from_ecr_image(image, region)

    missing_labels = []

    for label in expected_labels:
        if label not in actual_labels:
            missing_labels.append(label)

    # TODO: Remove this when ec2 labels are added. For now, ensure they are not added.
    if customer_type_label_prefix == "ec2":
        assert set(missing_labels) == set(expected_labels), \
            f"EC2 labels are not supported yet, and should not be added to containers. " \
            f"{set(expected_labels) - set(missing_labels)} should not be present."
    else:
        assert not missing_labels, \
            f"Labels {missing_labels} are expected in image {image}, but cannot be found. " \
            f"All labels on image: {actual_labels}"
예제 #3
0
def generate_unique_values_for_fixtures(metafunc_obj, images_to_parametrize,
                                        values_to_generate_for_fixture):
    """
    Take a dictionary (values_to_generate_for_fixture), that maps a fixture name used in a test function to another
    fixture that needs to be parametrized, and parametrize to create unique resources for a test.

    :param metafunc_obj: pytest metafunc object
    :param images_to_parametrize: <list> list of image URIs which are used in a test
    :param values_to_generate_for_fixture: <dict> Mapping of "Fixture used" -> "Fixture to be parametrized"
    :return: <dict> Mapping of "Fixture to be parametrized" -> "Unique values for fixture to be parametrized"
    """
    job_type_map = {"training": "tr", "inference": "inf"}
    framework_name_map = {
        "tensorflow": "tf",
        "mxnet": "mx",
        "pytorch": "pt",
        "huggingface_pytorch": "hf-pt",
        "huggingface_tensorflow": "hf-tf",
        "huggingface_pytorch_trcomp": "hf-pt-trc",
        "huggingface_tensorflow_trcomp": "hf-tf-trc",
        "autogluon": "ag",
    }
    fixtures_parametrized = {}

    if images_to_parametrize:
        for key, new_fixture_name in values_to_generate_for_fixture.items():
            if key in metafunc_obj.fixturenames:
                fixtures_parametrized[new_fixture_name] = []
                for index, image in enumerate(images_to_parametrize):

                    # Tag fixtures with EC2 instance types if env variable is present
                    allowed_processors = ("gpu", "cpu", "eia", "neuron", "hpu")
                    instance_tag = ""
                    for processor in allowed_processors:
                        if processor in image:
                            if "graviton" in image:
                                instance_type_env = f"EC2_{processor.upper()}_GRAVITON_INSTANCE_TYPE"
                            else:
                                instance_type_env = f"EC2_{processor.upper()}_INSTANCE_TYPE"
                            instance_type = os.getenv(instance_type_env)
                            if instance_type:
                                instance_tag = f"-{instance_type.replace('.', '-')}"
                                break

                    image_tag = image.split(":")[-1].replace(".", "-")

                    framework, _ = get_framework_and_version_from_tag(image)

                    job_type = get_job_type_from_image(image)

                    fixtures_parametrized[new_fixture_name].append((
                        image,
                        f"{metafunc_obj.function.__name__}-{framework_name_map.get(framework)}-"
                        f"{job_type_map.get(job_type)}-{image_tag}-"
                        f"{os.getenv('CODEBUILD_RESOLVED_SOURCE_VERSION')}-{index}{instance_tag}",
                    ))
    return fixtures_parametrized
def _run_tag_success(image_uri, ec2_client, ec2_instance, ec2_connection):
    expected_tag_key = "aws-dlc-autogenerated-tag-do-not-delete"

    ec2_instance_id, _ = ec2_instance
    account_id = test_utils.get_account_id_from_image_uri(image_uri)
    image_region = test_utils.get_region_from_image_uri(image_uri)
    repo_name, image_tag = test_utils.get_repository_and_tag_from_image_uri(
        image_uri)
    framework, _ = test_utils.get_framework_and_version_from_tag(image_uri)
    job_type = test_utils.get_job_type_from_image(image_uri)
    processor = test_utils.get_processor_from_image_uri(image_uri)

    container_name = f"{repo_name}-telemetry_tag_instance_success-ec2"

    docker_cmd = "nvidia-docker" if processor == "gpu" else "docker"

    test_utils.login_to_ecr_registry(ec2_connection, account_id, image_region)
    ec2_connection.run(f"{docker_cmd} pull -q {image_uri}")

    preexisting_ec2_instance_tags = ec2_utils.get_ec2_instance_tags(
        ec2_instance_id, ec2_client=ec2_client)
    if expected_tag_key in preexisting_ec2_instance_tags:
        ec2_client.remove_tags(Resources=[ec2_instance_id],
                               Tags=[{
                                   "Key": expected_tag_key
                               }])

    if framework == "tensorflow" and job_type == "inference":
        env_vars_list = ecs_utils.get_ecs_tensorflow_environment_variables(
            processor, "saved_model_half_plus_two")
        env_vars = " ".join([
            f"-e {entry['name']}={entry['value']}" for entry in env_vars_list
        ])
        ec2_connection.run(
            f"{docker_cmd} run {env_vars} --name {container_name} -id {image_uri}"
        )
        time.sleep(5)
    else:
        framework_to_import = framework.replace("huggingface_", "")
        framework_to_import = "torch" if framework_to_import == "pytorch" else framework_to_import
        ec2_connection.run(
            f"{docker_cmd} run --name {container_name} -id {image_uri} bash")
        output = ec2_connection.run(
            f"{docker_cmd} exec -i {container_name} python -c 'import {framework_to_import}; import time; time.sleep(5)'",
            warn=True)

    ec2_instance_tags = ec2_utils.get_ec2_instance_tags(ec2_instance_id,
                                                        ec2_client=ec2_client)
    assert expected_tag_key in ec2_instance_tags, f"{expected_tag_key} was not applied as an instance tag"
def test_dlc_major_version_dockerfiles(image):
    """
    Test to make sure semantic versioning scheme in Dockerfiles is correct

    :param image: <str> ECR image URI
    """
    dlc_dir = os.getcwd().split(f"{os.sep}test{os.sep}")[0]
    job_type = test_utils.get_job_type_from_image(image)
    framework, fw_version = test_utils.get_framework_and_version_from_tag(
        image)
    processor = test_utils.get_processor_from_image_uri(image)

    # Assign a string of numbers associated with python version in tag. Python major version is not sufficient to
    # define DLC major version
    python_major_minor_version = re.search(r"-py(\d{2,})", image).group(1)

    root_dir = os.path.join(dlc_dir, framework, job_type, "docker")

    # Skip older FW versions that did not use this versioning scheme
    references = {
        "tensorflow2": "2.2.0",
        "tensorflow1": "1.16.0",
        "mxnet": "1.7.0",
        "pytorch": "1.5.0"
    }
    if test_utils.is_tf_version("1", image):
        reference_fw = "tensorflow1"
    elif test_utils.is_tf_version("2", image):
        reference_fw = "tensorflow2"
    else:
        reference_fw = framework
    if processor != "eia" and (
            reference_fw in references
            and Version(fw_version) < Version(references[reference_fw])):
        pytest.skip(
            f"Not enforcing new versioning scheme on old image {image}. "
            f"Started enforcing version scheme on the following: {references}")

    # Find all Dockerfile.<processor> for this framework/job_type's Major.Minor version
    dockerfiles = []
    fw_version_major_minor = re.match(r"(\d+\.\d+)", fw_version).group(1)
    for root, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename == f"Dockerfile.{processor}":
                dockerfile_path = os.path.join(root_dir, root, filename)
                if "example" not in dockerfile_path and f"{os.sep}{fw_version_major_minor}" in dockerfile_path:
                    dockerfiles.append(dockerfile_path)

    # For the collected dockerfiles above, note the DLC major versions in each Dockerfile if python version matches
    # the current image under test
    versions = {}
    dlc_label_regex = re.compile(r'LABEL dlc_major_version="(\d+)"')
    python_version_regex = re.compile(r"ARG PYTHON_VERSION=(\d+\.\d+)")
    for dockerfile in dockerfiles:
        with open(dockerfile, "r") as df:
            dlc_version = None
            python_version = None
            for line in df:
                major_version_match = dlc_label_regex.match(line)
                python_version_match = python_version_regex.match(line)
                if major_version_match:
                    dlc_version = int(major_version_match.group(1))
                elif python_version_match:
                    python_version = python_version_match.group(1).replace(
                        ".", "")

            # Raise errors if dlc major version label and python version arg are not found in Dockerfile
            if not dlc_version:
                raise DLCMajorVersionLabelNotFound(
                    f"Cannot find dlc_major_version label in {dockerfile}")
            if not python_version:
                raise DLCPythonVersionNotFound(
                    f"Cannot find PYTHON_VERSION arg in {dockerfile}")
            if python_version == python_major_minor_version:
                versions[dockerfile] = dlc_version

    expected_versions = list(range(1, len(dockerfiles) + 1))
    actual_versions = sorted(versions.values())

    # Test case explicitly for TF2.3 gpu, since v1.0 is banned
    if (framework, fw_version_major_minor, processor,
            python_major_minor_version, job_type) == (
                "tensorflow",
                "2.3",
                "gpu",
                "37",
                "training",
            ):
        expected_versions = [v + 1 for v in expected_versions]
        assert 1 not in actual_versions, (
            f"DLC v1.0 is deprecated in TF2.3 gpu containers, but found major version 1 "
            f"in one of the Dockerfiles. Please inspect {versions}")

    # Test case explicitly for PyTorch 1.6.0 training gpu, since v2.0 is banned
    if (framework, fw_version_major_minor, processor,
            python_major_minor_version, job_type) == (
                "pytorch",
                "1.6",
                "gpu",
                "36",
                "training",
            ):
        expected_versions = [v + 1 for v in expected_versions]
        expected_versions[0] = 1
        assert 2 not in actual_versions, (
            f"DLC v2.0 is deprecated in PyTorch 1.6.0 gpu containers, but found major version 2 "
            f"in one of the Dockerfiles. Please inspect {versions}")

    # Note: If, for example, we find 3 dockerfiles with the same framework major/minor version, same processor,
    # and same python major/minor version, we will expect DLC major versions 1, 2, and 3. If an exception needs to be
    # made to this rule, please see the above handling of TF2.3 as an example.
    assert actual_versions == expected_versions, (
        f"Found DLC major versions {actual_versions} but expected {expected_versions} for "
        f"{framework} {job_type} {processor}. Full version info: {versions}. Py version: {python_major_minor_version}"
    )