def test_dependency_check_gpu(gpu, ec2_connection):
    container_name = "dep_check_gpu"
    report_addon = _get_container_name('depcheck-report', gpu)
    dependency_check_report = f"{report_addon}.html"
    test_script = os.path.join(CONTAINER_TESTS_PREFIX, 'testDependencyCheck')
    ec2.execute_ec2_training_test(ec2_connection,
                                  gpu,
                                  test_script,
                                  container_name=container_name)

    if is_dlc_cicd_context():
        ec2_connection.run(
            f"docker cp {container_name}:/build/dependency-check-report.html ~/{dependency_check_report}"
        )
        ec2_connection.run(
            f"aws s3 cp ~/{dependency_check_report} s3://dlc-dependency-check")
    """
    Get a list of known safety check issue IDs to ignore, if specified in IGNORE_LISTS.
    :param image_uri:
    :return: <list> list of safety check IDs to ignore
    """
    framework = ("mxnet" if "mxnet" in image_uri else
                 "pytorch" if "pytorch" in image_uri else
                 "tensorflow")
    job_type = "training" if "training" in image_uri else "inference-eia" if "eia" in image_uri else "inference"
    python_version = "py2" if "py2" in image_uri else "py3"

    return IGNORE_SAFETY_IDS.get(framework, {}).get(job_type, {}).get(python_version)


@pytest.mark.canary("Run safety tests regularly on production images")
@pytest.mark.skipif(not is_dlc_cicd_context(), reason="Skipping test because it is not running in dlc cicd infra")
def test_safety(image):
    """
    Runs safety check on a container with the capability to ignore safety issues that cannot be fixed, and only raise
    error if an issue is fixable.
    """
    from dlc.safety_check import SafetyCheck
    safety_check = SafetyCheck()

    repo_name, image_tag = image.split('/')[-1].split(':')
    ignore_ids_list = _get_safety_ignore_list(image)
    sep = " -i "
    ignore_str = "" if not ignore_ids_list else f"{sep}{sep.join(ignore_ids_list)}"

    container_name = f"{repo_name}-{image_tag}-safety"
    docker_exec_cmd = f"docker exec -i {container_name}"
def test_cuda_paths(gpu):
    """
    Test to ensure that:
    a. buildspec contains an entry to create the same image as the image URI
    b. directory structure for GPU Dockerfiles has framework version, python version, and cuda version in it

    :param gpu: gpu image uris
    """
    image = gpu
    if "example" in image:
        pytest.skip(
            "Skipping Example Dockerfiles which are not explicitly tied to a cuda version"
        )

    dlc_path = os.getcwd().split("/test/")[0]
    job_type = "training" if "training" in image else "inference"

    # Ensure that image has a supported framework
    frameworks = ("tensorflow", "pytorch", "mxnet")
    framework = ""
    for fw in frameworks:
        if fw in image:
            framework = fw
            break
    assert framework, f"Cannot find any frameworks {frameworks} in image uri {image}"

    # Get cuda, framework version, python version through regex
    cuda_version = re.search(r"-(cu\d+)-", image).group(1)
    framework_version = re.search(r":(\d+(\.\d+){2})", image).group(1)
    framework_short_version = None
    python_version = re.search(r"(py\d+)", image).group(1)
    short_python_version = None
    image_tag = re.search(
        r":(\d+(\.\d+){2}-(cpu|gpu|neuron)-(py\d+)(-cu\d+)-(ubuntu\d+\.\d+)(-example)?)",
        image).group(1)

    framework_version_path = os.path.join(dlc_path, framework, job_type,
                                          "docker", framework_version)
    if not os.path.exists(framework_version_path):
        framework_short_version = re.match(r"(\d+.\d+)",
                                           framework_version).group(1)
        framework_version_path = os.path.join(dlc_path, framework, job_type,
                                              "docker",
                                              framework_short_version)
    if not os.path.exists(os.path.join(framework_version_path,
                                       python_version)):
        # Use the pyX version as opposed to the pyXY version if pyXY path does not exist
        short_python_version = python_version[:3]

    # Check buildspec for cuda version
    buildspec = "buildspec.yml"
    if is_tf_version("1", image):
        buildspec = "buildspec-tf1.yml"

    cuda_in_buildspec = False
    dockerfile_spec_abs_path = None
    cuda_in_buildspec_ref = f"CUDA_VERSION {cuda_version}"
    buildspec_path = os.path.join(dlc_path, framework, buildspec)
    buildspec_def = Buildspec()
    buildspec_def.load(buildspec_path)

    for name, image_spec in buildspec_def["images"].items():
        if image_spec["device_type"] == "gpu" and image_spec[
                "tag"] == image_tag:
            cuda_in_buildspec = True
            dockerfile_spec_abs_path = os.path.join(
                os.path.dirname(framework_version_path),
                image_spec["docker_file"].lstrip("docker/"))
            break

    try:
        assert cuda_in_buildspec, f"Can't find {cuda_in_buildspec_ref} in {buildspec_path}"
    except AssertionError as e:
        if not is_dlc_cicd_context():
            LOGGER.warn(
                f"{e} - not failing, as this is a(n) {os.getenv('BUILD_CONTEXT', 'empty')} build context."
            )
        else:
            raise

    image_properties_expected_in_dockerfile_path = [
        framework_short_version or framework_version, short_python_version
        or python_version, cuda_version
    ]
    assert all(
        prop in dockerfile_spec_abs_path
        for prop in image_properties_expected_in_dockerfile_path
    ), (f"Dockerfile location {dockerfile_spec_abs_path} does not contain all the image properties in "
        f"{image_properties_expected_in_dockerfile_path}")

    assert os.path.exists(
        dockerfile_spec_abs_path
    ), f"Cannot find dockerfile for {image} in {dockerfile_spec_abs_path}"
    It is retried multiple times in case there are transient failures in executing the command.

    :param package: str Name of the package whose latest version must be retrieved
    :return: tuple(command_success: bool, latest_version_value: str)
    """
    pypi_package_info = requests.get(f"https://pypi.org/pypi/{package}/json")
    data = json.loads(pypi_package_info.text)
    versions = data["releases"].keys()
    return str(max(Version(v) for v in versions))


@pytest.mark.usefixtures("sagemaker")
@pytest.mark.model("N/A")
@pytest.mark.canary("Run safety tests regularly on production images")
@pytest.mark.skipif(
    not is_dlc_cicd_context(),
    reason="Skipping test because it is not running in dlc cicd infra")
@pytest.mark.skipif(
    not (is_safety_test_context()),
    reason=
    ("Skipping the test to decrease the number of calls to the Safety Check DB. "
     "Test will be executed in the 'mainline' pipeline and canaries pipeline."
     ),
)
def test_safety(image):
    """
    Runs safety check on a container with the capability to ignore safety issues that cannot be fixed, and only raise
    error if an issue is fixable.
    """
    from dlc.safety_check import SafetyCheck
Esempio n. 5
0
def test_cuda_paths(gpu):
    """
    Test to ensure that:
    a. buildspec contains an entry to create the same image as the image URI
    b. directory structure for GPU Dockerfiles has framework version, python version, and cuda version in it

    :param gpu: gpu image uris
    """
    image = gpu
    if "example" in image:
        pytest.skip(
            "Skipping Example Dockerfiles which are not explicitly tied to a cuda version"
        )

    dlc_path = os.getcwd().split("/test/")[0]
    job_type = "training" if "training" in image else "inference"

    # Ensure that image has a supported framework
    framework, framework_version = get_framework_and_version_from_tag(image)

    # Get cuda, framework version, python version through regex
    cuda_version = re.search(r"-(cu\d+)-", image).group(1)
    framework_short_version = None
    python_version = re.search(r"(py\d+)", image).group(1)
    short_python_version = None
    image_tag = re.search(
        r":(\d+(\.\d+){2}(-transformers\d+(\.\d+){2})?-(gpu)-(py\d+)(-cu\d+)-(ubuntu\d+\.\d+)((-e3)?-example|-e3|-sagemaker)?)",
        image,
    ).group(1)

    # replacing '_' by '/' to handle huggingface_<framework> case
    framework_path = framework.replace("_", "/")
    framework_version_path = os.path.join(dlc_path, framework_path, job_type,
                                          "docker", framework_version)
    if not os.path.exists(framework_version_path):
        framework_short_version = re.match(r"(\d+.\d+)",
                                           framework_version).group(1)
        framework_version_path = os.path.join(dlc_path, framework_path,
                                              job_type, "docker",
                                              framework_short_version)
    if not os.path.exists(os.path.join(framework_version_path,
                                       python_version)):
        # Use the pyX version as opposed to the pyXY version if pyXY path does not exist
        short_python_version = python_version[:3]

    # Check buildspec for cuda version
    buildspec = "buildspec.yml"
    if is_tf_version("1", image):
        buildspec = "buildspec-tf1.yml"

    image_tag_in_buildspec = False
    dockerfile_spec_abs_path = None
    buildspec_path = os.path.join(dlc_path, framework_path, buildspec)
    buildspec_def = Buildspec()
    buildspec_def.load(buildspec_path)

    for name, image_spec in buildspec_def["images"].items():
        if image_spec["device_type"] == "gpu" and image_spec[
                "tag"] == image_tag:
            image_tag_in_buildspec = True
            dockerfile_spec_abs_path = os.path.join(
                os.path.dirname(framework_version_path),
                image_spec["docker_file"].lstrip("docker/"))
            break
    try:
        assert image_tag_in_buildspec, f"Image tag {image_tag} not found in {buildspec_path}"
    except AssertionError as e:
        if not is_dlc_cicd_context():
            LOGGER.warn(
                f"{e} - not failing, as this is a(n) {os.getenv('BUILD_CONTEXT', 'empty')} build context."
            )
        else:
            raise

    image_properties_expected_in_dockerfile_path = [
        framework_short_version or framework_version,
        short_python_version or python_version,
        cuda_version,
    ]
    assert all(
        prop in dockerfile_spec_abs_path
        for prop in image_properties_expected_in_dockerfile_path
    ), (f"Dockerfile location {dockerfile_spec_abs_path} does not contain all the image properties in "
        f"{image_properties_expected_in_dockerfile_path}")

    assert os.path.exists(
        dockerfile_spec_abs_path
    ), f"Cannot find dockerfile for {image} in {dockerfile_spec_abs_path}"
def _assert_artifact_free(output, stray_artifacts):
    """
    Manage looping through assertions to determine that directories don't have known stray files.

    :param output: Invoke result object
    :param stray_artifacts: List of things that should not be present in these directories
    """
    for artifact in stray_artifacts:
        assert not re.search(
            artifact, output.stdout
        ), f"Matched {artifact} in {output.stdout} while running {output.command}"


@pytest.mark.integration("oss_compliance")
@pytest.mark.model("N/A")
@pytest.mark.skipif(not is_dlc_cicd_context(), reason="We need to test OSS compliance only on PRs and pipelines")
def test_oss_compliance(image):
    """
    Run oss compliance check on a container to check if license attribution files exist.
    And upload source of third party packages to S3 bucket.
    """
    THIRD_PARTY_SOURCE_CODE_BUCKET = "aws-dlinfra-licenses"
    THIRD_PARTY_SOURCE_CODE_BUCKET_PATH = "third_party_source_code"
    file = "THIRD_PARTY_SOURCE_CODE_URLS"
    container_name = get_container_name("oss_compliance", image)
    context = Context()
    local_repo_path = get_repository_local_path()
    start_container(container_name, image, context)

    # run compliance test to make sure license attribution files exists. testOSSCompliance is copied as part of Dockerfile
    run_cmd_on_container(container_name, context, "/usr/local/bin/testOSSCompliance /root")
def test_cuda_paths(gpu):
    """
    Test to ensure directory structure for GPU Dockerfiles has cuda version in it

    :param gpu: gpu image uris
    """
    image = gpu
    if "example" in image:
        pytest.skip(
            "Skipping Example Dockerfiles which are not explicitly tied to a cuda version"
        )

    dlc_path = os.getcwd().split("/test/")[0]
    job_type = "training" if "training" in image else "inference"

    # Ensure that image has a supported framework
    frameworks = ("tensorflow", "pytorch", "mxnet")
    framework = ""
    for fw in frameworks:
        if fw in image:
            framework = fw
            break
    assert framework, f"Cannot find any frameworks {frameworks} in image uri {image}"

    # Get cuda, framework version, python version through regex
    cuda_version = re.search(r"-(cu\d+)-", image).group(1)
    framework_version = re.search(r":(\d+(.\d+){2})", image).group(1)
    python_version = re.search(r"(py\d+)", image).group(1)

    framework_version_path = os.path.join(dlc_path, framework, job_type,
                                          "docker", framework_version)
    if not os.path.exists(framework_version_path):
        framework_short_version = re.match(r"(\d+.\d+)",
                                           framework_version).group(1)
        framework_version_path = os.path.join(dlc_path, framework, job_type,
                                              "docker",
                                              framework_short_version)
    if not os.path.exists(os.path.join(framework_version_path,
                                       python_version)):
        # Use the pyX version as opposed to the pyXY version if pyXY path does not exist
        python_version = python_version[:3]

    # Check buildspec for cuda version
    buildspec = "buildspec.yml"
    if is_tf_version("1", image):
        buildspec = "buildspec-tf1.yml"

    cuda_in_buildspec = False
    cuda_in_buildspec_ref = f"CUDA_VERSION {cuda_version}"
    buildspec_path = os.path.join(dlc_path, framework, buildspec)
    with open(buildspec_path, "r") as bf:
        for line in bf:
            if cuda_in_buildspec_ref in line:
                cuda_in_buildspec = True
                break

    try:
        assert cuda_in_buildspec, f"Can't find {cuda_in_buildspec_ref} in {buildspec_path}"
    except AssertionError as e:
        if not is_dlc_cicd_context():
            LOGGER.warn(
                f"{e} - not failing, as this is a(n) {os.getenv('BUILD_CONTEXT', 'empty')} build context."
            )
        else:
            raise

    # Check that a Dockerfile exists in the right directory
    dockerfile_path = os.path.join(framework_version_path, python_version,
                                   cuda_version, "Dockerfile.gpu")

    assert os.path.exists(
        dockerfile_path
    ), f"Cannot find dockerfile for image {image} in {dockerfile_path}"
        assert not remaining_vulnerabilities, (
            f"The following vulnerabilities need to be fixed on {image}:\n"
            f"{json.dumps(remaining_vulnerabilities.vulnerability_list, indent=4)}"
        )
        return

    assert not remaining_vulnerabilities.vulnerability_list, (
        f"The following vulnerabilities need to be fixed on {image}:\n"
        f"{json.dumps(remaining_vulnerabilities.vulnerability_list, indent=4)}"
    )


@pytest.mark.usefixtures("sagemaker")
@pytest.mark.model("N/A")
@pytest.mark.integration("check OS dependencies")
@pytest.mark.skipif(is_dlc_cicd_context(), reason="Temporarily allow slack in allowlist w.r.t. actual vulnerabilities")
def test_is_ecr_scan_allowlist_outdated(image, ecr_client, sts_client, region):
    """
    Run ECR Scan Tool on an image being tested, and test if the vulnerabilities in the allowlist for the image
    are still valid, or if any vulnerabilities must be removed from the list.

    :param image: str Image URI for image to be tested
    :param ecr_client: boto3 Client for ECR
    :param sts_client: boto3 Client for STS
    :param region: str Name of region where test is executed
    """
    test_account_id = sts_client.get_caller_identity().get("Account")
    image_account_id = get_account_id_from_image_uri(image)
    if image_account_id != test_account_id:
        image_repo_uri, image_tag = image.split(":")
        _, image_repo_name = image_repo_uri.split("/")