def fetch_dlc_images_for_test_jobs(images, use_latest_additional_tag=False):
    """
    use the JobParamters.run_test_types values to pass on image ecr urls to each test type.
    :param images: list
    :return: dictionary
    """
    DLC_IMAGES = {
        "sagemaker": [],
        "ecs": [],
        "eks": [],
        "ec2": [],
        "sanity": []
    }

    build_disabled = not is_build_enabled()

    for docker_image in images:
        if not docker_image.is_test_promotion_enabled:
            continue
        use_preexisting_images = (build_disabled and docker_image.build_status
                                  == constants.NOT_BUILT)
        if docker_image.build_status == constants.SUCCESS or use_preexisting_images:
            ecr_url_to_test = docker_image.ecr_url
            if use_latest_additional_tag and len(
                    docker_image.additional_tags) > 0:
                ecr_url_to_test = f"{docker_image.repository}:{docker_image.additional_tags[-1]}"

            # Run sanity tests on the all images built
            DLC_IMAGES["sanity"].append(ecr_url_to_test)
            image_job_type = docker_image.info.get("image_type")
            image_device_type = docker_image.info.get("device_type")
            image_python_version = docker_image.info.get("python_version")
            image_tag = f"{image_job_type}_{image_device_type}_{image_python_version}"
            # when image_run_test_types has key all values can be (all , ecs, eks, ec2, sagemaker)
            if constants.ALL in JobParameters.image_run_test_types.keys():
                run_tests = JobParameters.image_run_test_types.get(
                    constants.ALL)
                run_tests = (constants.ALL_TESTS
                             if constants.ALL in run_tests else run_tests)
                for test in run_tests:
                    DLC_IMAGES[test].append(ecr_url_to_test)
            # when key is training or inference values can be  (ecs, eks, ec2, sagemaker)
            if image_job_type in JobParameters.image_run_test_types.keys():
                run_tests = JobParameters.image_run_test_types.get(
                    image_job_type)
                for test in run_tests:
                    DLC_IMAGES[test].append(ecr_url_to_test)
            # when key is image_tag (training-cpu-py3) values can be (ecs, eks, ec2, sagemaker)
            if image_tag in JobParameters.image_run_test_types.keys():
                run_tests = JobParameters.image_run_test_types.get(image_tag)
                run_tests = (constants.ALL_TESTS
                             if constants.ALL in run_tests else run_tests)
                for test in run_tests:
                    DLC_IMAGES[test].append(ecr_url_to_test)

    for test_type in DLC_IMAGES.keys():
        test_images = DLC_IMAGES[test_type]
        if test_images:
            DLC_IMAGES[test_type] = list(set(test_images))
    return DLC_IMAGES
Exemple #2
0
def build_setup(framework,
                device_types=None,
                image_types=None,
                py_versions=None):
    """
    Setup the appropriate environment variables depending on whether this is a PR build
    or a dev build

    Parameters:
        framework: str
        device_types: [str]
        image_types: [str]
        py_versions: [str]

    Returns:
        None
    """

    # Set necessary environment variables
    to_build = {
        "device_types": constants.DEVICE_TYPES,
        "image_types": constants.IMAGE_TYPES,
        "py_versions": constants.PYTHON_VERSIONS,
    }
    build_context = os.environ.get("BUILD_CONTEXT")
    enable_build = is_build_enabled()

    if build_context == "PR":
        pr_number = os.getenv("CODEBUILD_SOURCE_VERSION")
        LOGGER.info(f"pr number: {pr_number}")
        if pr_number is not None:
            pr_number = int(pr_number.split("/")[-1])
        device_types, image_types, py_versions = pr_build_setup(
            pr_number, framework)

    if device_types != constants.ALL:
        to_build["device_types"] = constants.DEVICE_TYPES.intersection(
            set(device_types))
    if image_types != constants.ALL:
        to_build["image_types"] = constants.IMAGE_TYPES.intersection(
            set(image_types))
    if py_versions != constants.ALL:
        to_build["py_versions"] = constants.PYTHON_VERSIONS.intersection(
            set(py_versions))
    for device_type in to_build["device_types"]:
        for image_type in to_build["image_types"]:
            for py_version in to_build["py_versions"]:
                env_variable = f"{framework.upper()}_{device_type.upper()}_{image_type.upper()}_{py_version.upper()}"
                if enable_build or build_context != "PR":
                    os.environ[env_variable] = "true"
Exemple #3
0
def image_builder(buildspec):

    BUILDSPEC = Buildspec()
    BUILDSPEC.load(buildspec)
    PRE_PUSH_STAGE_IMAGES = []
    COMMON_STAGE_IMAGES = []

    if "huggingface" in str(BUILDSPEC["framework"]) or "autogluon" in str(
            BUILDSPEC["framework"]) or "trcomp" in str(BUILDSPEC["framework"]):
        os.system("echo login into public ECR")
        os.system(
            "aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com"
        )

    for image_name, image_config in BUILDSPEC["images"].items():
        ARTIFACTS = deepcopy(
            BUILDSPEC["context"]) if BUILDSPEC.get("context") else {}

        extra_build_args = {}
        labels = {}
        enable_datetime_tag = parse_dlc_developer_configs(
            "build", "datetime_tag")

        if image_config.get("version") is not None:
            if BUILDSPEC["version"] != image_config.get("version"):
                continue

        if image_config.get("context") is not None:
            ARTIFACTS.update(image_config["context"])

        image_tag = tag_image_with_pr_number(
            image_config["tag"]
        ) if build_context == "PR" else image_config["tag"]
        if enable_datetime_tag or build_context != "PR":
            image_tag = tag_image_with_datetime(image_tag)
        image_repo_uri = (image_config["repository"] if build_context == "PR"
                          else modify_repository_name_for_context(
                              str(image_config["repository"]), build_context))
        base_image_uri = None
        if image_config.get("base_image_name") is not None:
            base_image_object = _find_image_object(
                PRE_PUSH_STAGE_IMAGES, image_config["base_image_name"])
            base_image_uri = base_image_object.ecr_url

        if image_config.get("download_artifacts") is not None:
            for artifact_name, artifact in image_config.get(
                    "download_artifacts").items():
                type = artifact["type"]
                uri = artifact["URI"]
                var = artifact["VAR_IN_DOCKERFILE"]

                try:
                    file_name = utils.download_file(uri, type).strip()
                except ValueError:
                    FORMATTER.print(
                        f"Artifact download failed: {uri} of type {type}.")

                ARTIFACTS.update({
                    f"{artifact_name}": {
                        "source":
                        f"{os.path.join(os.sep, os.path.abspath(os.getcwd()), file_name)}",
                        "target": file_name,
                    }
                })

                extra_build_args[var] = file_name
                labels[var] = file_name
                labels[f"{var}_URI"] = uri

        transformers_version = image_config.get("transformers_version")

        if str(BUILDSPEC["framework"]).startswith("huggingface") or str(
                BUILDSPEC["framework"]).endswith("trcomp"):
            if transformers_version:
                extra_build_args["TRANSFORMERS_VERSION"] = transformers_version
            else:
                raise KeyError(
                    f"HuggingFace buildspec.yml must contain 'transformers_version' field for each image"
                )
            if "datasets_version" in image_config:
                extra_build_args["DATASETS_VERSION"] = image_config.get(
                    "datasets_version")
            elif str(image_config["image_type"]) == "training":
                raise KeyError(
                    f"HuggingFace buildspec.yml must contain 'datasets_version' field for each image"
                )

        ARTIFACTS.update({
            "dockerfile": {
                "source": image_config["docker_file"],
                "target": "Dockerfile",
            }
        })

        context = Context(ARTIFACTS, f"build/{image_name}.tar.gz",
                          image_config["root"])

        if "labels" in image_config:
            labels.update(image_config.get("labels"))

        cx_type = utils.get_label_prefix_customer_type(image_tag)

        # Define label variables
        label_framework = str(BUILDSPEC['framework']).replace('_', '-')
        if image_config.get("framework_version"):
            label_framework_version = str(
                image_config['framework_version']).replace('.', '-')
        else:
            label_framework_version = str(BUILDSPEC['version']).replace(
                '.', '-')
        label_device_type = str(image_config['device_type'])
        if label_device_type == "gpu":
            label_device_type = f"{label_device_type}.{str(image_config['cuda_version'])}"
        label_arch = str(BUILDSPEC['arch_type'])
        label_python_version = str(image_config['tag_python_version'])
        label_os_version = str(image_config.get('os_version')).replace(
            '.', '-')
        label_contributor = str(BUILDSPEC.get('contributor'))
        label_transformers_version = str(transformers_version).replace(
            '.', '-')

        # job_type will be either inference or training, based on the repo URI
        if "training" in image_repo_uri:
            label_job_type = "training"
        elif "inference" in image_repo_uri:
            label_job_type = "inference"
        else:
            raise RuntimeError(
                f"Cannot find inference or training job type in {image_repo_uri}. "
                f"This is required to set job_type label.")

        if cx_type == "sagemaker":
            # Adding standard labels to all images
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.framework.{label_framework}.{label_framework_version}"] = "true"
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.device.{label_device_type}"] = "true"
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.arch.{label_arch}"] = "true"
            # python version label will look like py_version.py36, for example
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.python.{label_python_version}"] = "true"
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.os.{label_os_version}"] = "true"

            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.job.{label_job_type}"] = "true"

            if label_contributor:
                labels[
                    f"com.amazonaws.ml.engines.{cx_type}.dlc.contributor.{label_contributor}"] = "true"
            if transformers_version:
                labels[
                    f"com.amazonaws.ml.engines.{cx_type}.dlc.lib.transformers.{label_transformers_version}"] = "true"
        """
        Override parameters from parent in child.
        """

        info = {
            "account_id":
            str(BUILDSPEC["account_id"]),
            "region":
            str(BUILDSPEC["region"]),
            "framework":
            str(BUILDSPEC["framework"]),
            "version":
            str(BUILDSPEC["version"]),
            "root":
            str(image_config["root"]),
            "name":
            str(image_name),
            "device_type":
            str(image_config["device_type"]),
            "python_version":
            str(image_config["python_version"]),
            "image_type":
            str(image_config["image_type"]),
            "image_size_baseline":
            int(image_config["image_size_baseline"]),
            "base_image_uri":
            base_image_uri,
            "enable_test_promotion":
            image_config.get("enable_test_promotion", True),
            "labels":
            labels,
            "extra_build_args":
            extra_build_args,
        }

        # Create pre_push stage docker object
        pre_push_stage_image_object = DockerImage(
            info=info,
            dockerfile=image_config["docker_file"],
            repository=image_repo_uri,
            tag=append_tag(image_tag, "pre-push"),
            to_build=image_config["build"],
            stage=constants.PRE_PUSH_STAGE,
            context=context,
            additional_tags=[image_tag],
            target=image_config.get("target"),
        )

        ##### Create Common stage docker object #####
        # If for a pre_push stage image we create a common stage image, then we do not push the pre_push stage image
        # to the repository. Instead, we just push its common stage image to the repository. Therefore,
        # inside function get_common_stage_image_object we make pre_push_stage_image_object non pushable.
        common_stage_image_object = generate_common_stage_image_object(
            pre_push_stage_image_object, image_tag)
        COMMON_STAGE_IMAGES.append(common_stage_image_object)

        PRE_PUSH_STAGE_IMAGES.append(pre_push_stage_image_object)
        FORMATTER.separator()

    FORMATTER.banner("DLC")

    # Parent images do not inherit from any containers built in this job
    # Child images use one of the parent images as their base image
    parent_images = [
        image for image in PRE_PUSH_STAGE_IMAGES if not image.is_child_image
    ]
    child_images = [
        image for image in PRE_PUSH_STAGE_IMAGES if image.is_child_image
    ]
    ALL_IMAGES = PRE_PUSH_STAGE_IMAGES + COMMON_STAGE_IMAGES
    IMAGES_TO_PUSH = [
        image for image in ALL_IMAGES if image.to_push and image.to_build
    ]

    pushed_images = []
    pushed_images += process_images(parent_images, "Parent/Independent")
    pushed_images += process_images(child_images, "Child/Dependent")

    assert all(image in pushed_images
               for image in IMAGES_TO_PUSH), "Few images could not be pushed."

    # After the build, display logs/summary for all the images.
    FORMATTER.banner("Summary")
    show_build_info(ALL_IMAGES)

    FORMATTER.banner("Errors")
    is_any_build_failed, is_any_build_failed_size_limit = show_build_errors(
        ALL_IMAGES)

    # From all images, filter the images that were supposed to be built and upload their metrics
    BUILT_IMAGES = [image for image in ALL_IMAGES if image.to_build]

    FORMATTER.banner("Upload Metrics")
    upload_metrics(BUILT_IMAGES, BUILDSPEC, is_any_build_failed,
                   is_any_build_failed_size_limit)

    FORMATTER.banner("Test Env")
    # Set environment variables to be consumed by test jobs
    test_trigger_job = get_codebuild_project_name()
    # Tests should only run on images that were pushed to the repository
    if not is_build_enabled():
        # Ensure we have images populated if do_build is false, so that tests can proceed if needed
        images_to_test = [image for image in ALL_IMAGES if image.to_push]
    else:
        images_to_test = IMAGES_TO_PUSH

    utils.set_test_env(
        images_to_test,
        use_latest_additional_tag=True,
        BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"),
        TEST_TRIGGER=test_trigger_job,
    )