예제 #1
0
def image_builder(buildspec):

    BUILDSPEC = Buildspec()
    BUILDSPEC.load(buildspec)
    PRE_PUSH_STAGE_IMAGES = []
    COMMON_STAGE_IMAGES = []

    if "huggingface" in str(BUILDSPEC["framework"]) or "autogluon" in str(
            BUILDSPEC["framework"]) or "trcomp" in str(BUILDSPEC["framework"]):
        os.system("echo login into public ECR")
        os.system(
            "aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com"
        )

    for image_name, image_config in BUILDSPEC["images"].items():
        ARTIFACTS = deepcopy(
            BUILDSPEC["context"]) if BUILDSPEC.get("context") else {}

        extra_build_args = {}
        labels = {}
        enable_datetime_tag = parse_dlc_developer_configs(
            "build", "datetime_tag")

        if image_config.get("version") is not None:
            if BUILDSPEC["version"] != image_config.get("version"):
                continue

        if image_config.get("context") is not None:
            ARTIFACTS.update(image_config["context"])

        image_tag = tag_image_with_pr_number(
            image_config["tag"]
        ) if build_context == "PR" else image_config["tag"]
        if enable_datetime_tag or build_context != "PR":
            image_tag = tag_image_with_datetime(image_tag)
        image_repo_uri = (image_config["repository"] if build_context == "PR"
                          else modify_repository_name_for_context(
                              str(image_config["repository"]), build_context))
        base_image_uri = None
        if image_config.get("base_image_name") is not None:
            base_image_object = _find_image_object(
                PRE_PUSH_STAGE_IMAGES, image_config["base_image_name"])
            base_image_uri = base_image_object.ecr_url

        if image_config.get("download_artifacts") is not None:
            for artifact_name, artifact in image_config.get(
                    "download_artifacts").items():
                type = artifact["type"]
                uri = artifact["URI"]
                var = artifact["VAR_IN_DOCKERFILE"]

                try:
                    file_name = utils.download_file(uri, type).strip()
                except ValueError:
                    FORMATTER.print(
                        f"Artifact download failed: {uri} of type {type}.")

                ARTIFACTS.update({
                    f"{artifact_name}": {
                        "source":
                        f"{os.path.join(os.sep, os.path.abspath(os.getcwd()), file_name)}",
                        "target": file_name,
                    }
                })

                extra_build_args[var] = file_name
                labels[var] = file_name
                labels[f"{var}_URI"] = uri

        transformers_version = image_config.get("transformers_version")

        if str(BUILDSPEC["framework"]).startswith("huggingface") or str(
                BUILDSPEC["framework"]).endswith("trcomp"):
            if transformers_version:
                extra_build_args["TRANSFORMERS_VERSION"] = transformers_version
            else:
                raise KeyError(
                    f"HuggingFace buildspec.yml must contain 'transformers_version' field for each image"
                )
            if "datasets_version" in image_config:
                extra_build_args["DATASETS_VERSION"] = image_config.get(
                    "datasets_version")
            elif str(image_config["image_type"]) == "training":
                raise KeyError(
                    f"HuggingFace buildspec.yml must contain 'datasets_version' field for each image"
                )

        ARTIFACTS.update({
            "dockerfile": {
                "source": image_config["docker_file"],
                "target": "Dockerfile",
            }
        })

        context = Context(ARTIFACTS, f"build/{image_name}.tar.gz",
                          image_config["root"])

        if "labels" in image_config:
            labels.update(image_config.get("labels"))

        cx_type = utils.get_label_prefix_customer_type(image_tag)

        # Define label variables
        label_framework = str(BUILDSPEC['framework']).replace('_', '-')
        if image_config.get("framework_version"):
            label_framework_version = str(
                image_config['framework_version']).replace('.', '-')
        else:
            label_framework_version = str(BUILDSPEC['version']).replace(
                '.', '-')
        label_device_type = str(image_config['device_type'])
        if label_device_type == "gpu":
            label_device_type = f"{label_device_type}.{str(image_config['cuda_version'])}"
        label_arch = str(BUILDSPEC['arch_type'])
        label_python_version = str(image_config['tag_python_version'])
        label_os_version = str(image_config.get('os_version')).replace(
            '.', '-')
        label_contributor = str(BUILDSPEC.get('contributor'))
        label_transformers_version = str(transformers_version).replace(
            '.', '-')

        # job_type will be either inference or training, based on the repo URI
        if "training" in image_repo_uri:
            label_job_type = "training"
        elif "inference" in image_repo_uri:
            label_job_type = "inference"
        else:
            raise RuntimeError(
                f"Cannot find inference or training job type in {image_repo_uri}. "
                f"This is required to set job_type label.")

        if cx_type == "sagemaker":
            # Adding standard labels to all images
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.framework.{label_framework}.{label_framework_version}"] = "true"
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.device.{label_device_type}"] = "true"
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.arch.{label_arch}"] = "true"
            # python version label will look like py_version.py36, for example
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.python.{label_python_version}"] = "true"
            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.os.{label_os_version}"] = "true"

            labels[
                f"com.amazonaws.ml.engines.{cx_type}.dlc.job.{label_job_type}"] = "true"

            if label_contributor:
                labels[
                    f"com.amazonaws.ml.engines.{cx_type}.dlc.contributor.{label_contributor}"] = "true"
            if transformers_version:
                labels[
                    f"com.amazonaws.ml.engines.{cx_type}.dlc.lib.transformers.{label_transformers_version}"] = "true"
        """
        Override parameters from parent in child.
        """

        info = {
            "account_id":
            str(BUILDSPEC["account_id"]),
            "region":
            str(BUILDSPEC["region"]),
            "framework":
            str(BUILDSPEC["framework"]),
            "version":
            str(BUILDSPEC["version"]),
            "root":
            str(image_config["root"]),
            "name":
            str(image_name),
            "device_type":
            str(image_config["device_type"]),
            "python_version":
            str(image_config["python_version"]),
            "image_type":
            str(image_config["image_type"]),
            "image_size_baseline":
            int(image_config["image_size_baseline"]),
            "base_image_uri":
            base_image_uri,
            "enable_test_promotion":
            image_config.get("enable_test_promotion", True),
            "labels":
            labels,
            "extra_build_args":
            extra_build_args,
        }

        # Create pre_push stage docker object
        pre_push_stage_image_object = DockerImage(
            info=info,
            dockerfile=image_config["docker_file"],
            repository=image_repo_uri,
            tag=append_tag(image_tag, "pre-push"),
            to_build=image_config["build"],
            stage=constants.PRE_PUSH_STAGE,
            context=context,
            additional_tags=[image_tag],
            target=image_config.get("target"),
        )

        ##### Create Common stage docker object #####
        # If for a pre_push stage image we create a common stage image, then we do not push the pre_push stage image
        # to the repository. Instead, we just push its common stage image to the repository. Therefore,
        # inside function get_common_stage_image_object we make pre_push_stage_image_object non pushable.
        common_stage_image_object = generate_common_stage_image_object(
            pre_push_stage_image_object, image_tag)
        COMMON_STAGE_IMAGES.append(common_stage_image_object)

        PRE_PUSH_STAGE_IMAGES.append(pre_push_stage_image_object)
        FORMATTER.separator()

    FORMATTER.banner("DLC")

    # Parent images do not inherit from any containers built in this job
    # Child images use one of the parent images as their base image
    parent_images = [
        image for image in PRE_PUSH_STAGE_IMAGES if not image.is_child_image
    ]
    child_images = [
        image for image in PRE_PUSH_STAGE_IMAGES if image.is_child_image
    ]
    ALL_IMAGES = PRE_PUSH_STAGE_IMAGES + COMMON_STAGE_IMAGES
    IMAGES_TO_PUSH = [
        image for image in ALL_IMAGES if image.to_push and image.to_build
    ]

    pushed_images = []
    pushed_images += process_images(parent_images, "Parent/Independent")
    pushed_images += process_images(child_images, "Child/Dependent")

    assert all(image in pushed_images
               for image in IMAGES_TO_PUSH), "Few images could not be pushed."

    # After the build, display logs/summary for all the images.
    FORMATTER.banner("Summary")
    show_build_info(ALL_IMAGES)

    FORMATTER.banner("Errors")
    is_any_build_failed, is_any_build_failed_size_limit = show_build_errors(
        ALL_IMAGES)

    # From all images, filter the images that were supposed to be built and upload their metrics
    BUILT_IMAGES = [image for image in ALL_IMAGES if image.to_build]

    FORMATTER.banner("Upload Metrics")
    upload_metrics(BUILT_IMAGES, BUILDSPEC, is_any_build_failed,
                   is_any_build_failed_size_limit)

    FORMATTER.banner("Test Env")
    # Set environment variables to be consumed by test jobs
    test_trigger_job = get_codebuild_project_name()
    # Tests should only run on images that were pushed to the repository
    if not is_build_enabled():
        # Ensure we have images populated if do_build is false, so that tests can proceed if needed
        images_to_test = [image for image in ALL_IMAGES if image.to_push]
    else:
        images_to_test = IMAGES_TO_PUSH

    utils.set_test_env(
        images_to_test,
        use_latest_additional_tag=True,
        BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"),
        TEST_TRIGGER=test_trigger_job,
    )
예제 #2
0
def image_builder(buildspec):
    FORMATTER = OutputFormatter(constants.PADDING)

    BUILDSPEC = Buildspec()
    BUILDSPEC.load(buildspec)
    IMAGES = []

    for image in BUILDSPEC["images"].items():
        ARTIFACTS = deepcopy(BUILDSPEC["context"])

        image_name = image[0]
        image_config = image[1]

        if image_config.get("version") is not None:
            if BUILDSPEC["version"] != image_config.get("version"):
                continue

        if image_config.get("context") is not None:
            ARTIFACTS.update(image_config["context"])

        build_context = os.getenv("BUILD_CONTEXT")
        image_tag = (tag_image_with_pr_number(image_config["tag"])
                     if build_context == "PR" else image_config["tag"])
        if not build_config.DISABLE_DATETIME_TAG or build_context != "PR":
            image_tag = tag_image_with_datetime(image_tag)
        image_repo_uri = (image_config["repository"] if build_context == "PR"
                          else modify_repository_name_for_context(
                              str(image_config["repository"]), build_context))
        base_image_uri = None
        if image_config.get("base_image_name") is not None:
            base_image_object = _find_image_object(
                IMAGES, image_config["base_image_name"])
            base_image_uri = base_image_object.ecr_url

        ARTIFACTS.update({
            "dockerfile": {
                "source": image_config["docker_file"],
                "target": "Dockerfile",
            }
        })

        context = Context(ARTIFACTS, f"build/{image_name}.tar.gz",
                          image_config["root"])
        """
        Override parameters from parent in child.
        """

        info = {
            "account_id": str(BUILDSPEC["account_id"]),
            "region": str(BUILDSPEC["region"]),
            "framework": str(BUILDSPEC["framework"]),
            "version": str(BUILDSPEC["version"]),
            "root": str(image_config["root"]),
            "name": str(image_name),
            "device_type": str(image_config["device_type"]),
            "python_version": str(image_config["python_version"]),
            "image_type": str(image_config["image_type"]),
            "image_size_baseline": int(image_config["image_size_baseline"]),
            "base_image_uri": base_image_uri
        }

        image_object = DockerImage(
            info=info,
            dockerfile=image_config["docker_file"],
            repository=image_repo_uri,
            tag=image_tag,
            to_build=image_config["build"],
            context=context,
        )

        IMAGES.append(image_object)

    FORMATTER.banner("DLC")
    FORMATTER.title("Status")

    THREADS = {}

    # In the context of the ThreadPoolExecutor each instance of image.build submitted
    # to it is executed concurrently in a separate thread.
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        # Standard images must be built before example images
        # Example images will use standard images as base
        standard_images = [
            image for image in IMAGES if "example" not in image.name.lower()
        ]
        example_images = [
            image for image in IMAGES if "example" in image.name.lower()
        ]

        for image in standard_images:
            THREADS[image.name] = executor.submit(image.build)

        # the FORMATTER.progress(THREADS) function call also waits until all threads have completed
        FORMATTER.progress(THREADS)

        for image in example_images:
            THREADS[image.name] = executor.submit(image.build)

        # the FORMATTER.progress(THREADS) function call also waits until all threads have completed
        FORMATTER.progress(THREADS)

        FORMATTER.title("Build Logs")

        if not os.path.isdir("logs"):
            os.makedirs("logs")

        for image in IMAGES:
            FORMATTER.title(image.name)
            FORMATTER.table(image.info.items())
            FORMATTER.separator()
            FORMATTER.print_lines(image.log)
            with open(f"logs/{image.name}", "w") as fp:
                fp.write("/n".join(image.log))
                image.summary["log"] = f"logs/{image.name}"

        FORMATTER.title("Summary")

        for image in IMAGES:
            FORMATTER.title(image.name)
            FORMATTER.table(image.summary.items())

        FORMATTER.title("Errors")
        ANY_FAIL = False
        for image in IMAGES:
            if image.build_status == constants.FAIL:
                FORMATTER.title(image.name)
                FORMATTER.print_lines(image.log[-10:])
                ANY_FAIL = True
        if ANY_FAIL:
            raise Exception("Build failed")
        else:
            FORMATTER.print("No errors")

        FORMATTER.title("Uploading Metrics")
        metrics = Metrics(
            context=constants.BUILD_CONTEXT,
            region=BUILDSPEC["region"],
            namespace=constants.METRICS_NAMESPACE,
        )
        for image in IMAGES:
            try:
                metrics.push_image_metrics(image)
            except Exception as e:
                if ANY_FAIL:
                    raise Exception(f"Build failed.{e}")
                else:
                    raise Exception(f"Build passed. {e}")

        FORMATTER.separator()

        # Set environment variables to be consumed by test jobs
        test_trigger_job = utils.get_codebuild_project_name()
        utils.set_test_env(
            IMAGES,
            BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"),
            TEST_TRIGGER=test_trigger_job,
        )
def image_builder(buildspec):
    FORMATTER = OutputFormatter(constants.PADDING)

    BUILDSPEC = Buildspec()
    BUILDSPEC.load(buildspec)
    IMAGES = []

    if "huggingface" in str(BUILDSPEC["framework"]):
        os.system("echo login into public ECR")
        os.system(
            "aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com"
        )

    for image_name, image_config in BUILDSPEC["images"].items():
        ARTIFACTS = deepcopy(
            BUILDSPEC["context"]) if BUILDSPEC.get("context") else {}

        extra_build_args = {}
        labels = {}

        if image_config.get("version") is not None:
            if BUILDSPEC["version"] != image_config.get("version"):
                continue

        if image_config.get("context") is not None:
            ARTIFACTS.update(image_config["context"])

        build_context = os.getenv("BUILD_CONTEXT")
        image_tag = (tag_image_with_pr_number(image_config["tag"])
                     if build_context == "PR" else image_config["tag"])
        if not build_config.DISABLE_DATETIME_TAG or build_context != "PR":
            image_tag = tag_image_with_datetime(image_tag)
        image_repo_uri = (image_config["repository"] if build_context == "PR"
                          else modify_repository_name_for_context(
                              str(image_config["repository"]), build_context))
        base_image_uri = None
        if image_config.get("base_image_name") is not None:
            base_image_object = _find_image_object(
                IMAGES, image_config["base_image_name"])
            base_image_uri = base_image_object.ecr_url

        if image_config.get("download_artifacts") is not None:
            for artifact_name, artifact in image_config.get(
                    "download_artifacts").items():
                type = artifact["type"]
                uri = artifact["URI"]
                var = artifact["VAR_IN_DOCKERFILE"]

                try:
                    file_name = utils.download_file(uri, type).strip()
                except ValueError:
                    FORMATTER.print(
                        f"Artifact download failed: {uri} of type {type}.")

                ARTIFACTS.update({
                    f"{artifact_name}": {
                        "source":
                        f"{os.path.join(os.sep, os.path.abspath(os.getcwd()), file_name)}",
                        "target": file_name
                    }
                })

                extra_build_args[var] = file_name
                labels[var] = file_name
                labels[f"{var}_URI"] = uri

        ARTIFACTS.update({
            "dockerfile": {
                "source": image_config["docker_file"],
                "target": "Dockerfile",
            }
        })

        context = Context(ARTIFACTS, f"build/{image_name}.tar.gz",
                          image_config["root"])

        if "labels" in image_config:
            labels.update(image_config.get("labels"))
        """
        Override parameters from parent in child.
        """

        info = {
            "account_id": str(BUILDSPEC["account_id"]),
            "region": str(BUILDSPEC["region"]),
            "framework": str(BUILDSPEC["framework"]),
            "version": str(BUILDSPEC["version"]),
            "root": str(image_config["root"]),
            "name": str(image_name),
            "device_type": str(image_config["device_type"]),
            "python_version": str(image_config["python_version"]),
            "image_type": str(image_config["image_type"]),
            "image_size_baseline": int(image_config["image_size_baseline"]),
            "base_image_uri": base_image_uri,
            "labels": labels,
            "extra_build_args": extra_build_args
        }

        image_object = DockerImage(
            info=info,
            dockerfile=image_config["docker_file"],
            repository=image_repo_uri,
            tag=image_tag,
            to_build=image_config["build"],
            context=context,
        )

        IMAGES.append(image_object)

    FORMATTER.banner("DLC")
    FORMATTER.title("Status")

    THREADS = {}

    # In the context of the ThreadPoolExecutor each instance of image.build submitted
    # to it is executed concurrently in a separate thread.
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        # Standard images must be built before example images
        # Example images will use standard images as base
        standard_images = [
            image for image in IMAGES if "example" not in image.name.lower()
        ]
        example_images = [
            image for image in IMAGES if "example" in image.name.lower()
        ]

        for image in standard_images:
            THREADS[image.name] = executor.submit(image.build)

        # the FORMATTER.progress(THREADS) function call also waits until all threads have completed
        FORMATTER.progress(THREADS)

        for image in example_images:
            THREADS[image.name] = executor.submit(image.build)

        # the FORMATTER.progress(THREADS) function call also waits until all threads have completed
        FORMATTER.progress(THREADS)

        FORMATTER.title("Build Logs")

        if not os.path.isdir("logs"):
            os.makedirs("logs")

        for image in IMAGES:
            FORMATTER.title(image.name)
            FORMATTER.table(image.info.items())
            FORMATTER.separator()
            FORMATTER.print_lines(image.log)
            with open(f"logs/{image.name}", "w") as fp:
                fp.write("/n".join(image.log))
                image.summary["log"] = f"logs/{image.name}"

        FORMATTER.title("Summary")

        for image in IMAGES:
            FORMATTER.title(image.name)
            FORMATTER.table(image.summary.items())

        FORMATTER.title("Errors")
        is_any_build_failed = False
        is_any_build_failed_size_limit = False
        for image in IMAGES:
            if image.build_status == constants.FAIL:
                FORMATTER.title(image.name)
                FORMATTER.print_lines(image.log[-10:])
                is_any_build_failed = True
            else:
                if image.build_status == constants.FAIL_IMAGE_SIZE_LIMIT:
                    is_any_build_failed_size_limit = True
        if is_any_build_failed:
            raise Exception("Build failed")
        else:
            if is_any_build_failed_size_limit:
                FORMATTER.print("Build failed. Image size limit breached.")
            else:
                FORMATTER.print("No errors")

        FORMATTER.title("Uploading Metrics")
        metrics = Metrics(
            context=constants.BUILD_CONTEXT,
            region=BUILDSPEC["region"],
            namespace=constants.METRICS_NAMESPACE,
        )
        for image in IMAGES:
            try:
                metrics.push_image_metrics(image)
            except Exception as e:
                if is_any_build_failed or is_any_build_failed_size_limit:
                    raise Exception(f"Build failed.{e}")
                else:
                    raise Exception(f"Build passed. {e}")

        if is_any_build_failed_size_limit:
            raise Exception("Build failed because of file limit")

        FORMATTER.separator()

        # Set environment variables to be consumed by test jobs
        test_trigger_job = utils.get_codebuild_project_name()
        utils.set_test_env(
            IMAGES,
            BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"),
            TEST_TRIGGER=test_trigger_job,
        )