def image_builder(buildspec): BUILDSPEC = Buildspec() BUILDSPEC.load(buildspec) PRE_PUSH_STAGE_IMAGES = [] COMMON_STAGE_IMAGES = [] if "huggingface" in str(BUILDSPEC["framework"]) or "autogluon" in str( BUILDSPEC["framework"]) or "trcomp" in str(BUILDSPEC["framework"]): os.system("echo login into public ECR") os.system( "aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com" ) for image_name, image_config in BUILDSPEC["images"].items(): ARTIFACTS = deepcopy( BUILDSPEC["context"]) if BUILDSPEC.get("context") else {} extra_build_args = {} labels = {} enable_datetime_tag = parse_dlc_developer_configs( "build", "datetime_tag") if image_config.get("version") is not None: if BUILDSPEC["version"] != image_config.get("version"): continue if image_config.get("context") is not None: ARTIFACTS.update(image_config["context"]) image_tag = tag_image_with_pr_number( image_config["tag"] ) if build_context == "PR" else image_config["tag"] if enable_datetime_tag or build_context != "PR": image_tag = tag_image_with_datetime(image_tag) image_repo_uri = (image_config["repository"] if build_context == "PR" else modify_repository_name_for_context( str(image_config["repository"]), build_context)) base_image_uri = None if image_config.get("base_image_name") is not None: base_image_object = _find_image_object( PRE_PUSH_STAGE_IMAGES, image_config["base_image_name"]) base_image_uri = base_image_object.ecr_url if image_config.get("download_artifacts") is not None: for artifact_name, artifact in image_config.get( "download_artifacts").items(): type = artifact["type"] uri = artifact["URI"] var = artifact["VAR_IN_DOCKERFILE"] try: file_name = utils.download_file(uri, type).strip() except ValueError: FORMATTER.print( f"Artifact download failed: {uri} of type {type}.") ARTIFACTS.update({ f"{artifact_name}": { "source": f"{os.path.join(os.sep, os.path.abspath(os.getcwd()), file_name)}", "target": file_name, } }) extra_build_args[var] = file_name labels[var] = file_name labels[f"{var}_URI"] = uri transformers_version = image_config.get("transformers_version") if str(BUILDSPEC["framework"]).startswith("huggingface") or str( BUILDSPEC["framework"]).endswith("trcomp"): if transformers_version: extra_build_args["TRANSFORMERS_VERSION"] = transformers_version else: raise KeyError( f"HuggingFace buildspec.yml must contain 'transformers_version' field for each image" ) if "datasets_version" in image_config: extra_build_args["DATASETS_VERSION"] = image_config.get( "datasets_version") elif str(image_config["image_type"]) == "training": raise KeyError( f"HuggingFace buildspec.yml must contain 'datasets_version' field for each image" ) ARTIFACTS.update({ "dockerfile": { "source": image_config["docker_file"], "target": "Dockerfile", } }) context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"]) if "labels" in image_config: labels.update(image_config.get("labels")) cx_type = utils.get_label_prefix_customer_type(image_tag) # Define label variables label_framework = str(BUILDSPEC['framework']).replace('_', '-') if image_config.get("framework_version"): label_framework_version = str( image_config['framework_version']).replace('.', '-') else: label_framework_version = str(BUILDSPEC['version']).replace( '.', '-') label_device_type = str(image_config['device_type']) if label_device_type == "gpu": label_device_type = f"{label_device_type}.{str(image_config['cuda_version'])}" label_arch = str(BUILDSPEC['arch_type']) label_python_version = str(image_config['tag_python_version']) label_os_version = str(image_config.get('os_version')).replace( '.', '-') label_contributor = str(BUILDSPEC.get('contributor')) label_transformers_version = str(transformers_version).replace( '.', '-') # job_type will be either inference or training, based on the repo URI if "training" in image_repo_uri: label_job_type = "training" elif "inference" in image_repo_uri: label_job_type = "inference" else: raise RuntimeError( f"Cannot find inference or training job type in {image_repo_uri}. " f"This is required to set job_type label.") if cx_type == "sagemaker": # Adding standard labels to all images labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.framework.{label_framework}.{label_framework_version}"] = "true" labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.device.{label_device_type}"] = "true" labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.arch.{label_arch}"] = "true" # python version label will look like py_version.py36, for example labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.python.{label_python_version}"] = "true" labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.os.{label_os_version}"] = "true" labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.job.{label_job_type}"] = "true" if label_contributor: labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.contributor.{label_contributor}"] = "true" if transformers_version: labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.lib.transformers.{label_transformers_version}"] = "true" """ Override parameters from parent in child. """ info = { "account_id": str(BUILDSPEC["account_id"]), "region": str(BUILDSPEC["region"]), "framework": str(BUILDSPEC["framework"]), "version": str(BUILDSPEC["version"]), "root": str(image_config["root"]), "name": str(image_name), "device_type": str(image_config["device_type"]), "python_version": str(image_config["python_version"]), "image_type": str(image_config["image_type"]), "image_size_baseline": int(image_config["image_size_baseline"]), "base_image_uri": base_image_uri, "enable_test_promotion": image_config.get("enable_test_promotion", True), "labels": labels, "extra_build_args": extra_build_args, } # Create pre_push stage docker object pre_push_stage_image_object = DockerImage( info=info, dockerfile=image_config["docker_file"], repository=image_repo_uri, tag=append_tag(image_tag, "pre-push"), to_build=image_config["build"], stage=constants.PRE_PUSH_STAGE, context=context, additional_tags=[image_tag], target=image_config.get("target"), ) ##### Create Common stage docker object ##### # If for a pre_push stage image we create a common stage image, then we do not push the pre_push stage image # to the repository. Instead, we just push its common stage image to the repository. Therefore, # inside function get_common_stage_image_object we make pre_push_stage_image_object non pushable. common_stage_image_object = generate_common_stage_image_object( pre_push_stage_image_object, image_tag) COMMON_STAGE_IMAGES.append(common_stage_image_object) PRE_PUSH_STAGE_IMAGES.append(pre_push_stage_image_object) FORMATTER.separator() FORMATTER.banner("DLC") # Parent images do not inherit from any containers built in this job # Child images use one of the parent images as their base image parent_images = [ image for image in PRE_PUSH_STAGE_IMAGES if not image.is_child_image ] child_images = [ image for image in PRE_PUSH_STAGE_IMAGES if image.is_child_image ] ALL_IMAGES = PRE_PUSH_STAGE_IMAGES + COMMON_STAGE_IMAGES IMAGES_TO_PUSH = [ image for image in ALL_IMAGES if image.to_push and image.to_build ] pushed_images = [] pushed_images += process_images(parent_images, "Parent/Independent") pushed_images += process_images(child_images, "Child/Dependent") assert all(image in pushed_images for image in IMAGES_TO_PUSH), "Few images could not be pushed." # After the build, display logs/summary for all the images. FORMATTER.banner("Summary") show_build_info(ALL_IMAGES) FORMATTER.banner("Errors") is_any_build_failed, is_any_build_failed_size_limit = show_build_errors( ALL_IMAGES) # From all images, filter the images that were supposed to be built and upload their metrics BUILT_IMAGES = [image for image in ALL_IMAGES if image.to_build] FORMATTER.banner("Upload Metrics") upload_metrics(BUILT_IMAGES, BUILDSPEC, is_any_build_failed, is_any_build_failed_size_limit) FORMATTER.banner("Test Env") # Set environment variables to be consumed by test jobs test_trigger_job = get_codebuild_project_name() # Tests should only run on images that were pushed to the repository if not is_build_enabled(): # Ensure we have images populated if do_build is false, so that tests can proceed if needed images_to_test = [image for image in ALL_IMAGES if image.to_push] else: images_to_test = IMAGES_TO_PUSH utils.set_test_env( images_to_test, use_latest_additional_tag=True, BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"), TEST_TRIGGER=test_trigger_job, )
def image_builder(buildspec): FORMATTER = OutputFormatter(constants.PADDING) BUILDSPEC = Buildspec() BUILDSPEC.load(buildspec) IMAGES = [] for image in BUILDSPEC["images"].items(): ARTIFACTS = deepcopy(BUILDSPEC["context"]) image_name = image[0] image_config = image[1] if image_config.get("version") is not None: if BUILDSPEC["version"] != image_config.get("version"): continue if image_config.get("context") is not None: ARTIFACTS.update(image_config["context"]) build_context = os.getenv("BUILD_CONTEXT") image_tag = (tag_image_with_pr_number(image_config["tag"]) if build_context == "PR" else image_config["tag"]) if not build_config.DISABLE_DATETIME_TAG or build_context != "PR": image_tag = tag_image_with_datetime(image_tag) image_repo_uri = (image_config["repository"] if build_context == "PR" else modify_repository_name_for_context( str(image_config["repository"]), build_context)) base_image_uri = None if image_config.get("base_image_name") is not None: base_image_object = _find_image_object( IMAGES, image_config["base_image_name"]) base_image_uri = base_image_object.ecr_url ARTIFACTS.update({ "dockerfile": { "source": image_config["docker_file"], "target": "Dockerfile", } }) context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"]) """ Override parameters from parent in child. """ info = { "account_id": str(BUILDSPEC["account_id"]), "region": str(BUILDSPEC["region"]), "framework": str(BUILDSPEC["framework"]), "version": str(BUILDSPEC["version"]), "root": str(image_config["root"]), "name": str(image_name), "device_type": str(image_config["device_type"]), "python_version": str(image_config["python_version"]), "image_type": str(image_config["image_type"]), "image_size_baseline": int(image_config["image_size_baseline"]), "base_image_uri": base_image_uri } image_object = DockerImage( info=info, dockerfile=image_config["docker_file"], repository=image_repo_uri, tag=image_tag, to_build=image_config["build"], context=context, ) IMAGES.append(image_object) FORMATTER.banner("DLC") FORMATTER.title("Status") THREADS = {} # In the context of the ThreadPoolExecutor each instance of image.build submitted # to it is executed concurrently in a separate thread. with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: # Standard images must be built before example images # Example images will use standard images as base standard_images = [ image for image in IMAGES if "example" not in image.name.lower() ] example_images = [ image for image in IMAGES if "example" in image.name.lower() ] for image in standard_images: THREADS[image.name] = executor.submit(image.build) # the FORMATTER.progress(THREADS) function call also waits until all threads have completed FORMATTER.progress(THREADS) for image in example_images: THREADS[image.name] = executor.submit(image.build) # the FORMATTER.progress(THREADS) function call also waits until all threads have completed FORMATTER.progress(THREADS) FORMATTER.title("Build Logs") if not os.path.isdir("logs"): os.makedirs("logs") for image in IMAGES: FORMATTER.title(image.name) FORMATTER.table(image.info.items()) FORMATTER.separator() FORMATTER.print_lines(image.log) with open(f"logs/{image.name}", "w") as fp: fp.write("/n".join(image.log)) image.summary["log"] = f"logs/{image.name}" FORMATTER.title("Summary") for image in IMAGES: FORMATTER.title(image.name) FORMATTER.table(image.summary.items()) FORMATTER.title("Errors") ANY_FAIL = False for image in IMAGES: if image.build_status == constants.FAIL: FORMATTER.title(image.name) FORMATTER.print_lines(image.log[-10:]) ANY_FAIL = True if ANY_FAIL: raise Exception("Build failed") else: FORMATTER.print("No errors") FORMATTER.title("Uploading Metrics") metrics = Metrics( context=constants.BUILD_CONTEXT, region=BUILDSPEC["region"], namespace=constants.METRICS_NAMESPACE, ) for image in IMAGES: try: metrics.push_image_metrics(image) except Exception as e: if ANY_FAIL: raise Exception(f"Build failed.{e}") else: raise Exception(f"Build passed. {e}") FORMATTER.separator() # Set environment variables to be consumed by test jobs test_trigger_job = utils.get_codebuild_project_name() utils.set_test_env( IMAGES, BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"), TEST_TRIGGER=test_trigger_job, )
def image_builder(buildspec): FORMATTER = OutputFormatter(constants.PADDING) BUILDSPEC = Buildspec() BUILDSPEC.load(buildspec) IMAGES = [] if "huggingface" in str(BUILDSPEC["framework"]): os.system("echo login into public ECR") os.system( "aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com" ) for image_name, image_config in BUILDSPEC["images"].items(): ARTIFACTS = deepcopy( BUILDSPEC["context"]) if BUILDSPEC.get("context") else {} extra_build_args = {} labels = {} if image_config.get("version") is not None: if BUILDSPEC["version"] != image_config.get("version"): continue if image_config.get("context") is not None: ARTIFACTS.update(image_config["context"]) build_context = os.getenv("BUILD_CONTEXT") image_tag = (tag_image_with_pr_number(image_config["tag"]) if build_context == "PR" else image_config["tag"]) if not build_config.DISABLE_DATETIME_TAG or build_context != "PR": image_tag = tag_image_with_datetime(image_tag) image_repo_uri = (image_config["repository"] if build_context == "PR" else modify_repository_name_for_context( str(image_config["repository"]), build_context)) base_image_uri = None if image_config.get("base_image_name") is not None: base_image_object = _find_image_object( IMAGES, image_config["base_image_name"]) base_image_uri = base_image_object.ecr_url if image_config.get("download_artifacts") is not None: for artifact_name, artifact in image_config.get( "download_artifacts").items(): type = artifact["type"] uri = artifact["URI"] var = artifact["VAR_IN_DOCKERFILE"] try: file_name = utils.download_file(uri, type).strip() except ValueError: FORMATTER.print( f"Artifact download failed: {uri} of type {type}.") ARTIFACTS.update({ f"{artifact_name}": { "source": f"{os.path.join(os.sep, os.path.abspath(os.getcwd()), file_name)}", "target": file_name } }) extra_build_args[var] = file_name labels[var] = file_name labels[f"{var}_URI"] = uri ARTIFACTS.update({ "dockerfile": { "source": image_config["docker_file"], "target": "Dockerfile", } }) context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"]) if "labels" in image_config: labels.update(image_config.get("labels")) """ Override parameters from parent in child. """ info = { "account_id": str(BUILDSPEC["account_id"]), "region": str(BUILDSPEC["region"]), "framework": str(BUILDSPEC["framework"]), "version": str(BUILDSPEC["version"]), "root": str(image_config["root"]), "name": str(image_name), "device_type": str(image_config["device_type"]), "python_version": str(image_config["python_version"]), "image_type": str(image_config["image_type"]), "image_size_baseline": int(image_config["image_size_baseline"]), "base_image_uri": base_image_uri, "labels": labels, "extra_build_args": extra_build_args } image_object = DockerImage( info=info, dockerfile=image_config["docker_file"], repository=image_repo_uri, tag=image_tag, to_build=image_config["build"], context=context, ) IMAGES.append(image_object) FORMATTER.banner("DLC") FORMATTER.title("Status") THREADS = {} # In the context of the ThreadPoolExecutor each instance of image.build submitted # to it is executed concurrently in a separate thread. with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: # Standard images must be built before example images # Example images will use standard images as base standard_images = [ image for image in IMAGES if "example" not in image.name.lower() ] example_images = [ image for image in IMAGES if "example" in image.name.lower() ] for image in standard_images: THREADS[image.name] = executor.submit(image.build) # the FORMATTER.progress(THREADS) function call also waits until all threads have completed FORMATTER.progress(THREADS) for image in example_images: THREADS[image.name] = executor.submit(image.build) # the FORMATTER.progress(THREADS) function call also waits until all threads have completed FORMATTER.progress(THREADS) FORMATTER.title("Build Logs") if not os.path.isdir("logs"): os.makedirs("logs") for image in IMAGES: FORMATTER.title(image.name) FORMATTER.table(image.info.items()) FORMATTER.separator() FORMATTER.print_lines(image.log) with open(f"logs/{image.name}", "w") as fp: fp.write("/n".join(image.log)) image.summary["log"] = f"logs/{image.name}" FORMATTER.title("Summary") for image in IMAGES: FORMATTER.title(image.name) FORMATTER.table(image.summary.items()) FORMATTER.title("Errors") is_any_build_failed = False is_any_build_failed_size_limit = False for image in IMAGES: if image.build_status == constants.FAIL: FORMATTER.title(image.name) FORMATTER.print_lines(image.log[-10:]) is_any_build_failed = True else: if image.build_status == constants.FAIL_IMAGE_SIZE_LIMIT: is_any_build_failed_size_limit = True if is_any_build_failed: raise Exception("Build failed") else: if is_any_build_failed_size_limit: FORMATTER.print("Build failed. Image size limit breached.") else: FORMATTER.print("No errors") FORMATTER.title("Uploading Metrics") metrics = Metrics( context=constants.BUILD_CONTEXT, region=BUILDSPEC["region"], namespace=constants.METRICS_NAMESPACE, ) for image in IMAGES: try: metrics.push_image_metrics(image) except Exception as e: if is_any_build_failed or is_any_build_failed_size_limit: raise Exception(f"Build failed.{e}") else: raise Exception(f"Build passed. {e}") if is_any_build_failed_size_limit: raise Exception("Build failed because of file limit") FORMATTER.separator() # Set environment variables to be consumed by test jobs test_trigger_job = utils.get_codebuild_project_name() utils.set_test_env( IMAGES, BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"), TEST_TRIGGER=test_trigger_job, )