def populate_ecr_scan_with_web_scraper_results( image_uri, ecr_scan_to_be_populated, prepend_url="https://ubuntu.com/security/" ): # Added it here to prevent the SM tests from failing on the Quickcheck PRs from web_scraper.scraper_runner import run_spider from web_scraper.web_scraper.spiders.cve_spiders import CveSpider if len(ecr_scan_to_be_populated) == 0: return ecr_scan_to_be_populated cve_list = list(set([cve["name"] for cve in ecr_scan_to_be_populated])) url_list = [prepend_url + cve_name for cve_name in cve_list] url_csv_string = ",".join(url_list) simplified_image_uri = image_uri.replace(".", "-").replace("/", "-").replace(":", "-") storage_file_path = os.path.join( os.sep, get_repository_local_path(), "cve-data", f"cve-data-{simplified_image_uri}.json" ) run_spider(CveSpider, storage_file_path=storage_file_path, url_csv_string=url_csv_string) f = open(storage_file_path, "r") scraped_data = json.load(f) scraped_data = process_scraped_data(scraped_data) for finding in ecr_scan_to_be_populated: cve_id = finding["name"] package_name = [ attribute["value"] for attribute in finding["attributes"] if attribute["key"] == "package_name" ][0] if cve_id in scraped_data and package_name in scraped_data[cve_id]: finding["scraped_data"] = scraped_data[cve_id][package_name] else: finding["scraped_data"] = [ {"_comment": "Could Not be Processed. Webpage for this might not be in the required format."} ] return ecr_scan_to_be_populated
def test_git_secrets(): ctx = Context() repository_path = os.getenv("CODEBUILD_SRC_DIR") if not repository_path: repository_path = get_repository_local_path() LOGGER.info(f"repository_path = {repository_path}") # Replace the regex pattern below with a matching string to run test that makes scan fail: SOME_FAKE_CREDENTIALS = "ASIA[A-Z0-9]{16}" WHITELISTED_CREDENTIALS = "AKIAIOSFODNN7EXAMPLE" # End of Test Section with ctx.cd(repository_path): ctx.run("git clone https://github.com/awslabs/git-secrets.git") with ctx.cd("git-secrets"): ctx.run("make install") ctx.run("git secrets --install") ctx.run("git secrets --register-aws") output = ctx.run("git secrets --list") LOGGER.info(f"\n--COMMAND--\n{output.command}\n" f"--STDOUT--\n{output.stdout}\n" f"--STDERR--\n{output.stderr}\n" f"----------") scan_results = ctx.run("git secrets --scan", hide=True, warn=True) LOGGER.info(f"\n--COMMAND--\n{scan_results.command}\n" f"--STDOUT--\n{scan_results.stdout}\n" f"--STDERR--\n{scan_results.stderr}" f"----------") assert scan_results.ok, scan_results.stderr
def generate_sagemaker_reports(self): """ Append SageMaker data to the report """ ctx = Context() git_repo_path = get_repository_local_path() for repo in self.SM_REPOS: framework, job_type = repo.split(os.sep) pytest_framework_path = os.path.join(git_repo_path, "test", "sagemaker_tests", framework, job_type) with ctx.cd(pytest_framework_path): # We need to install requirements in order to use the SM pytest frameworks venv = os.path.join(pytest_framework_path, f".{repo.replace('/', '-')}") ctx.run(f"virtualenv {venv}") with ctx.prefix( f"source {os.path.join(venv, 'bin', 'activate')}"): ctx.run("pip install -r requirements.txt", warn=True) # TF inference separates remote/local conftests, and must be handled differently if framework == "tensorflow" and job_type == "inference": with ctx.cd( os.path.join(pytest_framework_path, "test", "integration")): # Handle local tests ctx.run( f"{self.COVERAGE_DOC_COMMAND} --framework-version 2 local/", hide=True) # Handle remote integration tests ctx.run(f"{self.COVERAGE_DOC_COMMAND} sagemaker/", hide=True) else: ctx.run(f"{self.COVERAGE_DOC_COMMAND} integration/", hide=True) # Handle TF inference remote tests tf_inf_path = os.path.join(git_repo_path, "test", "sagemaker_tests", "tensorflow", "inference") with ctx.cd(tf_inf_path): # Install TF inference pip requirements ctx.run(f"virtualenv .tf_inference") with ctx.prefix( f"source {os.path.join(tf_inf_path, '.tf_inference', 'bin', 'activate')}" ): ctx.run("pip install -r requirements.txt", warn=True) with ctx.cd(os.path.join(tf_inf_path, "test", "integration")): # Handle local tests ctx.run( f"{self.COVERAGE_DOC_COMMAND} --framework-version 2 local/" ) # Handle remote integration tests ctx.run(f"{self.COVERAGE_DOC_COMMAND} sagemaker/")
def test_oss_compliance(image): """ Run oss compliance check on a container to check if license attribution files exist. And upload source of third party packages to S3 bucket. """ THIRD_PARTY_SOURCE_CODE_BUCKET = "aws-dlinfra-licenses" THIRD_PARTY_SOURCE_CODE_BUCKET_PATH = "third_party_source_code" file = "THIRD_PARTY_SOURCE_CODE_URLS" container_name = get_container_name("oss_compliance", image) context = Context() local_repo_path = get_repository_local_path() start_container(container_name, image, context) # run compliance test to make sure license attribution files exists. testOSSCompliance is copied as part of Dockerfile run_cmd_on_container(container_name, context, "/usr/local/bin/testOSSCompliance /root") try: context.run( f"docker cp {container_name}:/root/{file} {os.path.join(local_repo_path, file)}" ) finally: context.run(f"docker rm -f {container_name}", hide=True) s3_resource = boto3.resource("s3") with open(os.path.join(local_repo_path, file)) as source_code_file: for line in source_code_file: name, version, url = line.split(" ") file_name = f"{name}_v{version}_source_code" s3_object_path = f"{THIRD_PARTY_SOURCE_CODE_BUCKET_PATH}/{file_name}.tar.gz" local_file_path = os.path.join(local_repo_path, file_name) for i in range(3): try: if not os.path.isdir(local_file_path): context.run( f"git clone {url.rstrip()} {local_file_path}") context.run( f"tar -czvf {local_file_path}.tar.gz {local_file_path}" ) except Exception as e: time.sleep(1) if i == 2: LOGGER.error(f"Unable to clone git repo. Error: {e}") raise continue try: if os.path.exists(f"{local_file_path}.tar.gz"): LOGGER.info(f"Uploading package to s3 bucket: {line}") s3_resource.Object(THIRD_PARTY_SOURCE_CODE_BUCKET, s3_object_path).load() except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "404": try: # using aws cli as using boto3 expects to upload folder by iterating through each file instead of entire folder. context.run( f"aws s3 cp {local_file_path}.tar.gz s3://{THIRD_PARTY_SOURCE_CODE_BUCKET}/{s3_object_path}" ) object = s3_resource.Bucket( THIRD_PARTY_SOURCE_CODE_BUCKET).Object( s3_object_path) object.Acl().put(ACL="public-read") except ClientError as e: LOGGER.error( f"Unable to upload source code to bucket {THIRD_PARTY_SOURCE_CODE_BUCKET}. Error: {e}" ) raise else: LOGGER.error( f"Unable to check if source code is present on bucket {THIRD_PARTY_SOURCE_CODE_BUCKET}. Error: {e}" ) raise
def test_ecr_scan(image, ecr_client, sts_client, region): """ Run ECR Scan Tool on an image being tested, and raise Error if vulnerabilities found 1. Start Scan. 2. For 5 minutes (Run DescribeImages): (We run this for 5 minutes because the Scan is expected to complete in about 2 minutes, though no analysis has been performed on exactly how long the Scan takes for a DLC image. Therefore we also have a 3 minute buffer beyond the expected amount of time taken.) 3.1. If imageScanStatus == COMPLETE: exit loop 3.2. If imageScanStatus == IN_PROGRESS or AttributeNotFound(imageScanStatus): continue loop 3.3. If imageScanStatus == FAILED: raise RuntimeError 4. If DescribeImages.imageScanStatus != COMPLETE: raise TimeOutError 5. assert imageScanFindingsSummary.findingSeverityCounts.HIGH/CRITICAL == 0 :param image: str Image URI for image to be tested :param ecr_client: boto3 Client for ECR :param sts_client: boto3 Client for STS :param region: str Name of region where test is executed """ test_account_id = sts_client.get_caller_identity().get("Account") image_account_id = get_account_id_from_image_uri(image) if image_account_id != test_account_id: image_repo_uri, image_tag = image.split(":") _, image_repo_name = image_repo_uri.split("/") target_image_repo_name = f"beta-{image_repo_name}" image = ecr_utils.reupload_image_to_test_ecr(image, target_image_repo_name, region) minimum_sev_threshold = get_minimum_sev_threshold_level(image) LOGGER.info(f"Severity threshold level is {minimum_sev_threshold}") run_scan(ecr_client, image) scan_results = ecr_utils.get_ecr_image_scan_results( ecr_client, image, minimum_vulnerability=minimum_sev_threshold) scan_results = ecr_utils.populate_ecr_scan_with_web_scraper_results( image, scan_results) ecr_image_vulnerability_list = ScanVulnerabilityList( minimum_severity=CVESeverity[minimum_sev_threshold]) ecr_image_vulnerability_list.construct_allowlist_from_ecr_scan_result( scan_results) remaining_vulnerabilities = ecr_image_vulnerability_list # TODO: Once this feature is enabled, remove "if" condition and second assertion statement # TODO: Ensure this works on the canary tags before removing feature flag if is_image_covered_by_allowlist_feature(image): upgraded_image_vulnerability_list, image_scan_allowlist = fetch_other_vulnerability_lists( image, ecr_client, minimum_sev_threshold) s3_bucket_name = ECR_SCAN_HELPER_BUCKET ## In case new vulnerabilities are found conduct failure routine newly_found_vulnerabilities = ecr_image_vulnerability_list - image_scan_allowlist if newly_found_vulnerabilities: failure_routine_summary = conduct_failure_routine( image, image_scan_allowlist, ecr_image_vulnerability_list, upgraded_image_vulnerability_list, s3_bucket_name, ) ( s3_filename_for_fixable_list, s3_filename_for_non_fixable_list, ) = process_failure_routine_summary_and_store_data_in_s3( failure_routine_summary, s3_bucket_name) assert not newly_found_vulnerabilities, ( f"""Found {len(failure_routine_summary["fixable_vulnerabilities"])} fixable vulnerabilites """ f"""and {len(failure_routine_summary["non_fixable_vulnerabilities"])} non fixable vulnerabilites. """ f"""Refer to files s3://{s3_bucket_name}/{s3_filename_for_fixable_list}, s3://{s3_bucket_name}/{s3_filename_for_non_fixable_list}, """ f"""s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_current_image_ecr_scan_list"]} and s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_allowlist"]}.""" ) ## In case there is no new vulnerability but the allowlist is outdated conduct failure routine vulnerabilities_that_can_be_fixed = image_scan_allowlist - upgraded_image_vulnerability_list if vulnerabilities_that_can_be_fixed: failure_routine_summary = conduct_failure_routine( image, image_scan_allowlist, ecr_image_vulnerability_list, upgraded_image_vulnerability_list, s3_bucket_name, ) ( s3_filename_for_fixable_list, s3_filename_for_non_fixable_list, ) = process_failure_routine_summary_and_store_data_in_s3( failure_routine_summary, s3_bucket_name) assert not vulnerabilities_that_can_be_fixed, ( f"""Allowlist is Outdated!! Found {len(failure_routine_summary["fixable_vulnerabilities"])} fixable vulnerabilites """ f"""and {len(failure_routine_summary["non_fixable_vulnerabilities"])} non fixable vulnerabilites. """ f"""Refer to files s3://{s3_bucket_name}/{s3_filename_for_fixable_list}, s3://{s3_bucket_name}/{s3_filename_for_non_fixable_list}, """ f"""s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_current_image_ecr_scan_list"]} and s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_allowlist"]}.""" ) return common_ecr_scan_allowlist = ScanVulnerabilityList( minimum_severity=CVESeverity[minimum_sev_threshold]) common_ecr_scan_allowlist_path = os.path.join( os.sep, get_repository_local_path(), "data", "common-ecr-scan-allowlist.json") if os.path.exists(common_ecr_scan_allowlist_path): common_ecr_scan_allowlist.construct_allowlist_from_file( common_ecr_scan_allowlist_path) remaining_vulnerabilities = remaining_vulnerabilities - common_ecr_scan_allowlist if remaining_vulnerabilities: assert not remaining_vulnerabilities.vulnerability_list, ( f"The following vulnerabilities need to be fixed on {image}:\n" f"{json.dumps(remaining_vulnerabilities.vulnerability_list, indent=4)}" )
def test_ecr_scan(image, ecr_client, sts_client, region): """ Run ECR Scan Tool on an image being tested, and raise Error if vulnerabilities found 1. Start Scan. 2. For 5 minutes (Run DescribeImages): (We run this for 5 minutes because the Scan is expected to complete in about 2 minutes, though no analysis has been performed on exactly how long the Scan takes for a DLC image. Therefore we also have a 3 minute buffer beyond the expected amount of time taken.) 3.1. If imageScanStatus == COMPLETE: exit loop 3.2. If imageScanStatus == IN_PROGRESS or AttributeNotFound(imageScanStatus): continue loop 3.3. If imageScanStatus == FAILED: raise RuntimeError 4. If DescribeImages.imageScanStatus != COMPLETE: raise TimeOutError 5. assert imageScanFindingsSummary.findingSeverityCounts.HIGH/CRITICAL == 0 :param image: str Image URI for image to be tested :param ecr_client: boto3 Client for ECR :param sts_client: boto3 Client for STS :param region: str Name of region where test is executed """ test_account_id = sts_client.get_caller_identity().get("Account") image_account_id = get_account_id_from_image_uri(image) image_region = get_region_from_image_uri(image) image_repo_name, original_image_tag = get_repository_and_tag_from_image_uri(image) additional_image_tags = get_all_the_tags_of_an_image_from_ecr(ecr_client, image) if not is_image_available_locally(image): LOGGER.info(f"Image {image} not available locally!! Pulling the image...") login_to_ecr_registry(Context(), image_account_id, image_region) run(f"docker pull {image}") if not is_image_available_locally(image): raise RuntimeError("Image shown as not available even after pulling") for additional_tag in additional_image_tags: image_uri_with_new_tag = image.replace(original_image_tag, additional_tag) run(f"docker tag {image} {image_uri_with_new_tag}", hide=True) if image_account_id != test_account_id: original_image = image target_image_repo_name = f"beta-{image_repo_name}" for additional_tag in additional_image_tags: image_uri_with_new_tag = original_image.replace(original_image_tag, additional_tag) new_image_uri = ecr_utils.reupload_image_to_test_ecr(image_uri_with_new_tag, target_image_repo_name, region) if image_uri_with_new_tag == original_image: image = new_image_uri minimum_sev_threshold = get_minimum_sev_threshold_level(image) LOGGER.info(f"Severity threshold level is {minimum_sev_threshold}") run_scan(ecr_client, image) scan_results = ecr_utils.get_ecr_image_scan_results(ecr_client, image, minimum_vulnerability=minimum_sev_threshold) scan_results = ecr_utils.populate_ecr_scan_with_web_scraper_results(image, scan_results) ecr_image_vulnerability_list = ScanVulnerabilityList(minimum_severity=CVESeverity[minimum_sev_threshold]) ecr_image_vulnerability_list.construct_allowlist_from_ecr_scan_result(scan_results) remaining_vulnerabilities = ecr_image_vulnerability_list if not is_image_covered_by_allowlist_feature(image): if is_canary_context(): pytest.skip("Skipping the test on the canary.") common_ecr_scan_allowlist = ScanVulnerabilityList(minimum_severity=CVESeverity[minimum_sev_threshold]) common_ecr_scan_allowlist_path = os.path.join( os.sep, get_repository_local_path(), "data", "common-ecr-scan-allowlist.json" ) if os.path.exists(common_ecr_scan_allowlist_path): common_ecr_scan_allowlist.construct_allowlist_from_file(common_ecr_scan_allowlist_path) remaining_vulnerabilities = remaining_vulnerabilities - common_ecr_scan_allowlist if remaining_vulnerabilities: assert not remaining_vulnerabilities.vulnerability_list, ( f"The following vulnerabilities need to be fixed on {image}:\n" f"{json.dumps(remaining_vulnerabilities.vulnerability_list, indent=4)}" ) return upgraded_image_vulnerability_list, image_scan_allowlist = fetch_other_vulnerability_lists( image, ecr_client, minimum_sev_threshold ) s3_bucket_name = ECR_SCAN_HELPER_BUCKET ## In case new vulnerabilities (fixable or non-fixable) are found, then conduct failure routine newly_found_vulnerabilities = ecr_image_vulnerability_list - image_scan_allowlist # In case there is no new vulnerability but the allowlist is outdated vulnerabilities_that_can_be_fixed = image_scan_allowlist - upgraded_image_vulnerability_list if newly_found_vulnerabilities or vulnerabilities_that_can_be_fixed: failure_routine_summary = conduct_failure_routine( image, image_scan_allowlist, ecr_image_vulnerability_list, upgraded_image_vulnerability_list, s3_bucket_name, ) ( s3_filename_for_fixable_list, s3_filename_for_non_fixable_list, ) = process_failure_routine_summary_and_store_data_in_s3(failure_routine_summary, s3_bucket_name) prepend_message = "Found new vulnerabilities in image." if newly_found_vulnerabilities else "Allowlist is outdated." display_message = prepend_message + " " + ( f"""Found {len(failure_routine_summary["fixable_vulnerabilities"])} fixable vulnerabilites """ f"""and {len(failure_routine_summary["non_fixable_vulnerabilities"])} non fixable vulnerabilites. """ f"""Refer to files s3://{s3_bucket_name}/{s3_filename_for_fixable_list}, s3://{s3_bucket_name}/{s3_filename_for_non_fixable_list}, """ f"""s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_current_image_ecr_scan_list"]} and s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_allowlist"]}.""" ) if is_canary_context(): LOGGER.error(display_message) pytest.skip("Skipping the test failure on the canary.") else: raise RuntimeError(display_message)