Example #1
0
def populate_ecr_scan_with_web_scraper_results(
    image_uri, ecr_scan_to_be_populated, prepend_url="https://ubuntu.com/security/"
):
    # Added it here to prevent the SM tests from failing on the Quickcheck PRs
    from web_scraper.scraper_runner import run_spider
    from web_scraper.web_scraper.spiders.cve_spiders import CveSpider

    if len(ecr_scan_to_be_populated) == 0:
        return ecr_scan_to_be_populated
    cve_list = list(set([cve["name"] for cve in ecr_scan_to_be_populated]))
    url_list = [prepend_url + cve_name for cve_name in cve_list]
    url_csv_string = ",".join(url_list)
    simplified_image_uri = image_uri.replace(".", "-").replace("/", "-").replace(":", "-")
    storage_file_path = os.path.join(
        os.sep, get_repository_local_path(), "cve-data", f"cve-data-{simplified_image_uri}.json"
    )
    run_spider(CveSpider, storage_file_path=storage_file_path, url_csv_string=url_csv_string)
    f = open(storage_file_path, "r")
    scraped_data = json.load(f)
    scraped_data = process_scraped_data(scraped_data)
    for finding in ecr_scan_to_be_populated:
        cve_id = finding["name"]
        package_name = [
            attribute["value"] for attribute in finding["attributes"] if attribute["key"] == "package_name"
        ][0]
        if cve_id in scraped_data and package_name in scraped_data[cve_id]:
            finding["scraped_data"] = scraped_data[cve_id][package_name]
        else:
            finding["scraped_data"] = [
                {"_comment": "Could Not be Processed. Webpage for this might not be in the required format."}
            ]
    return ecr_scan_to_be_populated
def test_git_secrets():
    ctx = Context()
    repository_path = os.getenv("CODEBUILD_SRC_DIR")
    if not repository_path:
        repository_path = get_repository_local_path()
    LOGGER.info(f"repository_path = {repository_path}")

    # Replace the regex pattern below with a matching string to run test that makes scan fail:
    SOME_FAKE_CREDENTIALS = "ASIA[A-Z0-9]{16}"
    WHITELISTED_CREDENTIALS = "AKIAIOSFODNN7EXAMPLE"
    # End of Test Section

    with ctx.cd(repository_path):
        ctx.run("git clone https://github.com/awslabs/git-secrets.git")
        with ctx.cd("git-secrets"):
            ctx.run("make install")
        ctx.run("git secrets --install")
        ctx.run("git secrets --register-aws")
        output = ctx.run("git secrets --list")
        LOGGER.info(f"\n--COMMAND--\n{output.command}\n"
                    f"--STDOUT--\n{output.stdout}\n"
                    f"--STDERR--\n{output.stderr}\n"
                    f"----------")
        scan_results = ctx.run("git secrets --scan", hide=True, warn=True)
        LOGGER.info(f"\n--COMMAND--\n{scan_results.command}\n"
                    f"--STDOUT--\n{scan_results.stdout}\n"
                    f"--STDERR--\n{scan_results.stderr}"
                    f"----------")
    assert scan_results.ok, scan_results.stderr
Example #3
0
    def generate_sagemaker_reports(self):
        """
        Append SageMaker data to the report
        """
        ctx = Context()
        git_repo_path = get_repository_local_path()

        for repo in self.SM_REPOS:
            framework, job_type = repo.split(os.sep)
            pytest_framework_path = os.path.join(git_repo_path, "test",
                                                 "sagemaker_tests", framework,
                                                 job_type)
            with ctx.cd(pytest_framework_path):
                # We need to install requirements in order to use the SM pytest frameworks
                venv = os.path.join(pytest_framework_path,
                                    f".{repo.replace('/', '-')}")
                ctx.run(f"virtualenv {venv}")
                with ctx.prefix(
                        f"source {os.path.join(venv, 'bin', 'activate')}"):
                    ctx.run("pip install -r requirements.txt", warn=True)

                    # TF inference separates remote/local conftests, and must be handled differently
                    if framework == "tensorflow" and job_type == "inference":
                        with ctx.cd(
                                os.path.join(pytest_framework_path, "test",
                                             "integration")):
                            # Handle local tests
                            ctx.run(
                                f"{self.COVERAGE_DOC_COMMAND} --framework-version 2 local/",
                                hide=True)
                            # Handle remote integration tests
                            ctx.run(f"{self.COVERAGE_DOC_COMMAND} sagemaker/",
                                    hide=True)
                    else:
                        ctx.run(f"{self.COVERAGE_DOC_COMMAND} integration/",
                                hide=True)

        # Handle TF inference remote tests
        tf_inf_path = os.path.join(git_repo_path, "test", "sagemaker_tests",
                                   "tensorflow", "inference")

        with ctx.cd(tf_inf_path):
            # Install TF inference pip requirements
            ctx.run(f"virtualenv .tf_inference")
            with ctx.prefix(
                    f"source {os.path.join(tf_inf_path, '.tf_inference', 'bin', 'activate')}"
            ):
                ctx.run("pip install -r requirements.txt", warn=True)
                with ctx.cd(os.path.join(tf_inf_path, "test", "integration")):
                    # Handle local tests
                    ctx.run(
                        f"{self.COVERAGE_DOC_COMMAND} --framework-version 2 local/"
                    )

                    # Handle remote integration tests
                    ctx.run(f"{self.COVERAGE_DOC_COMMAND} sagemaker/")
Example #4
0
def test_oss_compliance(image):
    """
    Run oss compliance check on a container to check if license attribution files exist.
    And upload source of third party packages to S3 bucket.
    """
    THIRD_PARTY_SOURCE_CODE_BUCKET = "aws-dlinfra-licenses"
    THIRD_PARTY_SOURCE_CODE_BUCKET_PATH = "third_party_source_code"
    file = "THIRD_PARTY_SOURCE_CODE_URLS"
    container_name = get_container_name("oss_compliance", image)
    context = Context()
    local_repo_path = get_repository_local_path()
    start_container(container_name, image, context)

    # run compliance test to make sure license attribution files exists. testOSSCompliance is copied as part of Dockerfile
    run_cmd_on_container(container_name, context,
                         "/usr/local/bin/testOSSCompliance /root")

    try:
        context.run(
            f"docker cp {container_name}:/root/{file} {os.path.join(local_repo_path, file)}"
        )
    finally:
        context.run(f"docker rm -f {container_name}", hide=True)

    s3_resource = boto3.resource("s3")

    with open(os.path.join(local_repo_path, file)) as source_code_file:
        for line in source_code_file:
            name, version, url = line.split(" ")
            file_name = f"{name}_v{version}_source_code"
            s3_object_path = f"{THIRD_PARTY_SOURCE_CODE_BUCKET_PATH}/{file_name}.tar.gz"
            local_file_path = os.path.join(local_repo_path, file_name)

            for i in range(3):
                try:
                    if not os.path.isdir(local_file_path):
                        context.run(
                            f"git clone {url.rstrip()} {local_file_path}")
                        context.run(
                            f"tar -czvf {local_file_path}.tar.gz {local_file_path}"
                        )
                except Exception as e:
                    time.sleep(1)
                    if i == 2:
                        LOGGER.error(f"Unable to clone git repo. Error: {e}")
                        raise
                    continue
            try:
                if os.path.exists(f"{local_file_path}.tar.gz"):
                    LOGGER.info(f"Uploading package to s3 bucket: {line}")
                    s3_resource.Object(THIRD_PARTY_SOURCE_CODE_BUCKET,
                                       s3_object_path).load()
            except botocore.exceptions.ClientError as e:
                if e.response["Error"]["Code"] == "404":
                    try:
                        # using aws cli as using boto3 expects to upload folder by iterating through each file instead of entire folder.
                        context.run(
                            f"aws s3 cp {local_file_path}.tar.gz s3://{THIRD_PARTY_SOURCE_CODE_BUCKET}/{s3_object_path}"
                        )
                        object = s3_resource.Bucket(
                            THIRD_PARTY_SOURCE_CODE_BUCKET).Object(
                                s3_object_path)
                        object.Acl().put(ACL="public-read")
                    except ClientError as e:
                        LOGGER.error(
                            f"Unable to upload source code to bucket {THIRD_PARTY_SOURCE_CODE_BUCKET}. Error: {e}"
                        )
                        raise
                else:
                    LOGGER.error(
                        f"Unable to check if source code is present on bucket {THIRD_PARTY_SOURCE_CODE_BUCKET}. Error: {e}"
                    )
                    raise
Example #5
0
def test_ecr_scan(image, ecr_client, sts_client, region):
    """
    Run ECR Scan Tool on an image being tested, and raise Error if vulnerabilities found
    1. Start Scan.
    2. For 5 minutes (Run DescribeImages):
       (We run this for 5 minutes because the Scan is expected to complete in about 2 minutes, though no
        analysis has been performed on exactly how long the Scan takes for a DLC image. Therefore we also
        have a 3 minute buffer beyond the expected amount of time taken.)
    3.1. If imageScanStatus == COMPLETE: exit loop
    3.2. If imageScanStatus == IN_PROGRESS or AttributeNotFound(imageScanStatus): continue loop
    3.3. If imageScanStatus == FAILED: raise RuntimeError
    4. If DescribeImages.imageScanStatus != COMPLETE: raise TimeOutError
    5. assert imageScanFindingsSummary.findingSeverityCounts.HIGH/CRITICAL == 0

    :param image: str Image URI for image to be tested
    :param ecr_client: boto3 Client for ECR
    :param sts_client: boto3 Client for STS
    :param region: str Name of region where test is executed
    """
    test_account_id = sts_client.get_caller_identity().get("Account")
    image_account_id = get_account_id_from_image_uri(image)
    if image_account_id != test_account_id:
        image_repo_uri, image_tag = image.split(":")
        _, image_repo_name = image_repo_uri.split("/")
        target_image_repo_name = f"beta-{image_repo_name}"
        image = ecr_utils.reupload_image_to_test_ecr(image,
                                                     target_image_repo_name,
                                                     region)

    minimum_sev_threshold = get_minimum_sev_threshold_level(image)
    LOGGER.info(f"Severity threshold level is {minimum_sev_threshold}")

    run_scan(ecr_client, image)
    scan_results = ecr_utils.get_ecr_image_scan_results(
        ecr_client, image, minimum_vulnerability=minimum_sev_threshold)
    scan_results = ecr_utils.populate_ecr_scan_with_web_scraper_results(
        image, scan_results)
    ecr_image_vulnerability_list = ScanVulnerabilityList(
        minimum_severity=CVESeverity[minimum_sev_threshold])
    ecr_image_vulnerability_list.construct_allowlist_from_ecr_scan_result(
        scan_results)

    remaining_vulnerabilities = ecr_image_vulnerability_list

    # TODO: Once this feature is enabled, remove "if" condition and second assertion statement
    # TODO: Ensure this works on the canary tags before removing feature flag
    if is_image_covered_by_allowlist_feature(image):
        upgraded_image_vulnerability_list, image_scan_allowlist = fetch_other_vulnerability_lists(
            image, ecr_client, minimum_sev_threshold)
        s3_bucket_name = ECR_SCAN_HELPER_BUCKET

        ## In case new vulnerabilities are found conduct failure routine
        newly_found_vulnerabilities = ecr_image_vulnerability_list - image_scan_allowlist
        if newly_found_vulnerabilities:
            failure_routine_summary = conduct_failure_routine(
                image,
                image_scan_allowlist,
                ecr_image_vulnerability_list,
                upgraded_image_vulnerability_list,
                s3_bucket_name,
            )
            (
                s3_filename_for_fixable_list,
                s3_filename_for_non_fixable_list,
            ) = process_failure_routine_summary_and_store_data_in_s3(
                failure_routine_summary, s3_bucket_name)
        assert not newly_found_vulnerabilities, (
            f"""Found {len(failure_routine_summary["fixable_vulnerabilities"])} fixable vulnerabilites """
            f"""and {len(failure_routine_summary["non_fixable_vulnerabilities"])} non fixable vulnerabilites. """
            f"""Refer to files s3://{s3_bucket_name}/{s3_filename_for_fixable_list}, s3://{s3_bucket_name}/{s3_filename_for_non_fixable_list}, """
            f"""s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_current_image_ecr_scan_list"]} and s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_allowlist"]}."""
        )

        ## In case there is no new vulnerability but the allowlist is outdated conduct failure routine
        vulnerabilities_that_can_be_fixed = image_scan_allowlist - upgraded_image_vulnerability_list
        if vulnerabilities_that_can_be_fixed:
            failure_routine_summary = conduct_failure_routine(
                image,
                image_scan_allowlist,
                ecr_image_vulnerability_list,
                upgraded_image_vulnerability_list,
                s3_bucket_name,
            )
            (
                s3_filename_for_fixable_list,
                s3_filename_for_non_fixable_list,
            ) = process_failure_routine_summary_and_store_data_in_s3(
                failure_routine_summary, s3_bucket_name)
        assert not vulnerabilities_that_can_be_fixed, (
            f"""Allowlist is Outdated!! Found {len(failure_routine_summary["fixable_vulnerabilities"])} fixable vulnerabilites """
            f"""and {len(failure_routine_summary["non_fixable_vulnerabilities"])} non fixable vulnerabilites. """
            f"""Refer to files s3://{s3_bucket_name}/{s3_filename_for_fixable_list}, s3://{s3_bucket_name}/{s3_filename_for_non_fixable_list}, """
            f"""s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_current_image_ecr_scan_list"]} and s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_allowlist"]}."""
        )
        return

    common_ecr_scan_allowlist = ScanVulnerabilityList(
        minimum_severity=CVESeverity[minimum_sev_threshold])
    common_ecr_scan_allowlist_path = os.path.join(
        os.sep, get_repository_local_path(), "data",
        "common-ecr-scan-allowlist.json")
    if os.path.exists(common_ecr_scan_allowlist_path):
        common_ecr_scan_allowlist.construct_allowlist_from_file(
            common_ecr_scan_allowlist_path)

    remaining_vulnerabilities = remaining_vulnerabilities - common_ecr_scan_allowlist

    if remaining_vulnerabilities:
        assert not remaining_vulnerabilities.vulnerability_list, (
            f"The following vulnerabilities need to be fixed on {image}:\n"
            f"{json.dumps(remaining_vulnerabilities.vulnerability_list, indent=4)}"
        )
Example #6
0
def test_ecr_scan(image, ecr_client, sts_client, region):
    """
    Run ECR Scan Tool on an image being tested, and raise Error if vulnerabilities found
    1. Start Scan.
    2. For 5 minutes (Run DescribeImages):
       (We run this for 5 minutes because the Scan is expected to complete in about 2 minutes, though no
        analysis has been performed on exactly how long the Scan takes for a DLC image. Therefore we also
        have a 3 minute buffer beyond the expected amount of time taken.)
    3.1. If imageScanStatus == COMPLETE: exit loop
    3.2. If imageScanStatus == IN_PROGRESS or AttributeNotFound(imageScanStatus): continue loop
    3.3. If imageScanStatus == FAILED: raise RuntimeError
    4. If DescribeImages.imageScanStatus != COMPLETE: raise TimeOutError
    5. assert imageScanFindingsSummary.findingSeverityCounts.HIGH/CRITICAL == 0

    :param image: str Image URI for image to be tested
    :param ecr_client: boto3 Client for ECR
    :param sts_client: boto3 Client for STS
    :param region: str Name of region where test is executed
    """
    test_account_id = sts_client.get_caller_identity().get("Account")
    image_account_id = get_account_id_from_image_uri(image)
    image_region = get_region_from_image_uri(image)
    image_repo_name, original_image_tag = get_repository_and_tag_from_image_uri(image)
    additional_image_tags = get_all_the_tags_of_an_image_from_ecr(ecr_client, image)
    if not is_image_available_locally(image):
        LOGGER.info(f"Image {image} not available locally!! Pulling the image...")
        login_to_ecr_registry(Context(), image_account_id, image_region)
        run(f"docker pull {image}")
        if not is_image_available_locally(image):
            raise RuntimeError("Image shown as not available even after pulling")
    for additional_tag in additional_image_tags:
        image_uri_with_new_tag = image.replace(original_image_tag, additional_tag)
        run(f"docker tag {image} {image_uri_with_new_tag}", hide=True)

    if image_account_id != test_account_id:
        original_image = image
        target_image_repo_name = f"beta-{image_repo_name}"
        for additional_tag in additional_image_tags:
            image_uri_with_new_tag = original_image.replace(original_image_tag, additional_tag)
            new_image_uri = ecr_utils.reupload_image_to_test_ecr(image_uri_with_new_tag, target_image_repo_name, region)
            if image_uri_with_new_tag == original_image:
                image = new_image_uri

    minimum_sev_threshold = get_minimum_sev_threshold_level(image)
    LOGGER.info(f"Severity threshold level is {minimum_sev_threshold}")

    run_scan(ecr_client, image)
    scan_results = ecr_utils.get_ecr_image_scan_results(ecr_client, image, minimum_vulnerability=minimum_sev_threshold)
    scan_results = ecr_utils.populate_ecr_scan_with_web_scraper_results(image, scan_results)
    ecr_image_vulnerability_list = ScanVulnerabilityList(minimum_severity=CVESeverity[minimum_sev_threshold])
    ecr_image_vulnerability_list.construct_allowlist_from_ecr_scan_result(scan_results)

    remaining_vulnerabilities = ecr_image_vulnerability_list

    if not is_image_covered_by_allowlist_feature(image):
        if is_canary_context():
            pytest.skip("Skipping the test on the canary.")
        
        common_ecr_scan_allowlist = ScanVulnerabilityList(minimum_severity=CVESeverity[minimum_sev_threshold])
        common_ecr_scan_allowlist_path = os.path.join(
            os.sep, get_repository_local_path(), "data", "common-ecr-scan-allowlist.json"
        )
        if os.path.exists(common_ecr_scan_allowlist_path):
            common_ecr_scan_allowlist.construct_allowlist_from_file(common_ecr_scan_allowlist_path)

        remaining_vulnerabilities = remaining_vulnerabilities - common_ecr_scan_allowlist

        if remaining_vulnerabilities:
            assert not remaining_vulnerabilities.vulnerability_list, (
                f"The following vulnerabilities need to be fixed on {image}:\n"
                f"{json.dumps(remaining_vulnerabilities.vulnerability_list, indent=4)}"
            )
        return

    upgraded_image_vulnerability_list, image_scan_allowlist = fetch_other_vulnerability_lists(
        image, ecr_client, minimum_sev_threshold
    )
    s3_bucket_name = ECR_SCAN_HELPER_BUCKET

    ## In case new vulnerabilities (fixable or non-fixable) are found, then conduct failure routine
    newly_found_vulnerabilities = ecr_image_vulnerability_list - image_scan_allowlist
    # In case there is no new vulnerability but the allowlist is outdated
    vulnerabilities_that_can_be_fixed = image_scan_allowlist - upgraded_image_vulnerability_list

    if newly_found_vulnerabilities or vulnerabilities_that_can_be_fixed:
        failure_routine_summary = conduct_failure_routine(
            image,
            image_scan_allowlist,
            ecr_image_vulnerability_list,
            upgraded_image_vulnerability_list,
            s3_bucket_name,
        )
        (
            s3_filename_for_fixable_list,
            s3_filename_for_non_fixable_list,
        ) = process_failure_routine_summary_and_store_data_in_s3(failure_routine_summary, s3_bucket_name)
        prepend_message = "Found new vulnerabilities in image." if newly_found_vulnerabilities else "Allowlist is outdated."
        display_message = prepend_message + " " + (
            f"""Found {len(failure_routine_summary["fixable_vulnerabilities"])} fixable vulnerabilites """
            f"""and {len(failure_routine_summary["non_fixable_vulnerabilities"])} non fixable vulnerabilites. """
            f"""Refer to files s3://{s3_bucket_name}/{s3_filename_for_fixable_list}, s3://{s3_bucket_name}/{s3_filename_for_non_fixable_list}, """
            f"""s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_current_image_ecr_scan_list"]} and s3://{s3_bucket_name}/{failure_routine_summary["s3_filename_for_allowlist"]}."""
        )
        if is_canary_context():
            LOGGER.error(display_message)
            pytest.skip("Skipping the test failure on the canary.")
        else:
            raise RuntimeError(display_message)