Ejemplo n.º 1
0
from checkov.serverless.runner import Runner as sls_runner
from checkov.terraform.plan_runner import Runner as tf_plan_runner
from checkov.terraform.runner import Runner as tf_graph_runner
from checkov.version import version

outer_registry = None

logging_init()
logger = logging.getLogger(__name__)
checkov_runners = [
    'cloudformation', 'terraform', 'kubernetes', 'serverless', 'arm',
    'terraform_plan', 'helm', 'dockerfile', 'secrets'
]

DEFAULT_RUNNERS = (tf_graph_runner(), cfn_runner(), k8_runner(), sls_runner(),
                   arm_runner(), tf_plan_runner(), helm_runner(),
                   dockerfile_runner(), secrets_runner())


def run(banner=checkov_banner, argv=sys.argv[1:]):
    default_config_paths = get_default_config_paths(sys.argv[1:])
    parser = ExtArgumentParser(
        description='Infrastructure as code static analysis',
        default_config_files=default_config_paths,
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        add_env_var_help=True)
    add_parser_args(parser)
    config = parser.parse_args(argv)
    # bridgecrew uses both the urllib3 and requests libraries, while checkov uses the requests library.
    # Allow the user to specify a CA bundle to be used by both libraries.
    bc_integration.setup_http_manager(config.ca_certificate)
Ejemplo n.º 2
0
def scan_files():
    crawler = artifactHubCrawler.ArtifactHubCrawler()
    crawlDict, totalRepos, totalPackages = crawler.mockCrawl()
    logging.info(
        f"Crawl completed with {totalPackages} charts from {totalRepos} repositories."
    )

    checks_table = []
    summary_table = []
    all_resources = []
    empty_resources_total = {}
    all_dataobj = []
    extract_failures = []
    download_failures = []
    parse_deps_failures = []

    chartNameFromResultDataExpression = '(.*)\.(RELEASE-NAME-)?(.*)(\.default)?'
    chartNameFromResultDataExpressionGroup = 3

    for repoCount in crawlDict:
        repo = crawlDict[repoCount]
        depGraph = pgv.AGraph(strict=False, directed=True)
        summary_lst = []
        result_lst = []
        helmdeps_lst = []
        empty_resources = {}
        orgRepoFilename = f"{repo['repoName']}"
        for chartPackage in crawlDict[repoCount]['repoPackages']:
            repoChartPathName = f"{repo['repoName']}/{chartPackage['name']}"
            ## DEBUG: Disable specific repo for scanning
            #if orgRepoFilename == "reponame":
            #    continue
            if True:
                logging.info(
                    f"Scanning {repo['repoName']}/{chartPackage['name']}| Download Source "
                )
                # Setup local dir and download
                repoChartPathName = f"{repo['repoName']}/{chartPackage['name']}"
                downloadPath = f'{RESULTS_PATH}/{repoChartPathName}'

                if not os.path.exists(downloadPath):
                    os.makedirs(downloadPath)
                try:
                    wget.download(chartPackage['content_url'], downloadPath)
                    for filename in glob.glob(f"{downloadPath}/**.tgz",
                                              recursive=False):
                        try:
                            extract(filename, downloadPath)
                            logging.info(
                                f"Scanning {repo['repoName']}/{chartPackage['name']}| Extract Source "
                            )
                            os.remove(filename)
                        except:
                            logging.info(
                                f"Failed to extract {repo['repoName']}/{chartPackage['name']}"
                            )
                            extract_failures.append(
                                [f"{repo['repoName']}/{chartPackage['name']}"])
                            continue
                except:
                    logging.info(
                        f"Failed to download {repo['repoName']}/{chartPackage['name']}"
                    )
                    download_failures.append(
                        [f"{repo['repoName']}/{chartPackage['name']}"])
                    continue

                logging.info(
                    f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Processing Chart Deps"
                )
                proc = subprocess.Popen([
                    "helm", 'dependency', 'list',
                    f"{downloadPath}/{chartPackage['name']}"
                ],
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)
                o, e = proc.communicate()
                if e:
                    if "Warning: Dependencies" in str(e, 'utf-8'):
                        logging.info(
                            f"V1 API chart without Chart.yaml dependancies. Skipping chart dependancy list for {chartPackage['name']} at dir: {downloadPath}/{chartPackage['name']}. Error details: {str(e, 'utf-8')}"
                        )
                    else:
                        logging.info(
                            f"Error processing helm dependancies for {chartPackage['name']} at source dir: {downloadPath}/{chartPackage['name']}. Error details: {str(e, 'utf-8')}"
                        )
                chart_deps = parse_helm_dependency_output(o)
                logging.info(chart_deps)

                #### GRAPH. INITIAL NODE AND DEPS
                if chart_deps:
                    depGraph.add_node(
                        repoChartPathName,
                        label=repoChartPathName,
                        color='green',
                        shape='circle'
                    )  # adds current repo to graph - ONLY if there are deps.
                    for key in chart_deps:
                        logging.info(f" GRAPHING DEPS FOR {chart_deps[key]}")
                        current_dep = chart_deps[key]
                        depCombinedRepoName = f'{list(current_dep.values())[2]}/{list(current_dep.values())[0]}'  #Consistent repo/chart naming for our dep
                        depRepoNameLabel = f'{list(current_dep.values())[0]}'
                        # Increment global deps tracking
                        globalDepsUsage[
                            depCombinedRepoName] = globalDepsUsage.get(
                                depCombinedRepoName, 0) + 1
                        globalDepsList[depCombinedRepoName].append(
                            repoChartPathName)
                        nodecolor = "black"
                        if repo['repoName'] in list(current_dep.values())[2]:
                            logging.info(
                                f"{depCombinedRepoName} (repo name in dep url): is probably a local repo dep for {repoChartPathName} Coloring blue"
                            )
                            nodecolor = "blue"
                        elif "http" not in list(current_dep.values())[2]:
                            logging.info(
                                f"{depCombinedRepoName} (no url, local files): is probably a local repo dep for {repoChartPathName} leaving black"
                            )
                        else:
                            logging.info(
                                f"{depCombinedRepoName} is probably a remote repo dep for {repoChartPathName} Coloring red"
                            )
                            nodecolor = "red"
                        depGraph.add_node(
                            depCombinedRepoName,
                            label=depRepoNameLabel,
                            shape='circle',
                            color=nodecolor)  #create dep repo if non exist.
                        depGraph.add_edge(
                            repoChartPathName, depCombinedRepoName
                        )  # Link our repo to the dep repo, will only link to the same source (new node if helm repo is from a different source)
                    """  dep_item = [
                         repoChartPathName, #Current chart combined repo/path
                         chartPackage['name'], #Current chart chartname
                         repo['repoName'],  #Current chart reponame
                         chartPackage['version'], #Current chart version
                         list(current_dep.values())[0], #dep dict chart_name
                         list(current_dep.values())[1], #dep dict chart_version
                         list(current_dep.values())[2], #dep dict chart_repo
                          list(current_dep.values())[3]  #dep dict chart_status
                     ] """

                # Assign results_scan outside of try objects.
                results_scan = object
                try:
                    logging.info(
                        f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Running Checkov"
                    )
                    runner = helm_runner()
                    results_scan = runner.run(root_folder=downloadPath,
                                              external_checks_dir=None,
                                              files=None)
                    res = results_scan.get_dict()
                    logging.info(
                        f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Processing Results"
                    )
                    for passed_check in res["results"]["passed_checks"]:
                        chartNameFromResultData = re.search(
                            chartNameFromResultDataExpression,
                            passed_check["resource"]).group(
                                chartNameFromResultDataExpressionGroup)
                        ## NEW. Default items if no key exists for non-critical components
                        check = [
                            repoChartPathName, repo['repoName'],
                            chartPackage['name'], chartPackage['version'],
                            chartPackage['ts'],
                            chartPackage.get('signed', 'no data'),
                            chartPackage.get(
                                'security_report_created_at',
                                'no data'), chartNameFromResultData,
                            chartPackage.get('is_operator', 'no data'),
                            str(check_category(
                                passed_check["check_id"])).lstrip(
                                    "CheckCategories."),
                            passed_check["check_id"],
                            passed_check["check_name"],
                            passed_check["check_result"]["result"],
                            passed_check["file_path"],
                            passed_check["check_class"],
                            passed_check["resource"].split(".")[0],
                            repo['repoRaw']['repository_id'],
                            repo['repoRaw']['digest'],
                            repo['repoRaw']['last_tracking_ts'],
                            repo['repoRaw']['verified_publisher'],
                            repo['repoRaw']['official'],
                            repo['repoRaw']['scanner_disabled']
                        ]

                        # check = [
                        #     repoChartPathName,
                        #     repo['repoName'],
                        #     chartPackage['name'],
                        #     chartPackage['version'],
                        #     chartPackage['ts'],
                        #     chartPackage['signed'],
                        #     chartPackage['security_report_created_at'],
                        #     chartNameFromResultData,
                        #     chartPackage['is_operator'],
                        #     str(check_category(passed_check["check_id"])).lstrip("CheckCategories."),
                        #     passed_check["check_id"],
                        #     passed_check["check_name"],
                        #     passed_check["check_result"]["result"],
                        #     passed_check["file_path"],
                        #     passed_check["check_class"],
                        #     passed_check["resource"].split(".")[0],
                        #     repo['repoRaw']['repository_id'],
                        #     repo['repoRaw']['digest'],
                        #     repo['repoRaw']['last_tracking_ts'],
                        #     repo['repoRaw']['verified_publisher'],
                        #     repo['repoRaw']['official'],
                        #     repo['repoRaw']['scanner_disabled']
                        #     ]

                        result_lst.append(check)
                    for failed_check in res["results"]["failed_checks"]:
                        chartNameFromResultData = re.search(
                            chartNameFromResultDataExpression,
                            failed_check["resource"]).group(
                                chartNameFromResultDataExpressionGroup)
                        check = [
                            repoChartPathName, repo['repoName'],
                            chartPackage['name'], chartPackage['version'],
                            chartPackage['ts'],
                            chartPackage.get('signed', 'no data'),
                            chartPackage.get(
                                'security_report_created_at',
                                'no data'), chartNameFromResultData,
                            chartPackage.get('is_operator', 'no data'),
                            str(check_category(
                                failed_check["check_id"])).lstrip(
                                    "CheckCategories."),
                            failed_check["check_id"],
                            failed_check["check_name"],
                            failed_check["check_result"]["result"],
                            failed_check["file_path"],
                            failed_check["check_class"],
                            failed_check["resource"].split(".")[0],
                            repo['repoRaw']['repository_id'],
                            repo['repoRaw']['digest'],
                            repo['repoRaw']['last_tracking_ts'],
                            repo['repoRaw']['verified_publisher'],
                            repo['repoRaw']['official'],
                            repo['repoRaw']['scanner_disabled']
                        ]
                        #check.extend(self.add_meta(scan_time))
                        result_lst.append(check)
                    if results_scan.is_empty():
                        check = [
                            repoChartPathName, repo['repoName'],
                            chartPackage['name'], chartPackage['version'],
                            chartPackage['ts'],
                            chartPackage.get('signed', 'no data'),
                            chartPackage.get('security_report_created_at',
                                             'no data'), "empty scan",
                            chartPackage.get('is_operator', 'no data'),
                            "empty scan", "empty scan", "empty scan",
                            "empty scan", "empty scan", "empty scan",
                            "empty scan", repo['repoRaw']['repository_id'],
                            repo['repoRaw']['digest'],
                            repo['repoRaw']['last_tracking_ts'],
                            repo['repoRaw']['verified_publisher'],
                            repo['repoRaw']['official'],
                            repo['repoRaw']['scanner_disabled']
                        ]
                        #check.extend(self.add_meta(scan_time))
                        result_lst.append(check)
                        #empty_resources = self.module_resources()
                except Exception:
                    logging.info('unexpected error in scan')
                    exc_type, exc_value, exc_traceback = sys.exc_info()
                    tb = traceback.format_exception(exc_type, exc_value,
                                                    exc_traceback)
                    check = [
                        repoChartPathName, repo['repoName'],
                        chartPackage['name'], chartPackage['version'],
                        chartPackage['ts'],
                        chartPackage.get('signed', 'no data'),
                        chartPackage.get('security_report_created_at',
                                         'no data'), "error in scan",
                        chartPackage.get('is_operator',
                                         'no data'), "error in scan",
                        "error in scan", "error in scan", "error in scan",
                        "error in scan", "error in scan", "error in scan",
                        repo['repoRaw']['repository_id'], "error in scan",
                        "error in scan", repo['repoRaw']['verified_publisher'],
                        repo['repoRaw']['official'],
                        repo['repoRaw']['scanner_disabled']
                    ]

                    result_lst.append(check)

                # Summary Results
                try:
                    logging.info(
                        f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Processing Summaries"
                    )
                    res = results_scan.get_dict()
                    summary_lst_item = [
                        repoChartPathName, repo['repoName'],
                        chartPackage['name'], chartPackage['version'],
                        chartPackage['ts'],
                        chartPackage.get('signed', 'No Data'),
                        chartPackage.get('security_report_created_at',
                                         'No Data'), chartPackage['name'],
                        chartPackage.get('is_operator', 'No Data'), "success",
                        res["summary"]["passed"], res["summary"]["failed"],
                        res["summary"]["parsing_errors"]
                    ]
                except:
                    summary_lst_item = [
                        repoChartPathName, repo['repoName'],
                        chartPackage['name'], chartPackage['version'],
                        chartPackage['ts'],
                        chartPackage.get('signed', 'No Data'),
                        chartPackage.get('security_report_created_at',
                                         'No Data'), chartPackage['name'],
                        chartPackage.get('is_operator',
                                         'No Data'), "failed", 0, 0, 0
                    ]
                summary_lst.append(summary_lst_item)

                # Helm Dependancies
                try:
                    res = results_scan.get_dict()
                    logging.info(
                        f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Processing Helm Dependancies"
                    )
                    #{'common': {'chart_name': 'common', 'chart_version': '0.0.5', 'chart_repo': 'https://charts.adfinis.com', 'chart_status': 'unpacked'}}
                    if chart_deps:
                        for key in chart_deps:
                            logging.info(f" HELMDEP FOUND! {chart_deps[key]}")
                            current_dep = chart_deps[key]

                            dep_item = [
                                repoChartPathName,  #Current chart combined repo/path
                                repo['repoName'],  #Current chart reponame
                                chartPackage['name'],  #Current chart chartname
                                chartPackage[
                                    'version'],  #Current chart version
                                list(current_dep.values())
                                [0],  #dep dict chart_name
                                list(current_dep.values())
                                [1],  #dep dict chart_version
                                list(current_dep.values())
                                [2],  #dep dict chart_repo
                                list(current_dep.values())[
                                    3]  #dep dict chart_status
                            ]

                            helmdeps_lst.append(dep_item)

                    logging.info(f"CURRENT HELMDEPS LIST {helmdeps_lst}")

                except:
                    pass

        # Dep graph per repo
        if helmdeps_lst:
            depGraph.draw(f"results/helm-deps-{repo['repoName']}.png",
                          prog='circo')
        logging.info(f"Global deps usage: {globalDepsUsage}")
        logging.info(f"Global deps list {globalDepsList}")

        result_writer.print_csv(summary_lst, result_lst, helmdeps_lst,
                                empty_resources, RESULTS_PATH,
                                repo['repoName'], orgRepoFilename,
                                globalDepsList, globalDepsUsage)
        empty_resources_total.update(empty_resources)
    return checks_table, summary_table, all_resources, empty_resources_total, all_dataobj
Ejemplo n.º 3
0
def run(banner=checkov_banner, argv=sys.argv[1:]):
    parser = argparse.ArgumentParser(
        description='Infrastructure as code static analysis')
    add_parser_args(parser)
    args = parser.parse_args(argv)
    # Disable runners with missing system dependencies
    args.skip_framework = runnerDependencyHandler.disable_incompatible_runners(
        args.skip_framework)

    runner_filter = RunnerFilter(
        framework=args.framework,
        skip_framework=args.skip_framework,
        checks=args.check,
        skip_checks=args.skip_check,
        download_external_modules=convert_str_to_bool(
            args.download_external_modules),
        external_modules_download_path=args.external_modules_download_path,
        evaluate_variables=convert_str_to_bool(args.evaluate_variables),
        runners=checkov_runners)
    if outer_registry:
        runner_registry = outer_registry
        runner_registry.runner_filter = runner_filter
    else:
        runner_registry = RunnerRegistry(banner, runner_filter, tf_runner(),
                                         cfn_runner(), k8_runner(),
                                         sls_runner(), arm_runner(),
                                         tf_plan_runner(), helm_runner())
    if args.version:
        print(version)
        return
    if args.bc_api_key:
        if args.repo_id is None:
            parser.error(
                "--repo-id argument is required when using --bc-api-key")
        if len(args.repo_id.split('/')) != 2:
            parser.error(
                "--repo-id argument format should be 'organization/repository_name' E.g "
                "bridgecrewio/checkov")

        source = os.getenv('BC_SOURCE', 'cli')
        source_version = os.getenv('BC_SOURCE_VERSION', version)
        logger.debug(f'BC_SOURCE = {source}, version = {source_version}')
        try:
            bc_integration.setup_bridgecrew_credentials(
                bc_api_key=args.bc_api_key,
                repo_id=args.repo_id,
                skip_fixes=args.skip_fixes,
                skip_suppressions=args.skip_suppressions,
                source=source,
                source_version=source_version)
        except Exception as e:
            logger.error(
                'An error occurred setting up the Bridgecrew platform integration. Please check your API token and try again.',
                exc_info=True)
            return

    guidelines = {}
    if not args.no_guide:
        guidelines = bc_integration.get_guidelines()
    if args.check and args.skip_check:
        parser.error(
            "--check and --skip-check can not be applied together. please use only one of them"
        )
        return
    if args.list:
        print_checks(framework=args.framework)
        return
    external_checks_dir = get_external_checks_dir(args)
    url = None

    if args.directory:
        for root_folder in args.directory:
            file = args.file
            scan_reports = runner_registry.run(
                root_folder=root_folder,
                external_checks_dir=external_checks_dir,
                files=file,
                guidelines=guidelines,
                bc_integration=bc_integration)
            if bc_integration.is_integration_configured():
                bc_integration.persist_repository(root_folder)
                bc_integration.persist_scan_results(scan_reports)
                url = bc_integration.commit_repository(args.branch)

            runner_registry.print_reports(scan_reports, args, url)
        return
    elif args.file:
        scan_reports = runner_registry.run(
            external_checks_dir=external_checks_dir,
            files=args.file,
            guidelines=guidelines,
            bc_integration=bc_integration)
        if bc_integration.is_integration_configured():
            files = [os.path.abspath(file) for file in args.file]
            root_folder = os.path.split(os.path.commonprefix(files))[0]
            bc_integration.persist_repository(root_folder)
            bc_integration.persist_scan_results(scan_reports)
            url = bc_integration.commit_repository(args.branch)
        runner_registry.print_reports(scan_reports, args, url)
    else:
        print(f"{banner}")

        bc_integration.onboarding()
Ejemplo n.º 4
0
def run(banner=checkov_banner, argv=sys.argv[1:]):
    parser = argparse.ArgumentParser(description='Infrastructure as code static analysis')
    add_parser_args(parser)
    args = parser.parse_args(argv)

    # bridgecrew uses both the urllib3 and requests libraries, while checkov uses the requests library.
    # Allow the user to specify a CA bundle to be used by both libraries.
    bc_integration.setup_http_manager(args.ca_certificate)

    # Disable runners with missing system dependencies
    args.skip_framework = runnerDependencyHandler.disable_incompatible_runners(args.skip_framework)

    runner_filter = RunnerFilter(framework=args.framework, skip_framework=args.skip_framework, checks=args.check, skip_checks=args.skip_check,
                                 download_external_modules=convert_str_to_bool(args.download_external_modules),
                                 external_modules_download_path=args.external_modules_download_path,
                                 evaluate_variables=convert_str_to_bool(args.evaluate_variables), runners=checkov_runners)
    if outer_registry:
        runner_registry = outer_registry
        runner_registry.runner_filter = runner_filter
    else:
        runner_registry = RunnerRegistry(banner, runner_filter, tf_graph_runner(), cfn_runner(), k8_runner(), sls_runner(),
                                         arm_runner(), tf_plan_runner(), helm_runner(),dockerfile_runner())
    if args.version:
        print(version)
        return

    if args.bc_api_key == '':
        parser.error('The --bc-api-key flag was specified but the value was blank. If this value was passed as a secret, you may need to double check the mapping.')
    elif args.bc_api_key:
        logger.debug(f'Using API key ending with {args.bc_api_key[-8:]}')

        if args.repo_id is None:
            parser.error("--repo-id argument is required when using --bc-api-key")
        if len(args.repo_id.split('/')) != 2:
            parser.error("--repo-id argument format should be 'organization/repository_name' E.g "
                         "bridgecrewio/checkov")

        source = os.getenv('BC_SOURCE', 'cli')
        source_version = os.getenv('BC_SOURCE_VERSION', version)
        logger.debug(f'BC_SOURCE = {source}, version = {source_version}')
        try:
            bc_integration.setup_bridgecrew_credentials(bc_api_key=args.bc_api_key, repo_id=args.repo_id, 
                                                        skip_fixes=args.skip_fixes,
                                                        skip_suppressions=args.skip_suppressions,
                                                        source=source, source_version=source_version, repo_branch=args.branch)
        except Exception as e:
            logger.error('An error occurred setting up the Bridgecrew platform integration. Please check your API token and try again.', exc_info=True)
            return
    else:
        logger.debug('No API key found. Scanning locally only.')

    guidelines = {}
    if not args.no_guide:
        guidelines = bc_integration.get_guidelines()
    if args.check and args.skip_check:
        parser.error("--check and --skip-check can not be applied together. please use only one of them")
        return
    if args.list:
        print_checks(framework=args.framework)
        return
    external_checks_dir = get_external_checks_dir(args)
    url = None

    if args.directory:
        exit_codes = []
        for root_folder in args.directory:
            file = args.file
            scan_reports = runner_registry.run(root_folder=root_folder, external_checks_dir=external_checks_dir,
                                               files=file, guidelines=guidelines, bc_integration=bc_integration)
            if bc_integration.is_integration_configured():
                bc_integration.persist_repository(root_folder)
                bc_integration.persist_scan_results(scan_reports)
                url = bc_integration.commit_repository(args.branch)

            exit_codes.append(runner_registry.print_reports(scan_reports, args, url))

        exit_code = 1 if 1 in exit_codes else 0
        return exit_code
    elif args.file:
        scan_reports = runner_registry.run(external_checks_dir=external_checks_dir, files=args.file,
                                           guidelines=guidelines, bc_integration=bc_integration)
        if bc_integration.is_integration_configured():
            files = [os.path.abspath(file) for file in args.file]
            root_folder = os.path.split(os.path.commonprefix(files))[0]
            bc_integration.persist_repository(root_folder)
            bc_integration.persist_scan_results(scan_reports)
            url = bc_integration.commit_repository(args.branch)
        return runner_registry.print_reports(scan_reports, args, url)
    elif args.docker_image:
        if args.bc_api_key is None:
            parser.error("--bc-api-key argument is required when using --docker-image")
            return
        if args.dockerfile_path is None:
            parser.error("--dockerfile-path argument is required when using --docker-image")
            return
        if args.branch is None:
            parser.error("--branch argument is required when using --docker-image")
            return
        image_scanner.scan(args.docker_image, args.dockerfile_path)
    else:
        print(f"{banner}")

        bc_integration.onboarding()