Beispiel #1
0
 def hcl2(self, directory, tf_definitions={}, parsing_errors={}):
     modules_scan = set()
     for root, d_names, f_names in os.walk(directory):
         filter_ignored_directories(d_names)
         self._mark_parsed(os.path.abspath(root))
         for file in f_names:
             if file.endswith(".tf"):
                 tf_file = os.path.join(root, file)
                 if tf_file not in tf_definitions.keys():
                     try:
                         tf_definition = self._parse_tf_definitions(tf_file)
                         if tf_definition:
                             tf_definitions[tf_file] = tf_definition
                         for modules in tf_definition.get("module", []):
                             for module in modules.values():
                                 relative_path = module['source'][0]
                                 abs_path = os.path.abspath(
                                     os.path.join(root, relative_path))
                                 if not self._is_parsed(abs_path):
                                     modules_scan.add(abs_path)
                     except Exception as e:
                         self.logger.debug(
                             f'failed while parsing file {tf_file}',
                             exc_info=e)
                         parsing_errors[tf_file] = e
     for m in modules_scan:
         if path.exists(m):
             self.hcl2(directory=m, tf_definitions=tf_definitions)
Beispiel #2
0
def _parse_directory(
        directory: str,
        include_sub_dirs: bool,
        out_definitions: Dict,
        out_evaluations_context: Optional[Dict[str, Dict[
            str, EvaluationContext]]] = None,
        out_parsing_errors: Optional[Dict[str, Exception]] = None,
        env_vars: Optional[Mapping[str, str]] = None,
        module_loader_registry: ModuleLoaderRegistry = default_ml_registry,
        dir_filter: Callable[[str], bool] = lambda _: True):
    """
Load and resolve configuration files starting in the given directory, merging the
resulting data into `tf_definitions`. This loads data according to the Terraform Code Organization
specification (https://www.terraform.io/docs/configuration/index.html#code-organization), starting
in the given directory and possibly moving out from there.

The resulting data dictionary generally follows the layout of HCL parsing with a couple distinctions:
- Data is broken out by file from which the data was loaded. So: <file>: <data>
  - Loaded modules will also be keyed by referrer info: <file>[<referring_file>#<index>]: <data>
- Module block will included a "__resolved__" key with a list of the file/referrer names under
  which data for the file was loaded. For example: "__resolved__": ["main.tf#0"]. The values will
  correspond to the file names mentioned in the first bullet.
- All variables that can be resolved will be resolved.

    :param directory:                  Directory in which .tf and .tfvars files will be loaded.
    :param include_sub_dirs:           If true, subdirectories will be walked.
    :param out_definitions:            Dict into which the "simple" TF data with variables resolved is put.
    :param out_evaluations_context:    Dict into which context about resource definitions is placed. Outer
                                       key is the file, inner key is a variable name.
    :param out_parsing_errors:         Dict into which parsing errors, keyed on file path, are placed.
    :param env_vars:                   Optional values to use for resolving environment variables in TF code.
                                       If nothing is specified, Checkov's local environment will be used.
    :param module_loader_registry:     Registry used for resolving modules. This allows customization of how
                                       much resolution is performed (and easier testing) by using a manually
                                       constructed registry rather than the default.
    :param dir_filter:                 Determines whether or not a directory should be processed. Returning
                                       True will allow processing. The argument will be the absolute path of
                                       the directory.
    """

    if out_evaluations_context is None:
        out_evaluations_context = {}
    if out_parsing_errors is None:
        out_parsing_errors = {}
    if env_vars is None:
        env_vars = dict(os.environ)

    if include_sub_dirs:
        for sub_dir, d_names, f_names in os.walk(directory):
            filter_ignored_directories(d_names)
            if dir_filter(os.path.abspath(sub_dir)):
                _internal_dir_load(sub_dir, out_definitions,
                                   out_evaluations_context, out_parsing_errors,
                                   env_vars, None, module_loader_registry,
                                   dir_filter)
    else:
        _internal_dir_load(directory, out_definitions, out_evaluations_context,
                           out_parsing_errors, env_vars, None,
                           module_loader_registry, dir_filter)
Beispiel #3
0
    def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter()):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                cfn_registry.load_external_checks(directory)

        if files:
            for file in files:
                (definitions[file], definitions_raw[file]) = parse(file)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)
                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in CF_POSSIBLE_ENDINGS:
                        files_list.append(os.path.join(root, file))

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file)

        # Filter out empty files that have not been parsed successfully, and filter out non-CF template files
        definitions = {k: v for k, v in definitions.items() if v and v.__contains__("Resources")}
        definitions_raw = {k: v for k, v in definitions_raw.items() if k in definitions.keys()}

        for cf_file in definitions.keys():
            if isinstance(definitions[cf_file], dict_node) and 'Resources' in definitions[cf_file].keys():
                cf_context_parser = ContextParser(cf_file, definitions[cf_file], definitions_raw[cf_file])
                logging.debug("Template Dump for {}: {}".format(cf_file, definitions[cf_file], indent=2))
                cf_context_parser.evaluate_default_refs()
                for resource_name, resource in definitions[cf_file]['Resources'].items():
                    resource_id = cf_context_parser.extract_cf_resource_id(resource, resource_name)
                    # check that the resource can be parsed as a CF resource
                    if resource_id:
                        entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines(resource)
                        if entity_lines_range and entity_code_lines:
                            # TODO - Variable Eval Message!
                            variable_evaluations = {}

                            skipped_checks = ContextParser.collect_skip_comments(entity_code_lines)

                            results = cfn_registry.scan(cf_file, {resource_name: resource}, skipped_checks,
                                                        runner_filter)
                            for check, check_result in results.items():
                                record = Record(check_id=check.id, check_name=check.name, check_result=check_result,
                                                code_block=entity_code_lines, file_path=cf_file,
                                                file_line_range=entity_lines_range,
                                                resource=resource_id, evaluations=variable_evaluations,
                                                check_class=check.__class__.__module__)
                                report.add_record(record=record)
        return report
Beispiel #4
0
    def find_chart_directories(root_folder, files):
        chart_directories = []
        if files:
            logging.info(
                'Running with --file argument; checking for Helm Chart.yaml files'
            )
            for file in files:
                if os.path.basename(file) == 'Chart.yaml':
                    chart_directories.append(os.path.dirname(file))

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)
                if 'Chart.yaml' in f_names:
                    chart_directories.append(root)

        return chart_directories
Beispiel #5
0
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter()):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                sls_registry.load_external_checks(directory, runner_filter)

        if files:
            for file in files:
                if os.path.basename(file) in SLS_FILE_MASK:
                    parse_result = parse(file)
                    if parse_result:
                        (definitions[file],
                         definitions_raw[file]) = parse_result

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                # Don't walk in to "node_modules" directories under the root folder. If –for some reason–
                # scanning one of these is desired, it can be directly specified.
                if "node_modules" in d_names:
                    d_names.remove("node_modules")

                filter_ignored_directories(d_names)
                for file in f_names:
                    if file in SLS_FILE_MASK:
                        full_path = os.path.join(root, file)
                        if "/." not in full_path:
                            # skip temp directories
                            files_list.append(full_path)

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                parse_result = parse(file)
                if parse_result:
                    (definitions[relative_file_path],
                     definitions_raw[relative_file_path]) = parse_result

        # Filter out empty files that have not been parsed successfully
        definitions = {k: v for k, v in definitions.items() if v}
        definitions_raw = {
            k: v
            for k, v in definitions_raw.items() if k in definitions.keys()
        }

        for sls_file in definitions.keys():
            if not isinstance(definitions[sls_file], dict_node):
                continue

            if CFN_RESOURCES_TOKEN in definitions[sls_file] and isinstance(
                    definitions[sls_file][CFN_RESOURCES_TOKEN], dict_node):
                cf_sub_template = definitions[sls_file][CFN_RESOURCES_TOKEN]
                cf_context_parser = CfnContextParser(sls_file, cf_sub_template,
                                                     definitions_raw[sls_file])
                logging.debug("Template Dump for {}: {}".format(
                    sls_file, definitions[sls_file], indent=2))
                cf_context_parser.evaluate_default_refs()
                for resource_name, resource in cf_sub_template[
                        'Resources'].items():
                    if not isinstance(resource, dict_node):
                        continue
                    cf_resource_id = cf_context_parser.extract_cf_resource_id(
                        resource, resource_name)
                    entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines(
                        resource)
                    if entity_lines_range and entity_code_lines:
                        skipped_checks = CfnContextParser.collect_skip_comments(
                            entity_code_lines)
                        # TODO - Variable Eval Message!
                        variable_evaluations = {}

                        results = cfn_registry.scan(sls_file,
                                                    {resource_name: resource},
                                                    skipped_checks,
                                                    runner_filter)
                        for check, check_result in results.items():
                            record = Record(
                                check_id=check.id,
                                check_name=check.name,
                                check_result=check_result,
                                code_block=entity_code_lines,
                                file_path=sls_file,
                                file_line_range=entity_lines_range,
                                resource=cf_resource_id,
                                evaluations=variable_evaluations,
                                check_class=check.__class__.__module__)
                            report.add_record(record=record)
            if FUNCTIONS_TOKEN in definitions[sls_file]:
                template_functions = definitions[sls_file][FUNCTIONS_TOKEN]
                sls_context_parser = SlsContextParser(
                    sls_file, definitions[sls_file], definitions_raw[sls_file])
                for sls_function_name, sls_function in template_functions.items(
                ):
                    if not isinstance(sls_function, dict_node):
                        continue
                    entity_lines_range, entity_code_lines = sls_context_parser.extract_function_code_lines(
                        sls_function)
                    if entity_lines_range and entity_code_lines:
                        skipped_checks = CfnContextParser.collect_skip_comments(
                            entity_code_lines)
                        variable_evaluations = {}
                        sls_context_parser.enrich_function_with_provider(
                            sls_function_name)
                        results = sls_registry.scan(
                            sls_file, {
                                'function': sls_function,
                                'provider_type':
                                sls_context_parser.provider_type
                            }, skipped_checks, runner_filter)
                        for check, check_result in results.items():
                            record = Record(
                                check_id=check.id,
                                check_name=check.name,
                                check_result=check_result,
                                code_block=entity_code_lines,
                                file_path=sls_file,
                                file_line_range=entity_lines_range,
                                resource=sls_function_name,
                                evaluations=variable_evaluations,
                                check_class=check.__class__.__module__)
                            report.add_record(record=record)

        return report
Beispiel #6
0
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter(),
            collect_skip_comments=True):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                function_registry.load_external_checks(directory)

        if files:
            for file in files:
                if os.path.basename(file) in SLS_FILE_MASK:
                    parse_result = parse(file)
                    if parse_result:
                        (definitions[file],
                         definitions_raw[file]) = parse_result

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                # Don't walk in to "node_modules" directories under the root folder. If –for some reason–
                # scanning one of these is desired, it can be directly specified.
                if "node_modules" in d_names:
                    d_names.remove("node_modules")

                filter_ignored_directories(d_names)
                for file in f_names:
                    if file in SLS_FILE_MASK:
                        full_path = os.path.join(root, file)
                        if "/." not in full_path:
                            # skip temp directories
                            files_list.append(full_path)

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                parse_result = parse(file)
                if parse_result:
                    (definitions[relative_file_path],
                     definitions_raw[relative_file_path]) = parse_result

        # Filter out empty files that have not been parsed successfully
        definitions = {k: v for k, v in definitions.items() if v}
        definitions_raw = {
            k: v
            for k, v in definitions_raw.items() if k in definitions.keys()
        }

        for sls_file, sls_file_data in definitions.items():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which sls_file does not always give).
            if sls_file[0] == '/':
                path_to_convert = (root_folder +
                                   sls_file) if root_folder else sls_file
            else:
                path_to_convert = (os.path.join(
                    root_folder, sls_file)) if root_folder else sls_file

            file_abs_path = os.path.abspath(path_to_convert)

            if not isinstance(sls_file_data, dict_node):
                continue

            if CFN_RESOURCES_TOKEN in sls_file_data and isinstance(
                    sls_file_data[CFN_RESOURCES_TOKEN], dict_node):
                cf_sub_template = sls_file_data[CFN_RESOURCES_TOKEN]
                cf_context_parser = CfnContextParser(sls_file, cf_sub_template,
                                                     definitions_raw[sls_file])
                logging.debug("Template Dump for {}: {}".format(sls_file,
                                                                sls_file_data,
                                                                indent=2))
                cf_context_parser.evaluate_default_refs()
                for resource_name, resource in cf_sub_template[
                        'Resources'].items():
                    if not isinstance(resource, dict_node):
                        continue
                    cf_resource_id = cf_context_parser.extract_cf_resource_id(
                        resource, resource_name)
                    if not cf_resource_id:
                        # Not Type attribute for resource
                        continue
                    entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines(
                        resource)
                    if entity_lines_range and entity_code_lines:
                        skipped_checks = CfnContextParser.collect_skip_comments(
                            entity_code_lines)
                        # TODO - Variable Eval Message!
                        variable_evaluations = {}

                        entity = {resource_name: resource}
                        results = cfn_registry.scan(sls_file, entity,
                                                    skipped_checks,
                                                    runner_filter)
                        tags = cfn_utils.get_resource_tags(
                            entity, cfn_registry)
                        for check, check_result in results.items():
                            record = Record(
                                check_id=check.id,
                                check_name=check.name,
                                check_result=check_result,
                                code_block=entity_code_lines,
                                file_path=sls_file,
                                file_line_range=entity_lines_range,
                                resource=cf_resource_id,
                                evaluations=variable_evaluations,
                                check_class=check.__class__.__module__,
                                file_abs_path=file_abs_path,
                                entity_tags=tags)
                            report.add_record(record=record)

            sls_context_parser = SlsContextParser(sls_file, sls_file_data,
                                                  definitions_raw[sls_file])

            # Sub-sections that have multiple items under them
            for token, registry in MULTI_ITEM_SECTIONS:
                template_items = sls_file_data.get(token)
                if not template_items or not isinstance(template_items, dict):
                    continue
                for item_name, item_content in template_items.items():
                    if not isinstance(item_content, dict_node):
                        continue
                    entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(
                        item_content)
                    if entity_lines_range and entity_code_lines:
                        skipped_checks = CfnContextParser.collect_skip_comments(
                            entity_code_lines)
                        variable_evaluations = {}
                        if token == "functions":  #nosec
                            # "Enriching" copies things like "environment" and "stackTags" down into the
                            # function data from the provider block since logically that's what serverless
                            # does. This allows checks to see what the complete data would be.
                            sls_context_parser.enrich_function_with_provider(
                                item_name)
                        entity = EntityDetails(
                            sls_context_parser.provider_type, item_content)
                        results = registry.scan(sls_file, entity,
                                                skipped_checks, runner_filter)
                        tags = cfn_utils.get_resource_tags(entity, registry)
                        for check, check_result in results.items():
                            record = Record(
                                check_id=check.id,
                                check_name=check.name,
                                check_result=check_result,
                                code_block=entity_code_lines,
                                file_path=sls_file,
                                file_line_range=entity_lines_range,
                                resource=item_name,
                                evaluations=variable_evaluations,
                                check_class=check.__class__.__module__,
                                file_abs_path=file_abs_path,
                                entity_tags=tags)
                            report.add_record(record=record)
            # Sub-sections that are a single item
            for token, registry in SINGLE_ITEM_SECTIONS:
                item_content = sls_file_data.get(token)
                if not item_content:
                    continue
                entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(
                    item_content)
                if not entity_lines_range:
                    entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(
                        sls_file_data)

                skipped_checks = CfnContextParser.collect_skip_comments(
                    entity_code_lines)
                variable_evaluations = {}
                entity = EntityDetails(sls_context_parser.provider_type,
                                       item_content)
                results = registry.scan(sls_file, entity, skipped_checks,
                                        runner_filter)
                tags = cfn_utils.get_resource_tags(entity, registry)
                for check, check_result in results.items():
                    record = Record(check_id=check.id,
                                    check_name=check.name,
                                    check_result=check_result,
                                    code_block=entity_code_lines,
                                    file_path=sls_file,
                                    file_line_range=entity_lines_range,
                                    resource=token,
                                    evaluations=variable_evaluations,
                                    check_class=check.__class__.__module__,
                                    file_abs_path=file_abs_path,
                                    entity_tags=tags)
                    report.add_record(record=record)

            # "Complete" checks
            # NOTE: Ignore code content, no point in showing (could be long)
            entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(
                sls_file_data)
            if entity_lines_range:
                skipped_checks = CfnContextParser.collect_skip_comments(
                    entity_code_lines)
                variable_evaluations = {}
                entity = EntityDetails(sls_context_parser.provider_type,
                                       sls_file_data)
                results = complete_registry.scan(sls_file, entity,
                                                 skipped_checks, runner_filter)
                tags = cfn_utils.get_resource_tags(entity, complete_registry)
                for check, check_result in results.items():
                    record = Record(
                        check_id=check.id,
                        check_name=check.name,
                        check_result=check_result,
                        code_block=[],  # Don't show, could be large
                        file_path=sls_file,
                        file_line_range=entity_lines_range,
                        resource="complete",  # Weird, not sure what to put where
                        evaluations=variable_evaluations,
                        check_class=check.__class__.__module__,
                        file_abs_path=file_abs_path,
                        entity_tags=tags)
                    report.add_record(record=record)

        return report
Beispiel #7
0
def _filter_ignored_directories(d_names, excluded_paths):
    filter_ignored_directories(d_names, excluded_paths)
    [
        d_names.remove(d) for d in list(d_names)
        if d in [default_ml_registry.external_modules_folder_name]
    ]
Beispiel #8
0
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter(),
            collect_skip_comments=True):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                arm_registry.load_external_checks(directory, runner_filter)

        if files:
            for file in files:
                (definitions[file], definitions_raw[file]) = parse(file)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)
                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in ARM_POSSIBLE_ENDINGS:
                        files_list.append(os.path.join(root, file))

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                (definitions[relative_file_path],
                 definitions_raw[relative_file_path]) = parse(file)

        # Filter out empty files that have not been parsed successfully, and filter out non-CF template files
        definitions = {
            k: v
            for k, v in definitions.items()
            if v and v.__contains__("resources")
        }
        definitions_raw = {
            k: v
            for k, v in definitions_raw.items() if k in definitions.keys()
        }

        for arm_file in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which arm_file does not always give).
            if arm_file[0] == '/':
                path_to_convert = (root_folder +
                                   arm_file) if root_folder else arm_file
            else:
                path_to_convert = (os.path.join(
                    root_folder, arm_file)) if root_folder else arm_file

            file_abs_path = os.path.abspath(path_to_convert)

            if isinstance(
                    definitions[arm_file],
                    dict_node) and 'resources' in definitions[arm_file].keys():
                arm_context_parser = ContextParser(arm_file,
                                                   definitions[arm_file],
                                                   definitions_raw[arm_file])
                logging.debug("Template Dump for {}: {}".format(
                    arm_file, definitions[arm_file], indent=2))
                arm_context_parser.evaluate_default_parameters()

                # Split out nested resources from base resource
                for resource in definitions[arm_file]['resources']:
                    if "parent_name" in resource.keys():
                        continue
                    nested_resources = []
                    nested_resources = arm_context_parser.search_deep_keys(
                        "resources", resource, [])
                    if nested_resources:
                        for nr in nested_resources:
                            nr_element = nr.pop()
                            if nr_element:
                                for element in nr_element:
                                    new_resource = {}
                                    new_resource = element
                                    if isinstance(new_resource, dict):
                                        new_resource["parent_name"] = resource[
                                            "name"]
                                        new_resource["parent_type"] = resource[
                                            "type"]
                                        definitions[arm_file][
                                            'resources'].append(new_resource)

                for resource in definitions[arm_file]['resources']:
                    resource_id = arm_context_parser.extract_arm_resource_id(
                        resource)
                    resource_name = arm_context_parser.extract_arm_resource_name(
                        resource)
                    entity_lines_range, entity_code_lines = arm_context_parser.extract_arm_resource_code_lines(
                        resource)
                    if entity_lines_range and entity_code_lines:
                        # TODO - Variable Eval Message!
                        variable_evaluations = {}

                        skipped_checks = ContextParser.collect_skip_comments(
                            resource)

                        results = arm_registry.scan(arm_file,
                                                    {resource_name: resource},
                                                    skipped_checks,
                                                    runner_filter)
                        for check, check_result in results.items():
                            record = Record(
                                check_id=check.id,
                                check_name=check.name,
                                check_result=check_result,
                                code_block=entity_code_lines,
                                file_path=arm_file,
                                file_line_range=entity_lines_range,
                                resource=resource_id,
                                evaluations=variable_evaluations,
                                check_class=check.__class__.__module__,
                                file_abs_path=file_abs_path)
                            report.add_record(record=record)
        return report
Beispiel #9
0
 def test_filter_ignored_directories_regex(self):
     d_names = ['bin', 'integration_tests', 'tests', 'docs', '.github', 'checkov', 'venv', '.git', 'kubernetes', '.idea']
     expected = ['bin', 'docs', 'checkov', 'venv', 'kubernetes']
     filter_ignored_directories(d_names, ["tests"])
     self.assertEqual(expected, d_names)
Beispiel #10
0
    def run(self, root_folder=None, external_checks_dir=None, files=None, runner_filter=RunnerFilter(),
            collect_skip_comments=True):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                registry.load_external_checks(directory)

        if files:
            for file in files:
                if os.path.basename(file) in DOCKER_FILE_MASK:
                    (definitions[file], definitions_raw[file]) = parse(file)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)
                for file in f_names:
                    if file in DOCKER_FILE_MASK:
                        files_list.append(os.path.join(root, file))

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                try:
                    (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file)
                except TypeError:
                    logging.info(f'Dockerfile skipping {file} as it is not a valid dockerfile template')

        for docker_file_path in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which docker_file_path does not always give).
            if docker_file_path[0] == '/':
                path_to_convert = (root_folder + docker_file_path) if root_folder else docker_file_path
            else:
                path_to_convert = (os.path.join(root_folder, docker_file_path)) if root_folder else docker_file_path

            file_abs_path = os.path.abspath(path_to_convert)
            skipped_checks = collect_skipped_checks(definitions[docker_file_path])
            instructions = definitions[docker_file_path]

            results = registry.scan(docker_file_path, instructions, skipped_checks,
                                    runner_filter)
            for check, check_result in results.items():
                result_configuration = check_result['results_configuration']
                startline = 0
                endline = 0
                result_instruction = ""
                if result_configuration:
                    startline = result_configuration['startline']
                    endline = result_configuration['endline']
                    result_instruction = result_configuration["instruction"]

                codeblock = []
                self.calc_record_codeblock(codeblock, definitions_raw, docker_file_path, endline, startline)
                record = Record(check_id=check.id, check_name=check.name, check_result=check_result,
                                code_block=codeblock,
                                file_path=docker_file_path,
                                file_line_range=[startline,
                                                 endline],
                                resource="{}.{}".format(docker_file_path,
                                                        result_instruction,
                                                        startline),
                                evaluations=None, check_class=check.__class__.__module__,
                                file_abs_path=file_abs_path, entity_tags=None)
                report.add_record(record=record)

        return report
Beispiel #11
0
    def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(),
            collect_skip_comments=True) -> Report:
        inv_secret_map = {v: k for k, v in SECRET_TYPE_TO_ID.items()}
        report = Report(self.check_type)
        initialize_plugin_settings(None)
        # Implement non IaC files (including .terraform dir)
        files_to_scan = files or []
        excluded_paths = (runner_filter.excluded_paths or []) + ignored_directories
        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names, excluded_paths)
                for file in f_names:
                    if file not in PROHIBITED_FILES and f".{file.split('.')[-1]}" in SUPPORTED_FILE_EXTENSIONS:
                        files_to_scan.append(os.path.join(root, file))
        logging.info(f'Secrets scanning will scan {len(files_to_scan)} files')
        for file in files_to_scan:
            logging.info(f'Scanning file {file} for secrets')
            if runner_filter.skip_checks:
                for skipped_check in runner_filter.skip_checks:
                    if skipped_check in inv_secret_map:
                        report.add_record(Record(
                            check_id=skipped_check,
                            check_name=inv_secret_map[skipped_check],
                            check_result={'result': CheckResult.SKIPPED,
                                          "suppress_comment": f"Secret scan {skipped_check} is skipped"},
                            file_path=file,
                            file_abs_path=os.path.abspath(file),
                            check_class="",
                            code_block="",
                            file_line_range=[0, 0],
                            evaluations=None,
                            resource=file
                        ))
            try:
                next(iter(deepcopy(scan_file)(file)))
            except StopIteration:
                # TODO decide how to make the file pass
                result = {'result': CheckResult.PASSED}
                continue
            for secret in scan_file(file):
                check_id = SECRET_TYPE_TO_ID[secret.type]
                if not runner_filter.should_run_check(check_id):
                    result = {'result': CheckResult.SKIPPED}
                else:
                    result = {'result': CheckResult.FAILED}
                    line_text = linecache.getline(os.path.join(root_folder, secret.filename), secret.line_number)
                    if line_text != "" and line_text.split()[0] == 'git_commit':
                        continue
                    result = self.search_for_suppression(root_folder, secret) or result
                report.add_record(Record(
                    check_id=check_id,
                    check_name=secret.type,
                    check_result=result,
                    code_block=[(secret.line_number, line_text)],
                    file_path=f'{secret.filename}:{secret.secret_hash}',
                    file_line_range=[secret.line_number, secret.line_number + 1],
                    resource=secret.filename,
                    check_class=None,
                    evaluations=None,
                    file_abs_path=os.path.abspath(secret.filename)
                ))

        return report
Beispiel #12
0
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter()):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                registry.load_external_checks(directory)

        if files:
            for file in files:
                parse_result = parse(file)
                if parse_result:
                    (definitions[file], definitions_raw[file]) = parse_result

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)

                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in K8_POSSIBLE_ENDINGS:
                        full_path = os.path.join(root, file)
                        if "/." not in full_path and file not in [
                                'package.json', 'package-lock.json'
                        ]:
                            # skip temp directories
                            files_list.append(full_path)

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                parse_result = parse(file)
                if parse_result:
                    (definitions[relative_file_path],
                     definitions_raw[relative_file_path]) = parse_result

        for k8_file in definitions.keys():
            if definitions[k8_file]:
                for i in range(len(definitions[k8_file])):
                    if (not 'apiVersion' in definitions[k8_file][i].keys()
                        ) and (not 'kind' in definitions[k8_file][i].keys()):
                        continue
                    logging.debug("Template Dump for {}: {}".format(
                        k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]

                    # Split out resources if entity kind is List
                    if entity_conf["kind"] == "List":
                        for item in entity_conf["items"]:
                            definitions[k8_file].append(item)

                for i in range(len(definitions[k8_file])):
                    if (not 'apiVersion' in definitions[k8_file][i].keys()
                        ) and (not 'kind' in definitions[k8_file][i].keys()):
                        continue
                    logging.debug("Template Dump for {}: {}".format(
                        k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]

                    if entity_conf["kind"] == "List":
                        continue

                    # Skip entity without metadata["name"]
                    if "metadata" in entity_conf:
                        if not "name" in entity_conf["metadata"]:
                            continue
                    else:
                        continue

                    # Append containers and initContainers to definitions list
                    for type in ["containers", "initContainers"]:
                        containers = []
                        if entity_conf["kind"] == "CustomResourceDefinition":
                            continue
                        containers = self._search_deep_keys(
                            type, entity_conf, [])
                        if not containers:
                            continue
                        containers = containers.pop()
                        #containers.insert(0,entity_conf['kind'])
                        containerDef = {}
                        namespace = ""
                        if "namespace" in entity_conf["metadata"]:
                            namespace = entity_conf["metadata"]["namespace"]
                        else:
                            namespace = "default"
                        containerDef["containers"] = containers.pop()
                        if containerDef["containers"] is not None:
                            for cd in containerDef["containers"]:
                                i = containerDef["containers"].index(cd)
                                containerDef["containers"][i][
                                    "apiVersion"] = entity_conf["apiVersion"]
                                containerDef["containers"][i]["kind"] = type
                                containerDef["containers"][i][
                                    "parent"] = "{}.{}.{} (container {})".format(
                                        entity_conf["kind"],
                                        entity_conf["metadata"]["name"],
                                        namespace, str(i))
                                containerDef["containers"][i][
                                    "parent_metadata"] = entity_conf[
                                        "metadata"]
                            definitions[k8_file].extend(
                                containerDef["containers"])

                # Run for each definition included added container definitions
                for i in range(len(definitions[k8_file])):
                    if (not 'apiVersion' in definitions[k8_file][i].keys()
                        ) and (not 'kind' in definitions[k8_file][i].keys()):
                        continue
                    logging.debug("Template Dump for {}: {}".format(
                        k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]

                    if entity_conf["kind"] == "List":
                        continue

                    # Skip entity without metadata["name"] or parent_metadata["name"]
                    if not any(x in entity_conf["kind"]
                               for x in ["containers", "initContainers"]):
                        if "metadata" in entity_conf:
                            if not "name" in entity_conf["metadata"]:
                                continue
                        else:
                            continue

                    # Skip Kustomization Templates (for now)
                    if entity_conf["kind"] == "Kustomization":
                        continue

                    skipped_checks = get_skipped_checks(entity_conf)

                    results = registry.scan(k8_file, entity_conf,
                                            skipped_checks, runner_filter)

                    # TODO refactor into context parsing
                    find_lines_result_list = list(
                        find_lines(entity_conf, '__startline__'))
                    start_line = entity_conf["__startline__"]
                    end_line = entity_conf["__endline__"]

                    if start_line == end_line:
                        entity_lines_range = [start_line, end_line]
                        entity_code_lines = definitions_raw[k8_file][
                            start_line - 1:end_line]
                    else:
                        entity_lines_range = [start_line, end_line - 1]
                        entity_code_lines = definitions_raw[k8_file][
                            start_line - 1:end_line - 1]

                    # TODO? - Variable Eval Message!
                    variable_evaluations = {}

                    for check, check_result in results.items():
                        record = Record(
                            check_id=check.id,
                            check_name=check.name,
                            check_result=check_result,
                            code_block=entity_code_lines,
                            file_path=k8_file,
                            file_line_range=entity_lines_range,
                            resource=check.get_resource_id(entity_conf),
                            evaluations=variable_evaluations,
                            check_class=check.__class__.__module__)
                        report.add_record(record=record)

        return report
Beispiel #13
0
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter(),
            collect_skip_comments=True) -> Report:
        secrets = SecretsCollection()
        with transient_settings({
                # Only run scans with only these plugins.
                'plugins_used': [
                    {
                        'name': 'AWSKeyDetector'
                    },
                    {
                        'name': 'ArtifactoryDetector'
                    },
                    {
                        'name': 'AzureStorageKeyDetector'
                    },
                    {
                        'name': 'BasicAuthDetector'
                    },
                    {
                        'name': 'CloudantDetector'
                    },
                    {
                        'name': 'IbmCloudIamDetector'
                    },
                    {
                        'name': 'MailchimpDetector'
                    },
                    {
                        'name': 'PrivateKeyDetector'
                    },
                    {
                        'name': 'SlackDetector'
                    },
                    {
                        'name': 'SoftlayerDetector'
                    },
                    {
                        'name': 'SquareOAuthDetector'
                    },
                    {
                        'name': 'StripeDetector'
                    },
                    {
                        'name': 'TwilioKeyDetector'
                    },
                ]
        }):
            report = Report(self.check_type)
            # Implement non IaC files (including .terraform dir)
            files_to_scan = files or []
            excluded_paths = (
                runner_filter.excluded_paths
                or []) + ignored_directories + [DEFAULT_EXTERNAL_MODULES_DIR]
            if root_folder:
                for root, d_names, f_names in os.walk(root_folder):
                    filter_ignored_directories(d_names, excluded_paths)
                    for file in f_names:
                        if file not in PROHIBITED_FILES and f".{file.split('.')[-1]}" in SUPPORTED_FILE_EXTENSIONS:
                            files_to_scan.append(os.path.join(root, file))
            logging.info(
                f'Secrets scanning will scan {len(files_to_scan)} files')
            for file in files_to_scan:
                logging.info(f'Scanning file {file} for secrets')
                try:
                    secrets.scan_file(file)
                except Exception as e:
                    logging.warning(
                        f"Secret scanning:could not process file {file}, {e}")
                    continue

            for _, secret in iter(secrets):
                check_id = SECRET_TYPE_TO_ID.get(secret.type)
                if not check_id:
                    continue
                result = {'result': CheckResult.FAILED}
                line_text = linecache.getline(
                    os.path.join(root_folder, secret.filename),
                    secret.line_number)
                if line_text != "" and line_text.split()[0] == 'git_commit':
                    continue
                result = self.search_for_suppression(
                    check_id, root_folder, secret, runner_filter.skip_checks,
                    CHECK_ID_TO_SECRET_TYPE) or result
                report.add_record(
                    Record(check_id=check_id,
                           check_name=secret.type,
                           check_result=result,
                           code_block=[(secret.line_number, line_text)],
                           file_path=
                           f'/{os.path.relpath(secret.filename, root_folder)}',
                           file_line_range=[
                               secret.line_number, secret.line_number + 1
                           ],
                           resource=secret.secret_hash,
                           check_class=None,
                           evaluations=None,
                           file_abs_path=os.path.abspath(secret.filename)))

            return report
Beispiel #14
0
    def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                arm_registry.load_external_checks(directory, runner_filter)

        if files:
            for file in files:
                (definitions[file], definitions_raw[file]) = parse(file)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)
                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in ARM_POSSIBLE_ENDINGS:
                        files_list.append(os.path.join(root, file))

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file)

        # Filter out empty files that have not been parsed successfully, and filter out non-CF template files
        definitions = {k: v for k, v in definitions.items() if v and v.__contains__("resources")}
        definitions_raw = {k: v for k, v in definitions_raw.items() if k in definitions.keys()}

        for arm_file in definitions.keys():
            if isinstance(definitions[arm_file], dict_node) and 'resources' in definitions[arm_file].keys():
                arm_context_parser = ContextParser(arm_file, definitions[arm_file], definitions_raw[arm_file])
                logging.debug("Template Dump for {}: {}".format(arm_file, definitions[arm_file], indent=2))
                arm_context_parser.evaluate_default_parameters()

                # Split out nested resources from base resource
                for resource in definitions[arm_file]['resources']:
                    if "parent_name" in resource.keys():
                        continue
                    nested_resources = []
                    nested_resources = arm_context_parser.search_deep_keys("resources", resource, [])
                    if nested_resources:
                        for nr in nested_resources:
                            nr_element = nr.pop()
                            if nr_element:
                                for element in nr_element:
                                    new_resource = {}
                                    new_resource = element
                                    if isinstance(new_resource, dict):
                                        new_resource["parent_name"] = resource["name"]
                                        new_resource["parent_type"] = resource["type"]
                                        definitions[arm_file]['resources'].append(new_resource)

                for resource in definitions[arm_file]['resources']:
                    resource_id = arm_context_parser.extract_arm_resource_id(resource)
                    resource_name = arm_context_parser.extract_arm_resource_name(resource)
                    entity_lines_range, entity_code_lines = arm_context_parser.extract_arm_resource_code_lines(resource)
                    if entity_lines_range and entity_code_lines:
                        # TODO - Variable Eval Message!
                        variable_evaluations = {}

                        skipped_checks = ContextParser.collect_skip_comments(resource)

                        results = arm_registry.scan(arm_file, {resource_name: resource}, skipped_checks,
                                                    runner_filter)
                        for check, check_result in results.items():
                            record = Record(check_id=check.id, check_name=check.name, check_result=check_result,
                                            code_block=entity_code_lines, file_path=arm_file,
                                            file_line_range=entity_lines_range,
                                            resource=resource_id, evaluations=variable_evaluations,
                                            check_class=check.__class__.__module__)
                            report.add_record(record=record)
        return report
Beispiel #15
0
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter(),
            collect_skip_comments=True):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                cfn_registry.load_external_checks(directory, runner_filter)

        if files:
            for file in files:
                (definitions[file], definitions_raw[file]) = parse(file)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)
                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in CF_POSSIBLE_ENDINGS:
                        files_list.append(os.path.join(root, file))

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                try:
                    (definitions[relative_file_path],
                     definitions_raw[relative_file_path]) = parse(file)
                except TypeError:
                    logging.info(
                        f'CloudFormation skipping {file} as it is not a valid CF template'
                    )

        # Filter out empty files that have not been parsed successfully, and filter out non-CF template files
        definitions = {
            k: v
            for k, v in definitions.items()
            if v and isinstance(v, dict_node) and v.__contains__("Resources")
            and isinstance(v["Resources"], dict_node)
        }
        definitions_raw = {
            k: v
            for k, v in definitions_raw.items() if k in definitions.keys()
        }

        for cf_file in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which cf_file does not always give).
            if cf_file[0] == '/':
                path_to_convert = (root_folder +
                                   cf_file) if root_folder else cf_file
            else:
                path_to_convert = (os.path.join(
                    root_folder, cf_file)) if root_folder else cf_file

            file_abs_path = os.path.abspath(path_to_convert)
            if isinstance(
                    definitions[cf_file],
                    dict_node) and 'Resources' in definitions[cf_file].keys():
                cf_context_parser = ContextParser(cf_file,
                                                  definitions[cf_file],
                                                  definitions_raw[cf_file])
                logging.debug("Template Dump for {}: {}".format(
                    cf_file, definitions[cf_file], indent=2))
                cf_context_parser.evaluate_default_refs()
                for resource_name, resource in definitions[cf_file][
                        'Resources'].items():
                    resource_id = cf_context_parser.extract_cf_resource_id(
                        resource, resource_name)
                    # check that the resource can be parsed as a CF resource
                    if resource_id:
                        entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines(
                            resource)
                        if entity_lines_range and entity_code_lines:
                            # TODO - Variable Eval Message!
                            variable_evaluations = {}

                            skipped_checks = ContextParser.collect_skip_comments(
                                entity_code_lines)

                            results = cfn_registry.scan(
                                cf_file, {resource_name: resource},
                                skipped_checks, runner_filter)
                            for check, check_result in results.items():
                                record = Record(
                                    check_id=check.id,
                                    check_name=check.name,
                                    check_result=check_result,
                                    code_block=entity_code_lines,
                                    file_path=cf_file,
                                    file_line_range=entity_lines_range,
                                    resource=resource_id,
                                    evaluations=variable_evaluations,
                                    check_class=check.__class__.__module__,
                                    file_abs_path=file_abs_path)
                                report.add_record(record=record)
        return report
Beispiel #16
0
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter(),
            collect_skip_comments=True,
            helmChart=None):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                registry.load_external_checks(directory, runner_filter)

        if files:
            for file in files:
                parse_result = parse(file)
                if parse_result:
                    (definitions[file], definitions_raw[file]) = parse_result

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)

                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in K8_POSSIBLE_ENDINGS:
                        full_path = os.path.join(root, file)
                        if "/." not in full_path and file not in [
                                'package.json', 'package-lock.json'
                        ]:
                            # skip temp directories
                            files_list.append(full_path)

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                parse_result = parse(file)
                if parse_result:
                    (definitions[relative_file_path],
                     definitions_raw[relative_file_path]) = parse_result

        for k8_file in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which sls_file does not always give).
            if k8_file[0] == '/':
                path_to_convert = (root_folder +
                                   k8_file) if root_folder else k8_file
            else:
                path_to_convert = (os.path.join(
                    root_folder, k8_file)) if root_folder else k8_file

            file_abs_path = os.path.abspath(path_to_convert)

            if definitions[k8_file]:
                for i in range(len(definitions[k8_file])):
                    if (not 'apiVersion' in definitions[k8_file][i].keys()
                        ) and (not 'kind' in definitions[k8_file][i].keys()):
                        continue
                    logging.debug("Template Dump for {}: {}".format(
                        k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]

                    # Split out resources if entity kind is List
                    if entity_conf["kind"] == "List":
                        for item in entity_conf["items"]:
                            definitions[k8_file].append(item)

                for i in range(len(definitions[k8_file])):
                    if (not 'apiVersion' in definitions[k8_file][i].keys()
                        ) and (not 'kind' in definitions[k8_file][i].keys()):
                        continue
                    logging.debug("Template Dump for {}: {}".format(
                        k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]

                    if entity_conf["kind"] == "List":
                        continue

                    # Skip entity without metadata["name"]
                    if "metadata" in entity_conf:
                        if isinstance(
                                entity_conf["metadata"],
                                int) or not "name" in entity_conf["metadata"]:
                            continue
                    else:
                        continue

                    # Skip entity with parent (metadata["ownerReferences"]) in runtime
                    # We will alert in runtime only
                    if "ownerReferences" in entity_conf["metadata"] and \
                            entity_conf["metadata"]["ownerReferences"] is not None:
                        continue

                    # Append containers and initContainers to definitions list
                    for type in ["containers", "initContainers"]:
                        containers = []
                        if entity_conf["kind"] == "CustomResourceDefinition":
                            continue
                        containers = self._search_deep_keys(
                            type, entity_conf, [])
                        if not containers:
                            continue
                        containers = containers.pop()
                        #containers.insert(0,entity_conf['kind'])
                        containerDef = {}
                        namespace = ""
                        if "namespace" in entity_conf["metadata"]:
                            namespace = entity_conf["metadata"]["namespace"]
                        else:
                            namespace = "default"
                        containerDef["containers"] = containers.pop()
                        if containerDef["containers"] is not None:
                            for cd in containerDef["containers"]:
                                i = containerDef["containers"].index(cd)
                                containerDef["containers"][i][
                                    "apiVersion"] = entity_conf["apiVersion"]
                                containerDef["containers"][i]["kind"] = type
                                containerDef["containers"][i][
                                    "parent"] = "{}.{}.{} (container {})".format(
                                        entity_conf["kind"],
                                        entity_conf["metadata"]["name"],
                                        namespace, str(i))
                                containerDef["containers"][i][
                                    "parent_metadata"] = entity_conf[
                                        "metadata"]
                            definitions[k8_file].extend(
                                containerDef["containers"])

                # Run for each definition included added container definitions
                for i in range(len(definitions[k8_file])):
                    if (not 'apiVersion' in definitions[k8_file][i].keys()
                        ) and (not 'kind' in definitions[k8_file][i].keys()):
                        continue
                    logging.debug("Template Dump for {}: {}".format(
                        k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]

                    if entity_conf["kind"] == "List":
                        continue

                    if isinstance(entity_conf["kind"], int):
                        continue
                    # Skip entity without metadata["name"] or parent_metadata["name"]
                    if not any(x in entity_conf["kind"]
                               for x in ["containers", "initContainers"]):
                        if "metadata" in entity_conf:
                            if isinstance(
                                    entity_conf["metadata"], int
                            ) or not "name" in entity_conf["metadata"]:
                                continue
                        else:
                            continue

                    # Skip entity with parent (metadata["ownerReferences"]) in runtime
                    # We will alert in runtime only
                    if "metadata" in entity_conf:
                        if "ownerReferences" in entity_conf["metadata"] and \
                                entity_conf["metadata"]["ownerReferences"] is not None:
                            continue

                    # Skip Kustomization Templates (for now)
                    if entity_conf["kind"] == "Kustomization":
                        continue

                    skipped_checks = get_skipped_checks(entity_conf)

                    results = registry.scan(k8_file, entity_conf,
                                            skipped_checks, runner_filter)

                    # TODO refactor into context parsing
                    find_lines_result_list = list(
                        find_lines(entity_conf, '__startline__'))
                    start_line = entity_conf["__startline__"]
                    end_line = entity_conf["__endline__"]

                    if start_line == end_line:
                        entity_lines_range = [start_line, end_line]
                        entity_code_lines = definitions_raw[k8_file][
                            start_line - 1:end_line]
                    else:
                        entity_lines_range = [start_line, end_line - 1]
                        entity_code_lines = definitions_raw[k8_file][
                            start_line - 1:end_line - 1]

                    # TODO? - Variable Eval Message!
                    variable_evaluations = {}

                    for check, check_result in results.items():
                        record = Record(
                            check_id=check.id,
                            check_name=check.name,
                            check_result=check_result,
                            code_block=entity_code_lines,
                            file_path=k8_file,
                            file_line_range=entity_lines_range,
                            resource=check.get_resource_id(entity_conf),
                            evaluations=variable_evaluations,
                            check_class=check.__class__.__module__,
                            file_abs_path=file_abs_path)
                        report.add_record(record=record)

        return report
Beispiel #17
0
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter()):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                resource_registry.load_external_checks(directory)

        if files:
            for file in files:
                (definitions[file], definitions_raw[file]) = parse(file)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_directories(d_names)
                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in CF_POSSIBLE_ENDINGS:
                        files_list.append(os.path.join(root, file))

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                (definitions[relative_file_path],
                 definitions_raw[relative_file_path]) = parse(file)

        # Filter out empty files that have not been parsed successfully, and filter out non-CF template files
        definitions = {
            k: v
            for k, v in definitions.items()
            if v and v.__contains__("Resources")
        }
        definitions_raw = {
            k: v
            for k, v in definitions_raw.items() if k in definitions.keys()
        }

        for cf_file in definitions.keys():
            if isinstance(
                    definitions[cf_file],
                    dict_node) and 'Resources' in definitions[cf_file].keys():
                logging.debug("Template Dump for {}: {}".format(
                    cf_file, definitions[cf_file], indent=2))

                # Get Parameter Defaults - Locate Refs in Template
                refs = []
                refs.extend(
                    self._search_deep_keys('Ref', definitions[cf_file], []))

                for ref in refs:
                    refname = ref.pop()
                    ref.pop()  # Get rid of the 'Ref' dict key

                    if 'Parameters' in definitions[cf_file].keys(
                    ) and refname in definitions[cf_file]['Parameters'].keys():
                        # TODO refactor into evaluations
                        if 'Default' in definitions[cf_file]['Parameters'][
                                refname].keys():
                            logging.debug(
                                "Replacing Ref {} in file {} with default parameter value: {}"
                                .format(
                                    refname, cf_file, definitions[cf_file]
                                    ['Parameters'][refname]['Default']))
                            _set_in_dict(
                                definitions[cf_file], ref, definitions[cf_file]
                                ['Parameters'][refname]['Default'])

                            ## TODO - Add Variable Eval Message for Output
                            # Output in Checkov looks like this:
                            # Variable versioning (of /.) evaluated to value "True" in expression: enabled = ${var.versioning}

                for resource_name, resource in definitions[cf_file][
                        'Resources'].items():
                    if resource_name == '__startline__' or resource_name == '__endline__':
                        continue
                    if 'Type' not in resource:
                        # This is not a CloudFormation resource, skip
                        continue
                    resource_id = f"{resource['Type']}.{resource_name}"

                    # TODO refactor into context parsing
                    find_lines_result_list = list(
                        find_lines(resource, '__startline__'))
                    if len(find_lines_result_list) >= 1:
                        start_line = min(find_lines_result_list)
                        end_line = max(
                            list(find_lines(resource, '__endline__')))

                        entity_lines_range = [start_line, end_line - 1]

                        entity_code_lines = definitions_raw[cf_file][
                            start_line - 1:end_line - 1]

                        # TODO - Variable Eval Message!
                        variable_evaluations = {}

                        skipped_checks = []
                        for line in entity_code_lines:
                            skip_search = re.search(COMMENT_REGEX, str(line))
                            if skip_search:
                                skipped_checks.append({
                                    'id':
                                    skip_search.group(2),
                                    'suppress_comment':
                                    skip_search.group(3)[1:]
                                    if skip_search.group(3) else
                                    "No comment provided"
                                })

                        results = resource_registry.scan(
                            cf_file, {resource_name: resource}, skipped_checks,
                            runner_filter)
                        for check, check_result in results.items():
                            ### TODO - Need to get entity_code_lines and entity_lines_range
                            record = Record(
                                check_id=check.id,
                                check_name=check.name,
                                check_result=check_result,
                                code_block=entity_code_lines,
                                file_path=cf_file,
                                file_line_range=entity_lines_range,
                                resource=resource_id,
                                evaluations=variable_evaluations,
                                check_class=check.__class__.__module__)
                            report.add_record(record=record)
        return report