def hcl2(self, directory, tf_definitions={}, parsing_errors={}): modules_scan = set() for root, d_names, f_names in os.walk(directory): filter_ignored_directories(d_names) self._mark_parsed(os.path.abspath(root)) for file in f_names: if file.endswith(".tf"): tf_file = os.path.join(root, file) if tf_file not in tf_definitions.keys(): try: tf_definition = self._parse_tf_definitions(tf_file) if tf_definition: tf_definitions[tf_file] = tf_definition for modules in tf_definition.get("module", []): for module in modules.values(): relative_path = module['source'][0] abs_path = os.path.abspath( os.path.join(root, relative_path)) if not self._is_parsed(abs_path): modules_scan.add(abs_path) except Exception as e: self.logger.debug( f'failed while parsing file {tf_file}', exc_info=e) parsing_errors[tf_file] = e for m in modules_scan: if path.exists(m): self.hcl2(directory=m, tf_definitions=tf_definitions)
def _parse_directory( directory: str, include_sub_dirs: bool, out_definitions: Dict, out_evaluations_context: Optional[Dict[str, Dict[ str, EvaluationContext]]] = None, out_parsing_errors: Optional[Dict[str, Exception]] = None, env_vars: Optional[Mapping[str, str]] = None, module_loader_registry: ModuleLoaderRegistry = default_ml_registry, dir_filter: Callable[[str], bool] = lambda _: True): """ Load and resolve configuration files starting in the given directory, merging the resulting data into `tf_definitions`. This loads data according to the Terraform Code Organization specification (https://www.terraform.io/docs/configuration/index.html#code-organization), starting in the given directory and possibly moving out from there. The resulting data dictionary generally follows the layout of HCL parsing with a couple distinctions: - Data is broken out by file from which the data was loaded. So: <file>: <data> - Loaded modules will also be keyed by referrer info: <file>[<referring_file>#<index>]: <data> - Module block will included a "__resolved__" key with a list of the file/referrer names under which data for the file was loaded. For example: "__resolved__": ["main.tf#0"]. The values will correspond to the file names mentioned in the first bullet. - All variables that can be resolved will be resolved. :param directory: Directory in which .tf and .tfvars files will be loaded. :param include_sub_dirs: If true, subdirectories will be walked. :param out_definitions: Dict into which the "simple" TF data with variables resolved is put. :param out_evaluations_context: Dict into which context about resource definitions is placed. Outer key is the file, inner key is a variable name. :param out_parsing_errors: Dict into which parsing errors, keyed on file path, are placed. :param env_vars: Optional values to use for resolving environment variables in TF code. If nothing is specified, Checkov's local environment will be used. :param module_loader_registry: Registry used for resolving modules. This allows customization of how much resolution is performed (and easier testing) by using a manually constructed registry rather than the default. :param dir_filter: Determines whether or not a directory should be processed. Returning True will allow processing. The argument will be the absolute path of the directory. """ if out_evaluations_context is None: out_evaluations_context = {} if out_parsing_errors is None: out_parsing_errors = {} if env_vars is None: env_vars = dict(os.environ) if include_sub_dirs: for sub_dir, d_names, f_names in os.walk(directory): filter_ignored_directories(d_names) if dir_filter(os.path.abspath(sub_dir)): _internal_dir_load(sub_dir, out_definitions, out_evaluations_context, out_parsing_errors, env_vars, None, module_loader_registry, dir_filter) else: _internal_dir_load(directory, out_definitions, out_evaluations_context, out_parsing_errors, env_vars, None, module_loader_registry, dir_filter)
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter()): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: cfn_registry.load_external_checks(directory) if files: for file in files: (definitions[file], definitions_raw[file]) = parse(file) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in CF_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file) # Filter out empty files that have not been parsed successfully, and filter out non-CF template files definitions = {k: v for k, v in definitions.items() if v and v.__contains__("Resources")} definitions_raw = {k: v for k, v in definitions_raw.items() if k in definitions.keys()} for cf_file in definitions.keys(): if isinstance(definitions[cf_file], dict_node) and 'Resources' in definitions[cf_file].keys(): cf_context_parser = ContextParser(cf_file, definitions[cf_file], definitions_raw[cf_file]) logging.debug("Template Dump for {}: {}".format(cf_file, definitions[cf_file], indent=2)) cf_context_parser.evaluate_default_refs() for resource_name, resource in definitions[cf_file]['Resources'].items(): resource_id = cf_context_parser.extract_cf_resource_id(resource, resource_name) # check that the resource can be parsed as a CF resource if resource_id: entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines(resource) if entity_lines_range and entity_code_lines: # TODO - Variable Eval Message! variable_evaluations = {} skipped_checks = ContextParser.collect_skip_comments(entity_code_lines) results = cfn_registry.scan(cf_file, {resource_name: resource}, skipped_checks, runner_filter) for check, check_result in results.items(): record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=cf_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__) report.add_record(record=record) return report
def find_chart_directories(root_folder, files): chart_directories = [] if files: logging.info( 'Running with --file argument; checking for Helm Chart.yaml files' ) for file in files: if os.path.basename(file) == 'Chart.yaml': chart_directories.append(os.path.dirname(file)) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) if 'Chart.yaml' in f_names: chart_directories.append(root) return chart_directories
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter()): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: sls_registry.load_external_checks(directory, runner_filter) if files: for file in files: if os.path.basename(file) in SLS_FILE_MASK: parse_result = parse(file) if parse_result: (definitions[file], definitions_raw[file]) = parse_result if root_folder: for root, d_names, f_names in os.walk(root_folder): # Don't walk in to "node_modules" directories under the root folder. If –for some reason– # scanning one of these is desired, it can be directly specified. if "node_modules" in d_names: d_names.remove("node_modules") filter_ignored_directories(d_names) for file in f_names: if file in SLS_FILE_MASK: full_path = os.path.join(root, file) if "/." not in full_path: # skip temp directories files_list.append(full_path) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' parse_result = parse(file) if parse_result: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse_result # Filter out empty files that have not been parsed successfully definitions = {k: v for k, v in definitions.items() if v} definitions_raw = { k: v for k, v in definitions_raw.items() if k in definitions.keys() } for sls_file in definitions.keys(): if not isinstance(definitions[sls_file], dict_node): continue if CFN_RESOURCES_TOKEN in definitions[sls_file] and isinstance( definitions[sls_file][CFN_RESOURCES_TOKEN], dict_node): cf_sub_template = definitions[sls_file][CFN_RESOURCES_TOKEN] cf_context_parser = CfnContextParser(sls_file, cf_sub_template, definitions_raw[sls_file]) logging.debug("Template Dump for {}: {}".format( sls_file, definitions[sls_file], indent=2)) cf_context_parser.evaluate_default_refs() for resource_name, resource in cf_sub_template[ 'Resources'].items(): if not isinstance(resource, dict_node): continue cf_resource_id = cf_context_parser.extract_cf_resource_id( resource, resource_name) entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines( resource) if entity_lines_range and entity_code_lines: skipped_checks = CfnContextParser.collect_skip_comments( entity_code_lines) # TODO - Variable Eval Message! variable_evaluations = {} results = cfn_registry.scan(sls_file, {resource_name: resource}, skipped_checks, runner_filter) for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=sls_file, file_line_range=entity_lines_range, resource=cf_resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__) report.add_record(record=record) if FUNCTIONS_TOKEN in definitions[sls_file]: template_functions = definitions[sls_file][FUNCTIONS_TOKEN] sls_context_parser = SlsContextParser( sls_file, definitions[sls_file], definitions_raw[sls_file]) for sls_function_name, sls_function in template_functions.items( ): if not isinstance(sls_function, dict_node): continue entity_lines_range, entity_code_lines = sls_context_parser.extract_function_code_lines( sls_function) if entity_lines_range and entity_code_lines: skipped_checks = CfnContextParser.collect_skip_comments( entity_code_lines) variable_evaluations = {} sls_context_parser.enrich_function_with_provider( sls_function_name) results = sls_registry.scan( sls_file, { 'function': sls_function, 'provider_type': sls_context_parser.provider_type }, skipped_checks, runner_filter) for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=sls_file, file_line_range=entity_lines_range, resource=sls_function_name, evaluations=variable_evaluations, check_class=check.__class__.__module__) report.add_record(record=record) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: function_registry.load_external_checks(directory) if files: for file in files: if os.path.basename(file) in SLS_FILE_MASK: parse_result = parse(file) if parse_result: (definitions[file], definitions_raw[file]) = parse_result if root_folder: for root, d_names, f_names in os.walk(root_folder): # Don't walk in to "node_modules" directories under the root folder. If –for some reason– # scanning one of these is desired, it can be directly specified. if "node_modules" in d_names: d_names.remove("node_modules") filter_ignored_directories(d_names) for file in f_names: if file in SLS_FILE_MASK: full_path = os.path.join(root, file) if "/." not in full_path: # skip temp directories files_list.append(full_path) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' parse_result = parse(file) if parse_result: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse_result # Filter out empty files that have not been parsed successfully definitions = {k: v for k, v in definitions.items() if v} definitions_raw = { k: v for k, v in definitions_raw.items() if k in definitions.keys() } for sls_file, sls_file_data in definitions.items(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which sls_file does not always give). if sls_file[0] == '/': path_to_convert = (root_folder + sls_file) if root_folder else sls_file else: path_to_convert = (os.path.join( root_folder, sls_file)) if root_folder else sls_file file_abs_path = os.path.abspath(path_to_convert) if not isinstance(sls_file_data, dict_node): continue if CFN_RESOURCES_TOKEN in sls_file_data and isinstance( sls_file_data[CFN_RESOURCES_TOKEN], dict_node): cf_sub_template = sls_file_data[CFN_RESOURCES_TOKEN] cf_context_parser = CfnContextParser(sls_file, cf_sub_template, definitions_raw[sls_file]) logging.debug("Template Dump for {}: {}".format(sls_file, sls_file_data, indent=2)) cf_context_parser.evaluate_default_refs() for resource_name, resource in cf_sub_template[ 'Resources'].items(): if not isinstance(resource, dict_node): continue cf_resource_id = cf_context_parser.extract_cf_resource_id( resource, resource_name) if not cf_resource_id: # Not Type attribute for resource continue entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines( resource) if entity_lines_range and entity_code_lines: skipped_checks = CfnContextParser.collect_skip_comments( entity_code_lines) # TODO - Variable Eval Message! variable_evaluations = {} entity = {resource_name: resource} results = cfn_registry.scan(sls_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags( entity, cfn_registry) for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=sls_file, file_line_range=entity_lines_range, resource=cf_resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) sls_context_parser = SlsContextParser(sls_file, sls_file_data, definitions_raw[sls_file]) # Sub-sections that have multiple items under them for token, registry in MULTI_ITEM_SECTIONS: template_items = sls_file_data.get(token) if not template_items or not isinstance(template_items, dict): continue for item_name, item_content in template_items.items(): if not isinstance(item_content, dict_node): continue entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines( item_content) if entity_lines_range and entity_code_lines: skipped_checks = CfnContextParser.collect_skip_comments( entity_code_lines) variable_evaluations = {} if token == "functions": #nosec # "Enriching" copies things like "environment" and "stackTags" down into the # function data from the provider block since logically that's what serverless # does. This allows checks to see what the complete data would be. sls_context_parser.enrich_function_with_provider( item_name) entity = EntityDetails( sls_context_parser.provider_type, item_content) results = registry.scan(sls_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags(entity, registry) for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=sls_file, file_line_range=entity_lines_range, resource=item_name, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) # Sub-sections that are a single item for token, registry in SINGLE_ITEM_SECTIONS: item_content = sls_file_data.get(token) if not item_content: continue entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines( item_content) if not entity_lines_range: entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines( sls_file_data) skipped_checks = CfnContextParser.collect_skip_comments( entity_code_lines) variable_evaluations = {} entity = EntityDetails(sls_context_parser.provider_type, item_content) results = registry.scan(sls_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags(entity, registry) for check, check_result in results.items(): record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=sls_file, file_line_range=entity_lines_range, resource=token, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) # "Complete" checks # NOTE: Ignore code content, no point in showing (could be long) entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines( sls_file_data) if entity_lines_range: skipped_checks = CfnContextParser.collect_skip_comments( entity_code_lines) variable_evaluations = {} entity = EntityDetails(sls_context_parser.provider_type, sls_file_data) results = complete_registry.scan(sls_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags(entity, complete_registry) for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=[], # Don't show, could be large file_path=sls_file, file_line_range=entity_lines_range, resource="complete", # Weird, not sure what to put where evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) return report
def _filter_ignored_directories(d_names, excluded_paths): filter_ignored_directories(d_names, excluded_paths) [ d_names.remove(d) for d in list(d_names) if d in [default_ml_registry.external_modules_folder_name] ]
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: arm_registry.load_external_checks(directory, runner_filter) if files: for file in files: (definitions[file], definitions_raw[file]) = parse(file) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in ARM_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file) # Filter out empty files that have not been parsed successfully, and filter out non-CF template files definitions = { k: v for k, v in definitions.items() if v and v.__contains__("resources") } definitions_raw = { k: v for k, v in definitions_raw.items() if k in definitions.keys() } for arm_file in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which arm_file does not always give). if arm_file[0] == '/': path_to_convert = (root_folder + arm_file) if root_folder else arm_file else: path_to_convert = (os.path.join( root_folder, arm_file)) if root_folder else arm_file file_abs_path = os.path.abspath(path_to_convert) if isinstance( definitions[arm_file], dict_node) and 'resources' in definitions[arm_file].keys(): arm_context_parser = ContextParser(arm_file, definitions[arm_file], definitions_raw[arm_file]) logging.debug("Template Dump for {}: {}".format( arm_file, definitions[arm_file], indent=2)) arm_context_parser.evaluate_default_parameters() # Split out nested resources from base resource for resource in definitions[arm_file]['resources']: if "parent_name" in resource.keys(): continue nested_resources = [] nested_resources = arm_context_parser.search_deep_keys( "resources", resource, []) if nested_resources: for nr in nested_resources: nr_element = nr.pop() if nr_element: for element in nr_element: new_resource = {} new_resource = element if isinstance(new_resource, dict): new_resource["parent_name"] = resource[ "name"] new_resource["parent_type"] = resource[ "type"] definitions[arm_file][ 'resources'].append(new_resource) for resource in definitions[arm_file]['resources']: resource_id = arm_context_parser.extract_arm_resource_id( resource) resource_name = arm_context_parser.extract_arm_resource_name( resource) entity_lines_range, entity_code_lines = arm_context_parser.extract_arm_resource_code_lines( resource) if entity_lines_range and entity_code_lines: # TODO - Variable Eval Message! variable_evaluations = {} skipped_checks = ContextParser.collect_skip_comments( resource) results = arm_registry.scan(arm_file, {resource_name: resource}, skipped_checks, runner_filter) for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=arm_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path) report.add_record(record=record) return report
def test_filter_ignored_directories_regex(self): d_names = ['bin', 'integration_tests', 'tests', 'docs', '.github', 'checkov', 'venv', '.git', 'kubernetes', '.idea'] expected = ['bin', 'docs', 'checkov', 'venv', 'kubernetes'] filter_ignored_directories(d_names, ["tests"]) self.assertEqual(expected, d_names)
def run(self, root_folder=None, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: registry.load_external_checks(directory) if files: for file in files: if os.path.basename(file) in DOCKER_FILE_MASK: (definitions[file], definitions_raw[file]) = parse(file) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) for file in f_names: if file in DOCKER_FILE_MASK: files_list.append(os.path.join(root, file)) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' try: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file) except TypeError: logging.info(f'Dockerfile skipping {file} as it is not a valid dockerfile template') for docker_file_path in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which docker_file_path does not always give). if docker_file_path[0] == '/': path_to_convert = (root_folder + docker_file_path) if root_folder else docker_file_path else: path_to_convert = (os.path.join(root_folder, docker_file_path)) if root_folder else docker_file_path file_abs_path = os.path.abspath(path_to_convert) skipped_checks = collect_skipped_checks(definitions[docker_file_path]) instructions = definitions[docker_file_path] results = registry.scan(docker_file_path, instructions, skipped_checks, runner_filter) for check, check_result in results.items(): result_configuration = check_result['results_configuration'] startline = 0 endline = 0 result_instruction = "" if result_configuration: startline = result_configuration['startline'] endline = result_configuration['endline'] result_instruction = result_configuration["instruction"] codeblock = [] self.calc_record_codeblock(codeblock, definitions_raw, docker_file_path, endline, startline) record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=codeblock, file_path=docker_file_path, file_line_range=[startline, endline], resource="{}.{}".format(docker_file_path, result_instruction, startline), evaluations=None, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=None) report.add_record(record=record) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True) -> Report: inv_secret_map = {v: k for k, v in SECRET_TYPE_TO_ID.items()} report = Report(self.check_type) initialize_plugin_settings(None) # Implement non IaC files (including .terraform dir) files_to_scan = files or [] excluded_paths = (runner_filter.excluded_paths or []) + ignored_directories if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names, excluded_paths) for file in f_names: if file not in PROHIBITED_FILES and f".{file.split('.')[-1]}" in SUPPORTED_FILE_EXTENSIONS: files_to_scan.append(os.path.join(root, file)) logging.info(f'Secrets scanning will scan {len(files_to_scan)} files') for file in files_to_scan: logging.info(f'Scanning file {file} for secrets') if runner_filter.skip_checks: for skipped_check in runner_filter.skip_checks: if skipped_check in inv_secret_map: report.add_record(Record( check_id=skipped_check, check_name=inv_secret_map[skipped_check], check_result={'result': CheckResult.SKIPPED, "suppress_comment": f"Secret scan {skipped_check} is skipped"}, file_path=file, file_abs_path=os.path.abspath(file), check_class="", code_block="", file_line_range=[0, 0], evaluations=None, resource=file )) try: next(iter(deepcopy(scan_file)(file))) except StopIteration: # TODO decide how to make the file pass result = {'result': CheckResult.PASSED} continue for secret in scan_file(file): check_id = SECRET_TYPE_TO_ID[secret.type] if not runner_filter.should_run_check(check_id): result = {'result': CheckResult.SKIPPED} else: result = {'result': CheckResult.FAILED} line_text = linecache.getline(os.path.join(root_folder, secret.filename), secret.line_number) if line_text != "" and line_text.split()[0] == 'git_commit': continue result = self.search_for_suppression(root_folder, secret) or result report.add_record(Record( check_id=check_id, check_name=secret.type, check_result=result, code_block=[(secret.line_number, line_text)], file_path=f'{secret.filename}:{secret.secret_hash}', file_line_range=[secret.line_number, secret.line_number + 1], resource=secret.filename, check_class=None, evaluations=None, file_abs_path=os.path.abspath(secret.filename) )) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter()): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: registry.load_external_checks(directory) if files: for file in files: parse_result = parse(file) if parse_result: (definitions[file], definitions_raw[file]) = parse_result if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in K8_POSSIBLE_ENDINGS: full_path = os.path.join(root, file) if "/." not in full_path and file not in [ 'package.json', 'package-lock.json' ]: # skip temp directories files_list.append(full_path) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' parse_result = parse(file) if parse_result: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse_result for k8_file in definitions.keys(): if definitions[k8_file]: for i in range(len(definitions[k8_file])): if (not 'apiVersion' in definitions[k8_file][i].keys() ) and (not 'kind' in definitions[k8_file][i].keys()): continue logging.debug("Template Dump for {}: {}".format( k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] # Split out resources if entity kind is List if entity_conf["kind"] == "List": for item in entity_conf["items"]: definitions[k8_file].append(item) for i in range(len(definitions[k8_file])): if (not 'apiVersion' in definitions[k8_file][i].keys() ) and (not 'kind' in definitions[k8_file][i].keys()): continue logging.debug("Template Dump for {}: {}".format( k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] if entity_conf["kind"] == "List": continue # Skip entity without metadata["name"] if "metadata" in entity_conf: if not "name" in entity_conf["metadata"]: continue else: continue # Append containers and initContainers to definitions list for type in ["containers", "initContainers"]: containers = [] if entity_conf["kind"] == "CustomResourceDefinition": continue containers = self._search_deep_keys( type, entity_conf, []) if not containers: continue containers = containers.pop() #containers.insert(0,entity_conf['kind']) containerDef = {} namespace = "" if "namespace" in entity_conf["metadata"]: namespace = entity_conf["metadata"]["namespace"] else: namespace = "default" containerDef["containers"] = containers.pop() if containerDef["containers"] is not None: for cd in containerDef["containers"]: i = containerDef["containers"].index(cd) containerDef["containers"][i][ "apiVersion"] = entity_conf["apiVersion"] containerDef["containers"][i]["kind"] = type containerDef["containers"][i][ "parent"] = "{}.{}.{} (container {})".format( entity_conf["kind"], entity_conf["metadata"]["name"], namespace, str(i)) containerDef["containers"][i][ "parent_metadata"] = entity_conf[ "metadata"] definitions[k8_file].extend( containerDef["containers"]) # Run for each definition included added container definitions for i in range(len(definitions[k8_file])): if (not 'apiVersion' in definitions[k8_file][i].keys() ) and (not 'kind' in definitions[k8_file][i].keys()): continue logging.debug("Template Dump for {}: {}".format( k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] if entity_conf["kind"] == "List": continue # Skip entity without metadata["name"] or parent_metadata["name"] if not any(x in entity_conf["kind"] for x in ["containers", "initContainers"]): if "metadata" in entity_conf: if not "name" in entity_conf["metadata"]: continue else: continue # Skip Kustomization Templates (for now) if entity_conf["kind"] == "Kustomization": continue skipped_checks = get_skipped_checks(entity_conf) results = registry.scan(k8_file, entity_conf, skipped_checks, runner_filter) # TODO refactor into context parsing find_lines_result_list = list( find_lines(entity_conf, '__startline__')) start_line = entity_conf["__startline__"] end_line = entity_conf["__endline__"] if start_line == end_line: entity_lines_range = [start_line, end_line] entity_code_lines = definitions_raw[k8_file][ start_line - 1:end_line] else: entity_lines_range = [start_line, end_line - 1] entity_code_lines = definitions_raw[k8_file][ start_line - 1:end_line - 1] # TODO? - Variable Eval Message! variable_evaluations = {} for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=k8_file, file_line_range=entity_lines_range, resource=check.get_resource_id(entity_conf), evaluations=variable_evaluations, check_class=check.__class__.__module__) report.add_record(record=record) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True) -> Report: secrets = SecretsCollection() with transient_settings({ # Only run scans with only these plugins. 'plugins_used': [ { 'name': 'AWSKeyDetector' }, { 'name': 'ArtifactoryDetector' }, { 'name': 'AzureStorageKeyDetector' }, { 'name': 'BasicAuthDetector' }, { 'name': 'CloudantDetector' }, { 'name': 'IbmCloudIamDetector' }, { 'name': 'MailchimpDetector' }, { 'name': 'PrivateKeyDetector' }, { 'name': 'SlackDetector' }, { 'name': 'SoftlayerDetector' }, { 'name': 'SquareOAuthDetector' }, { 'name': 'StripeDetector' }, { 'name': 'TwilioKeyDetector' }, ] }): report = Report(self.check_type) # Implement non IaC files (including .terraform dir) files_to_scan = files or [] excluded_paths = ( runner_filter.excluded_paths or []) + ignored_directories + [DEFAULT_EXTERNAL_MODULES_DIR] if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names, excluded_paths) for file in f_names: if file not in PROHIBITED_FILES and f".{file.split('.')[-1]}" in SUPPORTED_FILE_EXTENSIONS: files_to_scan.append(os.path.join(root, file)) logging.info( f'Secrets scanning will scan {len(files_to_scan)} files') for file in files_to_scan: logging.info(f'Scanning file {file} for secrets') try: secrets.scan_file(file) except Exception as e: logging.warning( f"Secret scanning:could not process file {file}, {e}") continue for _, secret in iter(secrets): check_id = SECRET_TYPE_TO_ID.get(secret.type) if not check_id: continue result = {'result': CheckResult.FAILED} line_text = linecache.getline( os.path.join(root_folder, secret.filename), secret.line_number) if line_text != "" and line_text.split()[0] == 'git_commit': continue result = self.search_for_suppression( check_id, root_folder, secret, runner_filter.skip_checks, CHECK_ID_TO_SECRET_TYPE) or result report.add_record( Record(check_id=check_id, check_name=secret.type, check_result=result, code_block=[(secret.line_number, line_text)], file_path= f'/{os.path.relpath(secret.filename, root_folder)}', file_line_range=[ secret.line_number, secret.line_number + 1 ], resource=secret.secret_hash, check_class=None, evaluations=None, file_abs_path=os.path.abspath(secret.filename))) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: arm_registry.load_external_checks(directory, runner_filter) if files: for file in files: (definitions[file], definitions_raw[file]) = parse(file) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in ARM_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file) # Filter out empty files that have not been parsed successfully, and filter out non-CF template files definitions = {k: v for k, v in definitions.items() if v and v.__contains__("resources")} definitions_raw = {k: v for k, v in definitions_raw.items() if k in definitions.keys()} for arm_file in definitions.keys(): if isinstance(definitions[arm_file], dict_node) and 'resources' in definitions[arm_file].keys(): arm_context_parser = ContextParser(arm_file, definitions[arm_file], definitions_raw[arm_file]) logging.debug("Template Dump for {}: {}".format(arm_file, definitions[arm_file], indent=2)) arm_context_parser.evaluate_default_parameters() # Split out nested resources from base resource for resource in definitions[arm_file]['resources']: if "parent_name" in resource.keys(): continue nested_resources = [] nested_resources = arm_context_parser.search_deep_keys("resources", resource, []) if nested_resources: for nr in nested_resources: nr_element = nr.pop() if nr_element: for element in nr_element: new_resource = {} new_resource = element if isinstance(new_resource, dict): new_resource["parent_name"] = resource["name"] new_resource["parent_type"] = resource["type"] definitions[arm_file]['resources'].append(new_resource) for resource in definitions[arm_file]['resources']: resource_id = arm_context_parser.extract_arm_resource_id(resource) resource_name = arm_context_parser.extract_arm_resource_name(resource) entity_lines_range, entity_code_lines = arm_context_parser.extract_arm_resource_code_lines(resource) if entity_lines_range and entity_code_lines: # TODO - Variable Eval Message! variable_evaluations = {} skipped_checks = ContextParser.collect_skip_comments(resource) results = arm_registry.scan(arm_file, {resource_name: resource}, skipped_checks, runner_filter) for check, check_result in results.items(): record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=arm_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__) report.add_record(record=record) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: cfn_registry.load_external_checks(directory, runner_filter) if files: for file in files: (definitions[file], definitions_raw[file]) = parse(file) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in CF_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' try: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file) except TypeError: logging.info( f'CloudFormation skipping {file} as it is not a valid CF template' ) # Filter out empty files that have not been parsed successfully, and filter out non-CF template files definitions = { k: v for k, v in definitions.items() if v and isinstance(v, dict_node) and v.__contains__("Resources") and isinstance(v["Resources"], dict_node) } definitions_raw = { k: v for k, v in definitions_raw.items() if k in definitions.keys() } for cf_file in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which cf_file does not always give). if cf_file[0] == '/': path_to_convert = (root_folder + cf_file) if root_folder else cf_file else: path_to_convert = (os.path.join( root_folder, cf_file)) if root_folder else cf_file file_abs_path = os.path.abspath(path_to_convert) if isinstance( definitions[cf_file], dict_node) and 'Resources' in definitions[cf_file].keys(): cf_context_parser = ContextParser(cf_file, definitions[cf_file], definitions_raw[cf_file]) logging.debug("Template Dump for {}: {}".format( cf_file, definitions[cf_file], indent=2)) cf_context_parser.evaluate_default_refs() for resource_name, resource in definitions[cf_file][ 'Resources'].items(): resource_id = cf_context_parser.extract_cf_resource_id( resource, resource_name) # check that the resource can be parsed as a CF resource if resource_id: entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines( resource) if entity_lines_range and entity_code_lines: # TODO - Variable Eval Message! variable_evaluations = {} skipped_checks = ContextParser.collect_skip_comments( entity_code_lines) results = cfn_registry.scan( cf_file, {resource_name: resource}, skipped_checks, runner_filter) for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=cf_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path) report.add_record(record=record) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True, helmChart=None): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: registry.load_external_checks(directory, runner_filter) if files: for file in files: parse_result = parse(file) if parse_result: (definitions[file], definitions_raw[file]) = parse_result if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in K8_POSSIBLE_ENDINGS: full_path = os.path.join(root, file) if "/." not in full_path and file not in [ 'package.json', 'package-lock.json' ]: # skip temp directories files_list.append(full_path) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' parse_result = parse(file) if parse_result: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse_result for k8_file in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which sls_file does not always give). if k8_file[0] == '/': path_to_convert = (root_folder + k8_file) if root_folder else k8_file else: path_to_convert = (os.path.join( root_folder, k8_file)) if root_folder else k8_file file_abs_path = os.path.abspath(path_to_convert) if definitions[k8_file]: for i in range(len(definitions[k8_file])): if (not 'apiVersion' in definitions[k8_file][i].keys() ) and (not 'kind' in definitions[k8_file][i].keys()): continue logging.debug("Template Dump for {}: {}".format( k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] # Split out resources if entity kind is List if entity_conf["kind"] == "List": for item in entity_conf["items"]: definitions[k8_file].append(item) for i in range(len(definitions[k8_file])): if (not 'apiVersion' in definitions[k8_file][i].keys() ) and (not 'kind' in definitions[k8_file][i].keys()): continue logging.debug("Template Dump for {}: {}".format( k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] if entity_conf["kind"] == "List": continue # Skip entity without metadata["name"] if "metadata" in entity_conf: if isinstance( entity_conf["metadata"], int) or not "name" in entity_conf["metadata"]: continue else: continue # Skip entity with parent (metadata["ownerReferences"]) in runtime # We will alert in runtime only if "ownerReferences" in entity_conf["metadata"] and \ entity_conf["metadata"]["ownerReferences"] is not None: continue # Append containers and initContainers to definitions list for type in ["containers", "initContainers"]: containers = [] if entity_conf["kind"] == "CustomResourceDefinition": continue containers = self._search_deep_keys( type, entity_conf, []) if not containers: continue containers = containers.pop() #containers.insert(0,entity_conf['kind']) containerDef = {} namespace = "" if "namespace" in entity_conf["metadata"]: namespace = entity_conf["metadata"]["namespace"] else: namespace = "default" containerDef["containers"] = containers.pop() if containerDef["containers"] is not None: for cd in containerDef["containers"]: i = containerDef["containers"].index(cd) containerDef["containers"][i][ "apiVersion"] = entity_conf["apiVersion"] containerDef["containers"][i]["kind"] = type containerDef["containers"][i][ "parent"] = "{}.{}.{} (container {})".format( entity_conf["kind"], entity_conf["metadata"]["name"], namespace, str(i)) containerDef["containers"][i][ "parent_metadata"] = entity_conf[ "metadata"] definitions[k8_file].extend( containerDef["containers"]) # Run for each definition included added container definitions for i in range(len(definitions[k8_file])): if (not 'apiVersion' in definitions[k8_file][i].keys() ) and (not 'kind' in definitions[k8_file][i].keys()): continue logging.debug("Template Dump for {}: {}".format( k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] if entity_conf["kind"] == "List": continue if isinstance(entity_conf["kind"], int): continue # Skip entity without metadata["name"] or parent_metadata["name"] if not any(x in entity_conf["kind"] for x in ["containers", "initContainers"]): if "metadata" in entity_conf: if isinstance( entity_conf["metadata"], int ) or not "name" in entity_conf["metadata"]: continue else: continue # Skip entity with parent (metadata["ownerReferences"]) in runtime # We will alert in runtime only if "metadata" in entity_conf: if "ownerReferences" in entity_conf["metadata"] and \ entity_conf["metadata"]["ownerReferences"] is not None: continue # Skip Kustomization Templates (for now) if entity_conf["kind"] == "Kustomization": continue skipped_checks = get_skipped_checks(entity_conf) results = registry.scan(k8_file, entity_conf, skipped_checks, runner_filter) # TODO refactor into context parsing find_lines_result_list = list( find_lines(entity_conf, '__startline__')) start_line = entity_conf["__startline__"] end_line = entity_conf["__endline__"] if start_line == end_line: entity_lines_range = [start_line, end_line] entity_code_lines = definitions_raw[k8_file][ start_line - 1:end_line] else: entity_lines_range = [start_line, end_line - 1] entity_code_lines = definitions_raw[k8_file][ start_line - 1:end_line - 1] # TODO? - Variable Eval Message! variable_evaluations = {} for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=k8_file, file_line_range=entity_lines_range, resource=check.get_resource_id(entity_conf), evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path) report.add_record(record=record) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter()): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: resource_registry.load_external_checks(directory) if files: for file in files: (definitions[file], definitions_raw[file]) = parse(file) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_directories(d_names) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in CF_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file) # Filter out empty files that have not been parsed successfully, and filter out non-CF template files definitions = { k: v for k, v in definitions.items() if v and v.__contains__("Resources") } definitions_raw = { k: v for k, v in definitions_raw.items() if k in definitions.keys() } for cf_file in definitions.keys(): if isinstance( definitions[cf_file], dict_node) and 'Resources' in definitions[cf_file].keys(): logging.debug("Template Dump for {}: {}".format( cf_file, definitions[cf_file], indent=2)) # Get Parameter Defaults - Locate Refs in Template refs = [] refs.extend( self._search_deep_keys('Ref', definitions[cf_file], [])) for ref in refs: refname = ref.pop() ref.pop() # Get rid of the 'Ref' dict key if 'Parameters' in definitions[cf_file].keys( ) and refname in definitions[cf_file]['Parameters'].keys(): # TODO refactor into evaluations if 'Default' in definitions[cf_file]['Parameters'][ refname].keys(): logging.debug( "Replacing Ref {} in file {} with default parameter value: {}" .format( refname, cf_file, definitions[cf_file] ['Parameters'][refname]['Default'])) _set_in_dict( definitions[cf_file], ref, definitions[cf_file] ['Parameters'][refname]['Default']) ## TODO - Add Variable Eval Message for Output # Output in Checkov looks like this: # Variable versioning (of /.) evaluated to value "True" in expression: enabled = ${var.versioning} for resource_name, resource in definitions[cf_file][ 'Resources'].items(): if resource_name == '__startline__' or resource_name == '__endline__': continue if 'Type' not in resource: # This is not a CloudFormation resource, skip continue resource_id = f"{resource['Type']}.{resource_name}" # TODO refactor into context parsing find_lines_result_list = list( find_lines(resource, '__startline__')) if len(find_lines_result_list) >= 1: start_line = min(find_lines_result_list) end_line = max( list(find_lines(resource, '__endline__'))) entity_lines_range = [start_line, end_line - 1] entity_code_lines = definitions_raw[cf_file][ start_line - 1:end_line - 1] # TODO - Variable Eval Message! variable_evaluations = {} skipped_checks = [] for line in entity_code_lines: skip_search = re.search(COMMENT_REGEX, str(line)) if skip_search: skipped_checks.append({ 'id': skip_search.group(2), 'suppress_comment': skip_search.group(3)[1:] if skip_search.group(3) else "No comment provided" }) results = resource_registry.scan( cf_file, {resource_name: resource}, skipped_checks, runner_filter) for check, check_result in results.items(): ### TODO - Need to get entity_code_lines and entity_lines_range record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=cf_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__) report.add_record(record=record) return report