def persist_repository(self, root_dir, files=None, excluded_paths=[]): """ Persist the repository found on root_dir path to Bridgecrew's platform. If --file flag is used, only files that are specified will be persisted. :param files: Absolute path of the files passed in the --file flag. :param root_dir: Absolute path of the directory containing the repository root level. """ if not self.use_s3_integration: return if files: for f in files: _, file_extension = os.path.splitext(f) if file_extension in SUPPORTED_FILE_EXTENSIONS: self._persist_file(f, os.path.relpath(f, root_dir)) else: for root_path, d_names, f_names in os.walk(root_dir): # self.excluded_paths only contains the config fetched from the platform. # but here we expect the list from runner_registry as well (which includes self.excluded_paths). filter_ignored_paths(root_path, d_names, excluded_paths) filter_ignored_paths(root_path, f_names, excluded_paths) for file_path in f_names: _, file_extension = os.path.splitext(file_path) if file_extension in SUPPORTED_FILE_EXTENSIONS: full_file_path = os.path.join(root_path, file_path) relative_file_path = os.path.relpath( full_file_path, root_dir) self._persist_file(full_file_path, relative_file_path)
def get_folder_definitions( root_folder: str, excluded_paths: Optional[List[str]], out_parsing_errors: Dict[str, str] = {} ) -> Tuple[Dict[str, DictNode], Dict[str, List[Tuple[int, str]]]]: files_list = [] for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, excluded_paths) filter_ignored_paths(root, f_names, excluded_paths) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in CF_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) definitions, definitions_raw = get_files_definitions( files_list, out_parsing_errors, lambda f: f'/{os.path.relpath(f, os.path.commonprefix((root_folder, f)))}') definitions = { create_file_abs_path(root_folder, file_path): v for (file_path, v) in definitions.items() } definitions_raw = { create_file_abs_path(root_folder, file_path): v for (file_path, v) in definitions_raw.items() } return definitions, definitions_raw
def run(self, root_folder=None, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True) -> Report: registry = self.import_registry() definitions = {} definitions_raw = {} report = Report(self.check_type) if not files and not root_folder: logging.debug("No resources to scan.") return report if not external_checks_dir and self.require_external_checks(): logging.debug("The json runner requires that external checks are defined.") return report if external_checks_dir: for directory in external_checks_dir: registry.load_external_checks(directory) if files: self._load_files(files, definitions, definitions_raw) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) self._load_files( f_names, definitions, definitions_raw, lambda f: os.path.join(root, f) ) for json_file_path in definitions.keys(): results = registry.scan( json_file_path, definitions[json_file_path], [], runner_filter ) for check, result in results.items(): result_config = result["results_configuration"] start = result_config.start_mark.line end = result_config.end_mark.line record = Record( check_id=check.id, bc_check_id=check.bc_id, check_name=check.name, check_result=result, code_block=definitions_raw[json_file_path][start:end + 1], file_path=json_file_path, file_line_range=[start + 1, end + 1], resource=f"{json_file_path}", evaluations=None, check_class=check.__class__.__module__, file_abs_path=os.path.abspath(json_file_path), entity_tags=None ) report.add_record(record) return report
def test_filter_ignored_directories_regex_legacy(self): d_names = [ 'bin', 'integration_tests', 'tests', 'docs', '.github', 'checkov', 'venv', '.git', 'kubernetes', '.idea' ] expected = ['bin', 'docs', 'checkov', 'venv', 'kubernetes'] filter_ignored_paths('.', d_names, ["tests"]) self.assertEqual(expected, d_names)
def get_folder_definitions( root_folder: str, excluded_paths: Optional[List[str]], out_parsing_errors: Dict[str, str] = {} ) -> Tuple[Dict[str, DictNode], Dict[str, List[Tuple[int, str]]]]: files_list = [] for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, excluded_paths) filter_ignored_paths(root, f_names, excluded_paths) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in CF_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) definitions, definitions_raw = get_files_definitions(files_list, out_parsing_errors) return definitions, definitions_raw
def run(self, root_folder=None, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) self.tf_definitions = {} parsing_errors = {} if external_checks_dir: for directory in external_checks_dir: resource_registry.load_external_checks(directory) self.graph_registry.load_external_checks(directory) if root_folder: files = [] if not files else files for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending == '.json': try: with open(f'{root}/{file}') as f: content = json.load(f) if isinstance(content, dict) and content.get('terraform_version'): files.append(os.path.join(root, file)) except Exception as e: logging.debug(f'Failed to load json file {root}/{file}, skipping') logging.debug('Failure message:') logging.debug(e, stack_info=True) if files: files = [os.path.abspath(file) for file in files] for file in files: if file.endswith(".json"): tf_definitions, template_lines = parse_tf_plan(file) if not tf_definitions: continue self.tf_definitions = tf_definitions self.template_lines = template_lines self.check_tf_definition(report, runner_filter) else: logging.debug(f'Failed to load {file} as is not a .json file, skipping') report.add_parsing_errors(list(parsing_errors.keys())) graph = self.graph_manager.build_graph_from_definitions(self.tf_definitions, render_variables=False) self.graph_manager.save_graph(graph) graph_report = self.get_graph_checks_report(root_folder, runner_filter) merge_reports(report, graph_report) return report
def get_folder_definitions( root_folder: str, excluded_paths: Optional[List[str]] ) -> Tuple[Dict[str, List], Dict[str, List[Tuple[int, str]]]]: files_list = [] for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, excluded_paths) filter_ignored_paths(root, f_names, excluded_paths) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in K8_POSSIBLE_ENDINGS: full_path = os.path.join(root, file) if "/." not in full_path and file not in ['package.json', 'package-lock.json']: # skip temp directories files_list.append(full_path) return get_files_definitions(files_list)
def findKustomizeDirectories(root_folder, files, excluded_paths): kustomizeDirectories = [] if not excluded_paths: excluded_paths = [] if files: logging.info('Running with --file argument; file must be a kustomization.yaml file') for file in files: if os.path.basename(file) in Runner.kustomizeSupportedFileTypes: kustomizeDirectories.append(os.path.dirname(file)) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, excluded_paths) filter_ignored_paths(root, f_names, excluded_paths) [kustomizeDirectories.append(os.path.abspath(root)) for x in f_names if x in Runner.kustomizeSupportedFileTypes] return kustomizeDirectories
def find_chart_directories(root_folder, files, excluded_paths): chart_directories = [] if not excluded_paths: excluded_paths = [] if files: logging.info('Running with --file argument; checking for Helm Chart.yaml files') for file in files: if os.path.basename(file) == 'Chart.yaml': chart_directories.append(os.path.dirname(file)) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, excluded_paths) filter_ignored_paths(root, f_names, excluded_paths) if 'Chart.yaml' in f_names: chart_directories.append(root) return chart_directories
def persist_repository(self, root_dir, files=None, excluded_paths=None): """ Persist the repository found on root_dir path to Bridgecrew's platform. If --file flag is used, only files that are specified will be persisted. :param files: Absolute path of the files passed in the --file flag. :param root_dir: Absolute path of the directory containing the repository root level. :param excluded_paths: Paths to exclude from persist process """ excluded_paths = excluded_paths if excluded_paths is not None else [] if not self.use_s3_integration: return files_to_persist = [] if files: for f in files: _, file_extension = os.path.splitext(f) if file_extension in SUPPORTED_FILE_EXTENSIONS: files_to_persist.append((f, os.path.relpath(f, root_dir))) else: for root_path, d_names, f_names in os.walk(root_dir): # self.excluded_paths only contains the config fetched from the platform. # but here we expect the list from runner_registry as well (which includes self.excluded_paths). filter_ignored_paths(root_path, d_names, excluded_paths) filter_ignored_paths(root_path, f_names, excluded_paths) for file_path in f_names: _, file_extension = os.path.splitext(file_path) if file_extension in SUPPORTED_FILE_EXTENSIONS: full_file_path = os.path.join(root_path, file_path) relative_file_path = os.path.relpath( full_file_path, root_dir) files_to_persist.append( (full_file_path, relative_file_path)) logging.info(f"Persisting {len(files_to_persist)} files") with futures.ThreadPoolExecutor() as executor: futures.wait( [ executor.submit(self._persist_file, full_file_path, relative_file_path) for full_file_path, relative_file_path in files_to_persist ], return_when=futures.FIRST_EXCEPTION, ) logging.info(f"Done persisting {len(files_to_persist)} files")
def persist_repository(self, root_dir, files=None, excluded_paths=None, included_paths: Optional[List[str]] = None): """ Persist the repository found on root_dir path to Bridgecrew's platform. If --file flag is used, only files that are specified will be persisted. :param files: Absolute path of the files passed in the --file flag. :param root_dir: Absolute path of the directory containing the repository root level. :param excluded_paths: Paths to exclude from persist process """ excluded_paths = excluded_paths if excluded_paths is not None else [] if not self.use_s3_integration: return files_to_persist: List[FileToPersist] = [] if files: for f in files: f_name = os.path.basename(f) _, file_extension = os.path.splitext(f) if file_extension in SUPPORTED_FILE_EXTENSIONS or f_name in SUPPORTED_FILES: files_to_persist.append( FileToPersist(f, os.path.relpath(f, root_dir))) else: for root_path, d_names, f_names in os.walk(root_dir): # self.excluded_paths only contains the config fetched from the platform. # but here we expect the list from runner_registry as well (which includes self.excluded_paths). filter_ignored_paths(root_path, d_names, excluded_paths, included_paths=included_paths) filter_ignored_paths(root_path, f_names, excluded_paths) for file_path in f_names: _, file_extension = os.path.splitext(file_path) if file_extension in SUPPORTED_FILE_EXTENSIONS or file_path in SUPPORTED_FILES: full_file_path = os.path.join(root_path, file_path) relative_file_path = os.path.relpath( full_file_path, root_dir) files_to_persist.append( FileToPersist(full_file_path, relative_file_path)) self.persist_files(files_to_persist)
def test_filter_ignored_directories_regex_absolute_cwd(self): # this simulates scanning a subdirectory and applying filter logic using an absolute path current_dir = os.path.dirname(os.path.realpath(__file__)) excluded_paths = ['dir2'] remaining_dirs = [] expected = { os.path.join(current_dir, 'sample_dir', 'dir33'), os.path.join(current_dir, 'sample_dir', 'dir1'), os.path.join(current_dir, 'sample_dir', 'dir1', 'dir4'), os.path.join(current_dir, 'sample_dir', 'dir11') } for root, dirs, files in os.walk(os.path.join(current_dir, 'sample_dir')): filter_ignored_paths(root, dirs, excluded_paths) remaining_dirs += [os.path.join(root, d) for d in dirs] # we expect .terraform and all dir2 to get filtered out self.assertEqual(set(remaining_dirs), expected) excluded_paths = [os.path.join('dir1', 'dir2')] remaining_dirs = [] expected = { os.path.join(current_dir, 'sample_dir', 'dir33'), os.path.join(current_dir, 'sample_dir', 'dir1'), os.path.join(current_dir, 'sample_dir', 'dir1', 'dir4'), os.path.join(current_dir, 'sample_dir', 'dir11'), os.path.join(current_dir, 'sample_dir', 'dir11', 'dir2'), os.path.join(current_dir, 'sample_dir', 'dir33', 'dir2'), } for root, dirs, files in os.walk(os.path.join(current_dir, 'sample_dir')): filter_ignored_paths(root, dirs, excluded_paths) remaining_dirs += [os.path.join(root, d) for d in dirs] # we expect .terraform and dir1/dir2 to get filtered out self.assertEqual(set(remaining_dirs), expected) excluded_paths = [os.path.join('dir..', 'dir2')] remaining_dirs = [] expected = { os.path.join(current_dir, 'sample_dir', 'dir33'), os.path.join(current_dir, 'sample_dir', 'dir1'), os.path.join(current_dir, 'sample_dir', 'dir1', 'dir4'), os.path.join(current_dir, 'sample_dir', 'dir11'), os.path.join(current_dir, 'sample_dir', 'dir1', 'dir2') } for root, dirs, files in os.walk(os.path.join(current_dir, 'sample_dir')): filter_ignored_paths(root, dirs, excluded_paths) remaining_dirs += [os.path.join(root, d) for d in dirs] # we expect .terraform and dir11/dir2 and dir33/dir2 to get filtered out self.assertEqual(set(remaining_dirs), expected)
def test_filter_ignored_directories_regex_relative_cwd(self): # this simulates scanning a subdirectory and applying filter logic relative to the CWD # for this we need to CD temporarily current_dir = os.path.dirname(os.path.realpath(__file__)) old_cwd = os.path.abspath(os.curdir) try: os.chdir(current_dir) excluded_paths = ['dir2', os.path.join('dir1', 'file1.tf')] remaining_dirs = [] remaining_files = [] expected_dirs = { os.path.join('sample_dir', 'dir33'), os.path.join('sample_dir', 'dir1'), os.path.join('sample_dir', 'dir1', 'dir4'), os.path.join('sample_dir', 'dir11') } expected_files = { os.path.join('sample_dir', 'dir33', 'file2.tf'), os.path.join('sample_dir', 'dir1', 'dir4', 'file3.tf'), } for root, dirs, files in os.walk('sample_dir'): filter_ignored_paths(root, dirs, excluded_paths) filter_ignored_paths(root, files, excluded_paths) remaining_dirs += [os.path.join(root, d) for d in dirs] remaining_files += [os.path.join(root, f) for f in files] # we expect .terraform and all dir2 to get filtered out # also dir1/file1 self.assertEqual(set(remaining_dirs), expected_dirs) self.assertEqual(set(remaining_files), expected_files) excluded_paths = [os.path.join('dir1', 'dir2')] remaining_dirs = [] remaining_files = [] expected_dirs = { os.path.join('sample_dir', 'dir33'), os.path.join('sample_dir', 'dir1'), os.path.join('sample_dir', 'dir1', 'dir4'), os.path.join('sample_dir', 'dir11'), os.path.join('sample_dir', 'dir11', 'dir2'), os.path.join('sample_dir', 'dir33', 'dir2'), } expected_files = { os.path.join('sample_dir', 'dir33', 'file2.tf'), os.path.join('sample_dir', 'dir1', 'file1.tf'), os.path.join('sample_dir', 'dir1', 'dir4', 'file3.tf'), os.path.join('sample_dir', 'dir11', 'dir2', 'file4.tf'), os.path.join('sample_dir', 'dir33', 'dir2', 'file5.tf') } for root, dirs, files in os.walk('sample_dir'): filter_ignored_paths(root, dirs, excluded_paths) filter_ignored_paths(root, files, excluded_paths) remaining_dirs += [os.path.join(root, d) for d in dirs] remaining_files += [os.path.join(root, f) for f in files] # we expect .terraform and dir1/dir2 to get filtered out self.assertEqual(set(remaining_dirs), expected_dirs) self.assertEqual(set(remaining_files), expected_files) excluded_paths = [os.path.join('dir..', 'dir2')] remaining_dirs = [] remaining_files = [] expected_dirs = { os.path.join('sample_dir', 'dir33'), os.path.join('sample_dir', 'dir1'), os.path.join('sample_dir', 'dir1', 'dir4'), os.path.join('sample_dir', 'dir11'), os.path.join('sample_dir', 'dir1', 'dir2') } expected_files = { os.path.join('sample_dir', 'dir1', 'dir2', 'file2.tf'), os.path.join('sample_dir', 'dir1', 'file1.tf'), os.path.join('sample_dir', 'dir33', 'file2.tf'), os.path.join('sample_dir', 'dir1', 'dir4', 'file3.tf') } for root, dirs, files in os.walk('sample_dir'): filter_ignored_paths(root, dirs, excluded_paths) filter_ignored_paths(root, files, excluded_paths) remaining_dirs += [os.path.join(root, d) for d in dirs] remaining_files += [os.path.join(root, f) for f in files] # we expect .terraform and dir11/dir2 and dir33/dir2 to get filtered out self.assertEqual(set(remaining_dirs), expected_dirs) self.assertEqual(set(remaining_files), expected_files) finally: os.chdir(old_cwd)
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True) -> Report: secrets = SecretsCollection() with transient_settings({ # Only run scans with only these plugins. 'plugins_used': [ { 'name': 'AWSKeyDetector' }, { 'name': 'ArtifactoryDetector' }, { 'name': 'AzureStorageKeyDetector' }, { 'name': 'BasicAuthDetector' }, { 'name': 'CloudantDetector' }, { 'name': 'IbmCloudIamDetector' }, { 'name': 'MailchimpDetector' }, { 'name': 'PrivateKeyDetector' }, { 'name': 'SlackDetector' }, { 'name': 'SoftlayerDetector' }, { 'name': 'SquareOAuthDetector' }, { 'name': 'StripeDetector' }, { 'name': 'TwilioKeyDetector' }, ] }): report = Report(self.check_type) # Implement non IaC files (including .terraform dir) files_to_scan = files or [] excluded_paths = (runner_filter.excluded_paths or []) + ignored_directories + [DEFAULT_EXTERNAL_MODULES_DIR] if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) for file in f_names: if file not in PROHIBITED_FILES and f".{file.split('.')[-1]}" in SUPPORTED_FILE_EXTENSIONS: files_to_scan.append(os.path.join(root, file)) logging.info(f'Secrets scanning will scan {len(files_to_scan)} files') # TODO: re-enable filter when re-adding `SecretKeyword` plugin scan.get_settings().disable_filters(*['detect_secrets.filters.heuristic.is_indirect_reference']) def _scan_file(file_paths: List[str]): for file_path in file_paths: start = time.time() try: secrets.scan_file(file_path) except Exception as err: logging.warning(f"Secret scanning:could not process file {file_path}, {err}") end = time.time() scan_time = end - start if scan_time > 10: logging.info(f'Scanned {file_path}, took {scan_time} seconds') run_function_multithreaded(_scan_file, files_to_scan, 1, num_of_workers=os.cpu_count()) for _, secret in iter(secrets): check_id = SECRET_TYPE_TO_ID.get(secret.type) if not check_id: continue result = {'result': CheckResult.FAILED} line_text = linecache.getline(os.path.join(root_folder, secret.filename), secret.line_number) if root_folder else linecache.getline(secret.filename, secret.line_number) if line_text != "" and line_text.split()[0] == 'git_commit': continue result = self.search_for_suppression(check_id, root_folder, secret, runner_filter.skip_checks, CHECK_ID_TO_SECRET_TYPE) or result report.add_record(Record( check_id=check_id, check_name=secret.type, check_result=result, code_block=[(secret.line_number, line_text)], file_path=f'/{os.path.relpath(secret.filename, root_folder)}', file_line_range=[secret.line_number, secret.line_number + 1], resource=secret.secret_hash, check_class=None, evaluations=None, file_abs_path=os.path.abspath(secret.filename) )) return report
def run(self, root_folder: str, external_checks_dir: Optional[List[str]] = None, files: Optional[List[str]] = None, runner_filter: RunnerFilter = RunnerFilter(), collect_skip_comments: bool = True) -> Report: current_dir = os.path.dirname(os.path.realpath(__file__)) secrets = SecretsCollection() with transient_settings({ # Only run scans with only these plugins. 'plugins_used': [{ 'name': 'AWSKeyDetector' }, { 'name': 'ArtifactoryDetector' }, { 'name': 'AzureStorageKeyDetector' }, { 'name': 'BasicAuthDetector' }, { 'name': 'CloudantDetector' }, { 'name': 'IbmCloudIamDetector' }, { 'name': 'MailchimpDetector' }, { 'name': 'PrivateKeyDetector' }, { 'name': 'SlackDetector' }, { 'name': 'SoftlayerDetector' }, { 'name': 'SquareOAuthDetector' }, { 'name': 'StripeDetector' }, { 'name': 'TwilioKeyDetector' }, { 'name': 'EntropyKeywordCombinator', 'path': f'file://{current_dir}/plugins/entropy_keyword_combinator.py', 'limit': ENTROPY_KEYWORD_LIMIT }] }) as settings: report = Report(self.check_type) # Implement non IaC files (including .terraform dir) files_to_scan = files or [] excluded_paths = ( runner_filter.excluded_paths or []) + ignored_directories + [DEFAULT_EXTERNAL_MODULES_DIR] if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, excluded_paths) filter_ignored_paths(root, f_names, excluded_paths) for file in f_names: if file not in PROHIBITED_FILES and f".{file.split('.')[-1]}" in SUPPORTED_FILE_EXTENSIONS: files_to_scan.append(os.path.join(root, file)) logging.info( f'Secrets scanning will scan {len(files_to_scan)} files') settings.disable_filters( *['detect_secrets.filters.heuristic.is_indirect_reference']) Runner._scan_files(files_to_scan, secrets) for _, secret in iter(secrets): check_id = SECRET_TYPE_TO_ID.get(secret.type) bc_check_id = bc_integration.ckv_to_bc_id_mapping.get( check_id) if bc_integration.ckv_to_bc_id_mapping else None if not check_id: continue if runner_filter.checks and not runner_filter.should_run_check( check_id, bc_check_id): continue result: _CheckResult = {'result': CheckResult.FAILED} line_text = linecache.getline(secret.filename, secret.line_number) if line_text != "" and len(line_text.split( )) > 0 and line_text.split()[0] == 'git_commit': continue result = self.search_for_suppression( check_id=check_id, bc_check_id=bc_check_id, secret=secret, runner_filter=runner_filter, ) or result report.add_resource(f'{secret.filename}:{secret.secret_hash}') report.add_record( Record(check_id=check_id, bc_check_id=bc_check_id, check_name=secret.type, check_result=result, code_block=[(secret.line_number, line_text)], file_path= f'/{os.path.relpath(secret.filename, root_folder)}', file_line_range=[ secret.line_number, secret.line_number + 1 ], resource=secret.secret_hash, check_class=None, evaluations=None, file_abs_path=os.path.abspath(secret.filename))) return report
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True, helmChart=None): report = Report(self.check_type) definitions = {} definitions_raw = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: registry.load_external_checks(directory) if files: _parse_files(files, definitions, definitions_raw) if root_folder: filepath_fn = lambda f: f'/{os.path.relpath(f, os.path.commonprefix((root_folder, f)))}' for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in K8_POSSIBLE_ENDINGS: full_path = os.path.join(root, file) if "/." not in full_path and file not in ['package.json','package-lock.json']: # skip temp directories files_list.append(full_path) _parse_files(files_list, definitions, definitions_raw, filepath_fn) for k8_file in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which sls_file does not always give). if k8_file[0] == '/': path_to_convert = (root_folder + k8_file) if root_folder else k8_file else: path_to_convert = (os.path.join(root_folder, k8_file)) if root_folder else k8_file file_abs_path = os.path.abspath(path_to_convert) if definitions[k8_file]: for i in range(len(definitions[k8_file])): if (not 'apiVersion' in definitions[k8_file][i].keys()) and (not 'kind' in definitions[k8_file][i].keys()): continue logging.debug("Template Dump for {}: {}".format(k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] if entity_conf is None: continue # Split out resources if entity kind is List if isinstance(entity_conf, dict) and entity_conf["kind"] == "List": for item in entity_conf.get("items", []): definitions[k8_file].append(item) for i in range(len(definitions[k8_file])): if _is_invalid_k8_definition(definitions[k8_file][i]): continue logging.debug("Template Dump for {}: {}".format(k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] if isinstance(entity_conf, dict) and entity_conf.get("kind") == "List": continue # Skip entity without metadata["name"] if isinstance(entity_conf, dict) and entity_conf.get("metadata"): if isinstance(entity_conf["metadata"], int) or "name" not in entity_conf["metadata"]: continue else: continue # Skip entity with parent (metadata["ownerReferences"]) in runtime # We will alert in runtime only if "ownerReferences" in entity_conf["metadata"] and \ entity_conf["metadata"]["ownerReferences"] is not None: continue # Append containers and initContainers to definitions list for type in ["containers", "initContainers"]: containers = [] if entity_conf["kind"] == "CustomResourceDefinition": continue containers = search_deep_keys(type, entity_conf, []) if not containers: continue containers = containers.pop() #containers.insert(0,entity_conf['kind']) containerDef = {} namespace = "" if "namespace" in entity_conf["metadata"]: namespace = entity_conf["metadata"]["namespace"] else: namespace = "default" containerDef["containers"] = containers.pop() if containerDef["containers"] is not None: containerDef["containers"] = force_list(containerDef["containers"]) for cd in containerDef["containers"]: i = containerDef["containers"].index(cd) containerDef["containers"][i]["apiVersion"] = entity_conf["apiVersion"] containerDef["containers"][i]["kind"] = type containerDef["containers"][i]["parent"] = "{}.{}.{} (container {})".format( entity_conf["kind"], entity_conf["metadata"]["name"], namespace, str(i)) containerDef["containers"][i]["parent_metadata"] = entity_conf["metadata"] definitions[k8_file].extend(containerDef["containers"]) # Run for each definition included added container definitions for i in range(len(definitions[k8_file])): if _is_invalid_k8_definition(definitions[k8_file][i]): continue logging.debug("Template Dump for {}: {}".format(k8_file, definitions[k8_file][i], indent=2)) entity_conf = definitions[k8_file][i] if entity_conf is None: continue if isinstance(entity_conf, dict) and (entity_conf["kind"] == "List" or not entity_conf.get("kind")): continue if isinstance(entity_conf, dict) and isinstance(entity_conf.get("kind"), int): continue # Skip entity without metadata["name"] or parent_metadata["name"] if not any(x in entity_conf["kind"] for x in ["containers", "initContainers"]): if entity_conf.get("metadata"): if isinstance(entity_conf["metadata"], int) or not "name" in entity_conf["metadata"]: continue else: continue # Skip entity with parent (metadata["ownerReferences"]) in runtime # We will alert in runtime only if "metadata" in entity_conf: if "ownerReferences" in entity_conf["metadata"] and \ entity_conf["metadata"]["ownerReferences"] is not None: continue # Skip Kustomization Templates (for now) if entity_conf["kind"] == "Kustomization": continue skipped_checks = get_skipped_checks(entity_conf) results = registry.scan(k8_file, entity_conf, skipped_checks, runner_filter) start_line = entity_conf["__startline__"] end_line = entity_conf["__endline__"] if start_line == end_line: entity_lines_range = [start_line, end_line] entity_code_lines = definitions_raw[k8_file][start_line - 1: end_line] else: entity_lines_range = [start_line, end_line - 1] entity_code_lines = definitions_raw[k8_file][start_line - 1: end_line - 1] # TODO? - Variable Eval Message! variable_evaluations = {} for check, check_result in results.items(): resource_id = check.get_resource_id(entity_conf) report.add_resource(f'{k8_file}:{resource_id}') record = Record(check_id=check.id, bc_check_id=check.bc_id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=k8_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path) record.set_guideline(check.guideline) report.add_record(record=record) return report
def _internal_dir_load(self, directory: str, module_loader_registry: ModuleLoaderRegistry, dir_filter: Callable[[str], bool], keys_referenced_as_modules: Set[str], specified_vars: Optional[Mapping[str, str]] = None, module_load_context: Optional[str] = None, vars_files: Optional[List[str]] = None, root_dir: Optional[str] = None, excluded_paths: Optional[List[str]] = None): """ See `parse_directory` docs. :param directory: Directory in which .tf and .tfvars files will be loaded. :param module_loader_registry: Registry used for resolving modules. This allows customization of how much resolution is performed (and easier testing) by using a manually constructed registry rather than the default. :param dir_filter: Determines whether or not a directory should be processed. Returning True will allow processing. The argument will be the absolute path of the directory. :param specified_vars: Specifically defined variable values, overriding values from any other source. """ # Stage 1: Look for applicable files in the directory: # https://www.terraform.io/docs/configuration/index.html#code-organization # Load the raw data for non-variable files, but perform no processing other than loading # variable default values. # Variable files are also flagged for later processing. var_value_and_file_map: Dict[str, Tuple[Any, str]] = {} hcl_tfvars: Optional[os.DirEntry] = None json_tfvars: Optional[os.DirEntry] = None auto_vars_files: List[os.DirEntry] = [] # *.auto.tfvars / *.auto.tfvars.json explicit_var_files: List[os.DirEntry] = [] # files passed with --var-file; only process the ones that are in this directory dir_contents = list(os.scandir(directory)) if excluded_paths: filter_ignored_paths(root_dir, dir_contents, excluded_paths) tf_files_to_load = [] for file in dir_contents: # Ignore directories and hidden files try: if not file.is_file() or file.name.startswith("."): continue except OSError: # Skip files that can't be accessed continue # Variable files # See: https://www.terraform.io/docs/configuration/variables.html#variable-definitions-tfvars-files if file.name == "terraform.tfvars.json": json_tfvars = file elif file.name == "terraform.tfvars": hcl_tfvars = file elif file.name.endswith(".auto.tfvars.json") or file.name.endswith(".auto.tfvars"): auto_vars_files.append(file) elif vars_files and file.path in vars_files: explicit_var_files.append(file) # Resource files elif file.name.endswith(".tf") or (self.scan_hcl and file.name.endswith('.hcl')): # TODO: add support for .tf.json tf_files_to_load.append(file) files_to_data = self._load_files(tf_files_to_load) for file, data in sorted(files_to_data, key=lambda x: x[0]): if not data: continue self.out_definitions[file] = data # Load variable defaults # (see https://www.terraform.io/docs/configuration/variables.html#declaring-an-input-variable) var_blocks = data.get("variable") if var_blocks and isinstance(var_blocks, list): for var_block in var_blocks: if not isinstance(var_block, dict): continue for var_name, var_definition in var_block.items(): if not isinstance(var_definition, dict): continue default_value = var_definition.get("default") if default_value is not None and isinstance(default_value, list): self.external_variables_data.append((var_name, default_value[0], file)) var_value_and_file_map[var_name] = default_value[0], file # Stage 2: Load vars in proper order: # https://www.terraform.io/docs/configuration/variables.html#variable-definition-precedence # Defaults are loaded in stage 1. # Then loading in this order with later taking precedence: # - Environment variables # - The terraform.tfvars file, if present. # - The terraform.tfvars.json file, if present. # - Any *.auto.tfvars or *.auto.tfvars.json files, processed in lexical order of # their filenames. # Overriding everything else, variables form `specified_vars`, which are considered # directly set. for key, value in self.env_vars.items(): # env vars if not key.startswith("TF_VAR_"): continue var_value_and_file_map[key[7:]] = value, f"env:{key}" self.external_variables_data.append((key[7:], value, f"env:{key}")) if hcl_tfvars: # terraform.tfvars data = _load_or_die_quietly(hcl_tfvars, self.out_parsing_errors, clean_definitions=False) if data: var_value_and_file_map.update({k: (_safe_index(v, 0), hcl_tfvars.path) for k, v in data.items()}) self.external_variables_data.extend([(k, _safe_index(v, 0), hcl_tfvars.path) for k, v in data.items()]) if json_tfvars: # terraform.tfvars.json data = _load_or_die_quietly(json_tfvars, self.out_parsing_errors) if data: var_value_and_file_map.update({k: (v, json_tfvars.path) for k, v in data.items()}) self.external_variables_data.extend([(k, v, json_tfvars.path) for k, v in data.items()]) auto_var_files_to_data = self._load_files(auto_vars_files) for var_file, data in sorted(auto_var_files_to_data, key=lambda x: x[0]): if data: var_value_and_file_map.update({k: (v, var_file) for k, v in data.items()}) self.external_variables_data.extend([(k, v, var_file) for k, v in data.items()]) explicit_var_files_to_data = self._load_files(explicit_var_files) # it's possible that os.scandir returned the var files in a different order than they were specified for var_file, data in sorted(explicit_var_files_to_data, key=lambda x: vars_files.index(x[0])): if data: var_value_and_file_map.update({k: (v, var_file) for k, v in data.items()}) self.external_variables_data.extend([(k, v, var_file) for k, v in data.items()]) if specified_vars: # specified var_value_and_file_map.update({k: (v, "manual specification") for k, v in specified_vars.items()}) self.external_variables_data.extend([(k, v, "manual specification") for k, v in specified_vars.items()]) # IMPLEMENTATION NOTE: When resolving `module.` references, access to the entire data map is needed. It # may be a little overboard, but I don't want to just pass the entire data map down # because it break encapsulations and I don't want to cause confusion about what data # set it being processed. To avoid this, here's a Callable that will get the data # map for a particular module reference. (Might be OCD, but...) module_data_retrieval = lambda module_ref: self.out_definitions.get(module_ref) # Stage 4: Load modules # This stage needs to be done in a loop (again... alas, no DAG) because modules might not # be loadable until other modules are loaded. This happens when parameters to one module # depend on the output of another. For such cases, the base module must be loaded, then # a parameter resolution pass needs to happen, then the second module can be loaded. # # One gotcha is that we need to make sure we load all modules at some point, even if their # parameters don't resolve. So, if we hit a spot where resolution doesn't change anything # and there are still modules to be loaded, they will be forced on the next pass. force_final_module_load = False for i in range(0, 10): # circuit breaker - no more than 10 loops logging.debug("Module load loop %d", i) # Stage 4a: Load eligible modules has_more_modules = self._load_modules(directory, module_loader_registry, dir_filter, module_load_context, keys_referenced_as_modules, force_final_module_load) # Stage 4b: Variable resolution round 2 - now with (possibly more) modules made_var_changes = False if not has_more_modules: break # nothing more to do elif not made_var_changes: # If there are more modules to load but no variables were resolved, then to a final module # load, forcing things through without complete resolution. force_final_module_load = True
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: cfn_registry.load_external_checks(directory) if files: for file in files: (definitions[file], definitions_raw[file]) = parse(file) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in CF_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' try: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file) except TypeError: logging.info( f'CloudFormation skipping {file} as it is not a valid CF template' ) # Filter out empty files that have not been parsed successfully, and filter out non-CF template files definitions = { k: v for k, v in definitions.items() if v and isinstance(v, dict_node) and v.__contains__("Resources") and isinstance(v["Resources"], dict_node) } definitions_raw = { k: v for k, v in definitions_raw.items() if k in definitions.keys() } for cf_file in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which cf_file does not always give). if cf_file[0] == '/': path_to_convert = (root_folder + cf_file) if root_folder else cf_file else: path_to_convert = (os.path.join( root_folder, cf_file)) if root_folder else cf_file file_abs_path = os.path.abspath(path_to_convert) if isinstance( definitions[cf_file], dict_node) and 'Resources' in definitions[cf_file].keys(): cf_context_parser = ContextParser(cf_file, definitions[cf_file], definitions_raw[cf_file]) logging.debug("Template Dump for {}: {}".format( cf_file, definitions[cf_file], indent=2)) cf_context_parser.evaluate_default_refs() for resource_name, resource in definitions[cf_file][ 'Resources'].items(): resource_id = cf_context_parser.extract_cf_resource_id( resource, resource_name) # check that the resource can be parsed as a CF resource if resource_id: entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines( resource) if entity_lines_range and entity_code_lines: # TODO - Variable Eval Message! variable_evaluations = {} skipped_checks = ContextParser.collect_skip_comments( entity_code_lines) entity = {resource_name: resource} results = cfn_registry.scan( cf_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags(entity) for check, check_result in results.items(): record = Record( check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=cf_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) return report
def run( self, root_folder: str, external_checks_dir: Optional[List[str]] = None, files: Optional[List[str]] = None, runner_filter: RunnerFilter = RunnerFilter(), collect_skip_comments: bool = True, ) -> Report: report = Report(self.check_type) files_list = [] filepath_fn = None if external_checks_dir: for directory in external_checks_dir: arm_resource_registry.load_external_checks(directory) if files: files_list = files.copy() if root_folder: filepath_fn = lambda f: f'/{os.path.relpath(f, os.path.commonprefix((root_folder, f)))}' for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) for file in f_names: file_ending = os.path.splitext(file)[1] if file_ending in ARM_POSSIBLE_ENDINGS: files_list.append(os.path.join(root, file)) definitions, definitions_raw = get_files_definitions( files_list, filepath_fn) # Filter out empty files that have not been parsed successfully, and filter out non-CF template files definitions = { k: v for k, v in definitions.items() if v and v.__contains__("resources") } definitions_raw = { k: v for k, v in definitions_raw.items() if k in definitions.keys() } for arm_file in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which arm_file does not always give). if arm_file[0] == '/': path_to_convert = (root_folder + arm_file) if root_folder else arm_file else: path_to_convert = (os.path.join( root_folder, arm_file)) if root_folder else arm_file file_abs_path = os.path.abspath(path_to_convert) if isinstance(definitions[arm_file], DictNode): arm_context_parser = ContextParser(arm_file, definitions[arm_file], definitions_raw[arm_file]) logging.debug( f"Template Dump for {arm_file}: {definitions[arm_file]}") if 'resources' in definitions[arm_file].keys(): arm_context_parser.evaluate_default_parameters() # Split out nested resources from base resource for resource in definitions[arm_file]['resources']: if isinstance( resource, dict) and "parent_name" in resource.keys(): continue nested_resources = [] nested_resources = arm_context_parser.search_deep_keys( "resources", resource, []) if nested_resources: for nr in nested_resources: nr_element = nr.pop() if nr_element: for element in nr_element: new_resource = {} new_resource = element if isinstance(new_resource, dict): new_resource[ "parent_name"] = resource[ "name"] new_resource[ "parent_type"] = resource[ "type"] definitions[arm_file][ 'resources'].append( new_resource) for resource in definitions[arm_file]['resources']: resource_id = arm_context_parser.extract_arm_resource_id( resource) report.add_resource(f'{arm_file}:{resource_id}') resource_name = arm_context_parser.extract_arm_resource_name( resource) entity_lines_range, entity_code_lines = arm_context_parser.extract_arm_resource_code_lines( resource) if entity_lines_range and entity_code_lines: # TODO - Variable Eval Message! variable_evaluations = {} skipped_checks = ContextParser.collect_skip_comments( resource) results = arm_resource_registry.scan( arm_file, {resource_name: resource}, skipped_checks, runner_filter) for check, check_result in results.items(): record = Record( check_id=check.id, bc_check_id=check.bc_id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=arm_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path) record.set_guideline(check.guideline) report.add_record(record=record) if 'parameters' in definitions[arm_file].keys(): parameters = definitions[arm_file]['parameters'] for parameter_name, parameter_details in parameters.items( ): # TODO - Variable Eval Message! variable_evaluations = {} resource_id = f'parameter.{parameter_name}' resource_name = parameter_name entity_lines_range, entity_code_lines = arm_context_parser.extract_arm_resource_code_lines( parameter_details) if entity_lines_range and entity_code_lines: skipped_checks = ContextParser.collect_skip_comments( parameter_details) results = arm_parameter_registry.scan( arm_file, {resource_name: parameter_details}, skipped_checks, runner_filter) for check, check_result in results.items(): record = Record( check_id=check.id, bc_check_id=check.bc_id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=arm_file, file_line_range=entity_lines_range, resource=resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path) record.set_guideline(check.guideline) report.add_record(record=record) return report
def _filter_ignored_paths(root, paths, excluded_paths): filter_ignored_paths(root, paths, excluded_paths) [paths.remove(path) for path in list(paths) if path in [default_ml_registry.external_modules_folder_name]]
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: function_registry.load_external_checks(directory) if files: for file in files: if os.path.basename(file) in SLS_FILE_MASK: parse_result = parse(file) if parse_result: (definitions[file], definitions_raw[file]) = parse_result if root_folder: for root, d_names, f_names in os.walk(root_folder): # Don't walk in to "node_modules" directories under the root folder. If –for some reason– # scanning one of these is desired, it can be directly specified. if "node_modules" in d_names: d_names.remove("node_modules") filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) for file in f_names: if file in SLS_FILE_MASK: full_path = os.path.join(root, file) if "/." not in full_path: # skip temp directories files_list.append(full_path) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' parse_result = parse(file) if parse_result: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse_result # Filter out empty files that have not been parsed successfully definitions = {k: v for k, v in definitions.items() if v} definitions_raw = {k: v for k, v in definitions_raw.items() if k in definitions.keys()} for sls_file, sls_file_data in definitions.items(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which sls_file does not always give). if sls_file[0] == '/': path_to_convert = (root_folder + sls_file) if root_folder else sls_file else: path_to_convert = (os.path.join(root_folder, sls_file)) if root_folder else sls_file file_abs_path = os.path.abspath(path_to_convert) if not isinstance(sls_file_data, dict_node): continue if CFN_RESOURCES_TOKEN in sls_file_data and isinstance(sls_file_data[CFN_RESOURCES_TOKEN], dict_node): cf_sub_template = sls_file_data[CFN_RESOURCES_TOKEN] if not cf_sub_template.get('Resources'): continue cf_context_parser = CfnContextParser(sls_file, cf_sub_template, definitions_raw[sls_file]) logging.debug("Template Dump for {}: {}".format(sls_file, sls_file_data, indent=2)) cf_context_parser.evaluate_default_refs() for resource_name, resource in cf_sub_template['Resources'].items(): if not isinstance(resource, dict_node): continue cf_resource_id = cf_context_parser.extract_cf_resource_id(resource, resource_name) if not cf_resource_id: # Not Type attribute for resource continue entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines( resource) if entity_lines_range and entity_code_lines: skipped_checks = CfnContextParser.collect_skip_comments(entity_code_lines) # TODO - Variable Eval Message! variable_evaluations = {} entity = {resource_name: resource} results = cfn_registry.scan(sls_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags(entity, cfn_registry) for check, check_result in results.items(): record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=sls_file, file_line_range=entity_lines_range, resource=cf_resource_id, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) sls_context_parser = SlsContextParser(sls_file, sls_file_data, definitions_raw[sls_file]) # Sub-sections that have multiple items under them for token, registry in MULTI_ITEM_SECTIONS: template_items = sls_file_data.get(token) if not template_items or not isinstance(template_items, dict): continue for item_name, item_content in template_items.items(): if not isinstance(item_content, dict_node): continue entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(item_content) if entity_lines_range and entity_code_lines: skipped_checks = CfnContextParser.collect_skip_comments(entity_code_lines) variable_evaluations = {} if token == "functions": #nosec # "Enriching" copies things like "environment" and "stackTags" down into the # function data from the provider block since logically that's what serverless # does. This allows checks to see what the complete data would be. sls_context_parser.enrich_function_with_provider(item_name) entity = EntityDetails(sls_context_parser.provider_type, item_content) results = registry.scan(sls_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags(entity, registry) for check, check_result in results.items(): record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=sls_file, file_line_range=entity_lines_range, resource=item_name, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) # Sub-sections that are a single item for token, registry in SINGLE_ITEM_SECTIONS: item_content = sls_file_data.get(token) if not item_content: continue entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(item_content) if not entity_lines_range: entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(sls_file_data) skipped_checks = CfnContextParser.collect_skip_comments(entity_code_lines) variable_evaluations = {} entity = EntityDetails(sls_context_parser.provider_type, item_content) results = registry.scan(sls_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags(entity, registry) for check, check_result in results.items(): record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=entity_code_lines, file_path=sls_file, file_line_range=entity_lines_range, resource=token, evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) # "Complete" checks # NOTE: Ignore code content, no point in showing (could be long) entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(sls_file_data) if entity_lines_range: skipped_checks = CfnContextParser.collect_skip_comments(entity_code_lines) variable_evaluations = {} entity = EntityDetails(sls_context_parser.provider_type, sls_file_data) results = complete_registry.scan(sls_file, entity, skipped_checks, runner_filter) tags = cfn_utils.get_resource_tags(entity, complete_registry) for check, check_result in results.items(): record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=[], # Don't show, could be large file_path=sls_file, file_line_range=entity_lines_range, resource="complete", # Weird, not sure what to put where evaluations=variable_evaluations, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags) report.add_record(record=record) return report
def run(self, root_folder=None, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) files_list = [] filepath_fn = None if external_checks_dir: for directory in external_checks_dir: registry.load_external_checks(directory) if files: files_list = [ file for file in files if Runner._is_docker_file(os.path.basename(file)) ] if root_folder: filepath_fn = lambda f: f'/{os.path.relpath(f, os.path.commonprefix((root_folder, f)))}' for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) for file in f_names: if Runner._is_docker_file(file): file_path = os.path.join(root, file) files_list.append(file_path) definitions, definitions_raw = get_files_definitions( files_list, filepath_fn) for docker_file_path in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which docker_file_path does not always give). if docker_file_path[0] == '/': path_to_convert = ( root_folder + docker_file_path) if root_folder else docker_file_path else: path_to_convert = (os.path.join( root_folder, docker_file_path)) if root_folder else docker_file_path file_abs_path = os.path.abspath(path_to_convert) report.add_resource(file_abs_path) skipped_checks = collect_skipped_checks( definitions[docker_file_path]) instructions = definitions[docker_file_path] results = registry.scan(docker_file_path, instructions, skipped_checks, runner_filter) for check, check_result in results.items(): result_configuration = check_result['results_configuration'] startline = 0 endline = len(definitions_raw[docker_file_path]) - 1 result_instruction = "" if result_configuration: startline = result_configuration['startline'] endline = result_configuration['endline'] result_instruction = result_configuration["instruction"] codeblock = [] self.calc_record_codeblock(codeblock, definitions_raw, docker_file_path, endline, startline) record = Record(check_id=check.id, bc_check_id=check.bc_id, check_name=check.name, check_result=check_result, code_block=codeblock, file_path=docker_file_path, file_line_range=[startline + 1, endline + 1], resource="{}.{}".format( docker_file_path, result_instruction, startline), evaluations=None, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=None) record.set_guideline(check.guideline) report.add_record(record=record) return report
def run(self, root_folder=None, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): report = Report(self.check_type) definitions = {} definitions_raw = {} parsing_errors = {} files_list = [] if external_checks_dir: for directory in external_checks_dir: registry.load_external_checks(directory) if files: for file in files: if os.path.basename(file) in DOCKER_FILE_MASK: (definitions[file], definitions_raw[file]) = parse(file) if root_folder: for root, d_names, f_names in os.walk(root_folder): filter_ignored_paths(root, d_names, runner_filter.excluded_paths) filter_ignored_paths(root, f_names, runner_filter.excluded_paths) for file in f_names: if file in DOCKER_FILE_MASK: files_list.append(os.path.join(root, file)) for file in files_list: relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}' try: (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse(file) except TypeError: logging.info( f'Dockerfile skipping {file} as it is not a valid dockerfile template' ) for docker_file_path in definitions.keys(): # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path, # or there will be no leading slash; root_folder will always be none. # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above). # The goal here is simply to get a valid path to the file (which docker_file_path does not always give). if docker_file_path[0] == '/': path_to_convert = ( root_folder + docker_file_path) if root_folder else docker_file_path else: path_to_convert = (os.path.join( root_folder, docker_file_path)) if root_folder else docker_file_path file_abs_path = os.path.abspath(path_to_convert) skipped_checks = collect_skipped_checks( definitions[docker_file_path]) instructions = definitions[docker_file_path] results = registry.scan(docker_file_path, instructions, skipped_checks, runner_filter) for check, check_result in results.items(): result_configuration = check_result['results_configuration'] startline = 0 endline = 0 result_instruction = "" if result_configuration: startline = result_configuration['startline'] endline = result_configuration['endline'] result_instruction = result_configuration["instruction"] codeblock = [] self.calc_record_codeblock(codeblock, definitions_raw, docker_file_path, endline, startline) record = Record(check_id=check.id, check_name=check.name, check_result=check_result, code_block=codeblock, file_path=docker_file_path, file_line_range=[startline, endline], resource="{}.{}".format( docker_file_path, result_instruction, startline), evaluations=None, check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=None) report.add_record(record=record) return report