def load_tf_modules(path: str, should_download_module: Callable[[str], bool] = should_download, run_parallel=False): module_loader_registry.root_dir = path modules_to_load = find_modules(path) def _download_module(m): if should_download_module(m.module_link): logging.info(f'Downloading module {m.address}') try: content = module_loader_registry.load( m.source_dir, m.module_link, "latest" if not m.version else m.version) if content is None or not content.loaded(): logging.warning(f'Failed to download module {m.address}') except Exception as e: logging.warning(f"Unable to load module ({m.address}): {e}") # To avoid duplicate work, we need to get the distinct module sources distinct_modules = list({m.address: m for m in modules_to_load}.values()) if run_parallel: list(parallel_runner.run_function(_download_module, distinct_modules)) else: for m in distinct_modules: _download_module(m)
def get_files_definitions(files: List[str], out_parsing_errors: Dict[str, str], filepath_fn=None) \ -> Tuple[Dict[str, DictNode], Dict[str, List[Tuple[int, str]]]]: def _parse_file(file): parsing_errors = {} result = parse(file, parsing_errors) return (file, result), parsing_errors results = parallel_runner.run_function(_parse_file, files) definitions = {} definitions_raw = {} for result, parsing_errors in results: out_parsing_errors.update(parsing_errors) (file, parse_result) = result path = filepath_fn(file) if filepath_fn else file try: template, template_lines = parse_result if isinstance(template, DictNode) and isinstance(template.get("Resources"), DictNode): definitions[path] = template definitions_raw[path] = template_lines else: logging.debug(f"Parsed file {file} incorrectly {template}") except (TypeError, ValueError) as e: logging.warning(f"CloudFormation skipping {file} as it is not a valid CF template\n{e}") continue return definitions, definitions_raw
def _load_files(self, files): def _load_file(file): parsing_errors = {} result = _load_or_die_quietly(file, parsing_errors) # the exceptions type can un-pickleable for path, e in parsing_errors.items(): parsing_errors[path] = Exception(str(e)) return (file.path, result), parsing_errors files_to_data = [] files_to_parse = [] for file in files: data = self.loaded_files_map.get(file.path) if data: files_to_data.append((file.path, data)) else: files_to_parse.append(file) results = parallel_runner.run_function(_load_file, files_to_parse) for result, parsing_errors in results: self.out_parsing_errors.update(parsing_errors) files_to_data.append(result) if result[0] not in self.loaded_files_map: self.loaded_files_map[result[0]] = result[1] return files_to_data
def run( self, root_folder: Optional[str] = None, external_checks_dir: Optional[List[str]] = None, files: Optional[List[str]] = None, guidelines: Optional[Dict[str, str]] = None, collect_skip_comments: bool = True, repo_root_for_plan_enrichment: Optional[List[Union[ str, os.PathLike]]] = None, ) -> List[Report]: integration_feature_registry.run_pre_runner() if len(self.runners) == 1: reports = [ self.runners[0].run( root_folder, external_checks_dir=external_checks_dir, files=files, runner_filter=self.runner_filter, collect_skip_comments=collect_skip_comments) ] else: reports = parallel_runner.run_function( lambda runner: runner.run( root_folder, external_checks_dir=external_checks_dir, files=files, runner_filter=self.runner_filter, collect_skip_comments=collect_skip_comments), self.runners, 1) for scan_report in reports: self._handle_report(scan_report, guidelines, repo_root_for_plan_enrichment) return self.scan_reports
def _scan_files(files_to_scan, secrets): # implemented the scan function like secrets.scan_files def _safe_scan(f): full_file_path = os.path.join(secrets.root, f) file_size = os.path.getsize(full_file_path) if file_size > MAX_FILE_SIZE > 0: logging.info( f'Skipping secret scanning on {full_file_path} due to file size. To scan this file for ' f'secrets, run this command again with the environment variable "CHECKOV_MAX_FILE_SIZE" ' f'to 0 or {file_size + 1}') return list() try: start_time = datetime.datetime.now() file_results = list(scan.scan_file(full_file_path)) end_time = datetime.datetime.now() run_time = end_time - start_time if run_time > datetime.timedelta(seconds=10): logging.info( f'Secret scanning for {full_file_path} took {run_time} seconds' ) return file_results except Exception as err: logging.warning( f"Secret scanning:could not process file {f}, {err}") return list() results = parallel_runner.run_function(lambda f: list(_safe_scan(f)), files_to_scan) for secrets_results in results: for secret in secrets_results: secrets[os.path.relpath(secret.filename, secrets.root)].add(secret)
def get_files_definitions(files: List[str], filepath_fn=None) \ -> Tuple[Dict[str, DictNode], Dict[str, List[Tuple[int, str]]]]: results = parallel_runner.run_function(lambda f: (f, parse(f)), files) definitions = {} definitions_raw = {} for file, result in results: path = filepath_fn(file) if filepath_fn else file definitions[path], definitions_raw[path] = result return definitions, definitions_raw
def _parse_files(files, definitions, definitions_raw, filepath_fn=None): def _parse_file(filename): try: return filename, parse(filename) except (TypeError, ValueError) as e: logging.warning(f"Kubernetes skipping {filename} as it is not a valid Kubernetes template\n{e}") results = parallel_runner.run_function(_parse_file, files) for result in results: if result: (file, parse_result) = result if parse_result: path = filepath_fn(file) if filepath_fn else file (definitions[path], definitions_raw[path]) = parse_result
def _load_files(self, files): def _load_file(file): parsing_errors = {} result = _load_or_die_quietly(file, parsing_errors) # the exceptions type can un-pickleable for path, e in parsing_errors.items(): parsing_errors[path] = Exception(str(e)) return (file.path, result), parsing_errors results = parallel_runner.run_function(_load_file, files) files_to_data = [] for result, parsing_errors in results: self.out_parsing_errors.update(parsing_errors) files_to_data.append(result) return files_to_data
def _scan_files(files_to_scan, secrets): # implemented the scan function like secrets.scan_files def _safe_scan(f): try: return list(scan.scan_file(os.path.join(secrets.root, f))) except Exception as err: logging.warning( f"Secret scanning:could not process file {f}, {err}") return list() results = parallel_runner.run_function(lambda f: list(_safe_scan(f)), files_to_scan) for secrets_results in results: for secret in secrets_results: secrets[os.path.relpath(secret.filename, secrets.root)].add(secret)
def get_files_definitions(files: List[str]) \ -> Tuple[Dict[str, List], Dict[str, List[Tuple[int, str]]]]: def _parse_file(filename): try: return filename, parse(filename) except (TypeError, ValueError) as e: logging.warning(f"Kubernetes skipping {filename} as it is not a valid Kubernetes template\n{e}") definitions = {} definitions_raw = {} results = parallel_runner.run_function(_parse_file, files) for result in results: if result: (path, parse_result) = result if parse_result: (definitions[path], definitions_raw[path]) = parse_result return definitions, definitions_raw
def get_files_definitions(files: List[str], filepath_fn=None) \ -> Tuple[Dict[str, DictNode], Dict[str, List[Tuple[int, str]]]]: def _parse_file(file): try: return file, parse(file) except TypeError: logging.info(f'Dockerfile skipping {file} as it is not a valid dockerfile template') return file, None results = parallel_runner.run_function(_parse_file, files) definitions = {} definitions_raw = {} for file, result in results: if result: path = filepath_fn(file) if filepath_fn else file definitions[path], definitions_raw[path] = result return definitions, definitions_raw
def _parse_files(self, files, scan_hcl, parsing_errors): def parse_file(file): if not (file.endswith(".tf") or (scan_hcl and file.endswith(".hcl"))): return file_parsing_errors = {} parse_result = self.parser.parse_file(file=file, parsing_errors=file_parsing_errors, scan_hcl=scan_hcl) # the exceptions type can un-pickleable so we need to cast them to Exception for path, e in file_parsing_errors.items(): file_parsing_errors[path] = Exception(str(e)) return file, parse_result, file_parsing_errors results = parallel_runner.run_function(parse_file, files) for result in results: if result: file, parse_result, file_parsing_errors = result if parse_result is not None: self.definitions[file] = parse_result if file_parsing_errors: parsing_errors.update(file_parsing_errors)
def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True): if external_checks_dir: for directory in external_checks_dir: registry.load_external_checks(directory) chart_directories = self.find_chart_directories(root_folder, files, runner_filter.excluded_paths) report = Report(self.check_type) chart_dir_and_meta = parallel_runner.run_function( lambda cd: (cd, self.parse_helm_chart_details(cd)), chart_directories) for chart_dir, chart_meta in chart_dir_and_meta: # chart_name = os.path.basename(chart_dir) logging.info( f"Processing chart found at: {chart_dir}, name: {chart_meta['name']}, version: {chart_meta['version']}") with tempfile.TemporaryDirectory() as target_dir: # dependency list is nicer to parse than dependency update. proc = subprocess.Popen([self.helm_command, 'dependency', 'list', chart_dir], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec o, e = proc.communicate() if e: if "Warning: Dependencies" in str(e, 'utf-8'): logging.info( f"V1 API chart without Chart.yaml dependancies. Skipping chart dependancy list for {chart_meta['name']} at dir: {chart_dir}. Working dir: {target_dir}. Error details: {str(e, 'utf-8')}") else: logging.info( f"Error processing helm dependancies for {chart_meta['name']} at source dir: {chart_dir}. Working dir: {target_dir}. Error details: {str(e, 'utf-8')}") self.parse_helm_dependency_output(o) try: # --dependency-update needed to pull in deps before templating. proc = subprocess.Popen([self.helm_command, 'template', '--dependency-update', chart_dir], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec o, e = proc.communicate() logging.debug( f"Ran helm command to template chart output. Chart: {chart_meta['name']}. dir: {target_dir}. Output: {str(o, 'utf-8')}") except Exception: logging.info( f"Error processing helm chart {chart_meta['name']} at dir: {chart_dir}. Working dir: {target_dir}. Error details: {str(e, 'utf-8')}") output = str(o, 'utf-8') reader = io.StringIO(output) cur_source_file = None cur_writer = None last_line_dashes = False line_num = 1 for s in reader: s = s.rstrip() if s == '---': last_line_dashes = True continue if last_line_dashes: # The next line should contain a "Source" comment saying the name of the file it came from # So we will close the old file, open a new file, and write the dashes from last iteration plus this line if not s.startswith('# Source: '): raise Exception(f'Line {line_num}: Expected line to start with # Source: {s}') source = s[10:] if source != cur_source_file: if cur_writer: cur_writer.close() file_path = os.path.join(target_dir, source) parent = os.path.dirname(file_path) os.makedirs(parent, exist_ok=True) cur_source_file = source cur_writer = open(os.path.join(target_dir, source), 'a') cur_writer.write('---' + os.linesep) cur_writer.write(s + os.linesep) last_line_dashes = False else: if s.startswith('# Source: '): raise Exception(f'Line {line_num}: Unexpected line starting with # Source: {s}') if not cur_writer: continue else: cur_writer.write(s + os.linesep) line_num += 1 if cur_writer: cur_writer.close() try: k8s_runner = k8_runner() chart_results = k8s_runner.run(target_dir, external_checks_dir=external_checks_dir, runner_filter=runner_filter, helmChart=chart_meta['name']) logging.debug(f"Sucessfully ran k8s scan on {chart_meta['name']}. Scan dir : {target_dir}") report.failed_checks += chart_results.failed_checks report.passed_checks += chart_results.passed_checks report.parsing_errors += chart_results.parsing_errors report.skipped_checks += chart_results.skipped_checks report.resources.update(chart_results.resources) except Exception as e: logging.warning(e, stack_info=True) with tempfile.TemporaryDirectory() as save_error_dir: logging.debug( f"Error running k8s scan on {chart_meta['name']}. Scan dir: {target_dir}. Saved context dir: {save_error_dir}") shutil.move(target_dir, save_error_dir) ## TODO: Export helm dependancies for the chart we've extracted in chart_dependencies return report
def _load_files(files_to_load, definitions, definitions_raw, filename_fn=None): files_to_load = [filename_fn(file) if filename_fn else file for file in files_to_load] results = parallel_runner.run_function(lambda f: (f, parse(f)), files_to_load) for file, result in results: (definitions[file], definitions_raw[file]) = result