예제 #1
0
    def persist_repository(self, root_dir, files=None, excluded_paths=[]):
        """
        Persist the repository found on root_dir path to Bridgecrew's platform. If --file flag is used, only files
        that are specified will be persisted.
        :param files: Absolute path of the files passed in the --file flag.
        :param root_dir: Absolute path of the directory containing the repository root level.
        """

        if not self.use_s3_integration:
            return

        if files:
            for f in files:
                _, file_extension = os.path.splitext(f)
                if file_extension in SUPPORTED_FILE_EXTENSIONS:
                    self._persist_file(f, os.path.relpath(f, root_dir))
        else:
            for root_path, d_names, f_names in os.walk(root_dir):
                # self.excluded_paths only contains the config fetched from the platform.
                # but here we expect the list from runner_registry as well (which includes self.excluded_paths).
                filter_ignored_paths(root_path, d_names, excluded_paths)
                filter_ignored_paths(root_path, f_names, excluded_paths)
                for file_path in f_names:
                    _, file_extension = os.path.splitext(file_path)
                    if file_extension in SUPPORTED_FILE_EXTENSIONS:
                        full_file_path = os.path.join(root_path, file_path)
                        relative_file_path = os.path.relpath(
                            full_file_path, root_dir)
                        self._persist_file(full_file_path, relative_file_path)
예제 #2
0
파일: cfn_utils.py 프로젝트: tronxd/checkov
def get_folder_definitions(
    root_folder: str,
    excluded_paths: Optional[List[str]],
    out_parsing_errors: Dict[str, str] = {}
) -> Tuple[Dict[str, DictNode], Dict[str, List[Tuple[int, str]]]]:
    files_list = []
    for root, d_names, f_names in os.walk(root_folder):
        filter_ignored_paths(root, d_names, excluded_paths)
        filter_ignored_paths(root, f_names, excluded_paths)
        for file in f_names:
            file_ending = os.path.splitext(file)[1]
            if file_ending in CF_POSSIBLE_ENDINGS:
                files_list.append(os.path.join(root, file))

    definitions, definitions_raw = get_files_definitions(
        files_list, out_parsing_errors, lambda f:
        f'/{os.path.relpath(f, os.path.commonprefix((root_folder, f)))}')

    definitions = {
        create_file_abs_path(root_folder, file_path): v
        for (file_path, v) in definitions.items()
    }
    definitions_raw = {
        create_file_abs_path(root_folder, file_path): v
        for (file_path, v) in definitions_raw.items()
    }

    return definitions, definitions_raw
예제 #3
0
파일: runner.py 프로젝트: metahertz/checkov
    def run(self, root_folder=None, external_checks_dir=None, files=None,
            runner_filter=RunnerFilter(), collect_skip_comments=True) -> Report:
        registry = self.import_registry()

        definitions = {}
        definitions_raw = {}

        report = Report(self.check_type)

        if not files and not root_folder:
            logging.debug("No resources to scan.")
            return report

        if not external_checks_dir and self.require_external_checks():
            logging.debug("The json runner requires that external checks are defined.")
            return report
        if external_checks_dir:
            for directory in external_checks_dir:
                registry.load_external_checks(directory)

        if files:
            self._load_files(files, definitions, definitions_raw)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names, runner_filter.excluded_paths)
                filter_ignored_paths(root, f_names, runner_filter.excluded_paths)
                self._load_files(
                    f_names,
                    definitions,
                    definitions_raw,
                    lambda f: os.path.join(root, f)
                )

        for json_file_path in definitions.keys():
            results = registry.scan(
                json_file_path, definitions[json_file_path], [], runner_filter
            )
            for check, result in results.items():
                result_config = result["results_configuration"]
                start = result_config.start_mark.line
                end = result_config.end_mark.line
                record = Record(
                    check_id=check.id,
                    bc_check_id=check.bc_id,
                    check_name=check.name,
                    check_result=result,
                    code_block=definitions_raw[json_file_path][start:end + 1],
                    file_path=json_file_path,
                    file_line_range=[start + 1, end + 1],
                    resource=f"{json_file_path}",
                    evaluations=None,
                    check_class=check.__class__.__module__,
                    file_abs_path=os.path.abspath(json_file_path),
                    entity_tags=None
                )
                report.add_record(record)

        return report
예제 #4
0
 def test_filter_ignored_directories_regex_legacy(self):
     d_names = [
         'bin', 'integration_tests', 'tests', 'docs', '.github', 'checkov',
         'venv', '.git', 'kubernetes', '.idea'
     ]
     expected = ['bin', 'docs', 'checkov', 'venv', 'kubernetes']
     filter_ignored_paths('.', d_names, ["tests"])
     self.assertEqual(expected, d_names)
예제 #5
0
def get_folder_definitions(
    root_folder: str, excluded_paths: Optional[List[str]], out_parsing_errors: Dict[str, str] = {}
) -> Tuple[Dict[str, DictNode], Dict[str, List[Tuple[int, str]]]]:
    files_list = []
    for root, d_names, f_names in os.walk(root_folder):
        filter_ignored_paths(root, d_names, excluded_paths)
        filter_ignored_paths(root, f_names, excluded_paths)
        for file in f_names:
            file_ending = os.path.splitext(file)[1]
            if file_ending in CF_POSSIBLE_ENDINGS:
                files_list.append(os.path.join(root, file))

    definitions, definitions_raw = get_files_definitions(files_list, out_parsing_errors)
    return definitions, definitions_raw
예제 #6
0
    def run(self, root_folder=None, external_checks_dir=None, files=None, runner_filter=RunnerFilter(),
            collect_skip_comments=True):
        report = Report(self.check_type)
        self.tf_definitions = {}
        parsing_errors = {}
        if external_checks_dir:
            for directory in external_checks_dir:
                resource_registry.load_external_checks(directory)
                self.graph_registry.load_external_checks(directory)

        if root_folder:
            files = [] if not files else files
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names, runner_filter.excluded_paths)
                filter_ignored_paths(root, f_names, runner_filter.excluded_paths)
                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending == '.json':
                        try:
                            with open(f'{root}/{file}') as f:
                                content = json.load(f)
                            if isinstance(content, dict) and content.get('terraform_version'):
                                files.append(os.path.join(root, file))
                        except Exception as e:
                            logging.debug(f'Failed to load json file {root}/{file}, skipping')
                            logging.debug('Failure message:')
                            logging.debug(e, stack_info=True)

        if files:
            files = [os.path.abspath(file) for file in files]
            for file in files:
                if file.endswith(".json"):
                    tf_definitions, template_lines = parse_tf_plan(file)
                    if not tf_definitions:
                        continue
                    self.tf_definitions = tf_definitions
                    self.template_lines = template_lines
                    self.check_tf_definition(report, runner_filter)
                else:
                    logging.debug(f'Failed to load {file} as is not a .json file, skipping')

        report.add_parsing_errors(list(parsing_errors.keys()))

        graph = self.graph_manager.build_graph_from_definitions(self.tf_definitions, render_variables=False)
        self.graph_manager.save_graph(graph)

        graph_report = self.get_graph_checks_report(root_folder, runner_filter)
        merge_reports(report, graph_report)

        return report
예제 #7
0
def get_folder_definitions(
        root_folder: str, excluded_paths: Optional[List[str]]
) -> Tuple[Dict[str, List], Dict[str, List[Tuple[int, str]]]]:
    files_list = []
    for root, d_names, f_names in os.walk(root_folder):
        filter_ignored_paths(root, d_names, excluded_paths)
        filter_ignored_paths(root, f_names, excluded_paths)

        for file in f_names:
            file_ending = os.path.splitext(file)[1]
            if file_ending in K8_POSSIBLE_ENDINGS:
                full_path = os.path.join(root, file)
                if "/." not in full_path and file not in ['package.json', 'package-lock.json']:
                    # skip temp directories
                    files_list.append(full_path)
    return get_files_definitions(files_list)
예제 #8
0
    def findKustomizeDirectories(root_folder, files, excluded_paths):
        kustomizeDirectories = []
        if not excluded_paths:
            excluded_paths = []
        if files:
            logging.info('Running with --file argument; file must be a kustomization.yaml file')
            for file in files:
                if os.path.basename(file) in Runner.kustomizeSupportedFileTypes:
                    kustomizeDirectories.append(os.path.dirname(file))

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names, excluded_paths)
                filter_ignored_paths(root, f_names, excluded_paths)
                [kustomizeDirectories.append(os.path.abspath(root)) for x in f_names if x in Runner.kustomizeSupportedFileTypes]

        return kustomizeDirectories
예제 #9
0
파일: runner.py 프로젝트: tsmithv11/checkov
    def find_chart_directories(root_folder, files, excluded_paths):
        chart_directories = []
        if not excluded_paths:
            excluded_paths = []
        if files:
            logging.info('Running with --file argument; checking for Helm Chart.yaml files')
            for file in files:
                if os.path.basename(file) == 'Chart.yaml':
                    chart_directories.append(os.path.dirname(file))

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names, excluded_paths)
                filter_ignored_paths(root, f_names, excluded_paths)
                if 'Chart.yaml' in f_names:
                    chart_directories.append(root)

        return chart_directories
예제 #10
0
    def persist_repository(self, root_dir, files=None, excluded_paths=None):
        """
        Persist the repository found on root_dir path to Bridgecrew's platform. If --file flag is used, only files
        that are specified will be persisted.
        :param files: Absolute path of the files passed in the --file flag.
        :param root_dir: Absolute path of the directory containing the repository root level.
        :param excluded_paths: Paths to exclude from persist process
        """
        excluded_paths = excluded_paths if excluded_paths is not None else []

        if not self.use_s3_integration:
            return
        files_to_persist = []
        if files:
            for f in files:
                _, file_extension = os.path.splitext(f)
                if file_extension in SUPPORTED_FILE_EXTENSIONS:
                    files_to_persist.append((f, os.path.relpath(f, root_dir)))
        else:
            for root_path, d_names, f_names in os.walk(root_dir):
                # self.excluded_paths only contains the config fetched from the platform.
                # but here we expect the list from runner_registry as well (which includes self.excluded_paths).
                filter_ignored_paths(root_path, d_names, excluded_paths)
                filter_ignored_paths(root_path, f_names, excluded_paths)
                for file_path in f_names:
                    _, file_extension = os.path.splitext(file_path)
                    if file_extension in SUPPORTED_FILE_EXTENSIONS:
                        full_file_path = os.path.join(root_path, file_path)
                        relative_file_path = os.path.relpath(
                            full_file_path, root_dir)
                        files_to_persist.append(
                            (full_file_path, relative_file_path))

        logging.info(f"Persisting {len(files_to_persist)} files")
        with futures.ThreadPoolExecutor() as executor:
            futures.wait(
                [
                    executor.submit(self._persist_file, full_file_path,
                                    relative_file_path)
                    for full_file_path, relative_file_path in files_to_persist
                ],
                return_when=futures.FIRST_EXCEPTION,
            )
        logging.info(f"Done persisting {len(files_to_persist)} files")
예제 #11
0
    def persist_repository(self,
                           root_dir,
                           files=None,
                           excluded_paths=None,
                           included_paths: Optional[List[str]] = None):
        """
        Persist the repository found on root_dir path to Bridgecrew's platform. If --file flag is used, only files
        that are specified will be persisted.
        :param files: Absolute path of the files passed in the --file flag.
        :param root_dir: Absolute path of the directory containing the repository root level.
        :param excluded_paths: Paths to exclude from persist process
        """
        excluded_paths = excluded_paths if excluded_paths is not None else []

        if not self.use_s3_integration:
            return
        files_to_persist: List[FileToPersist] = []
        if files:
            for f in files:
                f_name = os.path.basename(f)
                _, file_extension = os.path.splitext(f)
                if file_extension in SUPPORTED_FILE_EXTENSIONS or f_name in SUPPORTED_FILES:
                    files_to_persist.append(
                        FileToPersist(f, os.path.relpath(f, root_dir)))
        else:
            for root_path, d_names, f_names in os.walk(root_dir):
                # self.excluded_paths only contains the config fetched from the platform.
                # but here we expect the list from runner_registry as well (which includes self.excluded_paths).
                filter_ignored_paths(root_path,
                                     d_names,
                                     excluded_paths,
                                     included_paths=included_paths)
                filter_ignored_paths(root_path, f_names, excluded_paths)
                for file_path in f_names:
                    _, file_extension = os.path.splitext(file_path)
                    if file_extension in SUPPORTED_FILE_EXTENSIONS or file_path in SUPPORTED_FILES:
                        full_file_path = os.path.join(root_path, file_path)
                        relative_file_path = os.path.relpath(
                            full_file_path, root_dir)
                        files_to_persist.append(
                            FileToPersist(full_file_path, relative_file_path))

        self.persist_files(files_to_persist)
예제 #12
0
    def test_filter_ignored_directories_regex_absolute_cwd(self):
        # this simulates scanning a subdirectory and applying filter logic using an absolute path
        current_dir = os.path.dirname(os.path.realpath(__file__))

        excluded_paths = ['dir2']

        remaining_dirs = []

        expected = {
            os.path.join(current_dir, 'sample_dir', 'dir33'),
            os.path.join(current_dir, 'sample_dir', 'dir1'),
            os.path.join(current_dir, 'sample_dir', 'dir1', 'dir4'),
            os.path.join(current_dir, 'sample_dir', 'dir11')
        }

        for root, dirs, files in os.walk(os.path.join(current_dir, 'sample_dir')):
            filter_ignored_paths(root, dirs, excluded_paths)
            remaining_dirs += [os.path.join(root, d) for d in dirs]

        # we expect .terraform and all dir2 to get filtered out
        self.assertEqual(set(remaining_dirs), expected)

        excluded_paths = [os.path.join('dir1', 'dir2')]

        remaining_dirs = []

        expected = {
            os.path.join(current_dir, 'sample_dir', 'dir33'),
            os.path.join(current_dir, 'sample_dir', 'dir1'),
            os.path.join(current_dir, 'sample_dir', 'dir1', 'dir4'),
            os.path.join(current_dir, 'sample_dir', 'dir11'),
            os.path.join(current_dir, 'sample_dir', 'dir11', 'dir2'),
            os.path.join(current_dir, 'sample_dir', 'dir33', 'dir2'),
        }

        for root, dirs, files in os.walk(os.path.join(current_dir, 'sample_dir')):
            filter_ignored_paths(root, dirs, excluded_paths)
            remaining_dirs += [os.path.join(root, d) for d in dirs]

        # we expect .terraform and dir1/dir2 to get filtered out
        self.assertEqual(set(remaining_dirs), expected)

        excluded_paths = [os.path.join('dir..', 'dir2')]

        remaining_dirs = []

        expected = {
            os.path.join(current_dir, 'sample_dir', 'dir33'),
            os.path.join(current_dir, 'sample_dir', 'dir1'),
            os.path.join(current_dir, 'sample_dir', 'dir1', 'dir4'),
            os.path.join(current_dir, 'sample_dir', 'dir11'),
            os.path.join(current_dir, 'sample_dir', 'dir1', 'dir2')
        }

        for root, dirs, files in os.walk(os.path.join(current_dir, 'sample_dir')):
            filter_ignored_paths(root, dirs, excluded_paths)
            remaining_dirs += [os.path.join(root, d) for d in dirs]

        # we expect .terraform and dir11/dir2 and dir33/dir2 to get filtered out
        self.assertEqual(set(remaining_dirs), expected)
예제 #13
0
    def test_filter_ignored_directories_regex_relative_cwd(self):
        # this simulates scanning a subdirectory and applying filter logic relative to the CWD
        # for this we need to CD temporarily
        current_dir = os.path.dirname(os.path.realpath(__file__))
        old_cwd = os.path.abspath(os.curdir)

        try:
            os.chdir(current_dir)

            excluded_paths = ['dir2', os.path.join('dir1', 'file1.tf')]

            remaining_dirs = []
            remaining_files = []

            expected_dirs = {
                os.path.join('sample_dir', 'dir33'),
                os.path.join('sample_dir', 'dir1'),
                os.path.join('sample_dir', 'dir1', 'dir4'),
                os.path.join('sample_dir', 'dir11')
            }

            expected_files = {
                os.path.join('sample_dir', 'dir33', 'file2.tf'),
                os.path.join('sample_dir', 'dir1', 'dir4', 'file3.tf'),
            }

            for root, dirs, files in os.walk('sample_dir'):
                filter_ignored_paths(root, dirs, excluded_paths)
                filter_ignored_paths(root, files, excluded_paths)
                remaining_dirs += [os.path.join(root, d) for d in dirs]
                remaining_files += [os.path.join(root, f) for f in files]

            # we expect .terraform and all dir2 to get filtered out
            # also dir1/file1
            self.assertEqual(set(remaining_dirs), expected_dirs)
            self.assertEqual(set(remaining_files), expected_files)

            excluded_paths = [os.path.join('dir1', 'dir2')]

            remaining_dirs = []
            remaining_files = []

            expected_dirs = {
                os.path.join('sample_dir', 'dir33'),
                os.path.join('sample_dir', 'dir1'),
                os.path.join('sample_dir', 'dir1', 'dir4'),
                os.path.join('sample_dir', 'dir11'),
                os.path.join('sample_dir', 'dir11', 'dir2'),
                os.path.join('sample_dir', 'dir33', 'dir2'),
            }

            expected_files = {
                os.path.join('sample_dir', 'dir33', 'file2.tf'),
                os.path.join('sample_dir', 'dir1', 'file1.tf'),
                os.path.join('sample_dir', 'dir1', 'dir4', 'file3.tf'),
                os.path.join('sample_dir', 'dir11', 'dir2', 'file4.tf'),
                os.path.join('sample_dir', 'dir33', 'dir2', 'file5.tf')
            }

            for root, dirs, files in os.walk('sample_dir'):
                filter_ignored_paths(root, dirs, excluded_paths)
                filter_ignored_paths(root, files, excluded_paths)
                remaining_dirs += [os.path.join(root, d) for d in dirs]
                remaining_files += [os.path.join(root, f) for f in files]

            # we expect .terraform and dir1/dir2 to get filtered out
            self.assertEqual(set(remaining_dirs), expected_dirs)
            self.assertEqual(set(remaining_files), expected_files)

            excluded_paths = [os.path.join('dir..', 'dir2')]

            remaining_dirs = []
            remaining_files = []

            expected_dirs = {
                os.path.join('sample_dir', 'dir33'),
                os.path.join('sample_dir', 'dir1'),
                os.path.join('sample_dir', 'dir1', 'dir4'),
                os.path.join('sample_dir', 'dir11'),
                os.path.join('sample_dir', 'dir1', 'dir2')
            }

            expected_files = {
                os.path.join('sample_dir', 'dir1', 'dir2', 'file2.tf'),
                os.path.join('sample_dir', 'dir1', 'file1.tf'),
                os.path.join('sample_dir', 'dir33', 'file2.tf'),
                os.path.join('sample_dir', 'dir1', 'dir4', 'file3.tf')
            }

            for root, dirs, files in os.walk('sample_dir'):
                filter_ignored_paths(root, dirs, excluded_paths)
                filter_ignored_paths(root, files, excluded_paths)
                remaining_dirs += [os.path.join(root, d) for d in dirs]
                remaining_files += [os.path.join(root, f) for f in files]

            # we expect .terraform and dir11/dir2 and dir33/dir2 to get filtered out
            self.assertEqual(set(remaining_dirs), expected_dirs)
            self.assertEqual(set(remaining_files), expected_files)

        finally:
            os.chdir(old_cwd)
예제 #14
0
    def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(),
            collect_skip_comments=True) -> Report:
        secrets = SecretsCollection()
        with transient_settings({
            # Only run scans with only these plugins.
            'plugins_used': [
                {
                    'name': 'AWSKeyDetector'
                },
                {
                    'name': 'ArtifactoryDetector'
                },
                {
                    'name': 'AzureStorageKeyDetector'
                },
                {
                    'name': 'BasicAuthDetector'
                },
                {
                    'name': 'CloudantDetector'
                },
                {
                    'name': 'IbmCloudIamDetector'
                },
                {
                    'name': 'MailchimpDetector'
                },
                {
                    'name': 'PrivateKeyDetector'
                },
                {
                    'name': 'SlackDetector'
                },
                {
                    'name': 'SoftlayerDetector'
                },
                {
                    'name': 'SquareOAuthDetector'
                },
                {
                    'name': 'StripeDetector'
                },
                {
                    'name': 'TwilioKeyDetector'
                },
            ]
        }):
            report = Report(self.check_type)
            # Implement non IaC files (including .terraform dir)
            files_to_scan = files or []
            excluded_paths = (runner_filter.excluded_paths or []) + ignored_directories + [DEFAULT_EXTERNAL_MODULES_DIR]
            if root_folder:
                for root, d_names, f_names in os.walk(root_folder):
                    filter_ignored_paths(root, d_names, runner_filter.excluded_paths)
                    filter_ignored_paths(root, f_names, runner_filter.excluded_paths)
                    for file in f_names:
                        if file not in PROHIBITED_FILES and f".{file.split('.')[-1]}" in SUPPORTED_FILE_EXTENSIONS:
                            files_to_scan.append(os.path.join(root, file))
            logging.info(f'Secrets scanning will scan {len(files_to_scan)} files')

            # TODO: re-enable filter when re-adding `SecretKeyword` plugin
            scan.get_settings().disable_filters(*['detect_secrets.filters.heuristic.is_indirect_reference'])

            def _scan_file(file_paths: List[str]):
                for file_path in file_paths:
                    start = time.time()
                    try:
                        secrets.scan_file(file_path)
                    except Exception as err:
                        logging.warning(f"Secret scanning:could not process file {file_path}, {err}")
                    end = time.time()
                    scan_time = end - start
                    if scan_time > 10:
                        logging.info(f'Scanned {file_path}, took {scan_time} seconds')

            run_function_multithreaded(_scan_file, files_to_scan, 1, num_of_workers=os.cpu_count())

            for _, secret in iter(secrets):
                check_id = SECRET_TYPE_TO_ID.get(secret.type)
                if not check_id:
                    continue
                result = {'result': CheckResult.FAILED}
                line_text = linecache.getline(os.path.join(root_folder, secret.filename),
                                              secret.line_number) if root_folder else linecache.getline(secret.filename,
                                                                                                        secret.line_number)
                if line_text != "" and line_text.split()[0] == 'git_commit':
                    continue
                result = self.search_for_suppression(check_id, root_folder, secret, runner_filter.skip_checks,
                                                     CHECK_ID_TO_SECRET_TYPE) or result
                report.add_record(Record(
                    check_id=check_id,
                    check_name=secret.type,
                    check_result=result,
                    code_block=[(secret.line_number, line_text)],
                    file_path=f'/{os.path.relpath(secret.filename, root_folder)}',
                    file_line_range=[secret.line_number, secret.line_number + 1],
                    resource=secret.secret_hash,
                    check_class=None,
                    evaluations=None,
                    file_abs_path=os.path.abspath(secret.filename)
                ))

            return report
예제 #15
0
파일: runner.py 프로젝트: metahertz/checkov
    def run(self,
            root_folder: str,
            external_checks_dir: Optional[List[str]] = None,
            files: Optional[List[str]] = None,
            runner_filter: RunnerFilter = RunnerFilter(),
            collect_skip_comments: bool = True) -> Report:
        current_dir = os.path.dirname(os.path.realpath(__file__))
        secrets = SecretsCollection()
        with transient_settings({
                # Only run scans with only these plugins.
                'plugins_used':
            [{
                'name': 'AWSKeyDetector'
            }, {
                'name': 'ArtifactoryDetector'
            }, {
                'name': 'AzureStorageKeyDetector'
            }, {
                'name': 'BasicAuthDetector'
            }, {
                'name': 'CloudantDetector'
            }, {
                'name': 'IbmCloudIamDetector'
            }, {
                'name': 'MailchimpDetector'
            }, {
                'name': 'PrivateKeyDetector'
            }, {
                'name': 'SlackDetector'
            }, {
                'name': 'SoftlayerDetector'
            }, {
                'name': 'SquareOAuthDetector'
            }, {
                'name': 'StripeDetector'
            }, {
                'name': 'TwilioKeyDetector'
            }, {
                'name': 'EntropyKeywordCombinator',
                'path':
                f'file://{current_dir}/plugins/entropy_keyword_combinator.py',
                'limit': ENTROPY_KEYWORD_LIMIT
            }]
        }) as settings:
            report = Report(self.check_type)
            # Implement non IaC files (including .terraform dir)
            files_to_scan = files or []
            excluded_paths = (
                runner_filter.excluded_paths
                or []) + ignored_directories + [DEFAULT_EXTERNAL_MODULES_DIR]
            if root_folder:
                for root, d_names, f_names in os.walk(root_folder):
                    filter_ignored_paths(root, d_names, excluded_paths)
                    filter_ignored_paths(root, f_names, excluded_paths)
                    for file in f_names:
                        if file not in PROHIBITED_FILES and f".{file.split('.')[-1]}" in SUPPORTED_FILE_EXTENSIONS:
                            files_to_scan.append(os.path.join(root, file))
            logging.info(
                f'Secrets scanning will scan {len(files_to_scan)} files')

            settings.disable_filters(
                *['detect_secrets.filters.heuristic.is_indirect_reference'])

            Runner._scan_files(files_to_scan, secrets)

            for _, secret in iter(secrets):
                check_id = SECRET_TYPE_TO_ID.get(secret.type)
                bc_check_id = bc_integration.ckv_to_bc_id_mapping.get(
                    check_id) if bc_integration.ckv_to_bc_id_mapping else None
                if not check_id:
                    continue
                if runner_filter.checks and not runner_filter.should_run_check(
                        check_id, bc_check_id):
                    continue
                result: _CheckResult = {'result': CheckResult.FAILED}
                line_text = linecache.getline(secret.filename,
                                              secret.line_number)
                if line_text != "" and len(line_text.split(
                )) > 0 and line_text.split()[0] == 'git_commit':
                    continue
                result = self.search_for_suppression(
                    check_id=check_id,
                    bc_check_id=bc_check_id,
                    secret=secret,
                    runner_filter=runner_filter,
                ) or result
                report.add_resource(f'{secret.filename}:{secret.secret_hash}')
                report.add_record(
                    Record(check_id=check_id,
                           bc_check_id=bc_check_id,
                           check_name=secret.type,
                           check_result=result,
                           code_block=[(secret.line_number, line_text)],
                           file_path=
                           f'/{os.path.relpath(secret.filename, root_folder)}',
                           file_line_range=[
                               secret.line_number, secret.line_number + 1
                           ],
                           resource=secret.secret_hash,
                           check_class=None,
                           evaluations=None,
                           file_abs_path=os.path.abspath(secret.filename)))

            return report
예제 #16
0
파일: runner.py 프로젝트: tronxd/checkov
    def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True, helmChart=None):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                registry.load_external_checks(directory)

        if files:
            _parse_files(files, definitions, definitions_raw)

        if root_folder:
            filepath_fn = lambda f: f'/{os.path.relpath(f, os.path.commonprefix((root_folder, f)))}'
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names, runner_filter.excluded_paths)
                filter_ignored_paths(root, f_names, runner_filter.excluded_paths)

                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in K8_POSSIBLE_ENDINGS:
                        full_path = os.path.join(root, file)
                        if "/." not in full_path and file not in ['package.json','package-lock.json']:
                            # skip temp directories
                            files_list.append(full_path)

            _parse_files(files_list, definitions, definitions_raw, filepath_fn)

        for k8_file in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which sls_file does not always give).
            if k8_file[0] == '/':
                path_to_convert = (root_folder + k8_file) if root_folder else k8_file
            else:
                path_to_convert = (os.path.join(root_folder, k8_file)) if root_folder else k8_file

            file_abs_path = os.path.abspath(path_to_convert)

            if definitions[k8_file]:
                for i in range(len(definitions[k8_file])):
                    if (not 'apiVersion' in definitions[k8_file][i].keys()) and (not 'kind' in definitions[k8_file][i].keys()):
                        continue
                    logging.debug("Template Dump for {}: {}".format(k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]
                    if entity_conf is None:
                        continue

                    # Split out resources if entity kind is List
                    if isinstance(entity_conf, dict) and entity_conf["kind"] == "List":
                        for item in entity_conf.get("items", []):
                            definitions[k8_file].append(item)

                for i in range(len(definitions[k8_file])):
                    if _is_invalid_k8_definition(definitions[k8_file][i]):
                        continue
                    logging.debug("Template Dump for {}: {}".format(k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]

                    if isinstance(entity_conf, dict) and entity_conf.get("kind") == "List":
                        continue

                    # Skip entity without metadata["name"]
                    if isinstance(entity_conf, dict) and entity_conf.get("metadata"):
                        if isinstance(entity_conf["metadata"], int) or "name" not in entity_conf["metadata"]:
                            continue
                    else:
                        continue

                    # Skip entity with parent (metadata["ownerReferences"]) in runtime
                    # We will alert in runtime only
                    if "ownerReferences" in entity_conf["metadata"] and \
                            entity_conf["metadata"]["ownerReferences"] is not None:
                        continue

                    # Append containers and initContainers to definitions list
                    for type in ["containers", "initContainers"]:
                        containers = []
                        if entity_conf["kind"] == "CustomResourceDefinition":
                            continue
                        containers = search_deep_keys(type, entity_conf, [])
                        if not containers:
                            continue
                        containers = containers.pop()
                        #containers.insert(0,entity_conf['kind'])
                        containerDef = {}
                        namespace = ""
                        if "namespace" in entity_conf["metadata"]:
                            namespace = entity_conf["metadata"]["namespace"]
                        else:
                            namespace = "default"
                        containerDef["containers"] = containers.pop()
                        if containerDef["containers"] is not None:
                            containerDef["containers"] = force_list(containerDef["containers"])
                            for cd in containerDef["containers"]:
                                i = containerDef["containers"].index(cd)
                                containerDef["containers"][i]["apiVersion"] = entity_conf["apiVersion"]
                                containerDef["containers"][i]["kind"] = type
                                containerDef["containers"][i]["parent"] = "{}.{}.{} (container {})".format(
                                    entity_conf["kind"], entity_conf["metadata"]["name"], namespace, str(i))
                                containerDef["containers"][i]["parent_metadata"] = entity_conf["metadata"]
                            definitions[k8_file].extend(containerDef["containers"])

                # Run for each definition included added container definitions
                for i in range(len(definitions[k8_file])):
                    if _is_invalid_k8_definition(definitions[k8_file][i]):
                        continue
                    logging.debug("Template Dump for {}: {}".format(k8_file, definitions[k8_file][i], indent=2))

                    entity_conf = definitions[k8_file][i]
                    if entity_conf is None:
                        continue
                    if isinstance(entity_conf, dict) and (entity_conf["kind"] == "List" or not entity_conf.get("kind")):
                        continue

                    if isinstance(entity_conf, dict) and isinstance(entity_conf.get("kind"), int):
                        continue
                    # Skip entity without metadata["name"] or parent_metadata["name"]
                    if not any(x in entity_conf["kind"] for x in ["containers", "initContainers"]):
                        if entity_conf.get("metadata"):
                            if isinstance(entity_conf["metadata"], int) or not "name" in entity_conf["metadata"]:
                                continue
                        else:
                            continue

                    # Skip entity with parent (metadata["ownerReferences"]) in runtime
                    # We will alert in runtime only
                    if "metadata" in entity_conf:
                        if "ownerReferences" in entity_conf["metadata"] and \
                                entity_conf["metadata"]["ownerReferences"] is not None:
                            continue

                    # Skip Kustomization Templates (for now)
                    if entity_conf["kind"] == "Kustomization":
                        continue

                    skipped_checks = get_skipped_checks(entity_conf)

                    results = registry.scan(k8_file, entity_conf, skipped_checks, runner_filter)

                    start_line = entity_conf["__startline__"]
                    end_line = entity_conf["__endline__"]

                    if start_line == end_line:
                        entity_lines_range = [start_line, end_line]
                        entity_code_lines = definitions_raw[k8_file][start_line - 1: end_line]
                    else:
                        entity_lines_range = [start_line, end_line - 1]
                        entity_code_lines = definitions_raw[k8_file][start_line - 1: end_line - 1]

                    # TODO? - Variable Eval Message!
                    variable_evaluations = {}

                    for check, check_result in results.items():
                        resource_id = check.get_resource_id(entity_conf)
                        report.add_resource(f'{k8_file}:{resource_id}')
                        record = Record(check_id=check.id, bc_check_id=check.bc_id,
                                        check_name=check.name, check_result=check_result,
                                        code_block=entity_code_lines, file_path=k8_file,
                                        file_line_range=entity_lines_range,
                                        resource=resource_id, evaluations=variable_evaluations,
                                        check_class=check.__class__.__module__, file_abs_path=file_abs_path)
                        record.set_guideline(check.guideline)
                        report.add_record(record=record)

        return report
예제 #17
0
파일: parser.py 프로젝트: tronxd/checkov
    def _internal_dir_load(self, directory: str,
                           module_loader_registry: ModuleLoaderRegistry,
                           dir_filter: Callable[[str], bool],
                           keys_referenced_as_modules: Set[str],
                           specified_vars: Optional[Mapping[str, str]] = None,
                           module_load_context: Optional[str] = None,
                           vars_files: Optional[List[str]] = None,
                           root_dir: Optional[str] = None,
                           excluded_paths: Optional[List[str]] = None):
        """
    See `parse_directory` docs.
        :param directory:                  Directory in which .tf and .tfvars files will be loaded.
        :param module_loader_registry:     Registry used for resolving modules. This allows customization of how
                                       much resolution is performed (and easier testing) by using a manually
                                       constructed registry rather than the default.
        :param dir_filter:                 Determines whether or not a directory should be processed. Returning
                                       True will allow processing. The argument will be the absolute path of
                                       the directory.
        :param specified_vars:     Specifically defined variable values, overriding values from any other source.
        """

        # Stage 1: Look for applicable files in the directory:
        #          https://www.terraform.io/docs/configuration/index.html#code-organization
        #          Load the raw data for non-variable files, but perform no processing other than loading
        #          variable default values.
        #          Variable files are also flagged for later processing.
        var_value_and_file_map: Dict[str, Tuple[Any, str]] = {}
        hcl_tfvars: Optional[os.DirEntry] = None
        json_tfvars: Optional[os.DirEntry] = None
        auto_vars_files: List[os.DirEntry] = []  # *.auto.tfvars / *.auto.tfvars.json
        explicit_var_files: List[os.DirEntry] = []  # files passed with --var-file; only process the ones that are in this directory

        dir_contents = list(os.scandir(directory))
        if excluded_paths:
            filter_ignored_paths(root_dir, dir_contents, excluded_paths)

        tf_files_to_load = []
        for file in dir_contents:
            # Ignore directories and hidden files
            try:
                if not file.is_file() or file.name.startswith("."):
                    continue
            except OSError:
                # Skip files that can't be accessed
                continue

            # Variable files
            # See: https://www.terraform.io/docs/configuration/variables.html#variable-definitions-tfvars-files
            if file.name == "terraform.tfvars.json":
                json_tfvars = file
            elif file.name == "terraform.tfvars":
                hcl_tfvars = file
            elif file.name.endswith(".auto.tfvars.json") or file.name.endswith(".auto.tfvars"):
                auto_vars_files.append(file)
            elif vars_files and file.path in vars_files:
                explicit_var_files.append(file)

            # Resource files
            elif file.name.endswith(".tf") or (self.scan_hcl and file.name.endswith('.hcl')):  # TODO: add support for .tf.json
                tf_files_to_load.append(file)

        files_to_data = self._load_files(tf_files_to_load)

        for file, data in sorted(files_to_data, key=lambda x: x[0]):
            if not data:
                continue
            self.out_definitions[file] = data

            # Load variable defaults
            #  (see https://www.terraform.io/docs/configuration/variables.html#declaring-an-input-variable)
            var_blocks = data.get("variable")
            if var_blocks and isinstance(var_blocks, list):
                for var_block in var_blocks:
                    if not isinstance(var_block, dict):
                        continue
                    for var_name, var_definition in var_block.items():
                        if not isinstance(var_definition, dict):
                            continue

                        default_value = var_definition.get("default")
                        if default_value is not None and isinstance(default_value, list):
                            self.external_variables_data.append((var_name, default_value[0], file))
                            var_value_and_file_map[var_name] = default_value[0], file

        # Stage 2: Load vars in proper order:
        #          https://www.terraform.io/docs/configuration/variables.html#variable-definition-precedence
        #          Defaults are loaded in stage 1.
        #          Then loading in this order with later taking precedence:
        #             - Environment variables
        #             - The terraform.tfvars file, if present.
        #             - The terraform.tfvars.json file, if present.
        #             - Any *.auto.tfvars or *.auto.tfvars.json files, processed in lexical order of
        #               their filenames.
        #          Overriding everything else, variables form `specified_vars`, which are considered
        #          directly set.
        for key, value in self.env_vars.items():  # env vars
            if not key.startswith("TF_VAR_"):
                continue
            var_value_and_file_map[key[7:]] = value, f"env:{key}"
            self.external_variables_data.append((key[7:], value, f"env:{key}"))
        if hcl_tfvars:  # terraform.tfvars
            data = _load_or_die_quietly(hcl_tfvars, self.out_parsing_errors, clean_definitions=False)
            if data:
                var_value_and_file_map.update({k: (_safe_index(v, 0), hcl_tfvars.path) for k, v in data.items()})
                self.external_variables_data.extend([(k, _safe_index(v, 0), hcl_tfvars.path) for k, v in data.items()])
        if json_tfvars:  # terraform.tfvars.json
            data = _load_or_die_quietly(json_tfvars, self.out_parsing_errors)
            if data:
                var_value_and_file_map.update({k: (v, json_tfvars.path) for k, v in data.items()})
                self.external_variables_data.extend([(k, v, json_tfvars.path) for k, v in data.items()])

        auto_var_files_to_data = self._load_files(auto_vars_files)
        for var_file, data in sorted(auto_var_files_to_data, key=lambda x: x[0]):
            if data:
                var_value_and_file_map.update({k: (v, var_file) for k, v in data.items()})
                self.external_variables_data.extend([(k, v, var_file) for k, v in data.items()])

        explicit_var_files_to_data = self._load_files(explicit_var_files)
        # it's possible that os.scandir returned the var files in a different order than they were specified
        for var_file, data in sorted(explicit_var_files_to_data, key=lambda x: vars_files.index(x[0])):
            if data:
                var_value_and_file_map.update({k: (v, var_file) for k, v in data.items()})
                self.external_variables_data.extend([(k, v, var_file) for k, v in data.items()])

        if specified_vars:  # specified
            var_value_and_file_map.update({k: (v, "manual specification") for k, v in specified_vars.items()})
            self.external_variables_data.extend([(k, v, "manual specification") for k, v in specified_vars.items()])

        # IMPLEMENTATION NOTE: When resolving `module.` references, access to the entire data map is needed. It
        #                      may be a little overboard, but I don't want to just pass the entire data map down
        #                      because it break encapsulations and I don't want to cause confusion about what data
        #                      set it being processed. To avoid this, here's a Callable that will get the data
        #                      map for a particular module reference. (Might be OCD, but...)
        module_data_retrieval = lambda module_ref: self.out_definitions.get(module_ref)

        # Stage 4: Load modules
        #          This stage needs to be done in a loop (again... alas, no DAG) because modules might not
        #          be loadable until other modules are loaded. This happens when parameters to one module
        #          depend on the output of another. For such cases, the base module must be loaded, then
        #          a parameter resolution pass needs to happen, then the second module can be loaded.
        #
        #          One gotcha is that we need to make sure we load all modules at some point, even if their
        #          parameters don't resolve. So, if we hit a spot where resolution doesn't change anything
        #          and there are still modules to be loaded, they will be forced on the next pass.
        force_final_module_load = False
        for i in range(0, 10):  # circuit breaker - no more than 10 loops
            logging.debug("Module load loop %d", i)

            # Stage 4a: Load eligible modules
            has_more_modules = self._load_modules(directory, module_loader_registry,
                                                  dir_filter, module_load_context,
                                                  keys_referenced_as_modules,
                                                  force_final_module_load)

            # Stage 4b: Variable resolution round 2 - now with (possibly more) modules
            made_var_changes = False
            if not has_more_modules:
                break  # nothing more to do
            elif not made_var_changes:
                # If there are more modules to load but no variables were resolved, then to a final module
                # load, forcing things through without complete resolution.
                force_final_module_load = True
예제 #18
0
파일: runner.py 프로젝트: je17/checkov
    def run(self,
            root_folder,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter(),
            collect_skip_comments=True):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                cfn_registry.load_external_checks(directory)

        if files:
            for file in files:
                (definitions[file], definitions_raw[file]) = parse(file)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names,
                                     runner_filter.excluded_paths)
                filter_ignored_paths(root, f_names,
                                     runner_filter.excluded_paths)
                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in CF_POSSIBLE_ENDINGS:
                        files_list.append(os.path.join(root, file))

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                try:
                    (definitions[relative_file_path],
                     definitions_raw[relative_file_path]) = parse(file)
                except TypeError:
                    logging.info(
                        f'CloudFormation skipping {file} as it is not a valid CF template'
                    )

        # Filter out empty files that have not been parsed successfully, and filter out non-CF template files
        definitions = {
            k: v
            for k, v in definitions.items()
            if v and isinstance(v, dict_node) and v.__contains__("Resources")
            and isinstance(v["Resources"], dict_node)
        }
        definitions_raw = {
            k: v
            for k, v in definitions_raw.items() if k in definitions.keys()
        }

        for cf_file in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which cf_file does not always give).
            if cf_file[0] == '/':
                path_to_convert = (root_folder +
                                   cf_file) if root_folder else cf_file
            else:
                path_to_convert = (os.path.join(
                    root_folder, cf_file)) if root_folder else cf_file

            file_abs_path = os.path.abspath(path_to_convert)

            if isinstance(
                    definitions[cf_file],
                    dict_node) and 'Resources' in definitions[cf_file].keys():
                cf_context_parser = ContextParser(cf_file,
                                                  definitions[cf_file],
                                                  definitions_raw[cf_file])
                logging.debug("Template Dump for {}: {}".format(
                    cf_file, definitions[cf_file], indent=2))
                cf_context_parser.evaluate_default_refs()
                for resource_name, resource in definitions[cf_file][
                        'Resources'].items():
                    resource_id = cf_context_parser.extract_cf_resource_id(
                        resource, resource_name)
                    # check that the resource can be parsed as a CF resource
                    if resource_id:
                        entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines(
                            resource)
                        if entity_lines_range and entity_code_lines:
                            # TODO - Variable Eval Message!
                            variable_evaluations = {}

                            skipped_checks = ContextParser.collect_skip_comments(
                                entity_code_lines)
                            entity = {resource_name: resource}
                            results = cfn_registry.scan(
                                cf_file, entity, skipped_checks, runner_filter)
                            tags = cfn_utils.get_resource_tags(entity)
                            for check, check_result in results.items():
                                record = Record(
                                    check_id=check.id,
                                    check_name=check.name,
                                    check_result=check_result,
                                    code_block=entity_code_lines,
                                    file_path=cf_file,
                                    file_line_range=entity_lines_range,
                                    resource=resource_id,
                                    evaluations=variable_evaluations,
                                    check_class=check.__class__.__module__,
                                    file_abs_path=file_abs_path,
                                    entity_tags=tags)
                                report.add_record(record=record)
        return report
예제 #19
0
파일: runner.py 프로젝트: tsmithv11/checkov
    def run(
        self,
        root_folder: str,
        external_checks_dir: Optional[List[str]] = None,
        files: Optional[List[str]] = None,
        runner_filter: RunnerFilter = RunnerFilter(),
        collect_skip_comments: bool = True,
    ) -> Report:
        report = Report(self.check_type)
        files_list = []
        filepath_fn = None
        if external_checks_dir:
            for directory in external_checks_dir:
                arm_resource_registry.load_external_checks(directory)

        if files:
            files_list = files.copy()

        if root_folder:
            filepath_fn = lambda f: f'/{os.path.relpath(f, os.path.commonprefix((root_folder, f)))}'
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names,
                                     runner_filter.excluded_paths)
                filter_ignored_paths(root, f_names,
                                     runner_filter.excluded_paths)
                for file in f_names:
                    file_ending = os.path.splitext(file)[1]
                    if file_ending in ARM_POSSIBLE_ENDINGS:
                        files_list.append(os.path.join(root, file))

        definitions, definitions_raw = get_files_definitions(
            files_list, filepath_fn)

        # Filter out empty files that have not been parsed successfully, and filter out non-CF template files
        definitions = {
            k: v
            for k, v in definitions.items()
            if v and v.__contains__("resources")
        }
        definitions_raw = {
            k: v
            for k, v in definitions_raw.items() if k in definitions.keys()
        }

        for arm_file in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which arm_file does not always give).
            if arm_file[0] == '/':
                path_to_convert = (root_folder +
                                   arm_file) if root_folder else arm_file
            else:
                path_to_convert = (os.path.join(
                    root_folder, arm_file)) if root_folder else arm_file

            file_abs_path = os.path.abspath(path_to_convert)

            if isinstance(definitions[arm_file], DictNode):
                arm_context_parser = ContextParser(arm_file,
                                                   definitions[arm_file],
                                                   definitions_raw[arm_file])
                logging.debug(
                    f"Template Dump for {arm_file}: {definitions[arm_file]}")

                if 'resources' in definitions[arm_file].keys():
                    arm_context_parser.evaluate_default_parameters()

                    # Split out nested resources from base resource
                    for resource in definitions[arm_file]['resources']:
                        if isinstance(
                                resource,
                                dict) and "parent_name" in resource.keys():
                            continue
                        nested_resources = []
                        nested_resources = arm_context_parser.search_deep_keys(
                            "resources", resource, [])
                        if nested_resources:
                            for nr in nested_resources:
                                nr_element = nr.pop()
                                if nr_element:
                                    for element in nr_element:
                                        new_resource = {}
                                        new_resource = element
                                        if isinstance(new_resource, dict):
                                            new_resource[
                                                "parent_name"] = resource[
                                                    "name"]
                                            new_resource[
                                                "parent_type"] = resource[
                                                    "type"]
                                            definitions[arm_file][
                                                'resources'].append(
                                                    new_resource)

                    for resource in definitions[arm_file]['resources']:
                        resource_id = arm_context_parser.extract_arm_resource_id(
                            resource)
                        report.add_resource(f'{arm_file}:{resource_id}')
                        resource_name = arm_context_parser.extract_arm_resource_name(
                            resource)
                        entity_lines_range, entity_code_lines = arm_context_parser.extract_arm_resource_code_lines(
                            resource)
                        if entity_lines_range and entity_code_lines:
                            # TODO - Variable Eval Message!
                            variable_evaluations = {}

                            skipped_checks = ContextParser.collect_skip_comments(
                                resource)

                            results = arm_resource_registry.scan(
                                arm_file, {resource_name: resource},
                                skipped_checks, runner_filter)
                            for check, check_result in results.items():
                                record = Record(
                                    check_id=check.id,
                                    bc_check_id=check.bc_id,
                                    check_name=check.name,
                                    check_result=check_result,
                                    code_block=entity_code_lines,
                                    file_path=arm_file,
                                    file_line_range=entity_lines_range,
                                    resource=resource_id,
                                    evaluations=variable_evaluations,
                                    check_class=check.__class__.__module__,
                                    file_abs_path=file_abs_path)
                                record.set_guideline(check.guideline)
                                report.add_record(record=record)

                if 'parameters' in definitions[arm_file].keys():
                    parameters = definitions[arm_file]['parameters']
                    for parameter_name, parameter_details in parameters.items(
                    ):
                        # TODO - Variable Eval Message!
                        variable_evaluations = {}

                        resource_id = f'parameter.{parameter_name}'
                        resource_name = parameter_name
                        entity_lines_range, entity_code_lines = arm_context_parser.extract_arm_resource_code_lines(
                            parameter_details)

                        if entity_lines_range and entity_code_lines:
                            skipped_checks = ContextParser.collect_skip_comments(
                                parameter_details)
                            results = arm_parameter_registry.scan(
                                arm_file, {resource_name: parameter_details},
                                skipped_checks, runner_filter)
                            for check, check_result in results.items():
                                record = Record(
                                    check_id=check.id,
                                    bc_check_id=check.bc_id,
                                    check_name=check.name,
                                    check_result=check_result,
                                    code_block=entity_code_lines,
                                    file_path=arm_file,
                                    file_line_range=entity_lines_range,
                                    resource=resource_id,
                                    evaluations=variable_evaluations,
                                    check_class=check.__class__.__module__,
                                    file_abs_path=file_abs_path)
                                record.set_guideline(check.guideline)
                                report.add_record(record=record)

        return report
예제 #20
0
파일: parser.py 프로젝트: tronxd/checkov
def _filter_ignored_paths(root, paths, excluded_paths):
    filter_ignored_paths(root, paths, excluded_paths)
    [paths.remove(path) for path in list(paths) if path in [default_ml_registry.external_modules_folder_name]]
예제 #21
0
파일: runner.py 프로젝트: je17/checkov
    def run(self, root_folder, external_checks_dir=None, files=None, runner_filter=RunnerFilter(), collect_skip_comments=True):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                function_registry.load_external_checks(directory)

        if files:
            for file in files:
                if os.path.basename(file) in SLS_FILE_MASK:
                    parse_result = parse(file)
                    if parse_result:
                        (definitions[file], definitions_raw[file]) = parse_result

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                # Don't walk in to "node_modules" directories under the root folder. If –for some reason–
                # scanning one of these is desired, it can be directly specified.
                if "node_modules" in d_names:
                    d_names.remove("node_modules")

                filter_ignored_paths(root, d_names, runner_filter.excluded_paths)
                filter_ignored_paths(root, f_names, runner_filter.excluded_paths)
                for file in f_names:
                    if file in SLS_FILE_MASK:
                        full_path = os.path.join(root, file)
                        if "/." not in full_path:
                            # skip temp directories
                            files_list.append(full_path)

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                parse_result = parse(file)
                if parse_result:
                    (definitions[relative_file_path], definitions_raw[relative_file_path]) = parse_result

        # Filter out empty files that have not been parsed successfully
        definitions = {k: v for k, v in definitions.items() if v}
        definitions_raw = {k: v for k, v in definitions_raw.items() if k in definitions.keys()}

        for sls_file, sls_file_data in definitions.items():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which sls_file does not always give).
            if sls_file[0] == '/':
                path_to_convert = (root_folder + sls_file) if root_folder else sls_file
            else:
                path_to_convert = (os.path.join(root_folder, sls_file)) if root_folder else sls_file

            file_abs_path = os.path.abspath(path_to_convert)

            if not isinstance(sls_file_data, dict_node):
                continue

            if CFN_RESOURCES_TOKEN in sls_file_data and isinstance(sls_file_data[CFN_RESOURCES_TOKEN], dict_node):
                cf_sub_template = sls_file_data[CFN_RESOURCES_TOKEN]
                if not cf_sub_template.get('Resources'):
                    continue
                cf_context_parser = CfnContextParser(sls_file, cf_sub_template, definitions_raw[sls_file])
                logging.debug("Template Dump for {}: {}".format(sls_file, sls_file_data, indent=2))
                cf_context_parser.evaluate_default_refs()
                for resource_name, resource in cf_sub_template['Resources'].items():
                    if not isinstance(resource, dict_node):
                        continue
                    cf_resource_id = cf_context_parser.extract_cf_resource_id(resource, resource_name)
                    if not cf_resource_id:
                        # Not Type attribute for resource
                        continue
                    entity_lines_range, entity_code_lines = cf_context_parser.extract_cf_resource_code_lines(
                        resource)
                    if entity_lines_range and entity_code_lines:
                        skipped_checks = CfnContextParser.collect_skip_comments(entity_code_lines)
                        # TODO - Variable Eval Message!
                        variable_evaluations = {}

                        entity = {resource_name: resource}
                        results = cfn_registry.scan(sls_file, entity, skipped_checks, runner_filter)
                        tags = cfn_utils.get_resource_tags(entity, cfn_registry)
                        for check, check_result in results.items():
                            record = Record(check_id=check.id, check_name=check.name, check_result=check_result,
                                            code_block=entity_code_lines, file_path=sls_file,
                                            file_line_range=entity_lines_range,
                                            resource=cf_resource_id, evaluations=variable_evaluations,
                                            check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags)
                            report.add_record(record=record)

            sls_context_parser = SlsContextParser(sls_file, sls_file_data, definitions_raw[sls_file])

            # Sub-sections that have multiple items under them
            for token, registry in MULTI_ITEM_SECTIONS:
                template_items = sls_file_data.get(token)
                if not template_items or not isinstance(template_items, dict):
                    continue
                for item_name, item_content in template_items.items():
                    if not isinstance(item_content, dict_node):
                        continue
                    entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(item_content)
                    if entity_lines_range and entity_code_lines:
                        skipped_checks = CfnContextParser.collect_skip_comments(entity_code_lines)
                        variable_evaluations = {}
                        if token == "functions": #nosec
                            # "Enriching" copies things like "environment" and "stackTags" down into the
                            # function data from the provider block since logically that's what serverless
                            # does. This allows checks to see what the complete data would be.
                            sls_context_parser.enrich_function_with_provider(item_name)
                        entity = EntityDetails(sls_context_parser.provider_type, item_content)
                        results = registry.scan(sls_file, entity, skipped_checks, runner_filter)
                        tags = cfn_utils.get_resource_tags(entity, registry)
                        for check, check_result in results.items():
                            record = Record(check_id=check.id, check_name=check.name, check_result=check_result,
                                            code_block=entity_code_lines, file_path=sls_file,
                                            file_line_range=entity_lines_range,
                                            resource=item_name, evaluations=variable_evaluations,
                                            check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags)
                            report.add_record(record=record)
            # Sub-sections that are a single item
            for token, registry in SINGLE_ITEM_SECTIONS:
                item_content = sls_file_data.get(token)
                if not item_content:
                    continue
                entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(item_content)
                if not entity_lines_range:
                    entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(sls_file_data)

                skipped_checks = CfnContextParser.collect_skip_comments(entity_code_lines)
                variable_evaluations = {}
                entity = EntityDetails(sls_context_parser.provider_type, item_content)
                results = registry.scan(sls_file, entity, skipped_checks, runner_filter)
                tags = cfn_utils.get_resource_tags(entity, registry)
                for check, check_result in results.items():
                    record = Record(check_id=check.id, check_name=check.name, check_result=check_result,
                                    code_block=entity_code_lines, file_path=sls_file,
                                    file_line_range=entity_lines_range,
                                    resource=token, evaluations=variable_evaluations,
                                    check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags)
                    report.add_record(record=record)

            # "Complete" checks
            # NOTE: Ignore code content, no point in showing (could be long)
            entity_lines_range, entity_code_lines = sls_context_parser.extract_code_lines(sls_file_data)
            if entity_lines_range:
                skipped_checks = CfnContextParser.collect_skip_comments(entity_code_lines)
                variable_evaluations = {}
                entity = EntityDetails(sls_context_parser.provider_type, sls_file_data)
                results = complete_registry.scan(sls_file, entity, skipped_checks, runner_filter)
                tags = cfn_utils.get_resource_tags(entity, complete_registry)
                for check, check_result in results.items():
                    record = Record(check_id=check.id, check_name=check.name, check_result=check_result,
                                    code_block=[],              # Don't show, could be large
                                    file_path=sls_file,
                                    file_line_range=entity_lines_range,
                                    resource="complete",        # Weird, not sure what to put where
                                    evaluations=variable_evaluations,
                                    check_class=check.__class__.__module__, file_abs_path=file_abs_path, entity_tags=tags)
                    report.add_record(record=record)

        return report
예제 #22
0
파일: runner.py 프로젝트: tronxd/checkov
    def run(self,
            root_folder=None,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter(),
            collect_skip_comments=True):
        report = Report(self.check_type)
        files_list = []
        filepath_fn = None
        if external_checks_dir:
            for directory in external_checks_dir:
                registry.load_external_checks(directory)

        if files:
            files_list = [
                file for file in files
                if Runner._is_docker_file(os.path.basename(file))
            ]

        if root_folder:
            filepath_fn = lambda f: f'/{os.path.relpath(f, os.path.commonprefix((root_folder, f)))}'
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names,
                                     runner_filter.excluded_paths)
                filter_ignored_paths(root, f_names,
                                     runner_filter.excluded_paths)
                for file in f_names:
                    if Runner._is_docker_file(file):
                        file_path = os.path.join(root, file)
                        files_list.append(file_path)

        definitions, definitions_raw = get_files_definitions(
            files_list, filepath_fn)

        for docker_file_path in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which docker_file_path does not always give).
            if docker_file_path[0] == '/':
                path_to_convert = (
                    root_folder +
                    docker_file_path) if root_folder else docker_file_path
            else:
                path_to_convert = (os.path.join(
                    root_folder,
                    docker_file_path)) if root_folder else docker_file_path

            file_abs_path = os.path.abspath(path_to_convert)
            report.add_resource(file_abs_path)
            skipped_checks = collect_skipped_checks(
                definitions[docker_file_path])
            instructions = definitions[docker_file_path]

            results = registry.scan(docker_file_path, instructions,
                                    skipped_checks, runner_filter)
            for check, check_result in results.items():
                result_configuration = check_result['results_configuration']
                startline = 0
                endline = len(definitions_raw[docker_file_path]) - 1
                result_instruction = ""
                if result_configuration:
                    startline = result_configuration['startline']
                    endline = result_configuration['endline']
                    result_instruction = result_configuration["instruction"]

                codeblock = []
                self.calc_record_codeblock(codeblock, definitions_raw,
                                           docker_file_path, endline,
                                           startline)
                record = Record(check_id=check.id,
                                bc_check_id=check.bc_id,
                                check_name=check.name,
                                check_result=check_result,
                                code_block=codeblock,
                                file_path=docker_file_path,
                                file_line_range=[startline + 1, endline + 1],
                                resource="{}.{}".format(
                                    docker_file_path, result_instruction,
                                    startline),
                                evaluations=None,
                                check_class=check.__class__.__module__,
                                file_abs_path=file_abs_path,
                                entity_tags=None)
                record.set_guideline(check.guideline)
                report.add_record(record=record)

        return report
예제 #23
0
    def run(self,
            root_folder=None,
            external_checks_dir=None,
            files=None,
            runner_filter=RunnerFilter(),
            collect_skip_comments=True):
        report = Report(self.check_type)
        definitions = {}
        definitions_raw = {}
        parsing_errors = {}
        files_list = []
        if external_checks_dir:
            for directory in external_checks_dir:
                registry.load_external_checks(directory)

        if files:
            for file in files:
                if os.path.basename(file) in DOCKER_FILE_MASK:
                    (definitions[file], definitions_raw[file]) = parse(file)

        if root_folder:
            for root, d_names, f_names in os.walk(root_folder):
                filter_ignored_paths(root, d_names,
                                     runner_filter.excluded_paths)
                filter_ignored_paths(root, f_names,
                                     runner_filter.excluded_paths)
                for file in f_names:
                    if file in DOCKER_FILE_MASK:
                        files_list.append(os.path.join(root, file))

            for file in files_list:
                relative_file_path = f'/{os.path.relpath(file, os.path.commonprefix((root_folder, file)))}'
                try:
                    (definitions[relative_file_path],
                     definitions_raw[relative_file_path]) = parse(file)
                except TypeError:
                    logging.info(
                        f'Dockerfile skipping {file} as it is not a valid dockerfile template'
                    )

        for docker_file_path in definitions.keys():

            # There are a few cases here. If -f was used, there could be a leading / because it's an absolute path,
            # or there will be no leading slash; root_folder will always be none.
            # If -d is used, root_folder will be the value given, and -f will start with a / (hardcoded above).
            # The goal here is simply to get a valid path to the file (which docker_file_path does not always give).
            if docker_file_path[0] == '/':
                path_to_convert = (
                    root_folder +
                    docker_file_path) if root_folder else docker_file_path
            else:
                path_to_convert = (os.path.join(
                    root_folder,
                    docker_file_path)) if root_folder else docker_file_path

            file_abs_path = os.path.abspath(path_to_convert)
            skipped_checks = collect_skipped_checks(
                definitions[docker_file_path])
            instructions = definitions[docker_file_path]

            results = registry.scan(docker_file_path, instructions,
                                    skipped_checks, runner_filter)
            for check, check_result in results.items():
                result_configuration = check_result['results_configuration']
                startline = 0
                endline = 0
                result_instruction = ""
                if result_configuration:
                    startline = result_configuration['startline']
                    endline = result_configuration['endline']
                    result_instruction = result_configuration["instruction"]

                codeblock = []
                self.calc_record_codeblock(codeblock, definitions_raw,
                                           docker_file_path, endline,
                                           startline)
                record = Record(check_id=check.id,
                                check_name=check.name,
                                check_result=check_result,
                                code_block=codeblock,
                                file_path=docker_file_path,
                                file_line_range=[startline, endline],
                                resource="{}.{}".format(
                                    docker_file_path, result_instruction,
                                    startline),
                                evaluations=None,
                                check_class=check.__class__.__module__,
                                file_abs_path=file_abs_path,
                                entity_tags=None)
                report.add_record(record=record)

        return report