def _process_scan_impl(self, scan_collection: ScanCollection) -> str: results = list(scan_collection.get_all_results()) if not results: return "" policy_breaks = [] for result in results: policy_breaks += result.scan.policy_breaks # Use a set to ensure we do not report duplicate incidents. # (can happen when the secret is present in both the old and the new version of # the document) formatted_policy_breaks = {format_policy_break(x) for x in policy_breaks} break_count = len(formatted_policy_breaks) summary_str = f"{break_count} {pluralize('incident', break_count)}" # Putting each policy break on its own line would be more readable, but we can't # do this because of a bug in GitLab Web IDE which causes newline characters to # be shown as "<br>" # https://gitlab.com/gitlab-org/gitlab/-/issues/350349 breaks_str = ", ".join(formatted_policy_breaks) return ( f"GL-HOOK-ERR: ggshield found {summary_str} in these changes: {breaks_str}." " The commit has been rejected." )
def path_cmd(ctx: click.Context, paths: List[str], recursive: bool, yes: bool) -> int: # pragma: no cover """ scan files and directories. """ config = ctx.obj["config"] output_handler: OutputHandler = ctx.obj["output_handler"] try: files = get_files_from_paths( paths=paths, paths_ignore=config.paths_ignore, recursive=recursive, yes=yes, verbose=config.verbose, ) results = files.scan( client=ctx.obj["client"], cache=ctx.obj["cache"], matches_ignore=config.matches_ignore, all_policies=config.all_policies, verbose=config.verbose, ) scan = ScanCollection(id=" ".join(paths), type="path_scan", results=results) return output_handler.process_scan(scan)[1] except click.exceptions.Abort: return 0 except Exception as error: if config.verbose: traceback.print_exc() raise click.ClickException(str(error))
def scan_commit( commit: Commit, client: GGClient, cache: Cache, verbose: bool, matches_ignore: Iterable[IgnoredMatch], all_policies: bool, mode_header: str, banlisted_detectors: Optional[Set[str]] = None, ) -> ScanCollection: # pragma: no cover results = commit.scan( client=client, cache=cache, matches_ignore=matches_ignore, banlisted_detectors=banlisted_detectors, all_policies=all_policies, verbose=verbose, ) return ScanCollection( commit.sha or "unknown", type="commit", results=results, optional_header=commit.optional_header, extra_info=commit.info._asdict(), )
def precommit_cmd(ctx: click.Context, precommit_args: List[str]) -> int: # pragma: no cover """ scan as a pre-commit git hook. """ config = ctx.obj["config"] output_handler = TextHandler(show_secrets=config.show_secrets, verbose=config.verbose, output=None) try: check_git_dir() results = Commit(filter_set=ctx.obj["filter_set"]).scan( client=ctx.obj["client"], cache=ctx.obj["cache"], matches_ignore=config.matches_ignore, all_policies=config.all_policies, verbose=config.verbose, ) return output_handler.process_scan( ScanCollection(id="cached", type="pre-commit", results=results))[1] except click.exceptions.Abort: return 0 except Exception as error: if config.verbose: traceback.print_exc() raise click.ClickException(str(error))
def path_cmd(ctx: click.Context, paths: List[str], recursive: bool, yes: bool) -> int: # pragma: no cover """ scan files and directories. """ config = ctx.obj["config"] output_handler: OutputHandler = ctx.obj["output_handler"] try: files = get_files_from_paths( paths=paths, exclusion_regexes=ctx.obj["exclusion_regexes"], recursive=recursive, yes=yes, verbose=config.verbose, # when scanning a path explicitly we should not care if it is a git repository or not ignore_git=True, ) results = files.scan( client=ctx.obj["client"], cache=ctx.obj["cache"], matches_ignore=config.matches_ignore, banlisted_detectors=config.banlisted_detectors, all_policies=config.all_policies, verbose=config.verbose, ) scan = ScanCollection(id=" ".join(paths), type="path_scan", results=results) return output_handler.process_scan(scan) except Exception as error: return handle_exception(error, config.verbose)
def docker_scan_archive( archive: Path, client: GGClient, cache: Cache, verbose: bool, matches_ignore: Iterable[str], all_policies: bool, scan_id: str, banlisted_detectors: Optional[Set[str]] = None, ) -> ScanCollection: files = get_files_from_docker_archive(archive) with click.progressbar(length=len(files.files), label="Scanning") as progressbar: def update_progress(chunk: List[Dict[str, Any]]) -> None: progressbar.update(len(chunk)) results = files.scan( client=client, cache=cache, matches_ignore=matches_ignore, all_policies=all_policies, verbose=verbose, on_file_chunk_scanned=update_progress, banlisted_detectors=banlisted_detectors, ) return ScanCollection(id=scan_id, type="scan_docker_archive", results=results)
def scan_commit_range( client: GGClient, cache: Cache, commit_list: List[str], output_handler: OutputHandler, verbose: bool, exclusion_regexes: Set[re.Pattern], matches_ignore: Iterable[IgnoredMatch], all_policies: bool, scan_id: str, mode_header: str, banlisted_detectors: Optional[Set[str]] = None, ) -> int: # pragma: no cover """ Scan every commit in a range. :param client: Public Scanning API client :param commit_list: List of commits sha to scan :param verbose: Display successfull scan's message """ return_code = 0 with concurrent.futures.ThreadPoolExecutor( max_workers=min(CPU_COUNT, 4)) as executor: future_to_process = [ executor.submit( scan_commit, Commit(sha, exclusion_regexes), client, cache, verbose, matches_ignore, all_policies, mode_header, banlisted_detectors, ) for sha in commit_list ] scans: List[ScanCollection] = [] with click.progressbar( iterable=concurrent.futures.as_completed(future_to_process), length=len(future_to_process), label=format_text("Scanning Commits", STYLE["progress"]), ) as completed_futures: for future in completed_futures: scans.append(future.result()) return_code = output_handler.process_scan( ScanCollection(id=scan_id, type="commit-range", scans=scans)) return return_code
def test_stdin_supports_gitlab_web_ui( self, scan_commit_mock: Mock, get_list_mock: Mock, cli_fs_runner: CliRunner, ): """ GIVEN 1 webpush commit WHEN the command is run and there are secrets THEN it should return a special remediation message AND the GL-HOOK-ERR line should be there AND it should contain an obfuscated version of the secret """ old_sha = "56781234" new_sha = "1234abcd" get_list_mock.return_value = [new_sha] scan_commit_mock.return_value = ScanCollection( new_sha, type="commit", results=[ Result( _SIMPLE_SECRET_PATCH, Filemode.MODIFY, "server.conf", _SIMPLE_SECRET_PATCH_SCAN_RESULT, ) ], ) result = cli_fs_runner.invoke( cli, ["-v", "scan", "pre-receive"], input=f"{old_sha}\n{new_sha}\norigin/main\n", env={ "GL_PROTOCOL": "web", }, ) get_list_mock.assert_called_once_with( f"--max-count=51 {old_sha}...{new_sha}") scan_commit_mock.assert_called_once() web_ui_lines = [ x for x in result.output.splitlines() if x.startswith("GL-HOOK-ERR: ") ] assert web_ui_lines assert any( contains_secret(x, _SIMPLE_SECRET_TOKEN) for x in web_ui_lines) assert result.exit_code == 1
def scan_commit_range( client: GGClient, cache: Cache, commit_list: List[str], output_handler: OutputHandler, verbose: bool, filter_set: Set[str], matches_ignore: Iterable[str], all_policies: bool, scan_id: str, ) -> int: # pragma: no cover """ Scan every commit in a range. :param client: Public Scanning API client :param commit_range: Range of commits to scan (A...B) :param verbose: Display successfull scan's message """ return_code = 0 with concurrent.futures.ThreadPoolExecutor( max_workers=min(CPU_COUNT, 4)) as executor: future_to_process = [ executor.submit( scan_commit, Commit(sha, filter_set), client, cache, verbose, matches_ignore, all_policies, ) for sha in commit_list ] scans: List[ScanCollection] = [] with click.progressbar( length=len(future_to_process), label=format_text("Scanning Commits", STYLE["progress"]), ) as bar: processed = 0 for future in concurrent.futures.as_completed(future_to_process): scans.append(future.result()) processed += 1 bar.update(processed) return_code = output_handler.process_scan( ScanCollection(id=scan_id, type="commit-range", scans=scans))[1] return return_code
def archive_cmd(ctx: click.Context, path: str) -> int: # pragma: no cover """ scan archive <PATH>. """ with tempfile.TemporaryDirectory(suffix="ggshield") as temp_dir: try: shutil.unpack_archive(path, extract_dir=Path(temp_dir)) except Exception as exn: raise click.ClickException( f'Failed to unpack "{path}" archive: {exn}') config: Config = ctx.obj["config"] files: Files = get_files_from_paths( paths=[temp_dir], exclusion_regexes=ctx.obj["exclusion_regexes"], recursive=True, yes=True, verbose=config.verbose, ignore_git=True, ) with click.progressbar(length=len(files.files), label="Scanning") as progressbar: def update_progress(chunk: List[Dict[str, Any]]) -> None: progressbar.update(len(chunk)) results: List[Result] = files.scan( client=ctx.obj["client"], cache=ctx.obj["cache"], matches_ignore=config.matches_ignore, banlisted_detectors=config.banlisted_detectors, all_policies=config.all_policies, verbose=config.verbose, on_file_chunk_scanned=update_progress, ) scan = ScanCollection(id=path, type="archive_scan", results=results) output_handler: OutputHandler = ctx.obj["output_handler"] return output_handler.process_scan(scan)
def scan_commit( commit: Commit, client: GGClient, verbose: bool, matches_ignore: Iterable[str], all_policies: bool, ) -> ScanCollection: # pragma: no cover results = commit.scan( client=client, matches_ignore=matches_ignore, all_policies=all_policies, verbose=verbose, ) return ScanCollection( commit.sha or "unknown", type="commit", results=results, optional_header=commit.optional_header, extra_info=commit.info._asdict(), )
def test_json_output(client, cache, name, input_patch, expected, snapshot): c = Commit() c._patch = input_patch handler = JSONOutputHandler(verbose=True, show_secrets=False) with my_vcr.use_cassette(name): results = c.scan( client=client, cache=cache, matches_ignore={}, all_policies=True, verbose=False, banlisted_detectors=None, ) scan = ScanCollection(id="path", type="test", results=results) json_flat_results = handler._process_scan_impl(scan) exit_code = OutputHandler._get_exit_code(scan) assert exit_code == expected snapshot.assert_match( JSONScanCollectionSchema().loads(json_flat_results))
def pypi_cmd(ctx: click.Context, package_name: str) -> int: # pragma: no cover """ scan a pypi package <NAME>. """ config: Config = ctx.obj["config"] output_handler: OutputHandler = ctx.obj["output_handler"] with tempfile.TemporaryDirectory(suffix="ggshield") as temp_dir: save_package_to_tmp(temp_dir=temp_dir, package_name=package_name) files: Files = get_files_from_package( archive_dir=temp_dir, package_name=package_name, exclusion_regexes=ctx.obj["exclusion_regexes"], verbose=config.verbose, ) with click.progressbar(length=len(files.files), label="Scanning") as progressbar: def update_progress(chunk: List[Dict[str, Any]]) -> None: progressbar.update(len(chunk)) results: List[Result] = files.scan( client=ctx.obj["client"], cache=ctx.obj["cache"], matches_ignore=config.matches_ignore, banlisted_detectors=config.banlisted_detectors, all_policies=config.all_policies, verbose=config.verbose, on_file_chunk_scanned=update_progress, ) scan = ScanCollection(id=package_name, type="path_scan", results=results) return output_handler.process_scan(scan)
def precommit_cmd(ctx: click.Context, precommit_args: List[str]) -> int: # pragma: no cover """ scan as a pre-commit git hook. """ config = ctx.obj["config"] output_handler = TextOutputHandler(show_secrets=config.show_secrets, verbose=config.verbose, output=None) try: check_git_dir() results = Commit(exclusion_regexes=ctx.obj["exclusion_regexes"]).scan( client=ctx.obj["client"], cache=ctx.obj["cache"], matches_ignore=config.matches_ignore, all_policies=config.all_policies, verbose=config.verbose, banlisted_detectors=config.banlisted_detectors, ) return output_handler.process_scan( ScanCollection(id="cached", type="pre-commit", results=results)) except Exception as error: return handle_exception(error, config.verbose)