def prepare_for_scan(user_path, scan_ignore): """ Prepare the files for scanning by collecting their names and contents. :param user_path: user path :param scan_ignore: list of files to ignore :return: list of dictionaries with filenames and their contents for scanning """ API_KEY = os.getenv("GG_API_KEY") client = GGClient(api_key=API_KEY) if client.health_check( ).success: # check health of the API and the API key used. file_list = [] for name in glob.iglob(user_path, recursive=True): if os.path.isdir(name) or os.path.relpath(name, start=user_path)[6:] in scan_ignore \ or os.path.basename(name) == 'gg_secret_scanner_results.txt': continue try: with open(name, mode='r', encoding='utf-8') as file: file_list.append({ 'filename': os.path.relpath(name, start=user_path)[6:], 'document': file.read() }) except Exception: # continue if some files could not be open (like images or executables) continue return file_list else: print('Invalid API Key or API maintenance.')
def test_extra_headers( request_mock: Mock, client: GGClient, session_headers: Any, extra_headers: Optional[Dict[str, str]], expected_headers: Dict[str, str], ): """ GIVEN client's session headers WHEN calling any client method with additional headers THEN session/method headers should be merged with priority on method headers """ client.session.headers = session_headers mock_response = Mock(spec=Response) mock_response.headers = {"content-type": "text"} mock_response.text = "some error" mock_response.status_code = 400 request_mock.return_value = mock_response client.multi_content_scan( [{ "filename": FILENAME, "document": DOCUMENT }], extra_headers=extra_headers, ) assert request_mock.called _, kwargs = request_mock.call_args assert expected_headers == kwargs["headers"] client.content_scan("some_string", extra_headers=extra_headers) assert request_mock.called _, kwargs = request_mock.call_args assert expected_headers == kwargs["headers"]
def test_client_creation( api_key: str, uri: str, user_agent: str, timeout: float, exception: Type[Exception], ): if exception is not None: with pytest.raises(exception): client = GGClient( api_key=api_key, base_uri=uri, user_agent=user_agent, timeout=timeout, ) else: client = GGClient( base_uri=uri, api_key=api_key, user_agent=user_agent, timeout=timeout, ) if exception is None: if uri: assert client.base_uri == uri else: assert client.base_uri == DEFAULT_BASE_URI assert client.api_key == api_key assert client.timeout == timeout assert user_agent in client.session.headers["User-Agent"] assert client.session.headers["Authorization"] == "Token {0}".format( api_key)
def test_versions_from_headers(request_mock: Mock, client: GGClient, method): app_version_value = "1.0" secrets_engine_version_value = "2.0" mock_response = Mock(spec=Response) mock_response.headers = { "X-App-Version": app_version_value, "X-Secrets-Engine-Version": secrets_engine_version_value, } request_mock.return_value = mock_response client.request(method=method, endpoint="endpoint") assert request_mock.called assert client.app_version is app_version_value assert client.secrets_engine_version is secrets_engine_version_value mock_response = Mock(spec=Response) mock_response.headers = {} request_mock.return_value = mock_response client.request(method=method, endpoint="endpoint") assert request_mock.called assert client.app_version is app_version_value assert client.secrets_engine_version is secrets_engine_version_value other_client = GGClient(api_key="") assert other_client.app_version is app_version_value assert other_client.secrets_engine_version is secrets_engine_version_value
def test_multi_content_not_ok(): req = [{"document": "valid"}] client = GGClient(base_uri=base_uri, api_key="invalid") obj = client.multi_content_scan(req) assert obj.status_code == 401 assert isinstance(obj, Detail) assert obj.detail == "Invalid API key."
def test_client__url_from_endpoint(base_uries, version, endpoints_and_urls): for curr_base_uri in base_uries: client = GGClient(api_key="validapi_keyforsure", base_uri=curr_base_uri) for endpoint, expected_url in endpoints_and_urls: assert ( client._url_from_endpoint(endpoint, version) == expected_url ), "Could not get the expected URL for base_uri=`{}`".format( base_uri)
def test_content_not_ok(): req = {"document": "valid", "filename": "valid"} client = GGClient(base_uri=base_uri, api_key="invalid") obj = client.content_scan(**req) assert obj.status_code == 401 assert isinstance(obj, Detail) assert obj.detail == "Invalid API key."
def scan( ctx: click.Context, show_secrets: bool, exit_zero: bool, all_policies: bool, verbose: bool, ) -> int: """ Command to scan various contents. """ api_key = os.getenv("GITGUARDIAN_API_KEY") base_uri = os.getenv("GITGUARDIAN_API_URL", ctx.obj["config"].api_url) if not api_key: raise click.ClickException("GitGuardian API Key is needed.") ctx.obj["client"] = GGClient( api_key=api_key, base_uri=base_uri, user_agent="ggshield", timeout=60 ) return_code = 0 ctx.obj["filter_set"] = path_filter_set( Path(os.getcwd()), ctx.obj["config"].paths_ignore ) if show_secrets is not None: ctx.obj["config"].show_secrets = show_secrets if all_policies is not None: ctx.obj["config"].all_policies = all_policies if verbose is not None: ctx.obj["config"].verbose = verbose if exit_zero is not None: ctx.obj["config"].exit_zero = exit_zero return return_code
def retrieve_client(config: Config) -> GGClient: session = Session() if config.allow_self_signed: urllib3.disable_warnings() session.verify = False try: api_key = config.api_key api_url = config.api_url except UnknownInstanceError as e: if e.instance == DEFAULT_DASHBOARD_URL: # This can happen when the user first tries the app and has not gone through # the authentication procedure yet. In this case, replace the error message # complaining about an unknown instance with a more user-friendly one. raise click.ClickException("GitGuardian API key is needed.") else: raise try: return GGClient( api_key=api_key, base_uri=api_url, user_agent="ggshield", timeout=60, session=session, ) except ValueError as e: # Can be raised by pygitguardian raise click.ClickException(f"Failed to create API client. {e}")
def scan( self, client: GGClient, matches_ignore: Iterable[str], all_policies: bool, verbose: bool, ) -> List[Result]: scannable_list = self.scannable_list results = [] for i in range(0, len(scannable_list), MULTI_DOCUMENT_LIMIT): chunk = scannable_list[i:i + MULTI_DOCUMENT_LIMIT] scan = client.multi_content_scan(chunk) if not scan.success: handle_scan_error(scan, chunk) continue for index, scanned in enumerate(scan.scan_results): remove_ignored_from_result(scanned, all_policies, matches_ignore) if scanned.has_policy_breaks: results.append( Result( content=chunk[index]["document"], scan=scanned, filemode=chunk[index]["filemode"], filename=chunk[index]["filename"], )) return results
def test_multi_content_scan( client: GGClient, name: str, to_scan: List[Dict[str, str]], expected: str, has_secrets: bool, has_policy_breaks: bool, ): with my_vcr.use_cassette(name + ".yaml"): multiscan = client.multi_content_scan(to_scan) assert multiscan.status_code == 200 if not isinstance(multiscan, MultiScanResult): pytest.fail("multiscan is not a MultiScanResult") return assert type(multiscan.to_dict()) == dict assert type(multiscan.to_json()) == str assert type(repr(multiscan)) == str assert type(str(multiscan)) == str assert multiscan.has_secrets == has_secrets assert multiscan.has_policy_breaks == has_policy_breaks for i, scan_result in enumerate(multiscan.scan_results): if expected: example_dict = json.loads(expected) assert all(elem in example_dict[i]["policies"] for elem in scan_result.policies) assert (scan_result.policy_break_count == example_dict[i] ["policy_break_count"])
def test_health_check(client: GGClient): health = client.health_check() assert health.status_code == 200 assert health.detail == "Valid API key." assert str(health) == "200:Valid API key." assert bool(health) assert health.success assert type(health.to_dict()) == dict assert type(health.to_json()) == str
async def content_scan(self, api_key, content, file_id): client = GGClient(api_key=api_key) if file_id and content: raise Exception("Can not use file_id & content at once, Please use either one of them.") if file_id: text = file_id['data'] try: scan_result = client.content_scan(document=text) return scan_result.to_json() except Exception as e: return f"Exception occured: {e}" if content: try: scan_result = client.content_scan(document=content) return scan_result.to_json() except Exception as e: return f"Exception occured: {e}"
def test_health_check(client: GGClient): health = client.health_check() assert health.status_code == 200 assert health.detail == "Valid API key." assert str(health) == ( "detail:Valid API key., status_code:200, " "app version:1.26.0-rc.4, secrets engine version:2.43.0") assert bool(health) assert health.success assert type(health.to_dict()) == OrderedDict assert type(health.to_json()) == str
def scan(file_list): """ Scanning files in the folder and subfolders for secrets in chunks. :return: list of results """ if file_list is not None: API_KEY = os.getenv("GG_API_KEY") client = GGClient(api_key=API_KEY) # scan docs in chunks to stay within the size limit scanned = [] for i in range(0, len(file_list), MULTI_DOCUMENT_LIMIT): chunk = file_list[i:i + MULTI_DOCUMENT_LIMIT] try: scan_result = client.multi_content_scan(chunk) if scan_result.success: scanned.extend(scan_result.scan_results) except Exception as exc: print('Could not scan some files. ' + str(exc)) return scanned else: print('Scanner did not receive documents to scan.')
def test_health_check_error(client: GGClient): health = client.health_check() assert health.status_code == 400 assert health.detail == "Configuration error." assert str(health) == ( "detail:Configuration error., status_code:400, " "app version:1.26.0-rc.4, secrets engine version:2.43.0") assert bool(health) is False assert health.success is False assert type(health.to_dict()) == OrderedDict assert type(health.to_json()) == str
def test_assert_content_type(client: GGClient): """ GIVEN a response that's 200 but the content is not JSON WHEN is_ok is called THEN is_ok should be false WHEN load_detail is called THEN is should return a Detail object """ resp = client.get(endpoint="/docs/static/logo.png", version=None) assert is_ok(resp) is False obj = load_detail(resp) obj.status_code = resp.status_code assert obj.status_code == 200 assert isinstance(obj, Detail) assert str(obj).startswith("200:"), str(obj)
def scan( ctx: click.Context, mode: str, show_secrets: bool, exit_zero: bool, all_policies: bool, verbose: bool, ) -> int: """ Command to scan various contents. """ api_key = os.getenv("GITGUARDIAN_API_KEY") base_uri = os.getenv("GITGUARDIAN_API_URL", ctx.obj["config"].api_url) if not api_key: raise click.ClickException("GitGuardian API Key is needed.") ctx.obj["client"] = GGClient(api_key=api_key, base_uri=base_uri, user_agent="ggshield", timeout=60) return_code = 0 ctx.obj["filter_set"] = path_filter_set(Path(os.getcwd()), ctx.obj["config"].paths_ignore) if show_secrets is not None: ctx.obj["config"].show_secrets = show_secrets if all_policies is not None: ctx.obj["config"].all_policies = all_policies if verbose is not None: ctx.obj["config"].verbose = verbose if exit_zero is not None: ctx.obj["config"].exit_zero = exit_zero if ctx.invoked_subcommand is None: if mode: click.echo( "--mode has been deprecated and will be removed " "after ggshield version 1.2. prefer to use subcommands.") if mode == "pre-commit": return ctx.invoke(precommit_cmd) elif mode == "ci": return ctx.invoke(ci_cmd) else: click.echo(ctx.get_help()) else: click.echo(ctx.get_help()) return return_code
def scan( ctx: click.Context, show_secrets: bool, exit_zero: bool, all_policies: bool, verbose: bool, json_output: bool, output: Optional[str], ) -> int: """ Command to scan various contents. """ api_key = os.getenv("GITGUARDIAN_API_KEY") base_uri = os.getenv("GITGUARDIAN_API_URL", ctx.obj["config"].api_url) if not api_key: raise click.ClickException("GitGuardian API Key is needed.") ctx.obj["client"] = GGClient( api_key=api_key, base_uri=base_uri, user_agent="ggshield", timeout=60 ) return_code = 0 ctx.obj["filter_set"] = path_filter_set( Path(os.getcwd()), ctx.obj["config"].paths_ignore ) config: Config = ctx.obj["config"] if show_secrets is not None: config.show_secrets = show_secrets if all_policies is not None: config.all_policies = all_policies if verbose is not None: config.verbose = verbose if exit_zero is not None: config.exit_zero = exit_zero output_handler_cls: Type[OutputHandler] = TextHandler if json_output: output_handler_cls = JSONHandler ctx.obj["output_handler"] = output_handler_cls( show_secrets=config.show_secrets, verbose=config.verbose, output=output ) return return_code
def test_quota_overview(client: GGClient): with my_vcr.use_cassette("quota.yaml"): quota_response = client.quota_overview() assert type(repr(quota_response)) == str assert type(str(quota_response)) == str assert quota_response.status_code == 200 if isinstance(quota_response, QuotaResponse): assert quota_response.content.limit == 5000 assert quota_response.content.count == 2 assert quota_response.content.remaining == 4998 assert quota_response.content.since == date(2021, 4, 18) else: pytest.fail("returned should be a QuotaResponse") assert type(quota_response.to_dict()) == OrderedDict quota_response_json = quota_response.to_json() assert type(quota_response_json) == str assert type(json.loads(quota_response_json)) == dict
def test_content_scan( client: GGClient, name: str, to_scan: Dict[str, str], has_secrets: bool, has_policy_breaks: bool, policy_break_count: int, ): with my_vcr.use_cassette(name + ".yaml"): scan_result = client.content_scan(**to_scan) assert type(repr(scan_result)) == str assert type(str(scan_result)) == str assert scan_result.status_code == 200 if isinstance(scan_result, ScanResult): assert scan_result.has_secrets == has_secrets assert scan_result.has_policy_breaks == has_policy_breaks assert scan_result.policy_break_count == policy_break_count else: pytest.fail("returned should be a ScanResult") assert type(scan_result.to_dict()) == dict scan_result_json = scan_result.to_json() assert type(scan_result_json) == str assert type(json.loads(scan_result_json)) == dict
def client() -> GGClient: api_key = os.getenv("GITGUARDIAN_API_KEY", "1234567890") base_uri = os.getenv("GITGUARDIAN_API_URL", "https://api.gitguardian.com") return GGClient(api_key, base_uri)
def test_content_scan_exceptions(client: GGClient, to_scan: str, exception: Type[Exception], regex: str): with pytest.raises(exception, match=regex): client.content_scan(to_scan)
def scan( ctx: click.Context, paths: Union[List, str], mode: str, recursive: bool, yes: bool, show_secrets: bool, exit_zero: bool, all_policies: bool, verbose: bool, repo: str, ) -> int: """ Command to scan various content. """ api_key = os.getenv("GITGUARDIAN_API_KEY") base_uri = os.getenv("GITGUARDIAN_API_URL", ctx.obj["config"].api_url) if not api_key: raise click.ClickException("GitGuardian API Key is needed.") client = GGClient(api_key=api_key, base_uri=base_uri, user_agent="ggshield", timeout=60) return_code = 0 matches_ignore = ctx.obj["config"].matches_ignore filter_set = path_filter_set(Path(os.getcwd()), ctx.obj["config"].paths_ignore) if show_secrets is None: show_secrets = ctx.obj["config"].show_secrets if all_policies is None: all_policies = ctx.obj["config"].all_policies if verbose is None: verbose = ctx.obj["config"].verbose if exit_zero is None: exit_zero = ctx.obj["config"].exit_zero try: if mode: check_git_dir() if mode == "pre-commit": return_code = scan_pre_commit( client=client, filter_set=filter_set, matches_ignore=matches_ignore, verbose=verbose, all_policies=all_policies, show_secrets=show_secrets, ) elif mode == "ci": return_code = scan_ci( client=client, verbose=verbose, filter_set=filter_set, matches_ignore=matches_ignore, all_policies=all_policies, show_secrets=show_secrets, ) else: click.echo(ctx.get_help()) elif repo: return_code = scan_repo( client=client, verbose=verbose, repo=repo, matches_ignore=matches_ignore, all_policies=all_policies, show_secrets=show_secrets, ) elif paths: return_code = scan_path( client=client, verbose=verbose, paths=paths, paths_ignore=ctx.obj["config"].paths_ignore, recursive=recursive, yes=yes, matches_ignore=matches_ignore, all_policies=all_policies, show_secrets=show_secrets, ) else: click.echo(ctx.get_help()) except click.exceptions.Abort: return_code = 0 except Exception as error: if verbose: traceback.print_exc() raise click.ClickException(str(error)) if exit_zero: return_code = 0 sys.exit(return_code)
def client(): api_key = os.environ.get("TEST_LIVE_SERVER_TOKEN", "sample_api_key") return GGClient(base_uri=base_uri, api_key=api_key)
import glob import os import sys import traceback from dotenv import load_dotenv load_dotenv() API_KEY = os.getenv("GG_API_KEY") from pygitguardian import GGClient from pygitguardian.config import MULTI_DOCUMENT_LIMIT # Init GGClient for connecting to gitguardian client = GGClient(api_key="API_KEY") # Creating a list of dictionaries for scanning files to_scan = [] for name in glob.glob("**/*", recursive=True): with open(name) as fn: to_scan.append({ "document": fn.read(), "filename": os.path.basename(name) }) to_process = [] for i in range(0, len(to_scan), MULTI_DOCUMENT_LIMIT): chunk = to_scan[i:i + MULTI_DOCUMENT_LIMIT] try: scan = client.multi_content_scan(chunk) except Exception as exc: # Handle exceptions such as schema validation traceback.print_exc(2, file=sys.stderr)
import traceback from requests import codes from pygitguardian import GGClient API_KEY = os.getenv("GG_API_KEY") FILENAME = ".env" DOCUMENT = """ import urllib.request url = 'http://*****:*****@cake.gitguardian.com/isreal.json' response = urllib.request.urlopen(url) consume(response.read())" """ client = GGClient(api_key=API_KEY) # Check the health of the API and the API key used. health_obj = client.health_check() if health_obj.status_code == codes[r"\o/"]: # this is 200 but cooler try: scan_result = client.content_scan(filename=FILENAME, document=DOCUMENT) except Exception as exc: # Handle exceptions such as schema validation traceback.print_exc(2, file=sys.stderr) print(str(exc)) print("Scan results:", scan_result.has_secrets, "-", scan_result.policy_break_count) else:
def test_multi_content_exceptions(client: GGClient, to_scan: List, exception: Type[Exception]): with pytest.raises(exception): client.multi_content_scan(to_scan)