def test_extra_headers( request_mock: Mock, client: GGClient, session_headers: Any, extra_headers: Optional[Dict[str, str]], expected_headers: Dict[str, str], ): """ GIVEN client's session headers WHEN calling any client method with additional headers THEN session/method headers should be merged with priority on method headers """ client.session.headers = session_headers mock_response = Mock(spec=Response) mock_response.headers = {"content-type": "text"} mock_response.text = "some error" mock_response.status_code = 400 request_mock.return_value = mock_response client.multi_content_scan( [{ "filename": FILENAME, "document": DOCUMENT }], extra_headers=extra_headers, ) assert request_mock.called _, kwargs = request_mock.call_args assert expected_headers == kwargs["headers"] client.content_scan("some_string", extra_headers=extra_headers) assert request_mock.called _, kwargs = request_mock.call_args assert expected_headers == kwargs["headers"]
def scan( self, client: GGClient, matches_ignore: Iterable[str], all_policies: bool, verbose: bool, ) -> List[Result]: scannable_list = self.scannable_list results = [] for i in range(0, len(scannable_list), MULTI_DOCUMENT_LIMIT): chunk = scannable_list[i:i + MULTI_DOCUMENT_LIMIT] scan = client.multi_content_scan(chunk) if not scan.success: handle_scan_error(scan, chunk) continue for index, scanned in enumerate(scan.scan_results): remove_ignored_from_result(scanned, all_policies, matches_ignore) if scanned.has_policy_breaks: results.append( Result( content=chunk[index]["document"], scan=scanned, filemode=chunk[index]["filemode"], filename=chunk[index]["filename"], )) return results
def test_multi_content_scan( client: GGClient, name: str, to_scan: List[Dict[str, str]], expected: str, has_secrets: bool, has_policy_breaks: bool, ): with my_vcr.use_cassette(name + ".yaml"): multiscan = client.multi_content_scan(to_scan) assert multiscan.status_code == 200 if not isinstance(multiscan, MultiScanResult): pytest.fail("multiscan is not a MultiScanResult") return assert type(multiscan.to_dict()) == dict assert type(multiscan.to_json()) == str assert type(repr(multiscan)) == str assert type(str(multiscan)) == str assert multiscan.has_secrets == has_secrets assert multiscan.has_policy_breaks == has_policy_breaks for i, scan_result in enumerate(multiscan.scan_results): if expected: example_dict = json.loads(expected) assert all(elem in example_dict[i]["policies"] for elem in scan_result.policies) assert (scan_result.policy_break_count == example_dict[i] ["policy_break_count"])
def test_multi_content_not_ok(): req = [{"document": "valid"}] client = GGClient(base_uri=base_uri, api_key="invalid") obj = client.multi_content_scan(req) assert obj.status_code == 401 assert isinstance(obj, Detail) assert obj.detail == "Invalid API key."
def scan(file_list): """ Scanning files in the folder and subfolders for secrets in chunks. :return: list of results """ if file_list is not None: API_KEY = os.getenv("GG_API_KEY") client = GGClient(api_key=API_KEY) # scan docs in chunks to stay within the size limit scanned = [] for i in range(0, len(file_list), MULTI_DOCUMENT_LIMIT): chunk = file_list[i:i + MULTI_DOCUMENT_LIMIT] try: scan_result = client.multi_content_scan(chunk) if scan_result.success: scanned.extend(scan_result.scan_results) except Exception as exc: print('Could not scan some files. ' + str(exc)) return scanned else: print('Scanner did not receive documents to scan.')
client = GGClient(api_key="API_KEY") # Creating a list of dictionaries for scanning files to_scan = [] for name in glob.glob("**/*", recursive=True): with open(name) as fn: to_scan.append({ "document": fn.read(), "filename": os.path.basename(name) }) to_process = [] for i in range(0, len(to_scan), MULTI_DOCUMENT_LIMIT): chunk = to_scan[i:i + MULTI_DOCUMENT_LIMIT] try: scan = client.multi_content_scan(chunk) except Exception as exc: # Handle exceptions such as schema validation traceback.print_exc(2, file=sys.stderr) print(str(exc)) if not scan.success: print("Error scanning some files. Results may be incomplete.") print(scan) to_process.extend(scan.scan_results) # Printing the results for i, scan_result in enumerate(to_process): if scan_result.has_secrets: print( f"{chunk[i]['filename']}: {scan_result.policy_break_count} break/s found" )
def test_multi_content_exceptions(client: GGClient, to_scan: List, exception: Type[Exception]): with pytest.raises(exception): client.multi_content_scan(to_scan)