def scan( self, client: GGClient, cache: Cache, matches_ignore: Iterable[IgnoredMatch], all_policies: bool, verbose: bool, banlisted_detectors: Optional[Set[str]] = None, on_file_chunk_scanned: Callable[ [List[Dict[str, Any]]], None ] = lambda chunk: None, ) -> List[Result]: cache.purge() scannable_list = self.scannable_list results = [] chunks = [] for i in range(0, len(scannable_list), MULTI_DOCUMENT_LIMIT): chunks.append(scannable_list[i : i + MULTI_DOCUMENT_LIMIT]) with concurrent.futures.ThreadPoolExecutor( max_workers=min(CPU_COUNT, 4), thread_name_prefix="content_scan" ) as executor: future_to_scan = { executor.submit( client.multi_content_scan, chunk, self.extra_headers, ): chunk for chunk in chunks } for future in concurrent.futures.as_completed(future_to_scan): chunk = future_to_scan[future] on_file_chunk_scanned(chunk) scan = future.result() if not scan.success: handle_scan_error(scan, chunk) continue for index, scanned in enumerate(scan.scan_results): remove_ignored_from_result(scanned, all_policies, matches_ignore) remove_results_from_banlisted_detectors( scanned, banlisted_detectors ) if scanned.has_policy_breaks: for policy_break in scanned.policy_breaks: cache.add_found_policy_break( policy_break, chunk[index]["filename"] ) results.append( Result( content=chunk[index]["document"], scan=scanned, filemode=chunk[index]["filemode"], filename=chunk[index]["filename"], ) ) cache.save() return results
def test_save_cache(self, cli_fs_runner): with open(".cache_ggshield", "w") as file: json.dump({}, file) cache = Cache() cache.update_cache(last_found_secrets=["XXX"]) cache.save() with open(".cache_ggshield", "r") as file: file_content = json.load(file) assert file_content == {"last_found_secrets": ["XXX"]}
def test_save_cache_first_time(self, isolated_fs, with_entry): """ GIVEN no existing cache WHEN save is called but there are (new entries/no entries in memory) THEN it should (create/not create) the file """ cache = Cache() if with_entry: cache.update_cache(last_found_secrets=["XXX"]) cache.save() assert os.path.isfile(".cache_ggshield") is with_entry
def test_read_only_fs(self): """ GIVEN a read-only file-system WHEN save is called THEN it shouldn't raise an exception """ cache = Cache() cache.update_cache(last_found_secrets=["XXX"]) # don't use mock.patch decorator on the test, since Cache.__init__ also calls open with patch("builtins.open") as open_mock: # The read-only FS is simulated with patched builtin open raising an error open_mock.side_effect = OSError("Read-only file system") cache.save() # Make sure our patched open was called open_mock.assert_called_once()