def test_cache_catches_last_found_secrets(client): """ GIVEN an empty cache and an empty config matches-ignore section WHEN I run a scan with multiple secrets THEN cache last_found_secrets is updated with these secrets and saved """ c = Commit() c._patch = _MULTIPLE_SECRETS config = Config() setattr(config, "matches_ignore", set()) cache = Cache() cache.purge() assert cache.last_found_secrets == set() with my_vcr.use_cassette("multiple_secrets"): c.scan( client=client, cache=cache, matches_ignore=config.matches_ignore, all_policies=True, verbose=False, ) assert config.matches_ignore == set() assert cache.last_found_secrets == FOUND_SECRETS cache.load_cache() assert cache.last_found_secrets == FOUND_SECRETS
def test_cache_catches_last_found_secrets(client): """ GIVEN an empty cache and an empty config matches-ignore section WHEN I run a scan with multiple secrets THEN cache last_found_secrets is updated with these secrets and saved """ c = Commit() c._patch = _MULTIPLE_SECRETS config = Config() setattr(config, "matches_ignore", []) cache = Cache() cache.purge() assert cache.last_found_secrets == list() with my_vcr.use_cassette("multiple_secrets"): c.scan( client=client, cache=cache, matches_ignore=config.matches_ignore, all_policies=True, verbose=False, ) assert config.matches_ignore == list() cache_found_secrets = sorted(cache.last_found_secrets, key=compare_matches_ignore) found_secrets = sorted(FOUND_SECRETS, key=compare_matches_ignore) assert [found_secret["match"] for found_secret in cache_found_secrets ] == [found_secret["match"] for found_secret in found_secrets] ignore_last_found(config, cache) for ignore in config.matches_ignore: assert "test.txt" in ignore["name"] cache.load_cache()
def test_load_cache_and_purge(self, cli_fs_runner): with open(".cache_ggshield", "w") as file: json.dump({"last_found_secrets": ["XXX"]}, file) cache = Cache() assert cache.last_found_secrets == {"XXX"} cache.purge() assert cache.last_found_secrets == set()
def test_load_cache_and_purge(self, cli_fs_runner): with open(".cache_ggshield", "w") as file: json.dump({"last_found_secrets": [{"name": "", "match": "XXX"}]}, file) cache = Cache() assert cache.last_found_secrets == [{"name": "", "match": "XXX"}] cache.purge() assert cache.last_found_secrets == []
def scan( self, client: GGClient, cache: Cache, matches_ignore: Iterable[str], all_policies: bool, verbose: bool, ) -> List[Result]: cache.purge() scannable_list = self.scannable_list results = [] chunks = [] for i in range(0, len(scannable_list), MULTI_DOCUMENT_LIMIT): chunks.append(scannable_list[i:i + MULTI_DOCUMENT_LIMIT]) with concurrent.futures.ThreadPoolExecutor( max_workers=min(CPU_COUNT, 4), thread_name_prefix="content_scan") as executor: future_to_scan = { executor.submit(client.multi_content_scan, chunk): chunk for chunk in chunks } for future in concurrent.futures.as_completed(future_to_scan): chunk = future_to_scan[future] scan = future.result() if not scan.success: handle_scan_error(scan, chunk) continue for index, scanned in enumerate(scan.scan_results): remove_ignored_from_result(scanned, all_policies, matches_ignore) if scanned.has_policy_breaks: for policy_break in scanned.policy_breaks: cache.add_found_policy_break( policy_break, chunk[index]["filename"]) results.append( Result( content=chunk[index]["document"], scan=scanned, filemode=chunk[index]["filemode"], filename=chunk[index]["filename"], )) cache.save() return results
def cache() -> Cache: c = Cache() c.purge() return c