def test_ignore_last_found_preserve_previous_config(client): """ GIVEN a cache containing new secrets AND a config not empty WHEN I run ignore command THEN existing config option are not wiped out """ config = Config() previous_secrets = [ { "name": "", "match": "previous_secret" }, { "name": "", "match": "other_previous_secret" }, ] previous_paths = {"some_path", "some_other_path"} config.matches_ignore = previous_secrets.copy() config.paths_ignore = previous_paths config.exit_zero = True cache = Cache() cache.last_found_secrets = FOUND_SECRETS ignore_last_found(config, cache) matches_ignore = sorted(config.matches_ignore, key=compare_matches_ignore) found_secrets = sorted(FOUND_SECRETS + previous_secrets, key=compare_matches_ignore) assert matches_ignore == found_secrets assert config.paths_ignore == previous_paths assert config.exit_zero is True
def test_cache_old_config_no_new_secret(client): """ GIVEN a cache of last found secrets same as config ignored-matches and config ignored-matches is a list of strings WHEN I run a scan (therefore finding no secret) THEN config matches is unchanged and cache is empty """ c = Commit() c._patch = _MULTIPLE_SECRETS config = Config() config.matches_ignore = [d["match"] for d in FOUND_SECRETS] cache = Cache() cache.last_found_secrets = FOUND_SECRETS with my_vcr.use_cassette("multiple_secrets"): results = c.scan( client=client, cache=cache, matches_ignore=config.matches_ignore, all_policies=True, verbose=False, ) assert results == [] assert config.matches_ignore == [d["match"] for d in FOUND_SECRETS] assert cache.last_found_secrets == []
def test_load_cache_and_purge(self, cli_fs_runner): with open(".cache_ggshield", "w") as file: json.dump({"last_found_secrets": ["XXX"]}, file) cache = Cache() assert cache.last_found_secrets == {"XXX"} cache.purge() assert cache.last_found_secrets == set()
def test_load_cache_and_purge(self, cli_fs_runner): with open(".cache_ggshield", "w") as file: json.dump({"last_found_secrets": [{"name": "", "match": "XXX"}]}, file) cache = Cache() assert cache.last_found_secrets == [{"name": "", "match": "XXX"}] cache.purge() assert cache.last_found_secrets == []
def test_do_not_add_policy_breaks_to_last_found(client): """ GIVEN 2 policy breaks on different files with the same ignore sha WHEN add_found_policy_break is called THEN only one element should be added """ policy_break = PolicyBreak("a", "gitignore", [Match("apikey", "apikey", 0, 0, 0, 0)]) cache = Cache() cache.add_found_policy_break(policy_break, "a") assert len(cache.last_found_secrets) == 0
def test_ignore_last_found(client): """ GIVEN a cache of last found secrets not empty WHEN I run a ignore last found command THEN config ignored-matches is updated accordingly """ config = Config() setattr(config, "matches_ignore", set()) cache = Cache() cache.last_found_secrets = FOUND_SECRETS ignore_last_found(config, cache) assert config.matches_ignore == FOUND_SECRETS assert cache.last_found_secrets == FOUND_SECRETS
def test_ignore_last_found_with_manually_added_secrets(client): """ GIVEN a cache containing part of config ignored-matches secrets WHEN I run ignore command THEN only new discovered secrets are added to the config """ manually_added_secret = "m42ploz2wd" config = Config() config.matches_ignore = {manually_added_secret} cache = Cache() cache.last_found_secrets = FOUND_SECRETS ignore_last_found(config, cache) assert config.matches_ignore == FOUND_SECRETS
def test_load_invalid_cache(self, cli_fs_runner, capsys): with open(".cache_ggshield", "w") as file: json.dump({"invalid_option": True}, file) Cache() captured = capsys.readouterr() assert "Unrecognized key in cache" in captured.out
def test_ignore_last_found_with_manually_added_secrets(client): """ GIVEN a cache containing part of config ignored-matches secrets WHEN I run ignore command THEN only new discovered secrets are added to the config """ manually_added_secret = ( "41b8889e5e794b21cb1349d8eef1815960bf5257330fd40243a4895f26c2b5c8") config = Config() config.matches_ignore = [{"name": "", "match": manually_added_secret}] cache = Cache() cache.last_found_secrets = FOUND_SECRETS ignore_last_found(config, cache) matches_ignore = sorted(config.matches_ignore, key=compare_matches_ignore) found_secrets = sorted(FOUND_SECRETS, key=compare_matches_ignore) assert matches_ignore == found_secrets
def test_ignore_last_found_preserve_previous_config(client): """ GIVEN a cache containing new secrets AND a config not empty WHEN I run ignore command THEN existing config option are not wiped out """ config = Config() previous_secrets = {"previous_secret", "other_previous_secret"} previous_paths = {"some_path", "some_other_path"} config.matches_ignore = previous_secrets config.paths_ignore = previous_paths config.exit_zero = True cache = Cache() cache.last_found_secrets = FOUND_SECRETS ignore_last_found(config, cache) assert config.matches_ignore == FOUND_SECRETS.union(previous_secrets) assert config.paths_ignore == previous_paths assert config.exit_zero is True
def test_ignore_last_found_compatible_with_previous_matches_ignore_format( client): """ GIVEN a cache containing new secrets AND a config's matches_ignore not empty as a list of strings WHEN I run ignore command THEN config's matches_ignore is updated AND strings hashes are unchanged """ config = Config() old_format_matches_ignore = [ "some_secret_hash", "another_secret_hash", ] config.matches_ignore = old_format_matches_ignore.copy() cache = Cache() cache.last_found_secrets = FOUND_SECRETS ignore_last_found(config, cache) assert sorted(config.matches_ignore, key=compare_matches_ignore) == sorted( FOUND_SECRETS + old_format_matches_ignore, key=compare_matches_ignore)
def test_cache_catches_last_found_secrets(client): """ GIVEN an empty cache and an empty config matches-ignore section WHEN I run a scan with multiple secrets THEN cache last_found_secrets is updated with these secrets and saved """ c = Commit() c._patch = _MULTIPLE_SECRETS config = Config() setattr(config, "matches_ignore", set()) cache = Cache() cache.purge() assert cache.last_found_secrets == set() with my_vcr.use_cassette("multiple_secrets"): c.scan( client=client, cache=cache, matches_ignore=config.matches_ignore, all_policies=True, verbose=False, ) assert config.matches_ignore == set() assert cache.last_found_secrets == FOUND_SECRETS cache.load_cache() assert cache.last_found_secrets == FOUND_SECRETS
def test_cache_catches_last_found_secrets(client): """ GIVEN an empty cache and an empty config matches-ignore section WHEN I run a scan with multiple secrets THEN cache last_found_secrets is updated with these secrets and saved """ c = Commit() c._patch = _MULTIPLE_SECRETS config = Config() setattr(config, "matches_ignore", []) cache = Cache() cache.purge() assert cache.last_found_secrets == list() with my_vcr.use_cassette("multiple_secrets"): c.scan( client=client, cache=cache, matches_ignore=config.matches_ignore, all_policies=True, verbose=False, ) assert config.matches_ignore == list() cache_found_secrets = sorted(cache.last_found_secrets, key=compare_matches_ignore) found_secrets = sorted(FOUND_SECRETS, key=compare_matches_ignore) assert [found_secret["match"] for found_secret in cache_found_secrets ] == [found_secret["match"] for found_secret in found_secrets] ignore_last_found(config, cache) for ignore in config.matches_ignore: assert "test.txt" in ignore["name"] cache.load_cache()
def test_save_cache(self, cli_fs_runner): with open(".cache_ggshield", "w") as file: json.dump({}, file) cache = Cache() cache.update_cache(**{"last_found_secrets": {"XXX"}}) cache.save() with open(".cache_ggshield", "r") as file: file_content = json.load(file) assert file_content == {"last_found_secrets": ["XXX"]}
def scan( self, client: GGClient, cache: Cache, matches_ignore: Iterable[str], all_policies: bool, verbose: bool, ) -> List[Result]: cache.purge() scannable_list = self.scannable_list results = [] chunks = [] for i in range(0, len(scannable_list), MULTI_DOCUMENT_LIMIT): chunks.append(scannable_list[i:i + MULTI_DOCUMENT_LIMIT]) with concurrent.futures.ThreadPoolExecutor( max_workers=min(CPU_COUNT, 4), thread_name_prefix="content_scan") as executor: future_to_scan = { executor.submit(client.multi_content_scan, chunk): chunk for chunk in chunks } for future in concurrent.futures.as_completed(future_to_scan): chunk = future_to_scan[future] scan = future.result() if not scan.success: handle_scan_error(scan, chunk) continue for index, scanned in enumerate(scan.scan_results): remove_ignored_from_result(scanned, all_policies, matches_ignore) if scanned.has_policy_breaks: for policy_break in scanned.policy_breaks: cache.add_found_policy_break( policy_break, chunk[index]["filename"]) results.append( Result( content=chunk[index]["document"], scan=scanned, filemode=chunk[index]["filemode"], filename=chunk[index]["filename"], )) cache.save() return results
def test_save_cache_first_time(self, cli_fs_runner): os.remove(".cache_ggshield") cache = Cache() cache.save() assert os.path.isfile(".cache_ggshield") is True
def cache() -> Cache: c = Cache() c.purge() return c
def test_defaults(self, cli_fs_runner): cache = Cache() for attr in cache.attributes: assert getattr(cache, attr.name) == attr.default