class Xml: def __init__(self): self.breadcrumbs = [] self.rules = WhisperRules() def pairs(self, filepath: Path): def _traverse(tree): """Traverse XML document""" for event, element in tree: if event == "start": self.breadcrumbs.append(element.tag) elif event == "end": self.breadcrumbs.pop() continue # Format: <elem key="value"> for key, value in element.attrib.items(): yield key, value, self.breadcrumbs # Format: <elem name="jdbc:mysql://host?k1=v1&k2=v2"> if self.rules.match("uri", value): for k, v in Uri().pairs(value): yield k, v, self.breadcrumbs # Format: <key>value</key> if not element.text: continue yield element.tag, element.text, self.breadcrumbs # Format: <elem>key=value</elem> if "=" in element.text: item = element.text.split("=") if len(item) == 2: yield item[0], item[1], self.breadcrumbs # Format: <key>name</key><value>string</value> found_key = None found_value = None for item in element: if str(item.tag).lower() == "key": found_key = item.text elif str(item.tag).lower() == "value": found_value = item.text if found_key and found_value: yield found_key, found_value, self.breadcrumbs try: parser = ElementTree.XMLParser(recover=True) tree = ElementTree.parse(filepath.as_posix(), parser) tree = ElementTree.iterwalk(tree, events=("start", "end")) yield from _traverse(tree) except Exception as e: debug(f"{type(e)} in {filepath}")
class Plaintext: def __init__(self): self.rules = WhisperRules() def pairs(self, filepath: Path): lines = filepath.open("r").readlines() for idx in range(len(lines)): line = lines[idx] if not strip_string(line): continue for value in line.split(): if self.rules.match("uri", value): yield from Uri().pairs(value)
class StructuredDocument: def __init__(self): self.breadcrumbs = [] self.rules = WhisperRules() def traverse(self, code, key=None): """Recursively traverse YAML/JSON document""" if isinstance(code, dict): yield from self.cloudformation(code) for k, v in code.items(): self.breadcrumbs.append(k) yield k, v, self.breadcrumbs yield from self.traverse(v, key=k) self.breadcrumbs.pop() # Special key/value format elements = list(code.keys()) if "key" in elements and "value" in elements: yield code["key"], code["value"], self.breadcrumbs elif isinstance(code, list): for item in code: yield key, item, self.breadcrumbs yield from self.traverse(item, key=key) elif isinstance(code, str): if "=" in code: item = code.split("=", 1) if len(item) == 2: yield item[0], item[1], self.breadcrumbs if self.rules.match("uri", code): for k, v in Uri().pairs(code): yield k, v, self.breadcrumbs def cloudformation(self, code): """ AWS CloudFormation format """ if self.breadcrumbs: return # Not tree root if "AWSTemplateFormatVersion" not in code: return # Not CF format if "Parameters" not in code: return # No parameters for key, values in code["Parameters"].items(): if "Default" not in values: continue # No default value yield key, values["Default"]
class WhisperSecrets: def __init__(self, config): self.exclude = config["exclude"] self.breadcrumbs = [] self.rules = WhisperRules() self.rules.load_rules_from_dict(config["rules"]) def is_static(self, key: str, value: str) -> bool: """ Check if given value is static (hardcoded). If value is dynamic, it's not a hardcoded secret. """ if not isinstance(value, str): return False # Not string if not value: return False # Empty if value.startswith("$") and "$" not in value[2:]: return False # Variable if "{{" in value and "}}" in value: return False # Variable if value.startswith("{") and value.endswith("}"): if len(value) > 50: if self.rules.match("base64", value[1:-1]): return True # Token return False # Variable if value.startswith("${") and value.endswith("}"): return False # Variable if value.startswith("<") and value.endswith(">"): return False # Placeholder if value == "null": return False # IaC if re.match(r"\![A-Za-z]+ .+", value): return False # IaC !Ref !Sub ... if key: s_key = simple_string(key) s_value = simple_string(value) if s_key == s_value: return False # Placeholder if s_value.endswith(s_key): return False # Placeholder for ex in self.exclude["keys"]: if ex.match(key): return False # Exclude keys for ex in self.exclude["values"]: if ex.match(value): return False # Exclude values return True # Hardcoded static value def is_excluded(self, breadcrumbs: list) -> bool: for crumb in breadcrumbs: for ex in self.exclude["keys"]: if ex.match(str(crumb)): return True return False def detect_secrets(self, key: str, value: str, filepath: Path, breadcrumbs: list = []) -> Optional[Secret]: if not key: key = "" else: key = strip_string(key) if isinstance(value, str): value = strip_string(value) elif isinstance(value, int): value = str(value) else: return None # Neither text nor digits if not self.is_static(key, value): return None # Not static if self.is_excluded(breadcrumbs): return None # Excluded via config return self.rules.check(key, value, filepath) def scan(self, filename: str) -> Optional[Secret]: plugin = WhisperPlugins(filename) if not plugin: return yield self.detect_secrets("file", plugin.filepath.as_posix(), plugin.filepath) for ret in plugin.pairs(): if len(ret) == 2: # key, value yield self.detect_secrets(ret[0], ret[1], plugin.filepath) elif len(ret) == 3: # key, value, breadcrumbs yield self.detect_secrets(ret[0], ret[1], plugin.filepath, breadcrumbs=ret[2])
def test_match(value, result): rules = WhisperRules(rule_path("valid.yml")) assert rules.match("valid", value) == result
def test_match(value, expectation): rules = WhisperRules(rulespath=rule_path("valid.yml")) assert rules.match("valid", value) == expectation