def run(args): src = Path(args.src) if not src.exists(): debug(f"{src} does not exist") raise FileNotFoundError if src.is_file(): files = [src.as_posix()] elif src.is_dir(): files = [] else: debug(f"{src} is neither a file nor a directory") raise TypeError # Configure execution if not args.config: configpath = Path(__file__).parent configfile = configpath.joinpath("config.yml").as_posix() args.config = load_config(configfile, src=args.src) # Include files for incfile in args.config["include"]["files"]: files += set(src.glob(incfile)) # Exclude files files = list(set(files) - set(args.config["exclude"]["files"])) # Scan files whispers = WhisperSecrets(args) for filename in files: for secret in whispers.scan(filename): if secret: yield secret
def pairs(self, filepath: Path): def _constructor(loader, tag_suffix, node): """This is needed to parse IaC syntax""" if isinstance(node, yaml.MappingNode): return loader.construct_mapping(node) if isinstance(node, yaml.SequenceNode): return loader.construct_sequence(node) ret = loader.construct_scalar(node) return f"{tag_suffix} {ret}" """ Convert custom YAML to parsable YAML - Quote unquoted values such as {{ placeholder }} - Remove text between <% %> and {% %} - Remove comments that start with # """ document = "" for line in filepath.open("r").readlines(): if re.match(r".+(\[)?\{\{.*\}\}(\])?", line): line = line.replace('"', "'") line = line.replace("{{", '"{{').replace("}}", '}}"') document += line document = re.sub(r"[<{]%.*?%[}>]", "", document, flags=re.MULTILINE | re.DOTALL) document = re.sub(r"^#.*$", "", document) # Load converted YAML yaml.add_multi_constructor("", _constructor, Loader=yaml.SafeLoader) try: code = yaml.safe_load(document) yield from self.traverse(code) except Exception as e: debug(f"{type(e)} in {filepath}")
def pairs(self, filepath: Path): full_command = [] for line in filepath.open("r").readlines(): line = line.strip() if line.startswith("#"): # Comments line = line.lstrip("#").strip() line = line.translate(escaped_chars) if line.endswith("\\"): # Multi-line commands full_command.append(line[:-1]) continue full_command.append(line) try: cmd = shlex.split(" ".join(full_command)) except Exception as e: debug(f"Problem splitting {full_command}: {e}") continue full_command = [] if not cmd: continue elif cmd[0].lower() == "curl": yield from self.curl(cmd) for item in cmd: if "=" in item and len(item.split("=")) == 2: key, value = item.split("=") yield key, value
def test_debug(): logfile = configure_log() message = urandom(30).hex() try: 1 / 0 except Exception: debug(message) logtext = logfile.read_text() assert "ZeroDivisionError: division by zero" in logtext assert message in logtext
def pairs(self, filepath: Path): for cmdline in self.read_commands(filepath): try: cmd = shlex.split(cmdline) except Exception: debug(f"Failed parsing {filepath.as_posix()}\n{cmdline}") continue if not cmd: continue elif cmd[0].lower() == "curl": yield from self.curl(cmd) for item in cmd: if "=" in item and len(item.split("=")) == 2: key, value = item.split("=") yield key, value
def pairs(self, filepath: Path): def _traverse(tree): """Traverse XML document""" for event, element in tree: if event == "start": self.breadcrumbs.append(element.tag) elif event == "end": self.breadcrumbs.pop() continue # Format: <elem key="value"> for key, value in element.attrib.items(): yield key, value, self.breadcrumbs # Format: <elem name="jdbc:mysql://host?k1=v1&k2=v2"> if self.rules.match("uri", value): for k, v in Uri().pairs(value): yield k, v, self.breadcrumbs # Format: <key>value</key> if not element.text: continue yield element.tag, element.text, self.breadcrumbs # Format: <elem>key=value</elem> if "=" in element.text: item = element.text.split("=") if len(item) == 2: yield item[0], item[1], self.breadcrumbs # Format: <key>name</key><value>string</value> found_key = None found_value = None for item in element: if str(item.tag).lower() == "key": found_key = item.text elif str(item.tag).lower() == "value": found_value = item.text if found_key and found_value: yield found_key, found_value, self.breadcrumbs try: parser = ElementTree.XMLParser(recover=True) tree = ElementTree.parse(filepath.as_posix(), parser) tree = ElementTree.iterwalk(tree, events=("start", "end")) yield from _traverse(tree) except Exception as e: debug(f"{type(e)} in {filepath}")
def pairs(self, filepath: Path): """ Convert custom JSON to parsable JSON - Remove comments that start with // """ document = filepath.read_text() document = re.sub(r"^//.*", "", document, flags=re.MULTILINE | re.DOTALL) # Load converted JSON try: document = json.loads(document) yield from self.traverse(document) except Exception as e: debug(f"{type(e)} in {filepath}")
def pairs(self, filepath: Path): """ Convert custom JSON to parsable JSON - Remove lines that start with // comments - Strip // comments from the end the line """ document = "" for line in filepath.open("r").readlines(): if line.startswith("//"): continue line = re.sub(r" // ?.*$", "", line) document += line # Load converted JSON try: document = json.loads(document) yield from self.traverse(document) except Exception as e: debug(f"{type(e)} in {filepath}")
def load_config(configfile, src="."): configfile = Path(configfile) if not configfile.exists(): debug(f"{configfile} does not exist") raise FileNotFoundError if not configfile.is_file(): debug(f"{configfile} is not a file") raise TypeError config = load_yaml_from_file(configfile) # Ensure minimal expected config structure if "exclude" not in config: config["exclude"] = {"files": [], "keys": [], "values": []} else: for idx in ["files", "keys", "values"]: if idx not in config["exclude"]: config["exclude"][idx] = [] if "include" not in config: config["include"] = {"files": ["**/*"]} elif "files" not in config["include"]: config["include"]["files"] = ["**/*"] # Glob excluded files exfiles = [] for fileglob in config["exclude"]["files"]: for filepath in Path(src).glob(fileglob): exfiles.append(filepath) config["exclude"]["files"] = exfiles # Compile regex from excluded keys and values for param in ["keys", "values"]: excluded = [] for item in config["exclude"][param]: excluded.append(re.compile(item, flags=re.IGNORECASE)) config["exclude"][param] = excluded # Optional: rules if "rules" not in config: config["rules"] = {} return config
def run(src: str, config=None, dst=None): source = src src = Path(src) if not src.exists(): debug(f"{src} does not exist") raise FileNotFoundError if src.is_file(): files = [src.as_posix()] elif src.is_dir(): files = [] else: debug(f"{src} is neither a file nor a directory") raise TypeError # Configure execution if not config: configpath = Path(__file__).parent configfile = configpath.joinpath("config.yml").as_posix() config = load_config(configfile, src=src) # Include files for incfile in config["include"]["files"]: files += set(src.glob(incfile)) # Exclude files files = list(set(files) - set(config["exclude"]["files"])) if dst and not os.path.isdir(dst): os.mkdir(dst) # Scan files whispers = WhisperSecrets(config) for filename in files: # print(filename) try: for secret in whispers.scan(filename): if secret: if dst: save_file(filename, dst) yield secret except Exception as e: print(f"Error: {e}")
def pairs(self): if self.plugin: try: yield from self.plugin.pairs(self.filepath) except Exception: debug(f"Failed parsing {self.filepath.as_posix()}")
def pairs(self, filepath: Path): try: tree = astroid.parse(filepath.read_text()) yield from self.traverse(tree) except Exception as e: debug(f"{type(e)} in {filepath}")
def pairs(self, filepath: Path): """ Parses Python source code into AST using Astroid http://pylint.pycqa.org/projects/astroid/en/latest/api/astroid.nodes.html#id1 Returns key-value pairs """ def is_key(node): types = [ astroid.node_classes.Name, astroid.node_classes.AssignName ] return type(node) in types def is_value(node): types = [astroid.node_classes.Const] return type(node) in types def _traverse(tree): for node in tree.get_children(): yield from _traverse(node) # Assignment if isinstance(node, astroid.node_classes.Assign): if not is_value(node.value): continue value = node.value.value for key in node.targets: if is_key(key): yield key.name, value # Comparison elif isinstance(node, astroid.node_classes.Compare): left = node.left right = node.ops[0][1] if is_key(left) and is_value(right): key = left.name value = right.value elif is_key(right) and is_value(left): key = right.name value = left.value else: continue yield key, value # Dictionary values elif isinstance(node, astroid.node_classes.Dict): for key, value in node.items: if not is_value(key) or not is_value(value): continue yield key.value, value.value # Keywords elif isinstance(node, astroid.node_classes.Keyword): if is_value(node.value): yield node.arg, node.value.value # Function call elif isinstance(node, astroid.node_classes.Call): key = "function" value = node.as_string() # Entire function call yield key, value try: tree = astroid.parse(filepath.read_text()) yield from _traverse(tree) except Exception as e: debug(f"{type(e)} in {filepath}")