def find_secrets(self, diff): changes = None secrets_collection = SecretsCollection() with transient_settings({'plugins_used': [{'name': plugin_type.__name__} for plugin_type in get_mapping_from_secret_type_to_class().values()]}) as settings: settings.disable_filters( 'detect_secrets.filters.common.is_invalid_file', ) secrets_collection.scan_diff(diff) for file_name, secret in secrets_collection: if len(secret.secret_value) < 6: continue # Ignore small secrets to reduce false positives. # Only parse the diff if at least one secret was found. if not changes: patch_set = PatchSet.from_string(diff) changes = {} for patch_file in patch_set: lines = dict((line.target_line_no, line.value.strip()) for chunk in patch_file for line in chunk.target_lines() if line.is_added) changes[patch_file.path] = lines line = changes[secret.filename][secret.line_number] if self._blacklist.is_blacklisted(line, file_name, secret.secret_value): continue # detect_secrets sometimes return a lowercase version of the secret. Find the real string. secret_index = line.lower().find(secret.secret_value.lower()) secret_value = line[secret_index:secret_index + len(secret.secret_value)] yield Secret(secret.type, secret.filename, secret.line_number, secret_value, line, secret.is_verified)
def get_latest_patchset(): """Return the PatchSet for the latest commit.""" # regex from https://github.com/PyCQA/pylint/blob/master/pylintrc output = check_output( ['git', 'diff', '-U0', '@~..@']) return PatchSet.from_string( output.replace(b'\r\n', b'\n'), encoding='utf-8')
def get_commit_filechanges(self, repo, sha): resp = self.get( BitbucketAPIPath.repository_diff.format(repo=repo, spec=sha), allow_text=True ) diff_file = resp.text ps = PatchSet.from_string(diff_file) return self.transform_patchset(ps)
def diff_parser_from_str(diff_str): patches = [] try: patches = PatchSet.from_string(diff_str) except Exception as e: logging.error("diff_parser_from_str failed! diff str {}\nerror: {}".format(diff_str, e)) return patches
def from_string(text): """Reads diff from the string provided. This fills self._mapping with a mapping for each file the represents that map from target filename and line number to source filename and line number. """ patchset = PatchSet.from_string(text) return DiffMapper.from_patchset(patchset)
def get_patch(api, urn, pr_num, raw=False): """ get the formatted or not patch file for a pr """ path = "https://github.com/{urn}/pull/{pr}.patch".format(urn=urn, pr=pr_num) data = api("get", path) if raw: return data return PatchSet.from_string(data)
def get_commit_filechanges(self, repo, sha): # returns unidiff file resp = self.get("/2.0/repositories/{}/diff/{}".format(repo, sha), allow_text=True) diff_file = resp.text ps = PatchSet.from_string(diff_file) return self.transform_patchset(ps)
def scan_diff( self, diff, baseline_filename='', last_commit_hash='', repo_name='', ): """For optimization purposes, our scanning strategy focuses on looking at incremental differences, rather than re-scanning the codebase every time. This function supports this, and adds information to self.data. :type diff: str :param diff: diff string. Eg. The output of `git diff <fileA> <fileB>` :type baseline_filename: str :param baseline_filename: if there are any baseline secrets, then the baseline file will have hashes in them. By specifying it, we can skip this clear exception. :type last_commit_hash: str :param last_commit_hash: used for logging only -- the last commit hash we saved :type repo_name: str :param repo_name: used for logging only -- the name of the repo """ try: patch_set = PatchSet.from_string(diff) except UnidiffParseError: # pragma: no cover alert = { 'alert': 'UnidiffParseError', 'hash': last_commit_hash, 'repo_name': repo_name, } log.error(alert) raise if self.exclude_regex: regex = re.compile(self.exclude_regex, re.IGNORECASE) for patch_file in patch_set: filename = patch_file.path # If the file matches the exclude_regex, we skip it if self.exclude_regex and regex.search(filename): continue if filename == baseline_filename: continue for results, plugin in self._results_accumulator(filename): results.update( self._extract_secrets_from_patch( patch_file, plugin, filename, ), )
def test_patchset_from_bytes_string(self): with codecs.open(self.sample_file, 'rb') as diff_file: diff_data = diff_file.read() ps1 = PatchSet.from_string(diff_data, encoding='utf-8') with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: ps2 = PatchSet(diff_file) self.assertEqual(ps1, ps2)
def scan_diff( self, diff, baseline_filename='', last_commit_hash='', repo_name='', ): """For optimization purposes, our scanning strategy focuses on looking at incremental differences, rather than re-scanning the codebase every time. This function supports this, and adds information to self.data. :type diff: str :param diff: diff string. Eg. The output of `git diff <fileA> <fileB>` :type baseline_filename: str :param baseline_filename: if there are any baseline secrets, then the baseline file will have hashes in them. By specifying it, we can skip this clear exception. :type last_commit_hash: str :param last_commit_hash: used for logging only -- the last commit hash we saved :type repo_name: str :param repo_name: used for logging only -- the name of the repo """ try: patch_set = PatchSet.from_string(diff) except UnidiffParseError: # pragma: no cover alert = { 'alert': 'UnidiffParseError', 'hash': last_commit_hash, 'repo_name': repo_name, } CustomLogObj.getLogger().error(alert) raise if self.exclude_regex: regex = re.compile(self.exclude_regex, re.IGNORECASE) for patch_file in patch_set: filename = patch_file.path # If the file matches the exclude_regex, we skip it if self.exclude_regex and regex.search(filename): continue if filename == baseline_filename: continue for results, plugin in self._results_accumulator(filename): results.update( self._extract_secrets_from_patch( patch_file, plugin, filename, ), )
def get_commit_filechanges(self, repo, sha): resp = self.get( BitbucketAPIPath.repository_diff.format( repo=repo, spec=sha, ), allow_text=True, ) diff_file = resp.text ps = PatchSet.from_string(diff_file) return self.transform_patchset(ps)
def get_commit_filechanges(self, repo, sha): # returns unidiff file diff_file = self.get( '/2.0/repositories/{}/diff/{}'.format( repo, sha, ), allow_text=True, ) ps = PatchSet.from_string(diff_file) return self.transform_patchset(ps)
def get_commit_filechanges(self, repo, sha): # returns unidiff file resp = self.get( '/2.0/repositories/{}/diff/{}'.format( repo, sha, ), allow_text=True, ) diff_file = resp.text ps = PatchSet.from_string(diff_file) return self.transform_patchset(ps)
def get_commit_filechanges(self, repo, sha): # returns unidiff file diff_file = self.request( 'GET', '2.0', '/repositories/{}/diff/{}'.format( repo, sha, ), data=None, params=None, json=False, ) ps = PatchSet.from_string(diff_file) return self.transform_patchset(ps)
def _get_lines_from_diff(diff: str) -> Generator[Tuple[str, List[Tuple[int, str]]], None, None]: """ :raises: ImportError """ # Local imports, so that we don't need to require unidiff for versions of # detect-secrets that don't use it. from unidiff import PatchSet patch_set = PatchSet.from_string(diff) for patch_file in patch_set: filename = patch_file.path if _is_filtered_out(required_filter_parameters=['filename'], filename=filename): continue yield ( filename, [ (line.target_line_no, line.value) for chunk in patch_file # target_lines refers to incoming (new) changes for line in chunk.target_lines() if line.is_added ], )
def get_diff_info(self, commit): diffString = self.repo.git.diff(commit.parents[0].hexsha, commit.hexsha) patch = PatchSet.from_string(diffString) return (str(patch.added), str(patch.removed), str(len(patch.modified_files)))
def parse(data, tree): patches = PatchSet.from_string(data) for patch in patches: parse_patch(patch, tree)