def find_changed_lines(diff: str) -> Dict[str, List[Tuple[int, int]]]: # Delay import since this isn't required unless using the --diff-file # argument, which for local runs people don't care about try: import unidiff # type: ignore[import] except ImportError as e: e.msg += ", run 'pip install unidiff'" # type: ignore[attr-defined] raise e files: Any = collections.defaultdict(list) for file in unidiff.PatchSet(diff): for hunk in file: added_line_nos = [ line.target_line_no for line in hunk if line.is_added ] if len(added_line_nos) == 0: continue # Convert list of line numbers to ranges # Eg: [1, 2, 3, 12, 13, 14, 15] becomes [[1,3], [12, 15]] i = 1 ranges = [[added_line_nos[0], added_line_nos[0]]] while i < len(added_line_nos): if added_line_nos[i] != added_line_nos[i - 1] + 1: ranges[-1][1] = added_line_nos[i - 1] ranges.append([added_line_nos[i], added_line_nos[i]]) i += 1 ranges[-1][1] = added_line_nos[-1] files[file.path] += ranges return dict(files)
def diff_lint(diff, repo_root): changed_files = defaultdict(set) patches = unidiff.PatchSet(diff) for patch in patches: # remove a, since patch.source_file == 'b/project/main.py' path = patch.target_file[1:] if path.endswith('.py'): for hunk in patch: end = hunk.target_start + hunk.target_length changed_files[path].update(range(hunk.target_start, end)) for file_, range_ in changed_files.items(): start = time.time() cmd_tpl = '{file_name} --output-format=json' (pylint_stdout, __) = lint.py_run(cmd_tpl.format(file_name=repo_root + file_), return_std=True) for line in json.loads(pylint_stdout.buf): message_tpl = '{type_} -> {path}:{line} {message}.' if line['line'] in range_: print message_tpl.format(type_=line['type'], path=line['path'], line=line['line'], message=line['message']) seconds = time.time() - start print "Time spent: {seconds}s.\n".format(seconds=seconds)
def process_diff(content): """ test """ error_list = [] patch = unidiff.PatchSet(content) for file in patch: if not file.path.endswith(".py"): continue for hunk in file: for line in hunk: #if line.source_line_no: #error = check_doc_in_fun(file.path, line.source_line_no) #if error: #error_list.append("{}: {}".format(file.path, error)) if line.source_line_no: lines = len(hunk.target) for i in range(line.source_line_no, line.source_line_no + lines): #print(i, file.path, line.source_line_no) error = check_doc_in_class(file.path, i) if error: error_list.append("{}:{}".format(file.path, error)) if hunk.target_start: error = check_doc_in_class(file.path, hunk.target_start) if error: error_list.append("{}:{}".format(file.path, error)) error = check_doc_in_fun(file.path, hunk.target_start) if error: error_list.append("{}:{}".format(file.path, error)) for e in set(error_list): print(e) exit(len(error_list))
def fetch_diff(self): SyncGit.logger.info("Fetching diff from remote origin") try: firehol_repo = git.cmd.Git(self.repo_path) firehol_repo.checkout("master") firehol_repo.fetch("origin") diff_stdout = firehol_repo.execute( ["git", "diff", "master", "origin/master"], True).split("\n") try: udiff = unidiff.PatchSet(diff_stdout) firehol_repo.execute( ["git", "reset", "--hard", "origin/master"]) firehol_repo.merge() self.logger.info( "Successfully fetched diff from remote origin") return udiff except unidiff.UnidiffParseError: self.logger.exception("UnidiffParseError occurred") except git.GitCommandError: self.logger.exception("GitCommandError occurred")
def get_files_involved_in_pr(data): """ Return a list of file names modified/added in the PR """ headers = {"Authorization": "token " + os.environ["GITHUB_TOKEN"]} diff_headers = headers.copy() diff_headers["Accept"] = "application/vnd.github.VERSION.diff" auth = (os.environ["BOT_USERNAME"], os.environ["BOT_PASSWORD"]) repository = data["repository"] after_commit_hash = data["after_commit_hash"] author = data["author"] diff_url = "https://api.github.com/repos/{}/pulls/{}" diff_url = diff_url.format(repository, str(data["pr_number"])) r = requests.get(diff_url, headers=diff_headers, auth=auth) patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding) files = {} for patchset in patch: file = patchset.target_file[1:] files[file] = [] for hunk in patchset: for line in hunk.target_lines(): if line.is_added: files[file].append(line.target_line_no) return files
def clear(self, processors=None): """ Starts the process of cleaning unnessesary changes using given processors if no processor is given, we'll use the default ones. Processors are functions that receive a hunk and return `True` or `False`, when any processor returns `False`, the hunk is reverted from the working tree. """ processors = processors or self.default_processors index = self.repository.index patches = index.diff(None, create_patch=True, unified=0) for patch in patches: try: patch = unidiff.PatchSet(StringIO(patch.diff.decode('utf-8'))) except Exception as e: print("Unhandled error %s, continuing..." % str(e)) continue if self._clear_patch(patch, processors): patchpath = tempfile.mktemp() open(patchpath, 'w').write(str(patch) + '\n') self.repository.git.execute( ['git', 'apply', '--recount', '-R', '--unidiff-zero', '--allow-overlap', patchpath] )
def run(base_commit, ignore_config, step: Optional[Step], report: Optional[Report]): """Apply clang-format and return if no issues were found.""" if report is None: report = Report() # For debugging. if step is None: step = Step() # For debugging. r, patch = get_diff(base_commit) if not r: step.success = False return add_artifact = False patches = unidiff.PatchSet(patch) ignore_lines = [] if ignore_config is not None and os.path.exists(ignore_config): ignore_lines = open(ignore_config, 'r').readlines() ignore = pathspec.PathSpec.from_lines( pathspec.patterns.GitWildMatchPattern, ignore_lines) patched_file: unidiff.PatchedFile success = True for patched_file in patches: add_artifact = True if ignore.match_file(patched_file.source_file) or ignore.match_file( patched_file.target_file): logging.info(f'patch of {patched_file.patch_info} is ignored') continue hunk: unidiff.Hunk for hunk in patched_file: lines = [str(x) for x in hunk] success = False m = 10 # max number of lines to report. description = 'please reformat the code\n```\n' n = len(lines) cut = n > m + 1 if cut: lines = lines[:m] description += ''.join(lines) + '\n```' if cut: description += f'\n{n - m} diff lines are omitted. See full path.' report.add_lint({ 'name': 'clang-format', 'severity': 'autofix', 'code': 'clang-format', 'path': patched_file.source_file, 'line': hunk.source_start, 'char': 1, 'description': description, }) if add_artifact: patch_file = 'clang-format.patch' with open(patch_file, 'w') as f: f.write(patch) report.add_artifact(os.getcwd(), patch_file, 'clang-format') if not success: step.success = False step.messages.append( 'Please format your changes with clang-format by running `git-clang-format HEAD^` or applying patch.' ) logging.debug(f'report: {report}') logging.debug(f'step: {step}')
def do(base_branch, include_dir, exclude_dir): maybe_exclude_arg = [] if exclude_dir is None else\ [':(exclude){}'.format(exclude_dir)] args = ["git", "diff", base_branch, "--no-color", "-G", "TODO", "-i", '--', include_dir] + maybe_exclude_arg raw_diffs = subprocess.check_output(args) patch_set = unidiff.PatchSet(BytesIO(raw_diffs), encoding="utf8") path_info = defaultdict(list) for patch in patch_set: for hunk in patch: for line in hunk: if 'todo' in line.value.lower(): if line.is_added or line.is_removed: path_info[patch.path].append(line) num_added = 0 num_removed = 0 for path in sorted(path_info.keys()): print(path) for line in path_info[path]: if line.is_added: num_added += 1 else: num_removed += 1 val = "{}\t{}".format( (line.target_line_no if line.is_added else line.source_line_no), line.value.strip()) print( colored.green("+:" + val, bold=True) if line.is_added else colored.red("-:" + val, bold=True)) print() print("Total: {} additions and {} removals".format( colored.green(str(num_added), bold=True), colored.red(str(num_removed), bold=True)))
def __init__(self, owner, repo, pr=None, branch=None, token=None, url=None, commit=None, ignore_paths=None): """ GitHubInterface lets us post messages to GitHub. owner and repo are the repository owner/organization and repo name respectively. pr is the ID number of the pull request. branch is the branch name. either pr OR branch must be populated. token is your GitHub API token. url is the base URL of your GitHub instance, such as https://github.com commit is the commit hash we're running against ignore_paths are paths to ignore comments from """ self.github = None self.ignore_paths = set(ignore_paths or []) if not url or url == 'https://github.com': self.github = github3.GitHub(token=token) else: self.github = github3.GitHubEnterprise(url, token=token) self.owner = owner self.repo = repo print('Branch: {0}'.format(branch)) if branch and not pr: github_repo = self.github.repository(self.owner, self.repo) for pull_request in github_repo.iter_pulls(): if pull_request.to_json()['head']['ref'] == branch: pr = pull_request.to_json()['number'] break # TODO: support non-PR runs try: pr = int(pr) except (ValueError, TypeError): print('{0} is not a valid pull request ID'.format(pr)) self.github = None return print('PR ID: {0}'.format(pr)) self.pr = pr self.pull_request = self.github.pull_request(owner, repo, pr) self.commits = self.pr_commits(self.pull_request) self.last_sha = commit or git.current_sha() print('Last SHA: {0}'.format(self.last_sha)) self.first_sha = self.commits[0].sha self.parent_sha = git.parent_sha(self.first_sha) self.diff = git.diff(self.parent_sha, self.last_sha) self.patch = unidiff.PatchSet(self.diff.split('\n')) self.review_comments = list(self.pull_request.review_comments()) self.last_update = time.time()
def _get_patch_redirects(patch, allow_add_only=False): f = io.StringIO(patch.decode("utf-8")) p = unidiff.PatchSet(f) redirect_mappings = [] for p_file in p: for p_hunk in p_file: removed_urls = [] added_urls = [] for p_line in p_hunk: if p_line.line_type == "-": removed_urls += _url_extractor_wrapper(str(p_line)) logger.debug("removed_urls: {}".format(removed_urls)) elif p_line.line_type == "+": added_urls += _url_extractor_wrapper(str(p_line)) logger.debug("added_urls: {}".format(added_urls)) for url in added_urls.copy(): if url in removed_urls: removed_urls.remove(url) added_urls.remove(url) elif not url.startswith("http://") and not url.startswith( "https://"): added_urls.remove(url) if not allow_add_only: if removed_urls: assert added_urls elif added_urls: assert removed_urls if not removed_urls and not allow_add_only: logger.info("Skipping url patch {} {}".format( removed_urls, added_urls)) continue if len(set(added_urls)) == 1: to_url = added_urls[0] if allow_add_only and not removed_urls: removed_urls.append(None) for url in removed_urls: redirect_mappings.append((url, to_url)) elif not added_urls: for url in removed_urls: redirect_mappings.append((url, None)) else: if len(added_urls) != len(removed_urls): logger.info( "Hunk ignored as removed cant be mapped to added: {}\n{}" .format(removed_urls, added_urls)) continue for i, to_url in enumerate(added_urls): redirect_mappings.append((removed_urls[i], to_url)) return redirect_mappings
def get_patched_files(diff_text): patchset = unidiff.PatchSet(diff_text) patched_files = {} for patch in patchset: diff_file = patch.path for hunk in patch: for line in hunk.target_lines(): if line.is_added: patched_files.setdefault(diff_file, []).append( (line.target_line_no, line.value)) return patched_files
def autopep8(data, config): # Run pycodestyle headers = {"Authorization": "token " + os.environ["GITHUB_TOKEN"]} auth = (os.environ["BOT_USERNAME"], os.environ["BOT_PASSWORD"]) r = requests.get(data["diff_url"], headers=headers, auth=auth) ## All the python files with additions patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding) # A dictionary with filename paired with list of new line numbers py_files = {} for patchset in patch: if patchset.target_file[-3:] == '.py': py_file = patchset.target_file[1:] py_files[py_file] = [] for hunk in patchset: for line in hunk.target_lines(): if line.is_added: py_files[py_file].append(line.target_line_no) # Ignore errors and warnings specified in the config file to_ignore = ",".join(config["pycodestyle"]["ignore"]) arg_to_ignore = "" if len(to_ignore) > 0: arg_to_ignore = "--ignore " + to_ignore for file in py_files: filename = file[1:] url = "https://raw.githubusercontent.com/{}/{}/{}" url = url.format(data["repository"], data["sha"], file) r = requests.get(url, headers=headers, auth=auth) with open("file_to_fix.py", 'w+', encoding=r.encoding) as file_to_fix: file_to_fix.write(r.text) cmd = 'autopep8 file_to_fix.py --diff {arg_to_ignore}'.format( arg_to_ignore=arg_to_ignore) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) stdout, _ = proc.communicate() data["diff"][filename] = stdout.decode(r.encoding) # Fix the errors data["diff"][filename] = data["diff"][filename].replace( "file_to_check.py", filename) data["diff"][filename] = data["diff"][filename].replace("\\", "\\\\") ## Store the link to the file url = "https://github.com/" + data["author"] + "/" + \ data["repository"].split("/")[-1] + "/blob/" + \ data["sha"] + file data[filename + "_link"] = url os.remove("file_to_fix.py")
def comment_on_pr(diagnostics, repo, pr, token): headers = { "Authorization": "token " + token, "Accept": "application/vnd.github.v3+json", "User-Agent": "gps/intellij-inspections-action", } url = "https://api.github.com/repos/{}/pulls/{}/reviews".format(repo, pr) diff = get_diff_from_pr(repo, pr, token) ps = unidiff.PatchSet(diff) comments = [] unknown = [] for diagnostic in diagnostics: if diagnostic.error_level not in ["WARNING", "ERROR"]: continue pos = find_position(ps, diagnostic.file_name, diagnostic.line_number) if pos: comments.append({ "body": diagnostic.description, "path": diagnostic.file_name, "position": pos, }) else: if diagnostic.error_level in ["WARNING", "ERROR"]: unknown.append(diagnostic) event = "COMMENT" text = "" num_errors = sum(1 if d.error_level == "ERROR" else 0 for d in diagnostics) num_warnings = sum(1 if d.error_level == "WARNING" else 0 for d in diagnostics) if num_errors == 0: event = "APPROVE" if num_warnings == 0: text = "🎉 Excellent, your code has passed all inspections! 🎉" else: text = "Your code has no inspection errors, but there are a few warnings. Please check the warnings." elif num_errors > 1: event = "REQUEST_CHANGES" text = "‼️ Your code has errors - please fix them ‼️" if unknown: text += "\n\nThis branch has errors or warnings, but they are not part of the diff:\n\n" for diagnostic in unknown: text += "Path: {}\nLine number: {}\nLevel: {}\n Problem:{}\n".format( diagnostic.file_name, diagnostic.line_number, diagnostic.error_level, diagnostic.description, ) if len(text) > 65000: # GitHub does not allow comments larger than 65536 characters text = text[:65000] + "‼️‼️TRUNCATED‼️‼️" body = {"event": event, "comments": comments, "body": text} res = requests.post(url, headers=headers, data=json.dumps(body)) assert_200_response(res, "Unable to review PR")
def detect_lines(diffstr): """Take a diff string and return a dict of files with line numbers changed.""" resultant_lines = {} io = StringIO(diffstr) udiff = unidiff.PatchSet(io) for file in udiff: target_lines = [] for hunk in file: target_lines += range(hunk.target_start, hunk.target_start + hunk.target_length) resultant_lines[file.path] = target_lines return resultant_lines
def parse_error_diffs( errors: str, file_path_parser: FilePathParserType, logger: Optional[logging.Logger] = None, ) -> Iterable[Diagnostic]: """ Compatible with isort, black """ def _is_changed(line: unidiff.patch.Line) -> bool: return not line.is_context try: patches = unidiff.PatchSet(errors) except unidiff.errors.UnidiffParseError: _warn_parse_error(errors, logger) return for patch in patches: for hunk in patch: source_changes = list(filter(_is_changed, hunk.source_lines())) if source_changes: start_line = source_changes[0].source_line_no end_line = source_changes[-1].source_line_no else: target_changes = list(filter(_is_changed, hunk.target_lines())) assert target_changes, "expected either source or target line number" start_line = target_changes[0].target_line_no end_line = target_changes[-1].target_line_no try: file_path = file_path_parser(patch.source_file) except UnexpectedErrorFormat: _warn_parse_error(patch, logger) continue def filter_hunk( hunk: unidiff.patch.Hunk, ) -> Generator[unidiff.patch.Line, None, None]: for line in hunk: if _is_changed(line): yield line elif line.source_line_no is not None: if start_line <= line.source_line_no <= end_line: yield line yield Diagnostic( start_line=start_line, end_line=end_line, start_column=1, file_path=file_path, diff="".join(map(str, filter_hunk(hunk))), )
def autopep8(ghrequest, config): # Run pycodestyle r = utils.query_request(ghrequest.diff_url) ## All the python files with additions patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding) # A dictionary with filename paired with list of new line numbers py_files = {} for patchset in patch: if patchset.target_file[-3:] == '.py': py_file = patchset.target_file[1:] py_files[py_file] = [] for hunk in patchset: for line in hunk.target_lines(): if line.is_added: py_files[py_file].append(line.target_line_no) # Ignore errors and warnings specified in the config file to_ignore = ",".join(config["pycodestyle"]["ignore"]) arg_to_ignore = "" if len(to_ignore) > 0: arg_to_ignore = "--ignore " + to_ignore for py_file in py_files: filename = py_file[1:] url = "https://raw.githubusercontent.com/{}/{}/{}" url = url.format(ghrequest.repository, ghrequest.sha, py_file) r = utils.query_request(url) with open("file_to_fix.py", 'w+', encoding=r.encoding) as file_to_fix: file_to_fix.write(r.text) cmd = 'autopep8 file_to_fix.py --diff {arg_to_ignore}'.format( arg_to_ignore=arg_to_ignore) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) stdout, _ = proc.communicate() ghrequest.diff[filename] = stdout.decode(r.encoding) # Fix the errors ghrequest.diff[filename] = ghrequest.diff[filename].replace( "file_to_check.py", filename) ghrequest.diff[filename] = ghrequest.diff[filename].replace( "\\", "\\\\") ## Store the link to the file url = "https://github.com/{}/blob/{}{}" ghrequest.links = {} ghrequest.links[filename + "_link"] = url.format( ghrequest.repository, ghrequest.sha, py_file) os.remove("file_to_fix.py")
def get_pr_diff(repo, pr_number, token): """Download the PR diff, return a list of PatchedFile""" headers = { "Accept": "application/vnd.github.v3.diff", "Authorization": f"token {token}", } url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" pr_diff_response = requests.get(url, headers=headers) pr_diff_response.raise_for_status() # PatchSet is the easiest way to construct what we want, but the # diff_line_no property on lines is counted from the top of the # whole PatchSet, whereas GitHub is expecting the "position" # property to be line count within each file's diff. So we need to # do this little bit of faff to get a list of file-diffs with # their own diff_line_no range diff = [ unidiff.PatchSet(str(file))[0] for file in unidiff.PatchSet(pr_diff_response.text) ] return diff
def get_pr_diff(repo, pr_number, token): """Download the PR diff """ headers = { "Accept": "application/vnd.github.v3.diff", "Authorization": f"token {token}", } url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" pr_diff_response = requests.get(url, headers=headers) pr_diff_response.raise_for_status() return unidiff.PatchSet(pr_diff_response.text)
def position(self, message): """Calculate position within the PR, which is not the line number""" if not message.line_number: message.line_number = 1 patch = unidiff.PatchSet(self.diff.split('\n')) for patched_file in patch: target = patched_file.target_file.lstrip('b/') if target == message.path: offset = 1 for hunk_no, hunk in enumerate(patched_file): for position, hunk_line in enumerate(hunk): if '+' not in hunk_line.line_type: continue if hunk_line.target_line_no == message.line_number: return position + offset offset += len(hunk) + 1
def detect_lines(diffstr): """Take a diff string and return a dict of files with line numbers changed""" resultant_lines = {} # Force utf-8 re: https://github.com/ros/rosdistro/issues/6637 encoding = 'utf-8' io = StringIO(unicode(diffstr, encoding)) udiff = unidiff.PatchSet(io) for file in udiff: target_lines = [] # if file.path in TARGET_FILES: for hunk in file: target_lines += range(hunk.target_start, hunk.target_start + hunk.target_length) resultant_lines[file.path] = target_lines return resultant_lines
def autopep8ify(ghrequest, config): # Run pycodestyle r = utils.query_request(ghrequest.diff_url) ## All the python files with additions patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding) # A dictionary with filename paired with list of new line numbers py_files = {} linter = config["scanner"]["linter"] files_to_exclude = config[linter]["exclude"] for patchset in patch: if patchset.target_file[-3:] == '.py': py_file = patchset.target_file[1:] if utils.filename_match(py_file, files_to_exclude): continue py_files[py_file] = [] for hunk in patchset: for line in hunk.target_lines(): if line.is_added: py_files[py_file].append(line.target_line_no) # Ignore errors and warnings specified in the config file to_ignore = ",".join(config["pycodestyle"]["ignore"]) arg_to_ignore = "" if len(to_ignore) > 0: arg_to_ignore = "--ignore " + to_ignore for py_file in py_files: filename = py_file[1:] query = f"https://raw.githubusercontent.com/{ghrequest.repository}/{ghrequest.sha}/{py_file}" r = utils.query_request(query) with open("file_to_fix.py", 'w+', encoding=r.encoding) as file_to_fix: file_to_fix.write(r.text) cmd = f'autopep8 file_to_fix.py {arg_to_ignore}' proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) stdout, _ = proc.communicate() ghrequest.results[filename] = stdout.decode(r.encoding) os.remove("file_to_fix.py")
def message(filename, line, content, diff): if content: patch = unidiff.PatchSet(diff.decode('utf-8').split('\n')) for patched_file in patch: if patched_file.target_file == 'b/' + filename: offset = 1 for hunk_no, hunk in enumerate(patched_file): for position, hunk_line in enumerate(hunk): if '+' not in hunk_line.line_type: continue if hunk_line.target_line_no == line: return { 'filename': filename, 'line': position + offset, 'content': 'Line: ' + str(line) + ' \n```\n' + content.strip() + '\n```' } offset += len(hunk) + 1
def get_files_involved_in_pr(repo, pr_number): """ Return a list of file names modified/added in the PR """ headers = {"Accept": "application/vnd.github.VERSION.diff"} query = f"/repos/{repo}/pulls/{pr_number}" r = utils.query_request(query, headers=headers) patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding) files = {} for patchset in patch: diff_file = patchset.target_file[1:] files[diff_file] = [] for hunk in patchset: for line in hunk.target_lines(): if line.is_added: files[diff_file].append(line.target_line_no) return files
def find_changed_lines(diff: str) -> Dict[str, List[Tuple[int, int]]]: # Delay import since this isn't required unless using the --diff-file # argument, which for local runs people don't care about try: import unidiff # type: ignore[import] except ImportError as e: e.msg += ", run 'pip install unidiff'" # type: ignore[attr-defined] raise e files = collections.defaultdict(list) for file in unidiff.PatchSet(diff): for hunk in file: start = hunk[0].target_line_no if start is None: start = 1 end = hunk[-1].target_line_no files[file.path].append((start, end)) return dict(files)
def get_candidates_from_diff(difftext): try: import unidiff except ImportError as e: raise SystemExit("Could not import unidiff library: %s", e.message) patch = unidiff.PatchSet(difftext, encoding='utf-8') candidates = [] for patchedfile in [ patchfile for patchfile in patch.added_files + patch.modified_files ]: if patchedfile.source_file == '/dev/null': candidates.append(patchedfile.path) else: lines = ",".join([ "%s-%s" % (hunk.target_start, hunk.target_start + hunk.target_length) for hunk in patchedfile ]) candidates.append("%s:%s" % (patchedfile.path, lines)) return candidates
async def news_hook(request): payload = await request.read() # Verify the payload against the signature if (request.headers.get("X-Hub-Signature") and request.app.get("github_payload_key")): try: _verify_signature( request.app["github_payload_key"], request.headers["X-Hub-Signature"], payload, ) except InvalidSignature: return web.json_response( {"message": "Invalid signature"}, status=400, ) data = json.loads(payload.decode(request.charset or "utf8")) # We only care about a few different kinds of actions, the rest of them # are not useful to us, so we'll no-op out quickly if it is one of them. if (data.get("action") not in {"labeled", "unlabeled", "opened", "reopened", "synchronize"}): return web.json_response({"message": "Skipped due to action"}) async with aiohttp.ClientSession() as session: gh = gidgethub.aiohttp.GitHubAPI( session, "BrownTruck", oauth_token=request.app["github_token"], ) # Grab our labels out of GitHub's API tries = 5 while True: try: issue_data = await gh.getitem(data["pull_request"]["issue_url"]) label_data = await gh.getitem(issue_data["labels_url"]) except gidgethub.BadRequest as exc: if (isinstance(exc.status_code, http.HTTPStatus.NOT_FOUND) and tries > 0): tries -= 1 await asyncio.sleep(1) raise else: break labels = {l["name"] for l in label_data} # Grab the diff from GitHub and parse it into a diff object. diff_url = data["pull_request"]["diff_url"] async with session.get(diff_url) as resp: diff = unidiff.PatchSet(io.StringIO(await resp.text())) # Determine if the status check for this PR is passing or not and # update the status check to account for that. if ("trivial" in labels or any(f.is_added_file for f in diff if _news_fragment_re.search(f.path))): await gh.post( data["pull_request"]["statuses_url"], data={ "context": NEWS_FILE_CONTEXT, "target_url": HELP_URL, "state": "success", "description": "News files updated and/or change is trivial.", }, ) return web.json_response({ "message": "news file updated and/or ignored", }) else: await gh.post( data["pull_request"]["statuses_url"], data={ "context": NEWS_FILE_CONTEXT, "target_url": HELP_URL, "state": "failure", "description": "Missing either a news entry or a trivial file/label.", }, ) return web.json_response({ "message": "news file was not updated", "labels": list(labels), "files": [ {"path": f.path, "is_added_file": f.is_added_file} for f in diff ], })
def run_pycodestyle(data, config): """ Run pycodestyle script on the files and update the data dictionary """ headers = {"Authorization": "token " + os.environ["GITHUB_TOKEN"]} diff_headers = headers.copy() diff_headers["Accept"] = "application/vnd.github.VERSION.diff" auth = (os.environ["BOT_USERNAME"], os.environ["BOT_PASSWORD"]) repository = data["repository"] after_commit_hash = data["after_commit_hash"] author = data["author"] diff_url = "https://api.github.com/repos/{}/pulls/{}" diff_url = diff_url.format(repository, str(data["pr_number"])) # Run pycodestyle r = requests.get(diff_url, headers=diff_headers, auth=auth) ## All the python files with additions patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding) # A dictionary with filename paired with list of new line numbers py_files = {} for patchset in patch: if patchset.target_file[-3:] == '.py': py_file = patchset.target_file[1:] py_files[py_file] = [] for hunk in patchset: for line in hunk.target_lines(): if line.is_added: py_files[py_file].append(line.target_line_no) for file in py_files: filename = file[1:] url = "https://raw.githubusercontent.com/{}/{}/{}" url = url.format(repository, after_commit_hash, file) r = requests.get(url, headers=headers, auth=auth) with open("file_to_check.py", 'w+', encoding=r.encoding) as file_to_check: file_to_check.write(r.text) # Use the command line here cmd = 'pycodestyle {config[pycodestyle_cmd_config]} file_to_check.py'.format( config=config) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) stdout, _ = proc.communicate() data["extra_results"][filename] = stdout.decode( r.encoding).splitlines() # Put only relevant errors in the data["results"] dictionary data["results"][filename] = [] for error in list(data["extra_results"][filename]): if re.search("^file_to_check.py:\d+:\d+:\s[WE]\d+\s.*", error): data["results"][filename].append( error.replace("file_to_check.py", filename)) data["extra_results"][filename].remove(error) ## Remove errors in case of diff_only = True ## which are caused in the whole file for error in list(data["results"][filename]): if config["scanner"]["diff_only"]: if not int(error.split(":")[1]) in py_files[file]: data["results"][filename].remove(error) ## Store the link to the file url = "https://github.com/{}/{}/blob/{}{}" url = url.format(author, repository.split("/")[-1], after_commit_hash, file) data[filename + "_link"] = url os.remove("file_to_check.py")
#!/usr/bin/env python import sys import unidiff import os.path if len(sys.argv) != 3: print >> sys.stderr, "Usage: filter_lint_by_diff.py diff.patch repository_root_directory < cpplint_warnings.txt" sys.exit(1) added_lines = set() repository_root = sys.argv[2] with open(sys.argv[1], "r") as f: diff = unidiff.PatchSet(f) for diff_file in diff: filename = diff_file.target_file # Skip files deleted in the tip (b side of the diff): if filename == "/dev/null": continue assert filename.startswith("b/") filename = os.path.join(repository_root, filename[2:]) added_lines.add((filename, 0)) for diff_hunk in diff_file: for diff_line in diff_hunk: if diff_line.line_type == "+": added_lines.add((filename, diff_line.target_line_no)) for l in sys.stdin: bits = l.split(":") if len(bits) < 3:
def __init__( self, owner, repo, pr=None, branch=None, token=None, url=None, commit=None, ignore_paths=None, prefix=None, ): """ GitHubInterface lets us post messages to GitHub. owner and repo are the repository owner/organization and repo name respectively. pr is the ID number of the pull request. branch is the branch name. either pr OR branch must be populated. token is your GitHub API token. url is the base URL of your GitHub instance, such as https://github.com commit is the commit hash we're running against ignore_paths are paths to ignore comments from """ self.github = None self.stopped_early = False self.prefix = prefix self.ignore_paths = set(ignore_paths or []) if not url or url == "https://github.com": self.github = github3.GitHub(token=token) else: self.github = github3.GitHubEnterprise(url, token=token) self.owner = owner self.repo = repo self.github_repo = self.github.repository(self.owner, self.repo) all_commits = self.repo_commits(self.github_repo) self.master_sha = all_commits[0].sha print("Master SHA: {0}".format(self.master_sha)) print("Branch: {0}".format(branch)) self.pull_request_number = None if branch and not pr: for github_repo in [self.github_repo, self.github_repo.parent]: if pr: break if not github_repo: continue try: # github.py == 0.9.6 pulls = github_repo.iter_pulls() except AttributeError: pulls = github_repo.pull_requests() for pull_request in pulls: print("Branch: {} - Pull Request Head Ref: {}".format( branch, pull_request.head.ref)) if pull_request.head.ref == branch: pr = pull_request.number self.github_repo = github_repo break self.owner = self.github_repo.owner self.repo = self.github_repo.name # TODO: support non-PR runs try: pr = int(pr) except (ValueError, TypeError): print("{0} is not a valid pull request ID".format(pr)) self.github = None return print("PR ID: {0}".format(pr)) self.pull_request_number = pr self.pull_request = self.github.pull_request(self.owner, self.repo, pr) self.target_sha = self.pull_request.base.sha self.target_branch = self.pull_request.base.label try: # github.py == 0.9.6 try: git.fetch( self.pull_request.base.to_json()["repo"]["clone_url"]) except subprocess.CalledProcessError: git.fetch(self.pull_request.base.to_json()["repo"]["ssh_url"]) except AttributeError: # latest github.py try: git.fetch( self.pull_request.base.repository.as_dict()["clone_url"]) except subprocess.CalledProcessError: git.fetch( self.pull_request.base.repository.as_dict()["ssh_url"]) print("Target SHA: {0}".format(self.target_sha)) print("Target Branch: {0}".format(self.target_branch)) self.commits = self.pr_commits(self.pull_request) self.last_sha = commit or git.current_sha() print("Last SHA: {0}".format(self.last_sha)) self.first_sha = self.commits[0].sha self.diff = git.diff(self.target_sha, self.last_sha) self.patch = unidiff.PatchSet(self.diff.split("\n")) self.review_comments = list(self.pull_request.review_comments()) self.last_update = time.time() self.messages_in_files = dict()
def diff(self) -> unidiff.PatchSet: response = util.request( 'https://patch-diff.githubusercontent.com/raw/%s/pull/%s.diff' % (self._repo, self._pr_number)) return unidiff.PatchSet(response.content.decode('utf-8'))