Example #1
0
def find_changed_lines(diff: str) -> Dict[str, List[Tuple[int, int]]]:
    # Delay import since this isn't required unless using the --diff-file
    # argument, which for local runs people don't care about
    try:
        import unidiff  # type: ignore[import]
    except ImportError as e:
        e.msg += ", run 'pip install unidiff'"  # type: ignore[attr-defined]
        raise e

    files: Any = collections.defaultdict(list)

    for file in unidiff.PatchSet(diff):
        for hunk in file:
            added_line_nos = [
                line.target_line_no for line in hunk if line.is_added
            ]

            if len(added_line_nos) == 0:
                continue

            # Convert list of line numbers to ranges
            # Eg: [1, 2, 3, 12, 13, 14, 15] becomes [[1,3], [12, 15]]
            i = 1
            ranges = [[added_line_nos[0], added_line_nos[0]]]
            while i < len(added_line_nos):
                if added_line_nos[i] != added_line_nos[i - 1] + 1:
                    ranges[-1][1] = added_line_nos[i - 1]
                    ranges.append([added_line_nos[i], added_line_nos[i]])
                i += 1
            ranges[-1][1] = added_line_nos[-1]

            files[file.path] += ranges

    return dict(files)
Example #2
0
def diff_lint(diff, repo_root):
    changed_files = defaultdict(set)
    patches = unidiff.PatchSet(diff)
    for patch in patches:
        # remove a, since patch.source_file == 'b/project/main.py'
        path = patch.target_file[1:]
        if path.endswith('.py'):
            for hunk in patch:
                end = hunk.target_start + hunk.target_length
                changed_files[path].update(range(hunk.target_start, end))

    for file_, range_ in changed_files.items():
        start = time.time()
        cmd_tpl = '{file_name} --output-format=json'
        (pylint_stdout,
         __) = lint.py_run(cmd_tpl.format(file_name=repo_root + file_),
                           return_std=True)
        for line in json.loads(pylint_stdout.buf):
            message_tpl = '{type_} -> {path}:{line} {message}.'
            if line['line'] in range_:
                print message_tpl.format(type_=line['type'],
                                         path=line['path'],
                                         line=line['line'],
                                         message=line['message'])
        seconds = time.time() - start
        print "Time spent: {seconds}s.\n".format(seconds=seconds)
Example #3
0
def process_diff(content):
    """ test """
    error_list = []

    patch = unidiff.PatchSet(content)
    for file in patch:
        if not file.path.endswith(".py"):
            continue
        for hunk in file:
            for line in hunk:
                #if line.source_line_no:
                #error = check_doc_in_fun(file.path, line.source_line_no)
                #if error:
                #error_list.append("{}: {}".format(file.path, error))

                if line.source_line_no:
                    lines = len(hunk.target)
                    for i in range(line.source_line_no,
                                   line.source_line_no + lines):
                        #print(i, file.path, line.source_line_no)
                        error = check_doc_in_class(file.path, i)
                        if error:
                            error_list.append("{}:{}".format(file.path, error))

            if hunk.target_start:
                error = check_doc_in_class(file.path, hunk.target_start)
                if error:
                    error_list.append("{}:{}".format(file.path, error))
                error = check_doc_in_fun(file.path, hunk.target_start)
                if error:
                    error_list.append("{}:{}".format(file.path, error))

    for e in set(error_list):
        print(e)
    exit(len(error_list))
Example #4
0
    def fetch_diff(self):
        SyncGit.logger.info("Fetching diff from remote origin")

        try:
            firehol_repo = git.cmd.Git(self.repo_path)
            firehol_repo.checkout("master")
            firehol_repo.fetch("origin")
            diff_stdout = firehol_repo.execute(
                ["git", "diff", "master", "origin/master"], True).split("\n")

            try:
                udiff = unidiff.PatchSet(diff_stdout)
                firehol_repo.execute(
                    ["git", "reset", "--hard", "origin/master"])
                firehol_repo.merge()
                self.logger.info(
                    "Successfully fetched diff from remote origin")

                return udiff

            except unidiff.UnidiffParseError:
                self.logger.exception("UnidiffParseError occurred")

        except git.GitCommandError:
            self.logger.exception("GitCommandError occurred")
Example #5
0
def get_files_involved_in_pr(data):
    """
    Return a list of file names modified/added in the PR
    """
    headers = {"Authorization": "token " + os.environ["GITHUB_TOKEN"]}
    diff_headers = headers.copy()
    diff_headers["Accept"] = "application/vnd.github.VERSION.diff"
    auth = (os.environ["BOT_USERNAME"], os.environ["BOT_PASSWORD"])
    repository = data["repository"]
    after_commit_hash = data["after_commit_hash"]
    author = data["author"]
    diff_url = "https://api.github.com/repos/{}/pulls/{}"
    diff_url = diff_url.format(repository, str(data["pr_number"]))
    r = requests.get(diff_url, headers=diff_headers, auth=auth)
    patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding)

    files = {}

    for patchset in patch:
        file = patchset.target_file[1:]
        files[file] = []
        for hunk in patchset:
            for line in hunk.target_lines():
                if line.is_added:
                    files[file].append(line.target_line_no)

    return files
Example #6
0
    def clear(self, processors=None):
        """
        Starts the process of cleaning unnessesary changes using given
        processors if no processor is given, we'll use the default ones.

        Processors are functions that receive a hunk and return
        `True` or `False`, when any processor returns `False`, the hunk is
        reverted from the working tree.
        """
        processors = processors or self.default_processors
        index = self.repository.index
        patches = index.diff(None, create_patch=True, unified=0)
        for patch in patches:
            try:
                patch = unidiff.PatchSet(StringIO(patch.diff.decode('utf-8')))
            except Exception as e:
                print("Unhandled error %s, continuing..." % str(e))
                continue

            if self._clear_patch(patch, processors):
                patchpath = tempfile.mktemp()
                open(patchpath, 'w').write(str(patch) + '\n')
                self.repository.git.execute(
                    ['git', 'apply', '--recount', '-R', '--unidiff-zero',
                     '--allow-overlap', patchpath]
                )
def run(base_commit, ignore_config, step: Optional[Step],
        report: Optional[Report]):
    """Apply clang-format and return if no issues were found."""
    if report is None:
        report = Report()  # For debugging.
    if step is None:
        step = Step()  # For debugging.
    r, patch = get_diff(base_commit)
    if not r:
        step.success = False
        return
    add_artifact = False
    patches = unidiff.PatchSet(patch)
    ignore_lines = []
    if ignore_config is not None and os.path.exists(ignore_config):
        ignore_lines = open(ignore_config, 'r').readlines()
    ignore = pathspec.PathSpec.from_lines(
        pathspec.patterns.GitWildMatchPattern, ignore_lines)
    patched_file: unidiff.PatchedFile
    success = True
    for patched_file in patches:
        add_artifact = True
        if ignore.match_file(patched_file.source_file) or ignore.match_file(
                patched_file.target_file):
            logging.info(f'patch of {patched_file.patch_info} is ignored')
            continue
        hunk: unidiff.Hunk
        for hunk in patched_file:
            lines = [str(x) for x in hunk]
            success = False
            m = 10  # max number of lines to report.
            description = 'please reformat the code\n```\n'
            n = len(lines)
            cut = n > m + 1
            if cut:
                lines = lines[:m]
            description += ''.join(lines) + '\n```'
            if cut:
                description += f'\n{n - m} diff lines are omitted. See full path.'
            report.add_lint({
                'name': 'clang-format',
                'severity': 'autofix',
                'code': 'clang-format',
                'path': patched_file.source_file,
                'line': hunk.source_start,
                'char': 1,
                'description': description,
            })
    if add_artifact:
        patch_file = 'clang-format.patch'
        with open(patch_file, 'w') as f:
            f.write(patch)
        report.add_artifact(os.getcwd(), patch_file, 'clang-format')
    if not success:
        step.success = False
        step.messages.append(
            'Please format your changes with clang-format by running `git-clang-format HEAD^` or applying patch.'
        )
    logging.debug(f'report: {report}')
    logging.debug(f'step: {step}')
Example #8
0
def do(base_branch, include_dir, exclude_dir):
    maybe_exclude_arg = [] if exclude_dir is None else\
        [':(exclude){}'.format(exclude_dir)]
    args = ["git", "diff", base_branch, "--no-color", "-G", "TODO", "-i",
            '--', include_dir] + maybe_exclude_arg
    raw_diffs = subprocess.check_output(args)
    patch_set = unidiff.PatchSet(BytesIO(raw_diffs), encoding="utf8")
    path_info = defaultdict(list)
    for patch in patch_set:
        for hunk in patch:
            for line in hunk:
                if 'todo' in line.value.lower():
                    if line.is_added or line.is_removed:
                        path_info[patch.path].append(line)
    num_added = 0
    num_removed = 0
    for path in sorted(path_info.keys()):
        print(path)
        for line in path_info[path]:
            if line.is_added:
                num_added += 1
            else:
                num_removed += 1
            val = "{}\t{}".format(
                (line.target_line_no if line.is_added else line.source_line_no),
                line.value.strip())
            print(
                colored.green("+:" + val, bold=True)
                if line.is_added else
                colored.red("-:" + val, bold=True))
        print()
    print("Total: {} additions and {} removals".format(
        colored.green(str(num_added), bold=True),
        colored.red(str(num_removed), bold=True)))
Example #9
0
    def __init__(self,
                 owner,
                 repo,
                 pr=None,
                 branch=None,
                 token=None,
                 url=None,
                 commit=None,
                 ignore_paths=None):
        """
        GitHubInterface lets us post messages to GitHub.

        owner and repo are the repository owner/organization and repo name respectively.

        pr is the ID number of the pull request. branch is the branch name. either pr OR branch
        must be populated.

        token is your GitHub API token.

        url is the base URL of your GitHub instance, such as https://github.com

        commit is the commit hash we're running against

        ignore_paths are paths to ignore comments from
        """
        self.github = None
        self.ignore_paths = set(ignore_paths or [])
        if not url or url == 'https://github.com':
            self.github = github3.GitHub(token=token)
        else:
            self.github = github3.GitHubEnterprise(url, token=token)
        self.owner = owner
        self.repo = repo
        print('Branch: {0}'.format(branch))
        if branch and not pr:
            github_repo = self.github.repository(self.owner, self.repo)
            for pull_request in github_repo.iter_pulls():
                if pull_request.to_json()['head']['ref'] == branch:
                    pr = pull_request.to_json()['number']
                    break
        # TODO: support non-PR runs
        try:
            pr = int(pr)
        except (ValueError, TypeError):
            print('{0} is not a valid pull request ID'.format(pr))
            self.github = None
            return
        print('PR ID: {0}'.format(pr))
        self.pr = pr
        self.pull_request = self.github.pull_request(owner, repo, pr)
        self.commits = self.pr_commits(self.pull_request)
        self.last_sha = commit or git.current_sha()
        print('Last SHA: {0}'.format(self.last_sha))
        self.first_sha = self.commits[0].sha
        self.parent_sha = git.parent_sha(self.first_sha)
        self.diff = git.diff(self.parent_sha, self.last_sha)
        self.patch = unidiff.PatchSet(self.diff.split('\n'))
        self.review_comments = list(self.pull_request.review_comments())
        self.last_update = time.time()
Example #10
0
def _get_patch_redirects(patch, allow_add_only=False):
    f = io.StringIO(patch.decode("utf-8"))
    p = unidiff.PatchSet(f)
    redirect_mappings = []

    for p_file in p:
        for p_hunk in p_file:
            removed_urls = []
            added_urls = []
            for p_line in p_hunk:
                if p_line.line_type == "-":
                    removed_urls += _url_extractor_wrapper(str(p_line))
                    logger.debug("removed_urls: {}".format(removed_urls))
                elif p_line.line_type == "+":
                    added_urls += _url_extractor_wrapper(str(p_line))
                    logger.debug("added_urls: {}".format(added_urls))

            for url in added_urls.copy():
                if url in removed_urls:
                    removed_urls.remove(url)
                    added_urls.remove(url)
                elif not url.startswith("http://") and not url.startswith(
                        "https://"):
                    added_urls.remove(url)

            if not allow_add_only:
                if removed_urls:
                    assert added_urls
                elif added_urls:
                    assert removed_urls

            if not removed_urls and not allow_add_only:
                logger.info("Skipping url patch {} {}".format(
                    removed_urls, added_urls))
                continue

            if len(set(added_urls)) == 1:
                to_url = added_urls[0]
                if allow_add_only and not removed_urls:
                    removed_urls.append(None)

                for url in removed_urls:
                    redirect_mappings.append((url, to_url))
            elif not added_urls:
                for url in removed_urls:
                    redirect_mappings.append((url, None))
            else:
                if len(added_urls) != len(removed_urls):
                    logger.info(
                        "Hunk ignored as removed cant be mapped to added: {}\n{}"
                        .format(removed_urls, added_urls))
                    continue
                for i, to_url in enumerate(added_urls):
                    redirect_mappings.append((removed_urls[i], to_url))

    return redirect_mappings
def get_patched_files(diff_text):
    patchset = unidiff.PatchSet(diff_text)
    patched_files = {}
    for patch in patchset:
        diff_file = patch.path
        for hunk in patch:
            for line in hunk.target_lines():
                if line.is_added:
                    patched_files.setdefault(diff_file, []).append(
                        (line.target_line_no, line.value))
    return patched_files
Example #12
0
def autopep8(data, config):
    # Run pycodestyle

    headers = {"Authorization": "token " + os.environ["GITHUB_TOKEN"]}
    auth = (os.environ["BOT_USERNAME"], os.environ["BOT_PASSWORD"])
    r = requests.get(data["diff_url"], headers=headers, auth=auth)
    ## All the python files with additions
    patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding)

    # A dictionary with filename paired with list of new line numbers
    py_files = {}

    for patchset in patch:
        if patchset.target_file[-3:] == '.py':
            py_file = patchset.target_file[1:]
            py_files[py_file] = []
            for hunk in patchset:
                for line in hunk.target_lines():
                    if line.is_added:
                        py_files[py_file].append(line.target_line_no)

    # Ignore errors and warnings specified in the config file
    to_ignore = ",".join(config["pycodestyle"]["ignore"])
    arg_to_ignore = ""
    if len(to_ignore) > 0:
        arg_to_ignore = "--ignore " + to_ignore

    for file in py_files:
        filename = file[1:]
        url = "https://raw.githubusercontent.com/{}/{}/{}"
        url = url.format(data["repository"], data["sha"], file)
        r = requests.get(url, headers=headers, auth=auth)
        with open("file_to_fix.py", 'w+', encoding=r.encoding) as file_to_fix:
            file_to_fix.write(r.text)

        cmd = 'autopep8 file_to_fix.py --diff {arg_to_ignore}'.format(
            arg_to_ignore=arg_to_ignore)
        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        stdout, _ = proc.communicate()
        data["diff"][filename] = stdout.decode(r.encoding)

        # Fix the errors
        data["diff"][filename] = data["diff"][filename].replace(
            "file_to_check.py", filename)
        data["diff"][filename] = data["diff"][filename].replace("\\", "\\\\")

        ## Store the link to the file
        url = "https://github.com/" + data["author"] + "/" + \
              data["repository"].split("/")[-1] + "/blob/" + \
              data["sha"] + file
        data[filename + "_link"] = url

        os.remove("file_to_fix.py")
def comment_on_pr(diagnostics, repo, pr, token):
    headers = {
        "Authorization": "token " + token,
        "Accept": "application/vnd.github.v3+json",
        "User-Agent": "gps/intellij-inspections-action",
    }
    url = "https://api.github.com/repos/{}/pulls/{}/reviews".format(repo, pr)
    diff = get_diff_from_pr(repo, pr, token)
    ps = unidiff.PatchSet(diff)
    comments = []
    unknown = []
    for diagnostic in diagnostics:
        if diagnostic.error_level not in ["WARNING", "ERROR"]:
            continue
        pos = find_position(ps, diagnostic.file_name, diagnostic.line_number)
        if pos:
            comments.append({
                "body": diagnostic.description,
                "path": diagnostic.file_name,
                "position": pos,
            })
        else:
            if diagnostic.error_level in ["WARNING", "ERROR"]:
                unknown.append(diagnostic)
    event = "COMMENT"
    text = ""
    num_errors = sum(1 if d.error_level == "ERROR" else 0 for d in diagnostics)
    num_warnings = sum(1 if d.error_level == "WARNING" else 0
                       for d in diagnostics)
    if num_errors == 0:
        event = "APPROVE"
        if num_warnings == 0:
            text = "🎉 Excellent, your code has passed all inspections! 🎉"
        else:
            text = "Your code has no inspection errors, but there are a few warnings. Please check the warnings."
    elif num_errors > 1:
        event = "REQUEST_CHANGES"
        text = "‼️ Your code has errors - please fix them ‼️"
    if unknown:
        text += "\n\nThis branch has errors or warnings, but they are not part of the diff:\n\n"
        for diagnostic in unknown:
            text += "Path: {}\nLine number: {}\nLevel: {}\n Problem:{}\n".format(
                diagnostic.file_name,
                diagnostic.line_number,
                diagnostic.error_level,
                diagnostic.description,
            )
    if len(text) > 65000:
        # GitHub does not allow comments larger than 65536 characters
        text = text[:65000] + "‼️‼️TRUNCATED‼️‼️"
    body = {"event": event, "comments": comments, "body": text}
    res = requests.post(url, headers=headers, data=json.dumps(body))
    assert_200_response(res, "Unable to review PR")
Example #14
0
def detect_lines(diffstr):
    """Take a diff string and return a dict of files with line numbers changed."""
    resultant_lines = {}
    io = StringIO(diffstr)
    udiff = unidiff.PatchSet(io)
    for file in udiff:
        target_lines = []
        for hunk in file:
            target_lines += range(hunk.target_start,
                                  hunk.target_start + hunk.target_length)
        resultant_lines[file.path] = target_lines
    return resultant_lines
Example #15
0
def parse_error_diffs(
    errors: str,
    file_path_parser: FilePathParserType,
    logger: Optional[logging.Logger] = None,
) -> Iterable[Diagnostic]:
    """
    Compatible with isort, black
    """
    def _is_changed(line: unidiff.patch.Line) -> bool:
        return not line.is_context

    try:
        patches = unidiff.PatchSet(errors)
    except unidiff.errors.UnidiffParseError:
        _warn_parse_error(errors, logger)
        return
    for patch in patches:
        for hunk in patch:
            source_changes = list(filter(_is_changed, hunk.source_lines()))
            if source_changes:
                start_line = source_changes[0].source_line_no
                end_line = source_changes[-1].source_line_no
            else:
                target_changes = list(filter(_is_changed, hunk.target_lines()))
                assert target_changes, "expected either source or target line number"
                start_line = target_changes[0].target_line_no
                end_line = target_changes[-1].target_line_no

            try:
                file_path = file_path_parser(patch.source_file)
            except UnexpectedErrorFormat:
                _warn_parse_error(patch, logger)
                continue

            def filter_hunk(
                hunk: unidiff.patch.Hunk,
            ) -> Generator[unidiff.patch.Line, None, None]:
                for line in hunk:
                    if _is_changed(line):
                        yield line
                    elif line.source_line_no is not None:
                        if start_line <= line.source_line_no <= end_line:
                            yield line

            yield Diagnostic(
                start_line=start_line,
                end_line=end_line,
                start_column=1,
                file_path=file_path,
                diff="".join(map(str, filter_hunk(hunk))),
            )
Example #16
0
def autopep8(ghrequest, config):
    # Run pycodestyle

    r = utils.query_request(ghrequest.diff_url)
    ## All the python files with additions
    patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding)

    # A dictionary with filename paired with list of new line numbers
    py_files = {}

    for patchset in patch:
        if patchset.target_file[-3:] == '.py':
            py_file = patchset.target_file[1:]
            py_files[py_file] = []
            for hunk in patchset:
                for line in hunk.target_lines():
                    if line.is_added:
                        py_files[py_file].append(line.target_line_no)

    # Ignore errors and warnings specified in the config file
    to_ignore = ",".join(config["pycodestyle"]["ignore"])
    arg_to_ignore = ""
    if len(to_ignore) > 0:
        arg_to_ignore = "--ignore " + to_ignore

    for py_file in py_files:
        filename = py_file[1:]
        url = "https://raw.githubusercontent.com/{}/{}/{}"
        url = url.format(ghrequest.repository, ghrequest.sha, py_file)
        r = utils.query_request(url)
        with open("file_to_fix.py", 'w+', encoding=r.encoding) as file_to_fix:
            file_to_fix.write(r.text)

        cmd = 'autopep8 file_to_fix.py --diff {arg_to_ignore}'.format(
            arg_to_ignore=arg_to_ignore)
        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        stdout, _ = proc.communicate()
        ghrequest.diff[filename] = stdout.decode(r.encoding)

        # Fix the errors
        ghrequest.diff[filename] = ghrequest.diff[filename].replace(
            "file_to_check.py", filename)
        ghrequest.diff[filename] = ghrequest.diff[filename].replace(
            "\\", "\\\\")

        ## Store the link to the file
        url = "https://github.com/{}/blob/{}{}"
        ghrequest.links = {}
        ghrequest.links[filename + "_link"] = url.format(
            ghrequest.repository, ghrequest.sha, py_file)
        os.remove("file_to_fix.py")
Example #17
0
def get_pr_diff(repo, pr_number, token):
    """Download the PR diff, return a list of PatchedFile"""

    headers = {
        "Accept": "application/vnd.github.v3.diff",
        "Authorization": f"token {token}",
    }
    url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"

    pr_diff_response = requests.get(url, headers=headers)
    pr_diff_response.raise_for_status()

    # PatchSet is the easiest way to construct what we want, but the
    # diff_line_no property on lines is counted from the top of the
    # whole PatchSet, whereas GitHub is expecting the "position"
    # property to be line count within each file's diff. So we need to
    # do this little bit of faff to get a list of file-diffs with
    # their own diff_line_no range
    diff = [
        unidiff.PatchSet(str(file))[0]
        for file in unidiff.PatchSet(pr_diff_response.text)
    ]
    return diff
Example #18
0
def get_pr_diff(repo, pr_number, token):
    """Download the PR diff

    """

    headers = {
        "Accept": "application/vnd.github.v3.diff",
        "Authorization": f"token {token}",
    }
    url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"

    pr_diff_response = requests.get(url, headers=headers)
    pr_diff_response.raise_for_status()

    return unidiff.PatchSet(pr_diff_response.text)
Example #19
0
 def position(self, message):
     """Calculate position within the PR, which is not the line number"""
     if not message.line_number:
         message.line_number = 1
     patch = unidiff.PatchSet(self.diff.split('\n'))
     for patched_file in patch:
         target = patched_file.target_file.lstrip('b/')
         if target == message.path:
             offset = 1
             for hunk_no, hunk in enumerate(patched_file):
                 for position, hunk_line in enumerate(hunk):
                     if '+' not in hunk_line.line_type:
                         continue
                     if hunk_line.target_line_no == message.line_number:
                         return position + offset
                 offset += len(hunk) + 1
Example #20
0
def detect_lines(diffstr):
    """Take a diff string and return a dict of
    files with line numbers changed"""
    resultant_lines = {}
    # Force utf-8 re: https://github.com/ros/rosdistro/issues/6637
    encoding = 'utf-8'
    io = StringIO(unicode(diffstr, encoding))
    udiff = unidiff.PatchSet(io)
    for file in udiff:
        target_lines = []
        # if file.path in TARGET_FILES:
        for hunk in file:
            target_lines += range(hunk.target_start,
                                  hunk.target_start + hunk.target_length)
        resultant_lines[file.path] = target_lines
    return resultant_lines
Example #21
0
def autopep8ify(ghrequest, config):
    # Run pycodestyle
    r = utils.query_request(ghrequest.diff_url)

    ## All the python files with additions
    patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding)

    # A dictionary with filename paired with list of new line numbers
    py_files = {}

    linter = config["scanner"]["linter"]
    files_to_exclude = config[linter]["exclude"]

    for patchset in patch:
        if patchset.target_file[-3:] == '.py':
            py_file = patchset.target_file[1:]
            if utils.filename_match(py_file, files_to_exclude):
                continue
            py_files[py_file] = []
            for hunk in patchset:
                for line in hunk.target_lines():
                    if line.is_added:
                        py_files[py_file].append(line.target_line_no)

    # Ignore errors and warnings specified in the config file
    to_ignore = ",".join(config["pycodestyle"]["ignore"])
    arg_to_ignore = ""
    if len(to_ignore) > 0:
        arg_to_ignore = "--ignore " + to_ignore

    for py_file in py_files:
        filename = py_file[1:]
        query = f"https://raw.githubusercontent.com/{ghrequest.repository}/{ghrequest.sha}/{py_file}"
        r = utils.query_request(query)
        with open("file_to_fix.py", 'w+', encoding=r.encoding) as file_to_fix:
            file_to_fix.write(r.text)

        cmd = f'autopep8 file_to_fix.py {arg_to_ignore}'
        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        stdout, _ = proc.communicate()
        ghrequest.results[filename] = stdout.decode(r.encoding)

        os.remove("file_to_fix.py")
Example #22
0
def message(filename, line, content, diff):
    if content:
        patch = unidiff.PatchSet(diff.decode('utf-8').split('\n'))
        for patched_file in patch:
            if patched_file.target_file == 'b/' + filename:
                offset = 1
                for hunk_no, hunk in enumerate(patched_file):
                    for position, hunk_line in enumerate(hunk):
                        if '+' not in hunk_line.line_type:
                            continue
                        if hunk_line.target_line_no == line:
                            return {
                                'filename':
                                filename,
                                'line':
                                position + offset,
                                'content':
                                'Line: ' + str(line) + ' \n```\n' +
                                content.strip() + '\n```'
                            }
                    offset += len(hunk) + 1
Example #23
0
def get_files_involved_in_pr(repo, pr_number):
    """
    Return a list of file names modified/added in the PR
    """
    headers = {"Accept": "application/vnd.github.VERSION.diff"}

    query = f"/repos/{repo}/pulls/{pr_number}"
    r = utils.query_request(query, headers=headers)

    patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding)

    files = {}

    for patchset in patch:
        diff_file = patchset.target_file[1:]
        files[diff_file] = []
        for hunk in patchset:
            for line in hunk.target_lines():
                if line.is_added:
                    files[diff_file].append(line.target_line_no)
    return files
Example #24
0
def find_changed_lines(diff: str) -> Dict[str, List[Tuple[int, int]]]:
    # Delay import since this isn't required unless using the --diff-file
    # argument, which for local runs people don't care about
    try:
        import unidiff  # type: ignore[import]
    except ImportError as e:
        e.msg += ", run 'pip install unidiff'"  # type: ignore[attr-defined]
        raise e

    files = collections.defaultdict(list)

    for file in unidiff.PatchSet(diff):
        for hunk in file:
            start = hunk[0].target_line_no
            if start is None:
                start = 1
            end = hunk[-1].target_line_no

        files[file.path].append((start, end))

    return dict(files)
Example #25
0
def get_candidates_from_diff(difftext):
    try:
        import unidiff
    except ImportError as e:
        raise SystemExit("Could not import unidiff library: %s", e.message)
    patch = unidiff.PatchSet(difftext, encoding='utf-8')

    candidates = []
    for patchedfile in [
            patchfile for patchfile in patch.added_files + patch.modified_files
    ]:
        if patchedfile.source_file == '/dev/null':
            candidates.append(patchedfile.path)
        else:
            lines = ",".join([
                "%s-%s" %
                (hunk.target_start, hunk.target_start + hunk.target_length)
                for hunk in patchedfile
            ])
            candidates.append("%s:%s" % (patchedfile.path, lines))
    return candidates
Example #26
0
async def news_hook(request):
    payload = await request.read()

    # Verify the payload against the signature
    if (request.headers.get("X-Hub-Signature")
            and request.app.get("github_payload_key")):
        try:
            _verify_signature(
                request.app["github_payload_key"],
                request.headers["X-Hub-Signature"],
                payload,
            )
        except InvalidSignature:
            return web.json_response(
                {"message": "Invalid signature"},
                status=400,
            )

    data = json.loads(payload.decode(request.charset or "utf8"))

    # We only care about a few different kinds of actions, the rest of them
    # are not useful to us, so we'll no-op out quickly if it is one of them.
    if (data.get("action")
            not in {"labeled", "unlabeled", "opened", "reopened",
                    "synchronize"}):
        return web.json_response({"message": "Skipped due to action"})

    async with aiohttp.ClientSession() as session:
        gh = gidgethub.aiohttp.GitHubAPI(
            session,
            "BrownTruck",
            oauth_token=request.app["github_token"],
        )

        # Grab our labels out of GitHub's API
        tries = 5
        while True:
            try:
                issue_data = await gh.getitem(data["pull_request"]["issue_url"])
                label_data = await gh.getitem(issue_data["labels_url"])
            except gidgethub.BadRequest as exc:
                if (isinstance(exc.status_code, http.HTTPStatus.NOT_FOUND)
                        and tries > 0):
                    tries -= 1
                    await asyncio.sleep(1)
                raise
            else:
                break

        labels = {l["name"] for l in label_data}

        # Grab the diff from GitHub and parse it into a diff object.
        diff_url = data["pull_request"]["diff_url"]
        async with session.get(diff_url) as resp:
            diff = unidiff.PatchSet(io.StringIO(await resp.text()))

        # Determine if the status check for this PR is passing or not and
        # update the status check to account for that.
        if ("trivial" in labels
                or any(f.is_added_file for f in diff
                       if _news_fragment_re.search(f.path))):
            await gh.post(
                data["pull_request"]["statuses_url"],
                data={
                    "context": NEWS_FILE_CONTEXT,
                    "target_url": HELP_URL,
                    "state": "success",
                    "description":
                        "News files updated and/or change is trivial.",
                },
            )

            return web.json_response({
                "message": "news file updated and/or ignored",
            })
        else:
            await gh.post(
                data["pull_request"]["statuses_url"],
                data={
                    "context": NEWS_FILE_CONTEXT,
                    "target_url": HELP_URL,
                    "state": "failure",
                    "description":
                        "Missing either a news entry or a trivial file/label.",
                },
            )

            return web.json_response({
                "message": "news file was not updated",
                "labels": list(labels),
                "files": [
                    {"path": f.path, "is_added_file": f.is_added_file}
                    for f in diff
                ],
            })
Example #27
0
def run_pycodestyle(data, config):
    """
    Run pycodestyle script on the files and update the data
    dictionary
    """
    headers = {"Authorization": "token " + os.environ["GITHUB_TOKEN"]}
    diff_headers = headers.copy()
    diff_headers["Accept"] = "application/vnd.github.VERSION.diff"
    auth = (os.environ["BOT_USERNAME"], os.environ["BOT_PASSWORD"])
    repository = data["repository"]
    after_commit_hash = data["after_commit_hash"]
    author = data["author"]
    diff_url = "https://api.github.com/repos/{}/pulls/{}"
    diff_url = diff_url.format(repository, str(data["pr_number"]))

    # Run pycodestyle
    r = requests.get(diff_url, headers=diff_headers, auth=auth)

    ## All the python files with additions
    patch = unidiff.PatchSet(r.content.splitlines(), encoding=r.encoding)

    # A dictionary with filename paired with list of new line numbers
    py_files = {}

    for patchset in patch:
        if patchset.target_file[-3:] == '.py':
            py_file = patchset.target_file[1:]
            py_files[py_file] = []
            for hunk in patchset:
                for line in hunk.target_lines():
                    if line.is_added:
                        py_files[py_file].append(line.target_line_no)

    for file in py_files:
        filename = file[1:]
        url = "https://raw.githubusercontent.com/{}/{}/{}"
        url = url.format(repository, after_commit_hash, file)
        r = requests.get(url, headers=headers, auth=auth)
        with open("file_to_check.py", 'w+',
                  encoding=r.encoding) as file_to_check:
            file_to_check.write(r.text)

        # Use the command line here
        cmd = 'pycodestyle {config[pycodestyle_cmd_config]} file_to_check.py'.format(
            config=config)
        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        stdout, _ = proc.communicate()
        data["extra_results"][filename] = stdout.decode(
            r.encoding).splitlines()

        # Put only relevant errors in the data["results"] dictionary
        data["results"][filename] = []
        for error in list(data["extra_results"][filename]):
            if re.search("^file_to_check.py:\d+:\d+:\s[WE]\d+\s.*", error):
                data["results"][filename].append(
                    error.replace("file_to_check.py", filename))
                data["extra_results"][filename].remove(error)

        ## Remove errors in case of diff_only = True
        ## which are caused in the whole file
        for error in list(data["results"][filename]):
            if config["scanner"]["diff_only"]:
                if not int(error.split(":")[1]) in py_files[file]:
                    data["results"][filename].remove(error)

        ## Store the link to the file
        url = "https://github.com/{}/{}/blob/{}{}"
        url = url.format(author,
                         repository.split("/")[-1], after_commit_hash, file)
        data[filename + "_link"] = url

        os.remove("file_to_check.py")
#!/usr/bin/env python

import sys
import unidiff
import os.path

if len(sys.argv) != 3:
    print >> sys.stderr, "Usage: filter_lint_by_diff.py diff.patch repository_root_directory < cpplint_warnings.txt"
    sys.exit(1)

added_lines = set()
repository_root = sys.argv[2]

with open(sys.argv[1], "r") as f:
    diff = unidiff.PatchSet(f)
    for diff_file in diff:
        filename = diff_file.target_file
        # Skip files deleted in the tip (b side of the diff):
        if filename == "/dev/null":
            continue
        assert filename.startswith("b/")
        filename = os.path.join(repository_root, filename[2:])
        added_lines.add((filename, 0))
        for diff_hunk in diff_file:
            for diff_line in diff_hunk:
                if diff_line.line_type == "+":
                    added_lines.add((filename, diff_line.target_line_no))

for l in sys.stdin:
    bits = l.split(":")
    if len(bits) < 3:
Example #29
0
    def __init__(
        self,
        owner,
        repo,
        pr=None,
        branch=None,
        token=None,
        url=None,
        commit=None,
        ignore_paths=None,
        prefix=None,
    ):
        """
        GitHubInterface lets us post messages to GitHub.

        owner and repo are the repository owner/organization and repo name respectively.

        pr is the ID number of the pull request. branch is the branch name. either pr OR branch
        must be populated.

        token is your GitHub API token.

        url is the base URL of your GitHub instance, such as https://github.com

        commit is the commit hash we're running against

        ignore_paths are paths to ignore comments from
        """
        self.github = None
        self.stopped_early = False
        self.prefix = prefix
        self.ignore_paths = set(ignore_paths or [])
        if not url or url == "https://github.com":
            self.github = github3.GitHub(token=token)
        else:
            self.github = github3.GitHubEnterprise(url, token=token)
        self.owner = owner
        self.repo = repo

        self.github_repo = self.github.repository(self.owner, self.repo)
        all_commits = self.repo_commits(self.github_repo)
        self.master_sha = all_commits[0].sha
        print("Master SHA: {0}".format(self.master_sha))

        print("Branch: {0}".format(branch))
        self.pull_request_number = None
        if branch and not pr:
            for github_repo in [self.github_repo, self.github_repo.parent]:
                if pr:
                    break

                if not github_repo:
                    continue

                try:
                    # github.py == 0.9.6
                    pulls = github_repo.iter_pulls()
                except AttributeError:
                    pulls = github_repo.pull_requests()

                for pull_request in pulls:
                    print("Branch: {} - Pull Request Head Ref: {}".format(
                        branch, pull_request.head.ref))
                    if pull_request.head.ref == branch:
                        pr = pull_request.number
                        self.github_repo = github_repo
                        break

        self.owner = self.github_repo.owner
        self.repo = self.github_repo.name

        # TODO: support non-PR runs
        try:
            pr = int(pr)
        except (ValueError, TypeError):
            print("{0} is not a valid pull request ID".format(pr))
            self.github = None
            return

        print("PR ID: {0}".format(pr))
        self.pull_request_number = pr
        self.pull_request = self.github.pull_request(self.owner, self.repo, pr)
        self.target_sha = self.pull_request.base.sha
        self.target_branch = self.pull_request.base.label
        try:
            # github.py == 0.9.6
            try:
                git.fetch(
                    self.pull_request.base.to_json()["repo"]["clone_url"])
            except subprocess.CalledProcessError:
                git.fetch(self.pull_request.base.to_json()["repo"]["ssh_url"])
        except AttributeError:
            # latest github.py
            try:
                git.fetch(
                    self.pull_request.base.repository.as_dict()["clone_url"])
            except subprocess.CalledProcessError:
                git.fetch(
                    self.pull_request.base.repository.as_dict()["ssh_url"])

        print("Target SHA: {0}".format(self.target_sha))
        print("Target Branch: {0}".format(self.target_branch))
        self.commits = self.pr_commits(self.pull_request)
        self.last_sha = commit or git.current_sha()
        print("Last SHA: {0}".format(self.last_sha))
        self.first_sha = self.commits[0].sha
        self.diff = git.diff(self.target_sha, self.last_sha)
        self.patch = unidiff.PatchSet(self.diff.split("\n"))
        self.review_comments = list(self.pull_request.review_comments())
        self.last_update = time.time()
        self.messages_in_files = dict()
Example #30
0
 def diff(self) -> unidiff.PatchSet:
     response = util.request(
         'https://patch-diff.githubusercontent.com/raw/%s/pull/%s.diff' %
         (self._repo, self._pr_number))
     return unidiff.PatchSet(response.content.decode('utf-8'))