Esempio n. 1
0
    def _parse_hunk_line(self, line):
        """
        Given a hunk line in `git diff` output, return the line number
        at the start of the hunk.  A hunk is a segment of code that
        contains changes.

        The format of the hunk line is:

            @@ -k,l +n,m @@ TEXT

        where `k,l` represent the start line and length before the changes
        and `n,m` represent the start line and length after the changes.

        `git diff` will sometimes put a code excerpt from within the hunk
        in the `TEXT` section of the line.
        """
        # Split the line at the @@ terminators (start and end of the line)
        components = line.split('@@')

        # The first component should be an empty string, because
        # the line starts with '@@'.  The second component should
        # be the hunk information, and any additional components
        # are excerpts from the code.
        if len(components) >= 2:

            hunk_info = components[1]
            groups = self.HUNK_LINE_RE.findall(hunk_info)

            if len(groups) == 1:

                try:
                    return int(groups[0])

                except ValueError:
                    msg = "Could not parse '{}' as a line number".format(
                        groups[0])
                    raise GitDiffError(msg)

            else:
                msg = "Could not find start of hunk in line '{}'".format(line)
                raise GitDiffError(msg)

        else:
            msg = "Could not parse hunk in line '{}'".format(line)
            raise GitDiffError(msg)
Esempio n. 2
0
    def _parse_source_line(self, line):
        """
        Given a source line in `git diff` output, return the path
        to the source file.
        """
        if "--git" in line:
            regex = self.SRC_FILE_RE
        elif "--cc" in line:
            regex = self.MERGE_CONFLICT_RE
        else:
            msg = f"Do not recognize format of source in line '{line}'"
            raise GitDiffError(msg)

        # Parse for the source file path
        groups = regex.findall(line)

        if len(groups) == 1:
            return groups[0]

        msg = f"Could not parse source path in line '{line}'"
        raise GitDiffError(msg)
Esempio n. 3
0
    def _parse_source_sections(self, diff_str):
        """
        Given the output of `git diff`, return a dictionary
        with keys that are source file paths.

        Each value is a list of lines from the `git diff` output
        related to the source file.

        Raises a `GitDiffError` if `diff_str` is in an invalid format.
        """

        # Create a dict to map source files to lines in the diff output
        source_dict = dict()

        # Keep track of the current source file
        src_path = None

        # Signal that we've found a hunk (after starting a source file)
        found_hunk = False

        # Parse the diff string into sections by source file
        for line in diff_str.split('\n'):

            # If the line starts with "diff --git"
            # or "diff --cc" (in the case of a merge conflict)
            # then it is the start of a new source file
            if line.startswith('diff --git') or line.startswith('diff --cc'):

                # Retrieve the name of the source file
                src_path = self._parse_source_line(line)

                # Create an entry for the source file, if we don't
                # already have one.
                if src_path not in source_dict:
                    source_dict[src_path] = []

                # Signal that we're waiting for a hunk for this source file
                found_hunk = False

            # Every other line is stored in the dictionary for this source file
            # once we find a hunk section
            else:

                # Only add lines if we're in a hunk section
                # (ignore index and files changed lines)
                if found_hunk or line.startswith('@@'):

                    # Remember that we found a hunk
                    found_hunk = True

                    if src_path is not None:
                        source_dict[src_path].append(line)

                    else:
                        # We tolerate other information before we have
                        # a source file defined, unless it's a hunk line
                        if line.startswith("@@"):
                            msg = "Hunk has no source file: '{0}'".format(line)
                            raise GitDiffError(msg)

        return source_dict