Exemple #1
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        start_linenum = linenum

        # Now we have a diff we are going to use so get the filenames + commits
        diff_git_line = self.lines[linenum]

        file_info = ParsedDiffFile()
        file_info.append_data(diff_git_line)
        file_info.append_data(b'\n')
        file_info.binary = False

        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        # Assume the blob / commit information is provided globally. If
        # we found an index header we'll override this.
        file_info.orig_file_details = self.base_commit_id
        file_info.modified_file_details = self.new_commit_id

        headers, linenum = self._parse_extended_headers(linenum)

        for line in self.lines[start_linenum:linenum]:
            m = GitDiffParser.FILE_MODE_RE.search(line)

            if m:
                mode = int(m.group('mode'), 8)

                if stat.S_ISLNK(mode):
                    file_info.is_symlink = True
                    break

        if self._is_new_file(headers):
            file_info.append_data(headers[b'new file mode'][1])
            file_info.orig_file_details = PRE_CREATION
        elif self._is_deleted_file(headers):
            file_info.append_data(headers[b'deleted file mode'][1])
            file_info.deleted = True
        elif self._is_mode_change(headers):
            file_info.append_data(headers[b'old mode'][1])
            file_info.append_data(headers[b'new mode'][1])

        if self._is_moved_file(headers):
            file_info.orig_filename = headers[b'rename from'][0]
            file_info.modified_filename = headers[b'rename to'][0]
            file_info.moved = True

            if b'similarity index' in headers:
                file_info.append_data(headers[b'similarity index'][1])

            file_info.append_data(headers[b'rename from'][1])
            file_info.append_data(headers[b'rename to'][1])
        elif self._is_copied_file(headers):
            file_info.orig_filename = headers[b'copy from'][0]
            file_info.modified_filename = headers[b'copy to'][0]
            file_info.copied = True

            if b'similarity index' in headers:
                file_info.append_data(headers[b'similarity index'][1])

            file_info.append_data(headers[b'copy from'][1])
            file_info.append_data(headers[b'copy to'][1])

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if b'index' in headers:
            index_range = headers[b'index'][0].split()[0]

            if b'..' in index_range:
                (file_info.orig_file_details,
                 file_info.modified_file_details) = index_range.split(b'..')

            if self.pre_creation_regexp.match(file_info.orig_file_details):
                file_info.orig_file_details = PRE_CREATION

            file_info.append_data(headers[b'index'][1])

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.append_data(self.lines[linenum])
                file_info.append_data(b'\n')
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                orig_line = self.lines[linenum]
                new_line = self.lines[linenum + 1]

                orig_filename = orig_line[len(b'--- '):]
                new_filename = new_line[len(b'+++ '):]

                # Some diffs may incorrectly contain filenames listed as:
                #
                # --- filename\t
                # +++ filename\t
                #
                # We need to strip those single trailing tabs.
                if orig_filename.endswith(b'\t'):
                    orig_filename = orig_filename[:-1]

                if new_filename.endswith(b'\t'):
                    new_filename = new_filename[:-1]

                # Strip the Git a/ and b/ prefixes, if set in the diff.
                if orig_filename.startswith(b'a/'):
                    orig_filename = orig_filename[2:]

                if new_filename.startswith(b'b/'):
                    new_filename = new_filename[2:]

                if orig_filename == b'/dev/null':
                    file_info.orig_file_details = PRE_CREATION
                    file_info.orig_filename = new_filename
                else:
                    file_info.orig_filename = orig_filename

                if new_filename == b'/dev/null':
                    file_info.modified_filename = orig_filename
                else:
                    file_info.modified_filename = new_filename

                file_info.append_data(orig_line)
                file_info.append_data(b'\n')
                file_info.append_data(new_line)
                file_info.append_data(b'\n')
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        if not file_info.orig_filename:
            # This file didn't have any --- or +++ lines. This usually means
            # the file was deleted or moved without changes. We'll need to
            # fall back to parsing the diff --git line, which is more
            # error-prone.
            assert not file_info.modified_filename

            self._parse_diff_git_line(diff_git_line, file_info, linenum)

        # For an empty change, we keep the file's info only if it is a new
        # 0-length file, a moved file, a copied file, or a deleted 0-length
        # file.
        if (empty_change and file_info.orig_file_details != PRE_CREATION
                and not (file_info.moved or file_info.copied
                         or file_info.deleted)):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info
Exemple #2
0
    def _parse_git_diff(self, linenum):
        """Parse a Git-style diff header.

        This will parse a diff header containing file mode information,
        file operations, and ``diff --git`` lines, and filename information.

        Args:
            linenum (int):
                The current line number.

        Returns:
            tuple:
            A tuple containing the following:

            1. The next line number to parse.
            2. The populated :py:class:`ParsedDiffFile` instance for this
               file, if any.
        """
        lines = self.lines

        # First check if it is a new file with no content, a file mode
        # change with no content, or a deleted file with no content. If so,
        # we'll skip this diff.
        start_linenum = linenum

        diff_git_line = lines[linenum]
        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(lines):
            return linenum, None

        file_info = ParsedDiffFile(parsed_diff_change=self.parsed_diff_change)
        file_info.append_data(diff_git_line)
        file_info.append_data(b'\n')
        file_info.binary = False

        # Assume the blob / commit information is provided globally. If
        # we found an index header we'll override this.
        file_info.orig_file_details = self.base_commit_id
        file_info.modified_file_details = self.new_commit_id

        headers, linenum = self._parse_extended_headers(linenum)

        # Determine the created/deleted/modified state and accompanying UNIX
        # file mode.
        if self._is_new_file(headers):
            new_mode_header = headers[b'new file mode'][1]
            file_info.append_data(new_mode_header)
            file_info.orig_file_details = PRE_CREATION
            file_info.new_unix_mode = self._parse_unix_mode(new_mode_header)
        elif self._is_deleted_file(headers):
            old_mode_header = headers[b'deleted file mode'][1]
            file_info.append_data(old_mode_header)
            file_info.deleted = True
            file_info.old_unix_mode = self._parse_unix_mode(old_mode_header)
        elif self._is_mode_change(headers):
            old_mode_header = headers[b'old mode'][1]
            new_mode_header = headers[b'new mode'][1]
            file_info.append_data(old_mode_header)
            file_info.append_data(new_mode_header)
            file_info.old_unix_mode = self._parse_unix_mode(old_mode_header)
            file_info.new_unix_mode = self._parse_unix_mode(new_mode_header)

        # Determine whether the file has been moved or copied, and track
        # that information.
        if self._is_moved_file(headers):
            file_info.orig_filename = headers[b'rename from'][0]
            file_info.modified_filename = headers[b'rename to'][0]
            file_info.moved = True

            if b'similarity index' in headers:
                file_info.append_data(headers[b'similarity index'][1])

            file_info.append_data(headers[b'rename from'][1])
            file_info.append_data(headers[b'rename to'][1])
        elif self._is_copied_file(headers):
            file_info.orig_filename = headers[b'copy from'][0]
            file_info.modified_filename = headers[b'copy to'][0]
            file_info.copied = True

            if b'similarity index' in headers:
                file_info.append_data(headers[b'similarity index'][1])

            file_info.append_data(headers[b'copy from'][1])
            file_info.append_data(headers[b'copy to'][1])

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if b'index' in headers:
            index_header_pair = headers[b'index']
            index_range = index_header_pair[0].split()[0]
            index_header = index_header_pair[1]

            if b'..' in index_range:
                (file_info.orig_file_details,
                 file_info.modified_file_details) = index_range.split(b'..')

            if self.pre_creation_regexp.match(file_info.orig_file_details):
                file_info.orig_file_details = PRE_CREATION

            file_info.append_data(index_header)
            unix_mode = self._parse_unix_mode(index_header)

            if unix_mode is not None:
                # This will overwrite anything set above. In theory, a Git
                # diff shouldn't have multiple (conflicting) mode lines.
                file_info.old_unix_mode = unix_mode
                file_info.new_unix_mode = unix_mode

        changes_linenum = None

        # Get the changes
        while linenum < len(lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.append_data(lines[linenum])
                file_info.append_data(b'\n')
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                orig_line = lines[linenum]
                new_line = lines[linenum + 1]

                orig_filename = orig_line[len(b'--- '):]
                new_filename = new_line[len(b'+++ '):]

                # Some diffs may incorrectly contain filenames listed as:
                #
                # --- filename\t
                # +++ filename\t
                #
                # We need to strip those single trailing tabs.
                if orig_filename.endswith(b'\t'):
                    orig_filename = orig_filename[:-1]

                if new_filename.endswith(b'\t'):
                    new_filename = new_filename[:-1]

                # Strip the Git a/ and b/ prefixes, if set in the diff.
                if orig_filename.startswith(b'a/'):
                    orig_filename = orig_filename[2:]

                if new_filename.startswith(b'b/'):
                    new_filename = new_filename[2:]

                if orig_filename == b'/dev/null':
                    file_info.orig_file_details = PRE_CREATION
                    file_info.orig_filename = new_filename
                else:
                    file_info.orig_filename = orig_filename

                if new_filename == b'/dev/null':
                    file_info.modified_filename = orig_filename
                else:
                    file_info.modified_filename = new_filename

                file_info.append_data(orig_line)
                file_info.append_data(b'\n')
                file_info.append_data(new_line)
                file_info.append_data(b'\n')

                linenum += 2
                changes_linenum = linenum
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        # Now that we have the UNIX file mode and changed lines, we can
        # determine if this is a symlink. We need to check the new and old
        # UNIX modes.
        mode_for_symlink = (file_info.new_unix_mode or file_info.old_unix_mode)

        if (mode_for_symlink is not None
                and stat.S_ISLNK(int(mode_for_symlink, 8))):
            file_info.is_symlink = True

            if changes_linenum is not None:
                for i in range(changes_linenum, linenum):
                    line = lines[i]

                    if line.startswith(b'-'):
                        file_info.old_symlink_target = line[1:].strip()
                    elif line.startswith(b'+'):
                        file_info.new_symlink_target = line[1:].strip()

        if not file_info.orig_filename:
            # This file didn't have any --- or +++ lines. This usually means
            # the file was deleted or moved without changes. We'll need to
            # fall back to parsing the diff --git line, which is more
            # error-prone.
            assert not file_info.modified_filename

            self._parse_diff_git_line(diff_git_line, file_info, linenum)

        # For an empty change, we keep the file's info only if it is a new
        # 0-length file, a moved file, a copied file, or a deleted 0-length
        # file.
        #
        # TODO: In the future, we'll want to keep empty files so we can show
        #       metadata changes, once that functionality is available in the
        #       diff viewer.
        if (empty_change and file_info.orig_file_details != PRE_CREATION
                and not (file_info.moved or file_info.copied
                         or file_info.deleted)):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info.discard()
            file_info = None

        return linenum, file_info