Example #1
0
    def parse_diff_header(self, linenum, info):
        """
        Parses part of a diff beginning at the specified line number, trying
        to find a standard diff header.

        The line number returned is the line after the special header,
        which can be multiple lines long.
        """
        if linenum + 1 < len(self.lines) and \
           ((self.lines[linenum].startswith(b'--- ') and
             self.lines[linenum + 1].startswith(b'+++ ')) or
            (self.lines[linenum].startswith(b'*** ') and
             self.lines[linenum + 1].startswith(b'--- ') and
             not self.lines[linenum].endswith(b" ****"))):
            # This is a unified or context diff header. Parse the
            # file and extra info.
            try:
                info['origFile'], info['origInfo'] = \
                    self.parse_filename_header(self.lines[linenum][4:],
                                               linenum)
                linenum += 1

                info['newFile'], info['newInfo'] = \
                    self.parse_filename_header(self.lines[linenum][4:],
                                               linenum)
                linenum += 1
            except ValueError:
                raise DiffParserError("The diff file is missing revision " +
                                      "information", linenum)

        return linenum
Example #2
0
    def recalculate_line_counts(self, tool):
        """Recalculates the insert_count and delete_count values.

        This will attempt to re-parse the stored diff and fetch the
        line counts through the parser.
        """
        logging.debug('Recalculating insert/delete line counts on '
                      'RawFileDiffData %s' % self.pk)

        try:
            files = tool.get_parser(self.content).parse()

            if len(files) != 1:
                raise DiffParserError('Got wrong number of files (%d)' %
                                      len(files))
        except DiffParserError as e:
            logging.error(
                'Failed to correctly parse stored diff data in '
                'RawFileDiffData ID %s when trying to get '
                'insert/delete line counts: %s', self.pk, e)
        else:
            file_info = files[0]
            self.insert_count = file_info.insert_count
            self.delete_count = file_info.delete_count

            if self.pk:
                self.save(update_fields=['extra_data'])
Example #3
0
    def parse_filename_header(self, s, linenum):
        """Parse the filename found in a diff filename line.

        This parses the value after a ``---`` or ``+++`` indicator (or a
        special variant handled by a subclass), normalizing the filename and
        any following file details, and returning both for processing and
        storage.

        Often times, the file details will be a revision for the original
        file, but this is not guaranteed, and is up to the variation of the
        diff format.

        By default, this will assume that a filename and file details are
        separated by either a single tab, or two or more spaces. If neither
        are found, this will fail to parse.

        This must parse only the provided value, and cannot parse subsequent
        lines.

        Subclasses can override this behavior to parse these lines another
        way, or to normalize filenames (handling escaping or filenames with
        spaces as needed by that particular diff variation).

        Args:
            s (bytes):
                The value to parse.

            linenum (int):
                The line number containing the value to parse.

        Returns:
            tuple:
            A tuple containing:

            1. The filename (as bytes)
            2. The additional file information (as bytes)

        Raises:
            reviewboard.diffviewer.errors.DiffParserError:
                There was an error parsing the diff header. This may be a
                corrupted diff, or an error in the parsing implementation.
                Details are in the error message.
        """
        if b'\t' in s:
            # There's a \t separating the filename and info. This is the
            # best case scenario, since it allows for filenames with spaces
            # without much work.
            return s.split(b'\t', 1)

        # There's spaces being used to separate the filename and info.
        # This is technically wrong, so all we can do is assume that
        # 1) the filename won't have multiple consecutive spaces, and
        # 2) there's at least 2 spaces separating the filename and info.
        if b'  ' in s:
            return re.split(br'  +', s, 1)

        raise DiffParserError(
            'No valid separator after the filename was '
            'found in the diff header', linenum)
Example #4
0
    def parse_filename_header(self, s, linenum):
        if b"\t" in s:
            # There's a \t separating the filename and info. This is the
            # best case scenario, since it allows for filenames with spaces
            # without much work.
            return s.split(b"\t", 1)

        # There's spaces being used to separate the filename and info.
        # This is technically wrong, so all we can do is assume that
        # 1) the filename won't have multiple consecutive spaces, and
        # 2) there's at least 2 spaces separating the filename and info.
        if b"  " in s:
            return re.split(r"  +", s, 1)

        raise DiffParserError("No valid separator after the filename was " +
                              "found in the diff header",
                              linenum)
Example #5
0
    def parse_special_header(self, linenum, info):
        """
        Parses part of a diff beginning at the specified line number, trying
        to find a special diff header. This usually occurs before the standard
        diff header.

        The line number returned is the line after the special header,
        which can be multiple lines long.
        """
        try:
            index_line = self.lines[linenum]
            is_index = index_line.startswith(b'Index: ')
        except IndexError:
            is_index = False

        if is_index:
            # Try to find the "====" line.
            temp_linenum = linenum + 1

            while temp_linenum + 1 < len(self.lines):
                line = self.lines[temp_linenum]

                if line == self.INDEX_SEP:
                    # We found the line. This is looking like a valid diff
                    # for CVS, Subversion, and other systems. Try to parse
                    # the data from the line.
                    try:
                        info['index'] = index_line.split(None, 1)[1]
                    except ValueError:
                        raise DiffParserError('Malformed Index line', linenum)

                    linenum = temp_linenum + 1
                    break
                elif line.startswith((b'---', b'+++')):
                    # We never found that line, but we did hit the start of
                    # a diff file. We can't treat the "Index:" line as special
                    # in this case.
                    break

                temp_linenum += 1

        return linenum
Example #6
0
    def parse_special_header(self, linenum, info):
        """
        Parses part of a diff beginning at the specified line number, trying
        to find a special diff header. This usually occurs before the standard
        diff header.

        The line number returned is the line after the special header,
        which can be multiple lines long.
        """
        if linenum + 1 < len(self.lines) and \
           self.lines[linenum].startswith(b"Index: ") and \
           self.lines[linenum + 1] == self.INDEX_SEP:
            # This is an Index: header, which is common in CVS and Subversion,
            # amongst other systems.
            try:
                info['index'] = self.lines[linenum].split(None, 1)[1]
            except ValueError:
                raise DiffParserError("Malformed Index line", linenum)
            linenum += 2

        return linenum
Example #7
0
    def parse_diff_header(self, linenum, info):
        """
        Parses part of a diff beginning at the specified line number, trying
        to find a standard diff header.

        The line number returned is the line after the special header,
        which can be multiple lines long.
        """
        try:
            line1 = self.lines[linenum]
            line2 = self.lines[linenum + 1]

            is_diff_header = (
               (line1.startswith(b'--- ') and line2.startswith(b'+++ ')) or
               (line1.startswith(b'*** ') and line2.startswith(b'--- ') and
                not line1.endswith(b' ****'))
            )
        except IndexError:
            is_diff_header = False

        if is_diff_header:
            # This is a unified or context diff header. Parse the
            # file and extra info.
            try:
                info['origFile'], info['origInfo'] = \
                    self.parse_filename_header(self.lines[linenum][4:],
                                               linenum)
                linenum += 1

                info['newFile'], info['newInfo'] = \
                    self.parse_filename_header(self.lines[linenum][4:],
                                               linenum)
                linenum += 1
            except ValueError:
                raise DiffParserError(
                    'The diff file is missing revision information',
                    linenum)

        return linenum
Example #8
0
    def parse_diff_header(self, linenum, parsed_file):
        """Parse a standard header before changes made to a file.

        This attempts to parse the ``---`` (original) and ``+++`` (modified)
        file lines, which are usually present right before any changes to the
        file. By default, this method expects the ``---`` line to be found at
        ``linenum``.

        If found, this will populate :py:attr:`ParsedDiffFile.orig_filename`,
        :py:attr:`ParsedDiffFile.orig_file_details`,
        :py:attr:`ParsedDiffFile.modified_filename`, and
        :py:attr:`ParsedDiffFile.modified_file_details`.

        This calls out to :py:meth:`parse_filename_header` to help parse
        the contents immediately after the ``---`` or ``+++``.

        Subclasses can override this to parse these lines differently, or to
        to process the results of these lines (such as converting special
        filenames to states like "deleted" or "new file"). They may also set
        :py:class:`ParsedFileDiff.skip` to skip the rest of this file and begin
        parsing a new entry at the returned line number.

        Args:
            linenum (int):
                The line number to begin parsing.

            parsed_file (ParsedDiffFile):
                The file currently being parsed.

        Returns:
            int:
            The next line number to parse.

        Raises:
            reviewboard.diffviewer.errors.DiffParserError:
                There was an error parsing the diff header. This may be a
                corrupted diff, or an error in the parsing implementation.
                Details are in the error message.
        """
        try:
            line1 = self.lines[linenum]
            line2 = self.lines[linenum + 1]

            is_diff_header = (
                # Unified diff headers
                (line1.startswith(b'--- ') and line2.startswith(b'+++ ')) or

                # Context diff headers
                (line1.startswith(b'*** ') and line2.startswith(b'--- ')
                 and not line1.endswith(b' ****')))
        except IndexError:
            is_diff_header = False

        if is_diff_header:
            # This is a unified or context diff header. Parse the
            # file and extra info.
            try:
                (parsed_file.orig_filename,
                 parsed_file.orig_file_details) = \
                    self.parse_filename_header(self.lines[linenum][4:],
                                               linenum)
                linenum += 1

                (parsed_file.modified_filename,
                 parsed_file.modified_file_details) = \
                    self.parse_filename_header(self.lines[linenum][4:],
                                               linenum)

                # Set these for backwards-compatibility.
                #
                # This should be removed in Review Board 5.0.
                parsed_file._deprecated_info['origFile'] = \
                    parsed_file.orig_filename
                parsed_file._deprecated_info['origInfo'] = \
                    parsed_file.orig_file_details
                parsed_file._deprecated_info['newFile'] = \
                    parsed_file.modified_filename
                parsed_file._deprecated_info['newInfo'] = \
                    parsed_file.modified_file_details

                linenum += 1
            except ValueError:
                raise DiffParserError(
                    'The diff file is missing revision information', linenum)

        return linenum
Example #9
0
    def parse_special_header(self, linenum, parsed_file):
        """Parse a special diff header marking the start of a new file's info.

        This attempts to locate an ``Index:`` line at the specified line
        number, which usually indicates the beginning of file's information in
        a diff (for Unified Diff variants that support it). By default, this
        method expects the line to be found at ``linenum``.

        If present, the value found immediately after the ``Index:`` will be
        stored in :py:attr:`ParsedDiffFile.index_header_value`, allowing
        subclasses to make a determination based on its contents (which may
        vary between types of diffs, but should include at least a filename.

        If the ``Index:`` line is not present, this won't do anything by
        default.

        Subclasses can override this to parse additional information before the
        standard diff header. They may also set :py:attr:`ParsedFileDiff.skip`
        to skip the rest of this file and begin parsing a new entry at the
        returned line number.

        Args:
            linenum (int):
                The line number to begin parsing.

            parsed_file (ParsedDiffFile):
                The file currently being parsed.

        Returns:
            int:
            The next line number to parse.

        Raises:
            reviewboard.diffviewer.errors.DiffParserError:
                There was an error parsing the special header. This may be
                a corrupted diff, or an error in the parsing implementation.
                Details are in the error message.
        """
        try:
            index_line = self.lines[linenum]
            is_index = index_line.startswith(b'Index: ')
        except IndexError:
            is_index = False

        if is_index:
            # Try to find the "====" line.
            temp_linenum = linenum + 1

            while temp_linenum + 1 < len(self.lines):
                line = self.lines[temp_linenum]

                if line == self.INDEX_SEP:
                    # We found the line. This is looking like a valid diff
                    # for CVS, Subversion, and other systems. Try to parse
                    # the data from the line.
                    try:
                        parsed_file.index_header_value = \
                            index_line.split(None, 1)[1]

                        # Set these for backwards-compatibility.
                        #
                        # This should be removed in Review Board 5.0.
                        parsed_file._deprecated_info['index'] = \
                            parsed_file.index_header_value
                    except ValueError:
                        raise DiffParserError('Malformed Index line', linenum)

                    linenum = temp_linenum + 1
                    break
                elif line.startswith((b'---', b'+++')):
                    # We never found that line, but we did hit the start of
                    # a diff file. We can't treat the "Index:" line as special
                    # in this case.
                    break

                temp_linenum += 1

        return linenum