Beispiel #1
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        try:
            if self._is_empty_change(linenum):
                linenum += GIT_DIFF_EMPTY_CHANGESET_SIZE
                return linenum, None
        except IndexError:
            # This means this is the only bit left in the file
            linenum += GIT_DIFF_EMPTY_CHANGESET_SIZE
            return linenum, None

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + "\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split()

        try:
            # Need to remove the "a/" and "b/" prefix
            file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2])
            file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1])
        except ValueError:
            raise DiffParserError('The diff file is missing revision '
                                  'information', linenum)
        linenum += 1

        # Save the new file, deleted file, mode change and index
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            linenum += 2

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                return linenum, file_info

            if self._is_binary_patch(linenum):
                file_info.binary = True
                return linenum + 1, file_info

            if self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == "/dev/null":
                    file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        return linenum, file_info
Beispiel #2
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        # Now we have a diff we are going to use so get the filenames + commits
        diff_git_line = self.lines[linenum]

        file_info = File()
        file_info.data = diff_git_line + b'\n'
        file_info.binary = False

        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        # Assume the blob / commit information is provided globally. If
        # we found an index header we'll override this.
        file_info.origInfo = self.base_commit_id
        file_info.newInfo = self.new_commit_id

        headers, linenum = self._parse_extended_headers(linenum)

        if self._is_new_file(headers):
            file_info.data += headers[b'new file mode'][1]
            file_info.origInfo = PRE_CREATION
        elif self._is_deleted_file(headers):
            file_info.data += headers[b'deleted file mode'][1]
            file_info.deleted = True
        elif self._is_mode_change(headers):
            file_info.data += headers[b'old mode'][1]
            file_info.data += headers[b'new mode'][1]

        if self._is_moved_file(headers):
            file_info.origFile = headers[b'rename from'][0]
            file_info.newFile = headers[b'rename to'][0]
            file_info.moved = True

            if b'similarity index' in headers:
                file_info.data += headers[b'similarity index'][1]

            file_info.data += headers[b'rename from'][1]
            file_info.data += headers[b'rename to'][1]
        elif self._is_copied_file(headers):
            file_info.origFile = headers[b'copy from'][0]
            file_info.newFile = headers[b'copy to'][0]
            file_info.copied = True

            if b'similarity index' in headers:
                file_info.data += headers[b'similarity index'][1]

            file_info.data += headers[b'copy from'][1]
            file_info.data += headers[b'copy to'][1]

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if b'index' in headers:
            index_range = headers[b'index'][0].split()[0]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += headers[b'index'][1]

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + b"\n"
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                orig_line = self.lines[linenum]
                new_line = self.lines[linenum + 1]

                orig_filename = orig_line[len(b'--- '):]
                new_filename = new_line[len(b'+++ '):]

                # Some diffs may incorrectly contain filenames listed as:
                #
                # --- filename\t
                # +++ filename\t
                #
                # We need to strip those single trailing tabs.
                if orig_filename.endswith(b'\t'):
                    orig_filename = orig_filename[:-1]

                if new_filename.endswith(b'\t'):
                    new_filename = new_filename[:-1]

                # Strip the Git a/ and b/ prefixes, if set in the diff.
                if orig_filename.startswith(b'a/'):
                    orig_filename = orig_filename[2:]

                if new_filename.startswith(b'b/'):
                    new_filename = new_filename[2:]

                if orig_filename == b'/dev/null':
                    file_info.origInfo = PRE_CREATION
                    file_info.origFile = new_filename
                else:
                    file_info.origFile = orig_filename

                if new_filename == b'/dev/null':
                    file_info.newFile = orig_filename
                else:
                    file_info.newFile = new_filename

                file_info.data += orig_line + b'\n'
                file_info.data += new_line + b'\n'
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        if not file_info.origFile:
            # This file didn't have any --- or +++ lines. This usually means
            # the file was deleted or moved without changes. We'll need to
            # fall back to parsing the diff --git line, which is more
            # error-prone.
            assert not file_info.newFile

            self._parse_diff_git_line(diff_git_line, file_info, linenum)

        if isinstance(file_info.origFile, six.binary_type):
            file_info.origFile = file_info.origFile.decode('utf-8')

        if isinstance(file_info.newFile, six.binary_type):
            file_info.newFile = file_info.newFile.decode('utf-8')

        # For an empty change, we keep the file's info only if it is a new
        # 0-length file, a moved file, a copied file, or a deleted 0-length
        # file.
        if (empty_change and
            file_info.origInfo != PRE_CREATION and
            not (file_info.moved or file_info.copied or file_info.deleted)):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info
Beispiel #3
0
    def _parse_diff(self, i):
        """
        Parses out one file from a Git diff
        """
        if self.lines[i].startswith("diff --git"):
            # First check if it is a new file with no content or
            # a file mode change with no content or
            # a deleted file with no content
            # then skip
            try:
                if ((self.lines[i + 1].startswith("new file mode") or
                     self.lines[i + 1].startswith("old mode") or
                     self.lines[i + 1].startswith("deleted file mode")) and
                    self.lines[i + 3].startswith("diff --git")):
                    i += 3
                    return i, None
            except IndexError, x:
                # This means this is the only bit left in the file
                i += 3
                return i, None

            # Now we have a diff we are going to use so get the filenames + commits
            file = File()
            file.data = self.lines[i] + "\n"
            file.binary = False
            diffLine = self.lines[i].split()
            try:
                # Need to remove the "a/" and "b/" prefix
                remPrefix = re.compile("^[a|b]/");
                file.origFile = remPrefix.sub("", diffLine[-2])
                file.newFile = remPrefix.sub("", diffLine[-1])
            except ValueError:
                raise DiffParserError(
                    "The diff file is missing revision information",
                    i)
            i += 1

            # We have no use for recording this info so skip it
            if self.lines[i].startswith("new file mode") \
               or self.lines[i].startswith("deleted file mode"):
                i += 1
            elif self.lines[i].startswith("old mode") \
                 and self.lines[i + 1].startswith("new mode"):
                i += 2

            # Get the revision info
            if i < len(self.lines) and self.lines[i].startswith("index "):
                indexRange = self.lines[i].split(None, 2)[1]
                file.origInfo, file.newInfo = indexRange.split("..")
                if self.pre_creation_regexp.match(file.origInfo):
                    file.origInfo = PRE_CREATION
                i += 1

            # Get the changes
            while i < len(self.lines):
                if self.lines[i].startswith("diff --git"):
                    return i, file

                if self.lines[i].startswith("Binary files") or \
                   self.lines[i].startswith("GIT binary patch"):
                    file.binary = True
                    return i + 1, file

                if i + 1 < len(self.lines) and \
                   (self.lines[i].startswith('--- ') and \
                     self.lines[i + 1].startswith('+++ ')):
                    if self.lines[i].split()[1] == "/dev/null":
                        file.origInfo = PRE_CREATION

                file.data += self.lines[i] + "\n"
                i += 1

            return i, file
Beispiel #4
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        # Now we have a diff we are going to use so get the filenames + commits
        diff_git_line = self.lines[linenum]

        file_info = File()
        file_info.data = diff_git_line + b'\n'
        file_info.binary = False

        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        line = self.lines[linenum]

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += line + b"\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += line + b"\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += line + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            linenum += 2

        if self._is_moved_file(linenum):
            rename_from = self.lines[linenum + 1]
            rename_to = self.lines[linenum + 2]

            file_info.origFile = rename_from[len(b'rename from '):]
            file_info.newFile = rename_to[len(b'rename to '):]

            file_info.data += line + b"\n"
            file_info.data += rename_from + b"\n"
            file_info.data += rename_to + b"\n"
            linenum += 3
            file_info.moved = True
        elif self._is_copied_file(linenum):
            copy_from = self.lines[linenum + 1]
            copy_to = self.lines[linenum + 2]

            file_info.origFile = copy_from[len(b'copy from '):]
            file_info.newFile = copy_to[len(b'copy to '):]

            file_info.data += line + b"\n"
            file_info.data += copy_from + b"\n"
            file_info.data += copy_to + b"\n"
            linenum += 3
            file_info.copied = True

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + b"\n"
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                orig_line = self.lines[linenum]
                new_line = self.lines[linenum + 1]

                orig_filename = orig_line[len(b'--- '):]
                new_filename = new_line[len(b'+++ '):]

                if orig_filename.startswith(b'a/'):
                    orig_filename = orig_filename[2:]

                if new_filename.startswith(b'b/'):
                    new_filename = new_filename[2:]

                if orig_filename == b'/dev/null':
                    file_info.origInfo = PRE_CREATION
                    file_info.origFile = new_filename
                else:
                    file_info.origFile = orig_filename

                if new_filename == b'/dev/null':
                    file_info.newFile = orig_filename
                else:
                    file_info.newFile = new_filename

                file_info.data += orig_line + b'\n'
                file_info.data += new_line + b'\n'
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        if not file_info.origFile:
            # This file didn't have any --- or +++ lines. This usually means
            # the file was deleted or moved without changes. We'll need to
            # fall back to parsing the diff --git line, which is more
            # error-prone.
            assert not file_info.newFile

            self._parse_diff_git_line(diff_git_line, file_info, linenum)

        if isinstance(file_info.origFile, six.binary_type):
            file_info.origFile = file_info.origFile.decode('utf-8')

        if isinstance(file_info.newFile, six.binary_type):
            file_info.newFile = file_info.newFile.decode('utf-8')

        # For an empty change, we keep the file's info only if it is a new
        # 0-length file, a moved file, a copied file, or a deleted 0-length
        # file.
        if (empty_change and file_info.origInfo != PRE_CREATION
                and not (file_info.moved or file_info.copied
                         or file_info.deleted)):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info
Beispiel #5
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + b"\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split()

        try:
            # Need to remove the "a/" and "b/" prefix
            file_info.origFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-2])
            file_info.newFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-1])

            if isinstance(file_info.origFile, six.binary_type):
                file_info.origFile = file_info.origFile.decode('utf-8')

            if isinstance(file_info.newFile, six.binary_type):
                file_info.newFile = file_info.newFile.decode('utf-8')
        except ValueError:
            raise DiffParserError(
                'The diff file is missing revision '
                'information', linenum)

        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            linenum += 2
        elif self._is_moved_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            file_info.data += self.lines[linenum + 2] + b"\n"
            linenum += 3
            file_info.moved = True
        elif self._is_copied_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            file_info.data += self.lines[linenum + 2] + b"\n"
            linenum += 3
            file_info.copied = True

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + b"\n"
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == b"/dev/null":
                    file_info.origInfo = PRE_CREATION

                file_info.data += self.lines[linenum] + b'\n'
                file_info.data += self.lines[linenum + 1] + b'\n'
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        if empty_change and not (file_info.moved or file_info.copied):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info
Beispiel #6
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        empty_change = self._is_empty_change(linenum)
        empty_change_linenum = linenum + GIT_DIFF_EMPTY_CHANGESET_SIZE

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + "\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split()

        try:
            # Need to remove the "a/" and "b/" prefix
            file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2])
            file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1])
        except ValueError:
            raise DiffParserError('The diff file is missing revision '
                                  'information', linenum)

        linenum += 1

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            linenum += 2
        elif self._is_moved_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            file_info.data += self.lines[linenum + 2] + "\n"
            linenum += 3
            file_info.moved = True

        # Only show interesting empty changes. Basically, deletions.
        # It's likely a binary file if we're at this point, and so we want
        # to process the rest of it.
        if empty_change and not file_info.deleted:
            return empty_change_linenum, None

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                return linenum, file_info
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + "\n"
                return linenum + 1, file_info
            elif self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == "/dev/null":
                    file_info.origInfo = PRE_CREATION

                file_info.data += self.lines[linenum] + '\n'
                file_info.data += self.lines[linenum + 1] + '\n'
                linenum += 2
            else:
                linenum = self.parse_diff_line(linenum, file_info)

        return linenum, file_info
Beispiel #7
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        empty_change = self._is_empty_change(linenum)
        empty_change_linenum = linenum + GIT_DIFF_EMPTY_CHANGESET_SIZE

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + "\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split()

        try:
            # Need to remove the "a/" and "b/" prefix
            file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2])
            file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1])
        except ValueError:
            raise DiffParserError('The diff file is missing revision '
                                  'information', linenum)

        linenum += 1

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            linenum += 2
        elif self._is_moved_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            file_info.data += self.lines[linenum + 2] + "\n"
            linenum += 3
            file_info.moved = True

        # Only show interesting empty changes. Basically, deletions.
        # It's likely a binary file if we're at this point, and so we want
        # to process the rest of it.
        if empty_change and not file_info.deleted:
            return empty_change_linenum, None

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                return linenum, file_info

            if self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + "\n"
                return linenum + 1, file_info

            if self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == "/dev/null":
                    file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        return linenum, file_info
Beispiel #8
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + b"\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split(' b/') # We split at the b/ to deal with space in filenames, this is not perfect, but it should solve most of the whitespace problems

        try:
            file_info.origFile = diff_line[-2].replace('diff --git a/', '')
            file_info.newFile = diff_line[-1]

            if isinstance(file_info.origFile, six.binary_type):
                file_info.origFile = file_info.origFile.decode('utf-8')

            if isinstance(file_info.newFile, six.binary_type):
                file_info.newFile = file_info.newFile.decode('utf-8')
        except ValueError:
            raise DiffParserError('The diff file is missing revision '
                                  'information', linenum)

        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            linenum += 2
        elif self._is_moved_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            file_info.data += self.lines[linenum + 2] + b"\n"
            linenum += 3
            file_info.moved = True
        elif self._is_copied_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            file_info.data += self.lines[linenum + 2] + b"\n"
            linenum += 3
            file_info.copied = True

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + b"\n"
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == b"/dev/null":
                    file_info.origInfo = PRE_CREATION

                file_info.data += self.lines[linenum] + b'\n'
                file_info.data += self.lines[linenum + 1] + b'\n'
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        # For an empty change, we keep the file's info only if it is a new
        # 0-length file, a moved file, a copied file, or a deleted 0-length
        # file.
        if (empty_change and
            file_info.origInfo != PRE_CREATION and
            not (file_info.moved or file_info.copied or file_info.deleted)):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info
Beispiel #9
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        # Now we have a diff we are going to use so get the filenames + commits
        diff_git_line = self.lines[linenum]

        file_info = File()
        file_info.data = diff_git_line + b'\n'
        file_info.binary = False

        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        # Assume the blob / commit information is provided globally. If
        # we found an index header we'll override this.
        file_info.origInfo = self.base_commit_id
        file_info.newInfo = self.new_commit_id

        headers, linenum = self._parse_extended_headers(linenum)

        if self._is_new_file(headers):
            file_info.data += headers[b'new file mode'][1]
            file_info.origInfo = PRE_CREATION
        elif self._is_deleted_file(headers):
            file_info.data += headers[b'deleted file mode'][1]
            file_info.deleted = True
        elif self._is_mode_change(headers):
            file_info.data += headers[b'old mode'][1]
            file_info.data += headers[b'new mode'][1]

        if self._is_moved_file(headers):
            file_info.origFile = headers[b'rename from'][0]
            file_info.newFile = headers[b'rename to'][0]
            file_info.moved = True

            if b'similarity index' in headers:
                file_info.data += headers[b'similarity index'][1]

            file_info.data += headers[b'rename from'][1]
            file_info.data += headers[b'rename to'][1]
        elif self._is_copied_file(headers):
            file_info.origFile = headers[b'copy from'][0]
            file_info.newFile = headers[b'copy to'][0]
            file_info.copied = True

            if b'similarity index' in headers:
                file_info.data += headers[b'similarity index'][1]

            file_info.data += headers[b'copy from'][1]
            file_info.data += headers[b'copy to'][1]

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if b'index' in headers:
            index_range = headers[b'index'][0].split()[0]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += headers[b'index'][1]

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + b"\n"
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                orig_line = self.lines[linenum]
                new_line = self.lines[linenum + 1]

                orig_filename = orig_line[len(b'--- '):]
                new_filename = new_line[len(b'+++ '):]

                # Some diffs may incorrectly contain filenames listed as:
                #
                # --- filename\t
                # +++ filename\t
                #
                # We need to strip those single trailing tabs.
                if orig_filename.endswith(b'\t'):
                    orig_filename = orig_filename[:-1]

                if new_filename.endswith(b'\t'):
                    new_filename = new_filename[:-1]

                # Strip the Git a/ and b/ prefixes, if set in the diff.
                if orig_filename.startswith(b'a/'):
                    orig_filename = orig_filename[2:]

                if new_filename.startswith(b'b/'):
                    new_filename = new_filename[2:]

                if orig_filename == b'/dev/null':
                    file_info.origInfo = PRE_CREATION
                    file_info.origFile = new_filename
                else:
                    file_info.origFile = orig_filename

                if new_filename == b'/dev/null':
                    file_info.newFile = orig_filename
                else:
                    file_info.newFile = new_filename

                file_info.data += orig_line + b'\n'
                file_info.data += new_line + b'\n'
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        if not file_info.origFile:
            # This file didn't have any --- or +++ lines. This usually means
            # the file was deleted or moved without changes. We'll need to
            # fall back to parsing the diff --git line, which is more
            # error-prone.
            assert not file_info.newFile

            self._parse_diff_git_line(diff_git_line, file_info, linenum)

        if isinstance(file_info.origFile, six.binary_type):
            file_info.origFile = file_info.origFile.decode('utf-8')

        if isinstance(file_info.newFile, six.binary_type):
            file_info.newFile = file_info.newFile.decode('utf-8')

        # For an empty change, we keep the file's info only if it is a new
        # 0-length file, a moved file, a copied file, or a deleted 0-length
        # file.
        if (empty_change and
            file_info.origInfo != PRE_CREATION and
            not (file_info.moved or file_info.copied or file_info.deleted)):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info
Beispiel #10
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + "\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split()

        try:
            # Need to remove the "a/" and "b/" prefix
            file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2])
            file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1])
        except ValueError:
            raise DiffParserError('The diff file is missing revision '
                                  'information', linenum)

        linenum += 1

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            linenum += 2
        elif self._is_moved_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            file_info.data += self.lines[linenum + 2] + "\n"
            linenum += 3
            file_info.moved = True

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + "\n"
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == "/dev/null":
                    file_info.origInfo = PRE_CREATION

                file_info.data += self.lines[linenum] + '\n'
                file_info.data += self.lines[linenum + 1] + '\n'
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        if empty_change:
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info
Beispiel #11
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        empty_change = self._is_empty_change(linenum)
        empty_change_linenum = linenum + GIT_DIFF_EMPTY_CHANGESET_SIZE

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + "\n"
        file_info.binary = False

        # Remove "diff --git " prefix
        diff_line = self.lines[linenum][11:]
        if(not(diff_line.startswith("a/"))):
            raise DiffParserError('The diff file is missing revision '
                                  'information', linenum)
        b_block_index = diff_line.find(" b/")
        if(b_block_index < 0):
            file_info.origFile = diff_line[2:]
            file_info.newFile = ""
        else:
            file_info.origFile = diff_line[2:b_block_index]
            file_info.newFile = diff_line[b_block_index + 3:]

        linenum += 1

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            linenum += 2
        elif self._is_moved_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            file_info.data += self.lines[linenum + 2] + "\n"
            linenum += 3
            file_info.moved = True

        # Only show interesting empty changes. Basically, deletions.
        # It's likely a binary file if we're at this point, and so we want
        # to process the rest of it.
        if empty_change and not file_info.deleted:
            return empty_change_linenum, None

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                return linenum, file_info
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + "\n"
                return linenum + 1, file_info
            elif self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == "/dev/null":
                    file_info.origInfo = PRE_CREATION

                file_info.data += self.lines[linenum] + '\n'
                file_info.data += self.lines[linenum + 1] + '\n'
                linenum += 2
            else:
                linenum = self.parse_diff_line(linenum, file_info)

        return linenum, file_info
Beispiel #12
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        # Now we have a diff we are going to use so get the filenames + commits
        diff_git_line = self.lines[linenum]

        file_info = File()
        file_info.data = diff_git_line + b'\n'
        file_info.binary = False

        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        line = self.lines[linenum]

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += line + b"\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += line + b"\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += line + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            linenum += 2

        if self._is_moved_file(linenum):
            rename_from = self.lines[linenum + 1]
            rename_to = self.lines[linenum + 2]

            file_info.origFile = rename_from[len(b'rename from '):]
            file_info.newFile = rename_to[len(b'rename to '):]

            file_info.data += line + b"\n"
            file_info.data += rename_from + b"\n"
            file_info.data += rename_to + b"\n"
            linenum += 3
            file_info.moved = True
        elif self._is_copied_file(linenum):
            copy_from = self.lines[linenum + 1]
            copy_to = self.lines[linenum + 2]

            file_info.origFile = copy_from[len(b'copy from '):]
            file_info.newFile = copy_to[len(b'copy to '):]

            file_info.data += line + b"\n"
            file_info.data += copy_from + b"\n"
            file_info.data += copy_to + b"\n"
            linenum += 3
            file_info.copied = True

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + b"\n"
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                orig_line = self.lines[linenum]
                new_line = self.lines[linenum + 1]

                orig_filename = orig_line[len(b'--- '):]
                new_filename = new_line[len(b'+++ '):]

                if orig_filename.startswith(b'a/'):
                    orig_filename = orig_filename[2:]

                if new_filename.startswith(b'b/'):
                    new_filename = new_filename[2:]

                if orig_filename == b'/dev/null':
                    file_info.origInfo = PRE_CREATION
                    file_info.origFile = new_filename
                else:
                    file_info.origFile = orig_filename

                if new_filename == b'/dev/null':
                    file_info.newFile = orig_filename
                else:
                    file_info.newFile = new_filename

                file_info.data += orig_line + b'\n'
                file_info.data += new_line + b'\n'
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        if not file_info.origFile:
            # This file didn't have any --- or +++ lines. This usually means
            # the file was deleted or moved without changes. We'll need to
            # fall back to parsing the diff --git line, which is more
            # error-prone.
            assert not file_info.newFile

            self._parse_diff_git_line(diff_git_line, file_info, linenum)

        if isinstance(file_info.origFile, six.binary_type):
            file_info.origFile = file_info.origFile.decode('utf-8')

        if isinstance(file_info.newFile, six.binary_type):
            file_info.newFile = file_info.newFile.decode('utf-8')

        # For an empty change, we keep the file's info only if it is a new
        # 0-length file, a moved file, a copied file, or a deleted 0-length
        # file.
        if (empty_change and
            file_info.origInfo != PRE_CREATION and
            not (file_info.moved or file_info.copied or file_info.deleted)):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info