def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip try: if self._is_empty_change(linenum): linenum += GIT_DIFF_EMPTY_CHANGESET_SIZE return linenum, None except IndexError: # This means this is the only bit left in the file linenum += GIT_DIFF_EMPTY_CHANGESET_SIZE return linenum, None # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + "\n" file_info.binary = False diff_line = self.lines[linenum].split() try: # Need to remove the "a/" and "b/" prefix file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2]) file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1]) except ValueError: raise DiffParserError('The diff file is missing revision ' 'information', linenum) linenum += 1 # Save the new file, deleted file, mode change and index if self._is_new_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" linenum += 2 if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + "\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): return linenum, file_info if self._is_binary_patch(linenum): file_info.binary = True return linenum + 1, file_info if self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == "/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + "\n" linenum += 1 return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits diff_git_line = self.lines[linenum] file_info = File() file_info.data = diff_git_line + b'\n' file_info.binary = False linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None # Assume the blob / commit information is provided globally. If # we found an index header we'll override this. file_info.origInfo = self.base_commit_id file_info.newInfo = self.new_commit_id headers, linenum = self._parse_extended_headers(linenum) if self._is_new_file(headers): file_info.data += headers[b'new file mode'][1] file_info.origInfo = PRE_CREATION elif self._is_deleted_file(headers): file_info.data += headers[b'deleted file mode'][1] file_info.deleted = True elif self._is_mode_change(headers): file_info.data += headers[b'old mode'][1] file_info.data += headers[b'new mode'][1] if self._is_moved_file(headers): file_info.origFile = headers[b'rename from'][0] file_info.newFile = headers[b'rename to'][0] file_info.moved = True if b'similarity index' in headers: file_info.data += headers[b'similarity index'][1] file_info.data += headers[b'rename from'][1] file_info.data += headers[b'rename to'][1] elif self._is_copied_file(headers): file_info.origFile = headers[b'copy from'][0] file_info.newFile = headers[b'copy to'][0] file_info.copied = True if b'similarity index' in headers: file_info.data += headers[b'similarity index'][1] file_info.data += headers[b'copy from'][1] file_info.data += headers[b'copy to'][1] # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if b'index' in headers: index_range = headers[b'index'][0].split()[0] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += headers[b'index'][1] # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): orig_line = self.lines[linenum] new_line = self.lines[linenum + 1] orig_filename = orig_line[len(b'--- '):] new_filename = new_line[len(b'+++ '):] # Some diffs may incorrectly contain filenames listed as: # # --- filename\t # +++ filename\t # # We need to strip those single trailing tabs. if orig_filename.endswith(b'\t'): orig_filename = orig_filename[:-1] if new_filename.endswith(b'\t'): new_filename = new_filename[:-1] # Strip the Git a/ and b/ prefixes, if set in the diff. if orig_filename.startswith(b'a/'): orig_filename = orig_filename[2:] if new_filename.startswith(b'b/'): new_filename = new_filename[2:] if orig_filename == b'/dev/null': file_info.origInfo = PRE_CREATION file_info.origFile = new_filename else: file_info.origFile = orig_filename if new_filename == b'/dev/null': file_info.newFile = orig_filename else: file_info.newFile = new_filename file_info.data += orig_line + b'\n' file_info.data += new_line + b'\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) if not file_info.origFile: # This file didn't have any --- or +++ lines. This usually means # the file was deleted or moved without changes. We'll need to # fall back to parsing the diff --git line, which is more # error-prone. assert not file_info.newFile self._parse_diff_git_line(diff_git_line, file_info, linenum) if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode('utf-8') if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode('utf-8') # For an empty change, we keep the file's info only if it is a new # 0-length file, a moved file, a copied file, or a deleted 0-length # file. if (empty_change and file_info.origInfo != PRE_CREATION and not (file_info.moved or file_info.copied or file_info.deleted)): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_diff(self, i): """ Parses out one file from a Git diff """ if self.lines[i].startswith("diff --git"): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip try: if ((self.lines[i + 1].startswith("new file mode") or self.lines[i + 1].startswith("old mode") or self.lines[i + 1].startswith("deleted file mode")) and self.lines[i + 3].startswith("diff --git")): i += 3 return i, None except IndexError, x: # This means this is the only bit left in the file i += 3 return i, None # Now we have a diff we are going to use so get the filenames + commits file = File() file.data = self.lines[i] + "\n" file.binary = False diffLine = self.lines[i].split() try: # Need to remove the "a/" and "b/" prefix remPrefix = re.compile("^[a|b]/"); file.origFile = remPrefix.sub("", diffLine[-2]) file.newFile = remPrefix.sub("", diffLine[-1]) except ValueError: raise DiffParserError( "The diff file is missing revision information", i) i += 1 # We have no use for recording this info so skip it if self.lines[i].startswith("new file mode") \ or self.lines[i].startswith("deleted file mode"): i += 1 elif self.lines[i].startswith("old mode") \ and self.lines[i + 1].startswith("new mode"): i += 2 # Get the revision info if i < len(self.lines) and self.lines[i].startswith("index "): indexRange = self.lines[i].split(None, 2)[1] file.origInfo, file.newInfo = indexRange.split("..") if self.pre_creation_regexp.match(file.origInfo): file.origInfo = PRE_CREATION i += 1 # Get the changes while i < len(self.lines): if self.lines[i].startswith("diff --git"): return i, file if self.lines[i].startswith("Binary files") or \ self.lines[i].startswith("GIT binary patch"): file.binary = True return i + 1, file if i + 1 < len(self.lines) and \ (self.lines[i].startswith('--- ') and \ self.lines[i + 1].startswith('+++ ')): if self.lines[i].split()[1] == "/dev/null": file.origInfo = PRE_CREATION file.data += self.lines[i] + "\n" i += 1 return i, file
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits diff_git_line = self.lines[linenum] file_info = File() file_info.data = diff_git_line + b'\n' file_info.binary = False linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None line = self.lines[linenum] # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += line + b"\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += line + b"\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += line + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" linenum += 2 if self._is_moved_file(linenum): rename_from = self.lines[linenum + 1] rename_to = self.lines[linenum + 2] file_info.origFile = rename_from[len(b'rename from '):] file_info.newFile = rename_to[len(b'rename to '):] file_info.data += line + b"\n" file_info.data += rename_from + b"\n" file_info.data += rename_to + b"\n" linenum += 3 file_info.moved = True elif self._is_copied_file(linenum): copy_from = self.lines[linenum + 1] copy_to = self.lines[linenum + 2] file_info.origFile = copy_from[len(b'copy from '):] file_info.newFile = copy_to[len(b'copy to '):] file_info.data += line + b"\n" file_info.data += copy_from + b"\n" file_info.data += copy_to + b"\n" linenum += 3 file_info.copied = True # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b"\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): orig_line = self.lines[linenum] new_line = self.lines[linenum + 1] orig_filename = orig_line[len(b'--- '):] new_filename = new_line[len(b'+++ '):] if orig_filename.startswith(b'a/'): orig_filename = orig_filename[2:] if new_filename.startswith(b'b/'): new_filename = new_filename[2:] if orig_filename == b'/dev/null': file_info.origInfo = PRE_CREATION file_info.origFile = new_filename else: file_info.origFile = orig_filename if new_filename == b'/dev/null': file_info.newFile = orig_filename else: file_info.newFile = new_filename file_info.data += orig_line + b'\n' file_info.data += new_line + b'\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) if not file_info.origFile: # This file didn't have any --- or +++ lines. This usually means # the file was deleted or moved without changes. We'll need to # fall back to parsing the diff --git line, which is more # error-prone. assert not file_info.newFile self._parse_diff_git_line(diff_git_line, file_info, linenum) if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode('utf-8') if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode('utf-8') # For an empty change, we keep the file's info only if it is a new # 0-length file, a moved file, a copied file, or a deleted 0-length # file. if (empty_change and file_info.origInfo != PRE_CREATION and not (file_info.moved or file_info.copied or file_info.deleted)): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + b"\n" file_info.binary = False diff_line = self.lines[linenum].split() try: # Need to remove the "a/" and "b/" prefix file_info.origFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-2]) file_info.newFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-1]) if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode('utf-8') if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode('utf-8') except ValueError: raise DiffParserError( 'The diff file is missing revision ' 'information', linenum) linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.moved = True elif self._is_copied_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.copied = True # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b"\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == b"/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b'\n' file_info.data += self.lines[linenum + 1] + b'\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) if empty_change and not (file_info.moved or file_info.copied): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip empty_change = self._is_empty_change(linenum) empty_change_linenum = linenum + GIT_DIFF_EMPTY_CHANGESET_SIZE # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + "\n" file_info.binary = False diff_line = self.lines[linenum].split() try: # Need to remove the "a/" and "b/" prefix file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2]) file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1]) except ValueError: raise DiffParserError('The diff file is missing revision ' 'information', linenum) linenum += 1 # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" file_info.data += self.lines[linenum + 2] + "\n" linenum += 3 file_info.moved = True # Only show interesting empty changes. Basically, deletions. # It's likely a binary file if we're at this point, and so we want # to process the rest of it. if empty_change and not file_info.deleted: return empty_change_linenum, None if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + "\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): return linenum, file_info elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + "\n" return linenum + 1, file_info elif self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == "/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + '\n' file_info.data += self.lines[linenum + 1] + '\n' linenum += 2 else: linenum = self.parse_diff_line(linenum, file_info) return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip empty_change = self._is_empty_change(linenum) empty_change_linenum = linenum + GIT_DIFF_EMPTY_CHANGESET_SIZE # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + "\n" file_info.binary = False diff_line = self.lines[linenum].split() try: # Need to remove the "a/" and "b/" prefix file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2]) file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1]) except ValueError: raise DiffParserError('The diff file is missing revision ' 'information', linenum) linenum += 1 # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" file_info.data += self.lines[linenum + 2] + "\n" linenum += 3 file_info.moved = True # Only show interesting empty changes. Basically, deletions. # It's likely a binary file if we're at this point, and so we want # to process the rest of it. if empty_change and not file_info.deleted: return empty_change_linenum, None if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + "\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): return linenum, file_info if self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + "\n" return linenum + 1, file_info if self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == "/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + "\n" linenum += 1 return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + b"\n" file_info.binary = False diff_line = self.lines[linenum].split(' b/') # We split at the b/ to deal with space in filenames, this is not perfect, but it should solve most of the whitespace problems try: file_info.origFile = diff_line[-2].replace('diff --git a/', '') file_info.newFile = diff_line[-1] if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode('utf-8') if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode('utf-8') except ValueError: raise DiffParserError('The diff file is missing revision ' 'information', linenum) linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.moved = True elif self._is_copied_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.copied = True # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b"\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == b"/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b'\n' file_info.data += self.lines[linenum + 1] + b'\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) # For an empty change, we keep the file's info only if it is a new # 0-length file, a moved file, a copied file, or a deleted 0-length # file. if (empty_change and file_info.origInfo != PRE_CREATION and not (file_info.moved or file_info.copied or file_info.deleted)): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + "\n" file_info.binary = False diff_line = self.lines[linenum].split() try: # Need to remove the "a/" and "b/" prefix file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2]) file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1]) except ValueError: raise DiffParserError('The diff file is missing revision ' 'information', linenum) linenum += 1 # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" file_info.data += self.lines[linenum + 2] + "\n" linenum += 3 file_info.moved = True # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + "\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + "\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == "/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + '\n' file_info.data += self.lines[linenum + 1] + '\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) if empty_change: # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip empty_change = self._is_empty_change(linenum) empty_change_linenum = linenum + GIT_DIFF_EMPTY_CHANGESET_SIZE # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + "\n" file_info.binary = False # Remove "diff --git " prefix diff_line = self.lines[linenum][11:] if(not(diff_line.startswith("a/"))): raise DiffParserError('The diff file is missing revision ' 'information', linenum) b_block_index = diff_line.find(" b/") if(b_block_index < 0): file_info.origFile = diff_line[2:] file_info.newFile = "" else: file_info.origFile = diff_line[2:b_block_index] file_info.newFile = diff_line[b_block_index + 3:] linenum += 1 # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + "\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + "\n" file_info.data += self.lines[linenum + 1] + "\n" file_info.data += self.lines[linenum + 2] + "\n" linenum += 3 file_info.moved = True # Only show interesting empty changes. Basically, deletions. # It's likely a binary file if we're at this point, and so we want # to process the rest of it. if empty_change and not file_info.deleted: return empty_change_linenum, None if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + "\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): return linenum, file_info elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + "\n" return linenum + 1, file_info elif self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == "/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + '\n' file_info.data += self.lines[linenum + 1] + '\n' linenum += 2 else: linenum = self.parse_diff_line(linenum, file_info) return linenum, file_info