def _parse(self, diff, encoding): current_file = None diff = enumerate(diff, 1) for unused_diff_line_no, line in diff: if encoding is not None: line = line.decode(encoding) # check for source file header is_source_filename = RE_SOURCE_FILENAME.match(line) if is_source_filename: source_file = is_source_filename.group('filename') source_timestamp = is_source_filename.group('timestamp') # reset current file current_file = None continue # check for target file header is_target_filename = RE_TARGET_FILENAME.match(line) if is_target_filename: if current_file is not None: raise UnidiffParseError('Target without source: %s' % line) target_file = is_target_filename.group('filename') target_timestamp = is_target_filename.group('timestamp') # add current file to PatchSet current_file = PatchedFile(source_file, target_file, source_timestamp, target_timestamp) self.append(current_file) continue # check for hunk header is_hunk_header = RE_HUNK_HEADER.match(line) if is_hunk_header: if current_file is None: raise UnidiffParseError('Unexpected hunk found: %s' % line) current_file._parse_hunk(line, diff, encoding)
def _parse(self, diff, encoding): current_file = None patch_info = None diff = enumerate(diff, 1) for unused_diff_line_no, line in diff: if encoding is not None: line = line.decode(encoding) # check for source file header is_source_filename = RE_SOURCE_FILENAME.match(line) if is_source_filename: source_file = is_source_filename.group('filename') source_timestamp = is_source_filename.group('timestamp') # reset current file current_file = None continue # check for target file header is_target_filename = RE_TARGET_FILENAME.match(line) if is_target_filename: if current_file is not None: raise UnidiffParseError('Target without source: %s' % line) target_file = is_target_filename.group('filename') target_timestamp = is_target_filename.group('timestamp') # add current file to PatchSet current_file = PatchedFile(patch_info, source_file, target_file, source_timestamp, target_timestamp) self.append(current_file) patch_info = None continue # check for hunk header is_hunk_header = RE_HUNK_HEADER.match(line) if is_hunk_header: if current_file is None: raise UnidiffParseError('Unexpected hunk found: %s' % line) current_file._parse_hunk(line, diff, encoding) continue # check for no newline marker is_no_newline = RE_NO_NEWLINE_MARKER.match(line) if is_no_newline: if current_file is None: raise UnidiffParseError('Unexpected marker: %s' % line) current_file._add_no_newline_marker_to_last_hunk() continue # sometimes hunks can be followed by empty lines if line == '\n' and current_file is not None: current_file._append_trailing_empty_line() continue # if nothing has matched above then this line is a patch info if patch_info is None: current_file = None patch_info = PatchInfo() patch_info.append(line)
def _parse(self, diff, encoding): current_file = None diff = enumerate(diff, 1) for unused_diff_line_no, line in diff: if encoding is not None:#don't care - stuff about non utf-8 files line = line.decode(encoding) # check for source file header is_source_filename = RE_SOURCE_FILENAME.match(line) if is_source_filename: source_file = is_source_filename.group('filename') source_timestamp = is_source_filename.group('timestamp') if source_file is None:#must be the other side of regex triggered source_file = is_source_filename.group('renamefile') # reset current file current_file = None continue # check for target file header is_target_filename = RE_TARGET_FILENAME.match(line) if is_target_filename: is_renamed = False if current_file is not None: raise UnidiffParseError('Target without source: %s' % line) target_file = is_target_filename.group('filename') if target_file is None: target_file = is_target_filename.group('renamefile') is_renamed = True target_timestamp = is_target_filename.group('timestamp') # add current file to PatchSet current_file = PatchedFile(source_file, target_file, source_timestamp, target_timestamp, is_renamed) self.append(current_file) continue # check for hunk header is_hunk_header = RE_HUNK_HEADER.match(line) if is_hunk_header: if current_file is None: raise UnidiffParseError('Unexpected hunk found: %s' % line) current_file._parse_hunk(line, diff, encoding) # check for no newline marker is_no_newline = RE_NO_NEWLINE_MARKER.match(line) if is_no_newline: if current_file is None: raise UnidiffParseError('Unexpected marker: %s' % line) current_file._add_no_newline_marker_to_last_hunk()
def _add_no_newline_marker_to_last_hunk(self): if not self: raise UnidiffParseError( 'Unexpected marker:' + LINE_VALUE_NO_NEWLINE) last_hunk = self[-1] last_hunk.append( Line(LINE_VALUE_NO_NEWLINE + '\n', line_type=LINE_TYPE_NO_NEWLINE))
def _parse_hunk(self, header, diff, encoding): """Parse hunk details.""" header_info = RE_HUNK_HEADER.match(header) hunk_info = header_info.groups() hunk = Hunk(*hunk_info) source_line_no = hunk.source_start target_line_no = hunk.target_start expected_source_end = source_line_no + hunk.source_length expected_target_end = target_line_no + hunk.target_length for diff_line_no, line in diff: if encoding is not None: line = line.decode(encoding) valid_line = RE_HUNK_BODY_LINE.match(line) if not valid_line: raise UnidiffParseError('Hunk diff line expected: %s' % line) line_type = valid_line.group('line_type') if line_type == LINE_TYPE_EMPTY: line_type = LINE_TYPE_CONTEXT value = valid_line.group('value') original_line = Line(value, line_type=line_type) if line_type == LINE_TYPE_ADDED: original_line.target_line_no = target_line_no target_line_no += 1 self.added_lines_content = "{0}\n{1}".format( self.added_lines_content, value) elif line_type == LINE_TYPE_REMOVED: original_line.source_line_no = source_line_no source_line_no += 1 self.removed_lines = "{0}\n{1}".format( self.removed_lines_content, value) elif line_type == LINE_TYPE_CONTEXT: original_line.target_line_no = target_line_no target_line_no += 1 original_line.source_line_no = source_line_no source_line_no += 1 else: original_line = None if original_line: original_line.diff_line_no = diff_line_no hunk.append(original_line) # if hunk source/target lengths are ok, hunk is complete if (source_line_no == expected_source_end and target_line_no == expected_target_end): break self.append(hunk)
def _parse_hunk(self, header, diff, encoding): """Parse hunk details.""" header_info = RE_HUNK_HEADER.match(header) hunk_info = header_info.groups() hunk = Hunk(*hunk_info) source_line_no = hunk.source_start target_line_no = hunk.target_start for line in diff: if encoding is not None: line = line.decode(encoding) valid_line = RE_HUNK_BODY_LINE.match(line) if not valid_line: raise UnidiffParseError('Hunk diff line expected: %s' % line) line_type = valid_line.group('line_type') value = valid_line.group('value') original_line = Line(value, line_type=line_type) if line_type == LINE_TYPE_ADDED: original_line.target_line_no = target_line_no target_line_no += 1 elif line_type == LINE_TYPE_REMOVED: original_line.source_line_no = source_line_no source_line_no += 1 elif line_type == LINE_TYPE_CONTEXT: original_line.target_line_no = target_line_no target_line_no += 1 original_line.source_line_no = source_line_no source_line_no += 1 else: original_line = None if original_line: hunk.append(original_line) # check hunk len(old_lines) and len(new_lines) are ok if hunk.is_valid(): break self.append(hunk)
def _parse(self, diff, encoding, metadata_only): current_file = None patch_info = None diff = enumerate(diff, 1) for unused_diff_line_no, line in diff: if encoding is not None: line = line.decode(encoding) # check for a git rename, source file is_rename_source_filename = RE_RENAME_SOURCE_FILENAME.match(line) if is_rename_source_filename: # prefix with 'a/' to match expected git source format source_file = ('a/' + is_rename_source_filename.group('filename')) # keep line as patch_info patch_info.append(line) # reset current file current_file = None continue # check for a git rename, target file is_rename_target_filename = RE_RENAME_TARGET_FILENAME.match(line) if is_rename_target_filename: if current_file is not None: raise UnidiffParseError('Target without source: %s' % line) # prefix with 'b/' to match expected git source format target_file = ('b/' + is_rename_target_filename.group('filename')) # keep line as patch_info patch_info.append(line) # add current file to PatchSet current_file = PatchedFile(patch_info, source_file, target_file, None, None, is_rename=True) self.append(current_file) continue # check for source file header is_source_filename = RE_SOURCE_FILENAME.match(line) if is_source_filename: source_file = is_source_filename.group('filename') source_timestamp = is_source_filename.group('timestamp') # reset current file, unless we are processing a rename # (in that case, source files should match) if current_file is not None and not ( current_file.is_rename and current_file.source_file == source_file): current_file = None continue # check for target file header is_target_filename = RE_TARGET_FILENAME.match(line) if is_target_filename: if current_file is not None and not current_file.is_rename: raise UnidiffParseError('Target without source: %s' % line) target_file = is_target_filename.group('filename') target_timestamp = is_target_filename.group('timestamp') if current_file is None: # add current file to PatchSet current_file = PatchedFile(patch_info, source_file, target_file, source_timestamp, target_timestamp) self.append(current_file) patch_info = None continue # check for hunk header is_hunk_header = RE_HUNK_HEADER.match(line) if is_hunk_header: if current_file is None: raise UnidiffParseError('Unexpected hunk found: %s' % line) current_file._parse_hunk(line, diff, encoding, metadata_only) continue # check for no newline marker is_no_newline = RE_NO_NEWLINE_MARKER.match(line) if is_no_newline: if current_file is None: raise UnidiffParseError('Unexpected marker: %s' % line) current_file._add_no_newline_marker_to_last_hunk() continue # sometimes hunks can be followed by empty lines if line == '\n' and current_file is not None: current_file._append_trailing_empty_line() continue is_binary_diff = RE_BINARY_DIFF.match(line) if is_binary_diff: source_file = is_binary_diff.group('source_filename') target_file = is_binary_diff.group('target_filename') patch_info.append(line) current_file = PatchedFile(patch_info, source_file, target_file, is_binary_file=True) self.append(current_file) patch_info = None current_file = None continue # if nothing has matched above then this line is a patch info if patch_info is None: current_file = None patch_info = PatchInfo() patch_info.append(line)
def _append_trailing_empty_line(self): if not self: raise UnidiffParseError('Unexpected trailing newline character') last_hunk = self[-1] last_hunk.append(Line('\n', line_type=LINE_TYPE_EMPTY))
def _parse_hunk(self, header, diff, encoding, metadata_only): """Parse hunk details.""" header_info = RE_HUNK_HEADER.match(header) hunk_info = header_info.groups() hunk = Hunk(*hunk_info) source_line_no = hunk.source_start target_line_no = hunk.target_start expected_source_end = source_line_no + hunk.source_length expected_target_end = target_line_no + hunk.target_length added = 0 removed = 0 for diff_line_no, line in diff: if encoding is not None: line = line.decode(encoding) if metadata_only: # quick line type detection, no regex required line_type = line[0] if line else LINE_TYPE_CONTEXT if line_type not in (LINE_TYPE_ADDED, LINE_TYPE_REMOVED, LINE_TYPE_CONTEXT, LINE_TYPE_NO_NEWLINE): raise UnidiffParseError('Hunk diff line expected: %s' % line) if line_type == LINE_TYPE_ADDED: target_line_no += 1 added += 1 elif line_type == LINE_TYPE_REMOVED: source_line_no += 1 removed += 1 elif line_type == LINE_TYPE_CONTEXT: target_line_no += 1 source_line_no += 1 # no file content tracking original_line = None else: # parse diff line content valid_line = RE_HUNK_BODY_LINE.match(line) if not valid_line: valid_line = RE_HUNK_EMPTY_BODY_LINE.match(line) if not valid_line: raise UnidiffParseError('Hunk diff line expected: %s' % line) line_type = valid_line.group('line_type') if line_type == LINE_TYPE_EMPTY: line_type = LINE_TYPE_CONTEXT value = valid_line.group('value') original_line = Line(value, line_type=line_type) if line_type == LINE_TYPE_ADDED: original_line.target_line_no = target_line_no target_line_no += 1 elif line_type == LINE_TYPE_REMOVED: original_line.source_line_no = source_line_no source_line_no += 1 elif line_type == LINE_TYPE_CONTEXT: original_line.target_line_no = target_line_no original_line.source_line_no = source_line_no target_line_no += 1 source_line_no += 1 elif line_type == LINE_TYPE_NO_NEWLINE: pass else: original_line = None # stop parsing if we got past expected number of lines if (source_line_no > expected_source_end or target_line_no > expected_target_end): raise UnidiffParseError('Hunk is longer than expected') if original_line: original_line.diff_line_no = diff_line_no hunk.append(original_line) # if hunk source/target lengths are ok, hunk is complete if (source_line_no == expected_source_end and target_line_no == expected_target_end): break # report an error if we haven't got expected number of lines if (source_line_no < expected_source_end or target_line_no < expected_target_end): raise UnidiffParseError('Hunk is shorter than expected') if metadata_only: # HACK: set fixed calculated values when metadata_only is enabled hunk._added = added hunk._removed = removed self.append(hunk)
def _parse_hunk(self, header, diff, encoding): """Parse hunk details.""" header_info = RE_HUNK_HEADER.match(header) hunk_info = header_info.groups() hunk = Hunk(*hunk_info) source_line_no = hunk.source_start target_line_no = hunk.target_start expected_source_end = source_line_no + hunk.source_length expected_target_end = target_line_no + hunk.target_length for diff_line_no, line in diff: if encoding is not None: line = line.decode(encoding) valid_line = RE_HUNK_EMPTY_BODY_LINE.match(line) if not valid_line: valid_line = RE_HUNK_BODY_LINE.match(line) if not valid_line: raise UnidiffParseError('Hunk diff line expected: %s' % line) line_type = valid_line.group('line_type') if line_type == LINE_TYPE_EMPTY: line_type = LINE_TYPE_CONTEXT value = valid_line.group('value') original_line = Line(value, line_type=line_type) if line_type == LINE_TYPE_ADDED: original_line.target_line_no = target_line_no target_line_no += 1 elif line_type == LINE_TYPE_REMOVED: original_line.source_line_no = source_line_no source_line_no += 1 elif line_type == LINE_TYPE_CONTEXT: original_line.target_line_no = target_line_no target_line_no += 1 original_line.source_line_no = source_line_no source_line_no += 1 elif line_type == LINE_TYPE_NO_NEWLINE: pass else: original_line = None # stop parsing if we got past expected number of lines if (source_line_no > expected_source_end or target_line_no > expected_target_end): raise UnidiffParseError('Hunk is longer than expected') if original_line: original_line.diff_line_no = diff_line_no hunk.append(original_line) # if hunk source/target lengths are ok, hunk is complete if (source_line_no == expected_source_end and target_line_no == expected_target_end): break # report an error if we haven't got expected number of lines if (source_line_no < expected_source_end or target_line_no < expected_target_end): raise UnidiffParseError('Hunk is shorter than expected') self.append(hunk)