def detectWhiteSpaceChanges(file, old_lines, begin_old_offset, end_old_offset, old_ending_linebreak, new_lines, begin_new_offset, end_new_offset, new_ending_linebreak): start_old_offset = None for old_offset, new_offset in itertools.izip( xrange(begin_old_offset, end_old_offset), xrange(begin_new_offset, end_new_offset)): if old_lines[old_offset - 1] != new_lines[new_offset - 1] or ( old_offset == len(old_lines) and old_ending_linebreak != new_ending_linebreak): if start_old_offset is None: start_old_offset = old_offset start_new_offset = new_offset elif start_old_offset is not None: assert old_offset - start_old_offset != 0 and new_offset - start_new_offset != 0 chunk = diff.Chunk(start_old_offset, old_offset - start_old_offset, start_new_offset, new_offset - start_new_offset, is_whitespace=True) chunk.is_whitespace = True file.chunks.append(chunk) start_old_offset = None if start_old_offset is not None: assert end_old_offset - start_old_offset != 0 and end_new_offset - start_new_offset != 0 chunk = diff.Chunk(start_old_offset, end_old_offset - start_old_offset, start_new_offset, end_new_offset - start_new_offset, is_whitespace=True) chunk.is_whitespace = True file.chunks.append(chunk)
def createChunks(delete_offset, deleted_lines, insert_offset, inserted_lines): ws_before = None ws_after = None dl = deleted_lines il = inserted_lines if deleted_lines and inserted_lines and isWhitespaceChange( deleted_lines[0], inserted_lines[0]): ws_lines = 1 max_lines = min(len(deleted_lines), len(inserted_lines)) while ws_lines < max_lines and isWhitespaceChange( deleted_lines[ws_lines], inserted_lines[ws_lines]): ws_lines += 1 ws_before = diff.Chunk(delete_offset, ws_lines, insert_offset, ws_lines, is_whitespace=True) delete_offset += ws_lines del deleted_lines[:ws_lines] insert_offset += ws_lines del inserted_lines[:ws_lines] if deleted_lines and inserted_lines and isWhitespaceChange( deleted_lines[-1], inserted_lines[-1]): ws_lines = 1 max_lines = min(len(deleted_lines), len(inserted_lines)) while ws_lines < max_lines and isWhitespaceChange( deleted_lines[-(ws_lines + 1)], inserted_lines[-(ws_lines + 1)]): ws_lines += 1 ws_after = diff.Chunk(delete_offset + len(deleted_lines) - ws_lines, ws_lines, insert_offset + len(inserted_lines) - ws_lines, ws_lines, is_whitespace=True) del deleted_lines[-ws_lines:] del inserted_lines[-ws_lines:] if deleted_lines or inserted_lines: chunks = [ diff.Chunk(delete_offset, len(deleted_lines), insert_offset, len(inserted_lines)) ] else: chunks = [] if ws_before: chunks.insert(0, ws_before) if ws_after: chunks.append(ws_after) return chunks
def __getChunks(self, critic): if self.__chunks is None: cached_objects = Filediff.allCached(critic) assert Filediff.cache_key(self.filechange) in cached_objects cached_by_changeset = {} for (changeset_id, file_id), filediff in cached_objects.items(): if filediff._impl.__chunks is None: filediff._impl.__chunks = [] cached_by_changeset.setdefault(changeset_id, []) \ .append(file_id) cursor = critic.getDatabaseCursor() for changeset_id, file_ids in cached_by_changeset.items(): cursor.execute( """SELECT file, deleteOffset, deleteCount, insertOffset, insertCount, analysis, whitespace FROM chunks WHERE changeset=%s AND file=ANY (%s) ORDER BY file, deleteOffset, insertOffset""", (changeset_id, file_ids)) for (file_id, delete_offset, delete_count, insert_offset, insert_count, analysis, is_whitespace) in cursor: cached_objects[(changeset_id, file_id)]._impl.__chunks \ .append(diff.Chunk(delete_offset, delete_count, insert_offset, insert_count, analysis=analysis, is_whitespace=is_whitespace)) return self.__chunks
def loadChangesets(db, repository, changesets, filtered_file_ids=None, load_chunks=True): cursor = db.cursor() changeset_ids = [changeset.id for changeset in changesets] filtered_file_ids = list(filtered_file_ids) if filtered_file_ids else None if filtered_file_ids is None: cursor.execute("""SELECT changeset, file, path, old_sha1, new_sha1, old_mode, new_mode FROM fileversions JOIN files ON (files.id=fileversions.file) WHERE changeset=ANY (%s)""", (changeset_ids,)) else: cursor.execute("""SELECT changeset, file, path, old_sha1, new_sha1, old_mode, new_mode FROM fileversions JOIN files ON (files.id=fileversions.file) WHERE changeset=ANY (%s) AND file=ANY (%s)""", (changeset_ids, filtered_file_ids)) files = dict([(changeset.id, {}) for changeset in changesets]) for changeset_id, file_id, file_path, file_old_sha1, file_new_sha1, file_old_mode, file_new_mode in cursor.fetchall(): files[changeset_id][file_id] = diff.File(file_id, file_path, file_old_sha1, file_new_sha1, repository, old_mode=file_old_mode, new_mode=file_new_mode, chunks=[]) if load_chunks: if filtered_file_ids is None: cursor.execute("""SELECT id, changeset, file, deleteOffset, deleteCount, insertOffset, insertCount, analysis, whitespace FROM chunks WHERE changeset=ANY (%s) ORDER BY file, deleteOffset ASC""", (changeset_ids,)) else: cursor.execute("""SELECT id, changeset, file, deleteOffset, deleteCount, insertOffset, insertCount, analysis, whitespace FROM chunks WHERE changeset=ANY (%s) AND file=ANY (%s) ORDER BY file, deleteOffset ASC""", (changeset_ids, filtered_file_ids)) for chunk_id, changeset_id, file_id, delete_offset, delete_count, insert_offset, insert_count, analysis, is_whitespace in cursor: files[changeset_id][file_id].chunks.append(diff.Chunk(delete_offset, delete_count, insert_offset, insert_count, id=chunk_id, is_whitespace=is_whitespace, analysis=analysis)) for changeset in changesets: changeset.files = diff.File.sorted(files[changeset.id].values()) return changesets
def readChunks(): return [ diff.Chunk(delete_offset, delete_count, insert_offset, insert_count, analysis=analysis, is_whitespace=is_whitespace) for delete_offset, delete_count, insert_offset, insert_count, analysis, is_whitespace in cursor.fetchall() ]
def compareChunks(source_file, source_chunk, target_file, target_chunk, extra_target_chunks, context_lines=3): source_length = source_file.oldCount() target_length = target_file.newCount() source_lines = map(Line, source_chunk.deleted_lines) target_lines = map(Line, target_chunk.inserted_lines) sm = difflib.SequenceMatcher(None, source_lines, target_lines) blocks = filter(lambda x: x[2], sm.get_matching_blocks()) if blocks: chunks = [] i, j, n = blocks.pop(0) current = [(i, j, n)] matched = n pi = i + n pj = j + n for i, j, n in blocks: if i - pi > MAXIMUM_GAP or j - pj > MAXIMUM_GAP: chunks.append((matched, current)) current = [(i, j, n)] matched = n else: current.append((i, j, n)) matched += n pi = i + n pj = j + n chunks.append((matched, current)) chunks.sort() matched, blocks = chunks[-1] if matched < SMALLEST_INSERT: return None source_begin = max(-(source_chunk.delete_offset - 1), blocks[0][0] - context_lines) source_end = min(source_length + 1 - source_chunk.delete_offset, blocks[-1][0] + blocks[-1][2] + context_lines) target_begin = max(-(target_chunk.insert_offset - 1), blocks[0][1] - context_lines) target_end = min(target_length + 1 - target_chunk.insert_offset, blocks[-1][1] + blocks[-1][2] + context_lines) new_chunk = diff.Chunk(source_chunk.delete_offset + source_begin, source_end - source_begin, target_chunk.insert_offset + target_begin, target_end - target_begin) new_chunk.source_chunk = source_chunk new_chunk.source_begin = source_begin new_chunk.source_end = source_end new_chunk.source_length = source_length if blocks[0][1] >= SMALLEST_INSERT and blocks[0][ 1] < target_chunk.insert_count: extra_before = diff.Chunk(0, 0, target_chunk.insert_offset, blocks[0][1]) else: extra_before = None match_end = blocks[-1][1] + blocks[-1][2] if target_chunk.insert_count - match_end >= SMALLEST_INSERT: extra_after = diff.Chunk(0, 0, target_chunk.insert_offset + match_end, target_chunk.insert_count - match_end) else: extra_after = None new_chunk.deleted_lines = source_file.getOldLines(new_chunk) new_chunk.inserted_lines = target_file.getNewLines(new_chunk) if matched > len(new_chunk.inserted_lines) * 0.25: analysis = diff.analyze.analyzeChunk(new_chunk.deleted_lines, new_chunk.inserted_lines, moved=True) if matched > len(new_chunk.inserted_lines) * 0.5 or ( analysis and len(analysis.split(';')) >= len(new_chunk.inserted_lines) * 0.5): new_chunk.analysis = analysis if extra_before: extra_target_chunks.append(extra_before) if extra_after: extra_target_chunks.append(extra_after) return new_chunk return None
def parseDifferences(repository, commit=None, from_commit=None, to_commit=None, filter_paths=None, selected_path=None, simple=False): """parseDifferences(repository, [commit] | [from_commit, to_commit][, selected_path]) => dict(parent_sha1 => [diff.File, ...] (if selected_path is None) diff.File (if selected_path is not None)""" options = [] if from_commit and to_commit: command = 'diff' what = from_commit.sha1 + ".." + to_commit.sha1 elif not commit.parents: # Root commit. command = "show" what = commit.sha1 options.append("--pretty=format:") else: assert len(commit.parents) == 1 command = 'diff' what = commit.parents[0] + '..' + commit.sha1 if filter_paths is None and selected_path is None and not simple: names = repository.run(command, *(options + ["--name-only", what])) paths = set(filter(None, map(str.strip, names.splitlines()))) else: paths = set() if not simple: options.append('--ignore-space-change') options.append(what) if filter_paths is not None: options.append('--') options.extend(filter_paths) elif selected_path is not None: options.append('--') options.append(selected_path) stdout = repository.run(command, '--full-index', '--unified=1', '--patience', *options) selected_file = None re_chunk = re.compile('^@@ -(\\d+)(?:,\\d+)? \\+(\\d+)(?:,\\d+)? @@') re_binary = re.compile( '^Binary files (?:a/(.+)|/dev/null) and (?:b/(.+)|/dev/null) differ') re_diff = re.compile("^diff --git a/(.*) b/(.*)$") def isplitlines(text): start = 0 length = len(text) while start < length: try: end = text.index('\n', start) yield text[start:end] start = end + 1 except ValueError: yield text[start:] break lines = isplitlines(stdout) included = set() files = [] files_by_path = {} def addFile(new_file): assert new_file.path not in files_by_path files.append(new_file) files_by_path[new_file.path] = new_file included.add(new_file.path) old_mode = None new_mode = None try: line = lines.next() names = None while True: old_mode = None new_mode = None # Scan to the 'index <sha1>..<sha1>' line that marks the beginning # of the differences in one file. while not line.startswith("index "): match = re_diff.match(line) if match: if old_mode is not None and new_mode is not None: addFile( diff.File(None, names[0], None, None, repository, old_mode=old_mode, new_mode=new_mode, chunks=[])) names = (match.group(1), match.group(2)) elif line.startswith("old mode "): old_mode = line[9:] elif line.startswith("new mode "): new_mode = line[9:] elif line.startswith("new file mode "): new_mode = line[14:] elif line.startswith("deleted file mode "): old_mode = line[18:] line = lines.next() is_submodule = False try: sha1range, mode = line[6:].split(' ', 2) if mode == "160000": is_submodule = True old_mode = new_mode = mode old_sha1, new_sha1 = sha1range.split('..') except: old_sha1, new_sha1 = line[6:].split(' ', 1)[0].split("..") try: line = lines.next() except: if new_mode is not None: assert names[0] == names[1] addFile( diff.File(None, names[0], old_sha1, new_sha1, repository, old_mode=old_mode, new_mode=new_mode, chunks=[diff.Chunk(0, 0, 0, 0)])) old_mode = new_mode = None if re_diff.match(line): new_file = diff.File(None, names[0] or names[1], old_sha1, new_sha1, repository, old_mode=old_mode, new_mode=new_mode) if '0' * 40 == old_sha1 or '0' * 40 == new_sha1: new_file.chunks = [diff.Chunk(0, 0, 0, 0)] else: new_file.loadOldLines() new_file.loadNewLines() new_file.chunks = [] detectWhiteSpaceChanges(new_file, new_file.oldLines(False), 1, new_file.oldCount() + 1, True, new_file.newLines(False), 1, new_file.newCount() + 1, True) addFile(new_file) old_mode = new_mode = False continue binary = re_binary.match(line) if binary: path = (binary.group(1) or binary.group(2)).strip() new_file = diff.File(None, path, old_sha1, new_sha1, repository, old_mode=old_mode, new_mode=new_mode) new_file.chunks = [diff.Chunk(0, 0, 0, 0)] addFile(new_file) continue if line.startswith("--- a/"): old_path = line[6:].strip() else: old_path = None line = lines.next() if line.startswith("+++ b/"): new_path = line[6:].strip() else: new_path = None assert (old_path is None) == ('0' * 40 == old_sha1) assert (new_path is None) == ('0' * 40 == new_sha1) if old_path: path = old_path else: path = new_path if is_submodule: line = lines.next() match = re_chunk.match(line) assert match, repr(line) assert match.group(1) == match.group(2) == "1", repr( match.groups()) line = lines.next() assert line == "-Subproject commit %s" % old_sha1, repr(line) line = lines.next() assert line == "+Subproject commit %s" % new_sha1, repr(line) new_file = diff.File(None, path, old_sha1, new_sha1, repository, old_mode=old_mode, new_mode=new_mode, chunks=[ diff.Chunk( 1, 1, 1, 1, analysis="0=0:r18-58=18-58") ]) if path not in files_by_path: addFile(new_file) old_mode = new_mode = None continue try: line = lines.next() delete_offset = 1 delete_count = 0 deleted_lines = [] insert_offset = 1 insert_count = 0 inserted_lines = [] if old_path and new_path and not simple: old_lines = splitlines(repository.fetch(old_sha1).data) new_lines = splitlines(repository.fetch(new_sha1).data) else: old_lines = None new_lines = None if path in files_by_path: new_file = files_by_path[path] if old_sha1 != '0' * 40: assert new_file.old_sha1 == '0' * 40 new_file.old_sha1 = old_sha1 new_file.old_mode = old_mode if new_sha1 != '0' * 40: assert new_file.new_sha1 == '0' * 40 new_file.new_sha1 = new_sha1 new_file.new_mode = new_mode new_file.chunks = [] else: new_file = diff.File(None, path, old_sha1, new_sha1, repository, old_mode=old_mode, new_mode=new_mode, chunks=[]) old_mode = new_mode = None if selected_path is not None and selected_path == path: selected_file = new_file if path not in files_by_path: addFile(new_file) previous_delete_offset = 1 previous_insert_offset = 1 while True: match = re_chunk.match(line) if not match: break groups = match.groups() delete_offset = int(groups[0]) deleted_lines = [] insert_offset = int(groups[1]) inserted_lines = [] while True: line = lines.next() if line == "\\ No newline at end of file": continue if line[0] not in (' ', '-', '+'): break if line[0] != ' ' and previous_delete_offset is not None and old_lines and new_lines and not simple: detectWhiteSpaceChanges(files[-1], old_lines, previous_delete_offset, delete_offset, True, new_lines, previous_insert_offset, insert_offset, True) previous_delete_offset = None if line[0] == ' ' and previous_delete_offset is None: previous_delete_offset = delete_offset previous_insert_offset = insert_offset type = line[0] if type == '-': delete_offset += 1 deleted_lines.append(line[1:]) elif type == '+': insert_offset += 1 inserted_lines.append(line[1:]) else: if deleted_lines or inserted_lines: chunks = createChunks( delete_offset - len(deleted_lines), deleted_lines, insert_offset - len(inserted_lines), inserted_lines) files[-1].chunks.extend(chunks) deleted_lines = [] inserted_lines = [] delete_offset += 1 insert_offset += 1 if deleted_lines or inserted_lines: chunks = createChunks( delete_offset - len(deleted_lines), deleted_lines, insert_offset - len(inserted_lines), inserted_lines) files[-1].chunks.extend(chunks) deleted_lines = [] inserted_lines = [] if previous_delete_offset is not None and old_lines and new_lines and not simple: detectWhiteSpaceChanges(files[-1], old_lines, previous_delete_offset, len(old_lines) + 1, True, new_lines, previous_insert_offset, len(new_lines) + 1, True) previous_delete_offset = None except StopIteration: if deleted_lines or inserted_lines: chunks = createChunks(delete_offset - len(deleted_lines), deleted_lines, insert_offset - len(inserted_lines), inserted_lines) files[-1].chunks.extend(chunks) deleted_lines = [] inserted_lines = [] if previous_delete_offset is not None and old_lines and new_lines and not simple: detectWhiteSpaceChanges(files[-1], old_lines, previous_delete_offset, len(old_lines) + 1, True, new_lines, previous_insert_offset, len(new_lines) + 1, True) raise except StopIteration: if old_mode is not None and new_mode is not None: assert names[0] == names[1] addFile( diff.File(None, names[0], None, None, repository, old_mode=old_mode, new_mode=new_mode, chunks=[])) for path in (paths - included): lines = isplitlines( repository.run(command, '--full-index', '--unified=1', what, '--', path)) try: line = lines.next() while not line.startswith("index "): line = lines.next() try: sha1range, mode = line[6:].split(' ') if mode == "160000": continue old_sha1, new_sha1 = sha1range.split("..") except: old_sha1, new_sha1 = line[6:].split(' ', 1)[0].split("..") if old_sha1 == '0' * 40 or new_sha1 == '0' * 40: # Added or removed empty file. continue addFile( diff.File(None, path, old_sha1, new_sha1, repository, chunks=[])) old_data = repository.fetch(old_sha1).data old_lines = splitlines(old_data) new_data = repository.fetch(new_sha1).data new_lines = splitlines(new_data) assert len(old_lines) == len(new_lines), "%s:%d != %s:%d" % ( old_sha1, len(old_lines), new_sha1, len(new_lines)) def endsWithLinebreak(data): return data and data[-1] in "\n\r" detectWhiteSpaceChanges(files[-1], old_lines, 1, len(old_lines) + 1, endsWithLinebreak(old_data), new_lines, 1, len(new_lines) + 1, endsWithLinebreak(new_data)) except StopIteration: pass if not simple: for file in files: mergeChunks(file) if from_commit and to_commit: if selected_path is not None: return selected_file else: return {from_commit.sha1: files} elif not commit.parents: return {None: files} else: return {commit.parents[0]: files}