def test_reuse_detector(self): blob = make_object(Blob, data=b"blob") tree1 = self.commit_tree([("a", blob)]) tree2 = self.commit_tree([("b", blob)]) detector = RenameDetector(self.store) changes = [TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id))] self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id)) self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
def main(args, hear, talk, complain): """Reset boring changes See doc-string of this file for outline. Required arguments - args, hear, talk and complain -- should, respectively, be (or behave as, e.g. if mocking to test) sys.argv, sys.stdin, sys.stdout and sys.stderr. The only command-line option supported (in args) is a '--disclaim' flag, to treat as boring all changes in files with the standard 'We mean it' disclaimer; it is usual to pass this flag.\n""" ignore = Scanner.disclaimed if '--disclaim' in args else ( lambda p, w: False) # We're in the root directory of the module: repo = Repo('.') store, index = repo.object_store, repo.open_index() renamer = RenameDetector(store) try: # TODO: demand stronger similarity for a copy than for rename; # our huge copyright headers (and common boilerplate) make # small header files look very similar despite their real # content all being quite different. Probably need to hack # dulwich (find_copies_harder is off by default anyway). for kind, old, new in \ renamer.changes_with_renames(store[repo.refs['HEAD']].tree, index.commit(store)): # Each of old, new is a named triple of .path, .mode and # .sha; kind is the change type, in ('add', 'modify', # 'delete', 'rename', 'copy', 'unchanged'), although we # shouldn't get the last. If new.path is None, file was # removed, not renamed; otherwise, if new has a # disclaimer, it's private despite its name and path. if new.path and not ignore(new.path, complain.write): assert kind not in ('unchanged', 'delete'), kind if kind != 'add': # Filter out boring changes index[new.path] = Selector(store, new.sha, old.sha, old.mode or new.mode).refine() elif old.path: # disclaimed or removed: ignore by restoring assert new.path or kind == 'delete', (kind, new.path) index[old.path] = Selector.restore(store[old.sha], old.mode) talk.write(old.path + '\n') if new.path and new.path != old.path: talk.write(new.path + '\n') else: # new but disclaimed: ignore by discarding assert kind == 'add' and new.path, (kind, new.path) del index[new.path] talk.write(new.path + '\n') index.write() except IOError: # ... and any other errors that just mean failure. return 1 return 0
def test_reuse_detector(self): blob = make_object(Blob, data='blob') tree1 = self.commit_tree([('a', blob)]) tree2 = self.commit_tree([('b', blob)]) detector = RenameDetector(self.store) changes = [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id))] self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id)) self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
def test_tree_changes_rename_detector(self): blob_a1 = make_object(Blob, data='a\nb\nc\nd\n') blob_a2 = make_object(Blob, data='a\nb\nc\ne\n') blob_b = make_object(Blob, data='b') tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)]) tree2 = self.commit_tree([('c', blob_a2), ('b', blob_b)]) detector = RenameDetector(self.store) self.assertChangesEqual( [TreeChange.delete(('a', F, blob_a1.id)), TreeChange.add(('c', F, blob_a2.id))], tree1, tree2) self.assertChangesEqual( [TreeChange.delete(('a', F, blob_a1.id)), TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id), ('b', F, blob_b.id)), TreeChange.add(('c', F, blob_a2.id))], tree1, tree2, want_unchanged=True) self.assertChangesEqual( [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id), ('c', F, blob_a2.id))], tree1, tree2, rename_detector=detector) self.assertChangesEqual( [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id), ('c', F, blob_a2.id)), TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id), ('b', F, blob_b.id))], tree1, tree2, rename_detector=detector, want_unchanged=True)
def test_changes_with_renames(self): blob = make_object(Blob, data=b"blob") c1, c2 = self.make_linear_commits(2, trees={ 1: [(b"a", blob)], 2: [(b"b", blob)] }) entry_a = (b"a", F, blob.id) entry_b = (b"b", F, blob.id) changes_without_renames = [ TreeChange.delete(entry_a), TreeChange.add(entry_b), ] changes_with_renames = [TreeChange(CHANGE_RENAME, entry_a, entry_b)] self.assertWalkYields( [TestWalkEntry(c2, changes_without_renames)], [c2.id], max_entries=1, ) detector = RenameDetector(self.store) self.assertWalkYields( [TestWalkEntry(c2, changes_with_renames)], [c2.id], max_entries=1, rename_detector=detector, )
def test_renamed_file(self): self.build_tree(['a']) self.wt.add(['a']) self.wt.rename_one('a', 'b') a = Blob.from_string(b'contents of a\n') self.store.add_object(a) oldt = Tree() oldt.add(b"a", stat.S_IFREG | 0o644, a.id) self.store.add_object(oldt) newt = Tree() newt.add(b"b", stat.S_IFREG | 0o644, a.id) self.store.add_object(newt) self.expectDelta([ ('modify', (b'', stat.S_IFDIR, oldt.id), (b'', stat.S_IFDIR, newt.id)), ('delete', (b'a', stat.S_IFREG | 0o644, a.id), (None, None, None)), ('add', (None, None, None), (b'b', stat.S_IFREG | 0o644, a.id)), ], tree_id=oldt.id) if dulwich_version >= (0, 19, 15): self.expectDelta([('modify', (b'', stat.S_IFDIR, oldt.id), (b'', stat.S_IFDIR, newt.id)), ('rename', (b'a', stat.S_IFREG | 0o644, a.id), (b'b', stat.S_IFREG | 0o644, a.id))], tree_id=oldt.id, rename_detector=RenameDetector(self.store))
def last_commit_renamed_path(repo, history, path): """ Check if the last commit of this history renames the given path If so, return the new path, otherwise None """ if len(history) <= 1: return None; newtree = history[0].tree oldtree = history[1].tree rd = RenameDetector(repo) result = ""; for change in rd.changes_with_renames(oldtree, newtree): if change.type == 'rename' and change.old.path == path: return change.new.path return None
def __init__(self, store, include, exclude=None, order=ORDER_DATE, reverse=False, max_entries=None, paths=None, rename_detector=None, follow=False, since=None, until=None, queue_cls=_CommitTimeQueue): """Constructor. :param store: ObjectStore instance for looking up objects. :param include: Iterable of SHAs of commits to include along with their ancestors. :param exclude: Iterable of SHAs of commits to exclude along with their ancestors, overriding includes. :param order: ORDER_* constant specifying the order of results. Anything other than ORDER_DATE may result in O(n) memory usage. :param reverse: If True, reverse the order of output, requiring O(n) memory. :param max_entries: The maximum number of entries to yield, or None for no limit. :param paths: Iterable of file or subtree paths to show entries for. :param rename_detector: diff.RenameDetector object for detecting renames. :param follow: If True, follow path across renames/copies. Forces a default rename_detector. :param since: Timestamp to list commits after. :param until: Timestamp to list commits before. :param queue_cls: A class to use for a queue of commits, supporting the iterator protocol. The constructor takes a single argument, the Walker. """ # Note: when adding arguments to this method, please also update # dulwich.repo.BaseRepo.get_walker if order not in ALL_ORDERS: raise ValueError('Unknown walk order %s' % order) self.store = store self.include = include self.excluded = set(exclude or []) self.order = order self.reverse = reverse self.max_entries = max_entries self.paths = paths and set(paths) or None if follow and not rename_detector: rename_detector = RenameDetector(store) self.rename_detector = rename_detector self.follow = follow self.since = since self.until = until self._num_entries = 0 self._queue = queue_cls(self) self._out_queue = collections.deque()
def main(args, hear, talk, complain): # Future: we may want to parse more args, query the user or wrap # talk, complain for verbosity control. ignore = Scanner.disclaimed if '--disclaim' in args else ( lambda p, w: False) # We're in the root directory of the module: repo = Repo('.') store, index = repo.object_store, repo.open_index() renamer = RenameDetector(store) try: # TODO: demand stronger similarity for a copy than for rename; # our huge copyright headers (and common boilerplate) make # small header files look very similar despite their real # content all being quite different. Probably need to hack # dulwich (find_copies_harder is off by default anyway). for kind, old, new in \ renamer.changes_with_renames(store[repo.refs['HEAD']].tree, index.commit(store)): # Each of old, new is a named triple of .path, .mode and # .sha; kind is the change type, in ('add', 'modify', # 'delete', 'rename', 'copy', 'unchanged'), although we # shouldn't get the last. If new.path is None, file was # removed, not renamed; otherwise, if new has a # disclaimer, it's private despite its name and path. if new.path and not ignore(new.path, complain.write): assert kind not in ('unchanged', 'delete'), kind if kind != 'add': # Filter out boring changes index[new.path] = Selector(store, new.sha, old.sha, old.mode or new.mode).refine() elif old.path: # disclaimed or removed: ignore by restoring assert new.path or kind == 'delete', (kind, new.path) index[old.path] = Selector.restore(store[old.sha], old.mode) else: # new but disclaimed: ignore by discarding assert kind == 'add' and new.path, (kind, new.path) del index[new.path] index.write() except IOError: # ... and any other errors that just mean failure. return 1 return 0
def test_tree_changes_rename_detector(self): blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n") blob_a2 = make_object(Blob, data=b"a\nb\nc\ne\n") blob_b = make_object(Blob, data=b"b") tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)]) tree2 = self.commit_tree([(b"c", blob_a2), (b"b", blob_b)]) detector = RenameDetector(self.store) self.assertChangesEqual( [ TreeChange.delete((b"a", F, blob_a1.id)), TreeChange.add((b"c", F, blob_a2.id)), ], tree1, tree2, ) self.assertChangesEqual( [ TreeChange.delete((b"a", F, blob_a1.id)), TreeChange( CHANGE_UNCHANGED, (b"b", F, blob_b.id), (b"b", F, blob_b.id), ), TreeChange.add((b"c", F, blob_a2.id)), ], tree1, tree2, want_unchanged=True, ) self.assertChangesEqual( [ TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id)) ], tree1, tree2, rename_detector=detector, ) self.assertChangesEqual( [ TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id)), TreeChange( CHANGE_UNCHANGED, (b"b", F, blob_b.id), (b"b", F, blob_b.id), ), ], tree1, tree2, rename_detector=detector, want_unchanged=True, )
def test_want_unchanged(self): blob_a1 = make_object(Blob, data='a\nb\nc\nd\n') blob_b = make_object(Blob, data='b') blob_c2 = make_object(Blob, data='a\nb\nc\ne\n') tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)]) tree2 = self.commit_tree([('c', blob_c2), ('b', blob_b)]) detector = RenameDetector(self.store) self.assertEqual([ TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id), ('c', F, blob_c2.id)) ], self.detect_renames(tree1, tree2)) self.assertEqual([ TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id), ('c', F, blob_c2.id)), TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id), ('b', F, blob_b.id)) ], self.detect_renames(tree1, tree2, want_unchanged=True))
def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs): detector = RenameDetector(self.store, **kwargs) return detector.changes_with_renames(tree1.id, tree2.id, want_unchanged=want_unchanged)
def setUp(self): super(TreeChangesTest, self).setUp() self.detector = RenameDetector(self.store)
def _merge_branches(self, base, mine, other, take_mine=False): def load_json(path, branch): try: blob = self.blob(path, branch) except (KeyError, TypeError): return {} else: return loads(blob.as_raw_string()) merge_tree = Tree() base_tree, my_tree, other_tree = (self._get_tree(x) for x in (base, mine, other)) ren_detector = RenameDetector(self._repo.object_store) conflicts = {} my_changes, other_changes = (tree_changes(self._repo.object_store, base_tree.id, x.id, want_unchanged=True, rename_detector=ren_detector) for x in (my_tree, other_tree)) changes_by_path = defaultdict(list) for change in chain(my_changes, other_changes): if change.type == CHANGE_DELETE or change.type == CHANGE_RENAME: path = change.old.path else: path = change.new.path changes_by_path[path].append(change) had_conflict = False for path, changes in changes_by_path.items(): if len(changes) == 2: my_changes, other_changes = changes if my_changes.type == CHANGE_DELETE: if other_changes.type in (CHANGE_RENAME, CHANGE_MODIFY): merge_tree.add(other_changes.new.path, FILE_MODE, other_changes.new.sha) else: continue elif other_changes.type == CHANGE_DELETE: if my_changes.type in (CHANGE_RENAME, CHANGE_MODIFY): merge_tree.add(my_changes.new.path, FILE_MODE, my_changes.new.sha) else: continue else: jsons = [load_json(path, x) for x in (base, mine, other)] base_json, my_json, other_json = jsons # When dealing with renames, file contents are under the # 'new' path. Note that the file will be finally stored # under the name given by the last rename. if other_changes.type == CHANGE_RENAME: other_json = load_json(other_changes.new.path, other) path = other_changes.new.path if my_changes.type == CHANGE_RENAME: my_json = load_json(my_changes.new.path, mine) path = my_changes.new.path if take_mine: merged_json = my_json or other_json or base_json else: merged_json, merge_conflict = merge_jsons(*jsons) if merge_conflict: conflicts[path] = merged_json had_conflict = had_conflict or merge_conflict merged_blob = Blob.from_string( dumps(merged_json, sort_keys=True, indent=4)) self._update_store(merged_blob) merge_tree.add(path, FILE_MODE, merged_blob.id) else: data = (load_json(path, mine) or load_json(path, other) or load_json(path, base)) blob = Blob.from_string(dumps(data, sort_keys=True, indent=4)) self._update_store(blob) merge_tree.add(path, FILE_MODE, blob.id) self._update_store(merge_tree) return merge_tree, conflicts
def detect_renames(self, tree1, tree2, **kwargs): detector = RenameDetector(self.store, tree1.id, tree2.id, **kwargs) return detector.changes_with_renames()