def run(self): blob = fr.Blob(b'public gpg key contents') tag = fr.Tag(b'gpg-pubkey', blob.id, b'Ima Tagger', b'*****@*****.**', b'1136199845 +0300', b'Very important explanation and stuff') args = fr.FilteringOptions.parse_args(['--target', self.output_dir]) out = fr.RepoFilter(args) out.importer_only() self.out = out i1args = fr.FilteringOptions.parse_args(['--source', self.repo1]) i1 = fr.RepoFilter(i1args, reset_callback=self.skip_reset, commit_callback=self.hold_commit) i1.set_output(out) i1.run() i2args = fr.FilteringOptions.parse_args(['--source', self.repo2]) i2 = fr.RepoFilter(i2args, commit_callback=self.weave_commit) i2.set_output(out) i2.run() out.insert(blob) out.insert(tag) out.finish()
def finish(self) -> None: if not self.commits_to_rewrite: return # nothing to do if self.replace: replace_opt = "update-or-add" else: replace_opt = "update-no-add" # Use reference based names instead of OIDs. # This avoid Git's object name warning and # otherwise filter-repo fails to replace the objects. def rev_name(commit: git.Commit) -> str: return commit.name_rev.split()[1] first = self.commits_to_rewrite[0] last = self.commits_to_rewrite[-1] assert first == last or first in last.iter_parents(), "Wrong commit order" first_rev = rev_name(first) last_rev = rev_name(last) if self.with_initial_commit: refs = last_rev else: refs = f"{first_rev}^..{last_rev}" # ^ to include 'first' in the range args = fr.FilteringOptions.parse_args([ '--source', self.repo.git_dir, '--force', '--quiet', '--preserve-commit-encoding', '--replace-refs', replace_opt, '--refs', refs, ]) rfilter = fr.RepoFilter(args, commit_callback=self._rewrite) rfilter.run()
def fix_links(repo: str): args = git_filter_repo.FilteringOptions.default_options() # args.force = True # args.partial = True args.debug = True args.refs = ['HEAD'] os.environ[REPO_NAME] = repo git_filter_repo.RepoFilter( args, message_callback=msg_callback).run()
def anonymise(path: str, blacklist: List[str]): options = gfr.FilteringOptions.default_options() options.force = True blacklist_bytes = [name.encode("utf-8") for name in blacklist] os.chdir(path) filter = gfr.RepoFilter(options, message_callback=lambda message: message_callback( message, blacklist_bytes), name_callback=name_callback, email_callback=email_callback) filter.run()
#!/usr/bin/env python3 """ Please see the ***** API BACKWARD COMPATIBILITY CAVEAT ***** near the top of git-filter-repo. """ import git_filter_repo as fr def my_commit_callback(commit, metadata): if commit.branch == b"refs/heads/master": commit.branch = b"refs/heads/develop" args = fr.FilteringOptions.default_options() args.force = True filter = fr.RepoFilter(args, commit_callback=my_commit_callback) filter.run()
Please see the ***** API BACKWARD COMPATIBILITY CAVEAT ***** near the top of git-filter-repo. """ import sys import git_filter_repo as fr def drop_file_by_contents(blob, metadata): bad_file_contents = b'The launch code is 1-2-3-4.' if blob.data == bad_file_contents: blob.skip() def drop_files_by_name(commit, metadata): new_file_changes = [] for change in commit.file_changes: if not change.filename.endswith(b'.doc'): new_file_changes.append(change) commit.file_changes = new_file_changes sys.argv.append('--force') args = fr.FilteringOptions.parse_args(sys.argv[1:]) filter = fr.RepoFilter(args, blob_callback=drop_file_by_contents, commit_callback=drop_files_by_name) filter.run()
C from :3 M 100644 :1 salutation '''[1:]).encode()) counts = collections.Counter() def look_for_reset(obj, metadata): print("Processing {}".format(obj)) counts[type(obj)] += 1 if type(obj) == fr.Reset: assert obj.ref == b'refs/heads/B' # Use all kinds of internals that external scripts should NOT use and which # are likely to break in the future, just to verify a few invariants... args = fr.FilteringOptions.parse_args( ['--stdin', '--dry-run', '--path', 'salutation']) filter = fr.RepoFilter(args, blob_callback=look_for_reset, reset_callback=look_for_reset, commit_callback=look_for_reset, tag_callback=look_for_reset) filter._input = stream filter._setup_output() filter._sanity_checks_handled = True filter.run() assert counts == collections.Counter({fr.Blob: 1, fr.Commit: 3, fr.Reset: 1})
#!/usr/bin/env python3 """ This is a simple program that behaves identically to git-filter-repo. Its entire purpose is just to show what to import and run to get the normal git-filter-repo behavior, to serve as a starting point for you to figure out what you want to modify. """ """ Please see the ***** API BACKWARD COMPATIBILITY CAVEAT ***** near the top of git-filter-repo. """ import sys try: import git_filter_repo as fr except ImportError: raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?") args = fr.FilteringOptions.parse_args(sys.argv[1:]) if args.analyze: fr.RepoAnalyze.run(args) else: filter = fr.RepoFilter(args) filter.run()
#!/usr/bin/env python # https://stackoverflow.com/questions/64160917/how-to-use-git-filter-repo-as-a-library-with-the-python-module-interface/64160918#64160918 import git_filter_repo def blob_callback(blob, callback_metadata): blob.data = blob.data.replace(b'd1', b'asdf') # Args deduced from: # print(git_filter_repo.FilteringOptions.parse_args(['--refs', 'HEAD', '--force'], error_on_empty=False)) args = git_filter_repo.FilteringOptions.default_options() args.force = True args.partial = True args.refs = ['HEAD~..HEAD'] args.repack=False args.replace_refs='update-no-add' git_filter_repo.RepoFilter( args, blob_callback=blob_callback ).run()
if not any(x == b"0" for x in blob.data[0:8192]): filename = '.git/info/tmpfile' with open(filename, "wb") as f: f.write(blob.data) subprocess.check_call(lint_args.command + [filename]) with open(filename, "rb") as f: blob.data = f.read() os.remove(filename) if lint_args.filenames_important and not lint_args.relevant: lint_args.relevant = 'return True' if lint_args.relevant: body = lint_args.relevant exec('def is_relevant(filename):\n '+'\n '.join(body.splitlines()), globals()) lint_args.filenames_important = True args = fr.FilteringOptions.default_options() args.force = True if lint_args.filenames_important: tmpdir = tempfile.mkdtemp().encode() cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'], stdin = subprocess.PIPE, stdout = subprocess.PIPE) filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames) filter.run() cat_file_process.stdin.close() cat_file_process.wait() else: filter = fr.RepoFilter(args, blob_callback=lint_non_binary_blobs) filter.run()
args = parser.parse_args() if not args.file: raise SystemExit("Error: Need to specify the --file option") fhash = subprocess.check_output(['git', 'hash-object', '-w', args.file]).strip() fmode = b'100755' if os.access(args.file, os.X_OK) else b'100644' # FIXME: I've assumed the file wasn't a directory or symlink... def fixup_commits(commit, metadata): if len(commit.parents) == 0: commit.file_changes.append(fr.FileChange(b'M', args.file, fhash, fmode)) # FIXME: What if the history already had a file matching the given name, # but which didn't exist until later in history? Is the intent for the # user to keep the other version that existed when it existed, or to # overwrite the version for all of history with the specified file? I # don't know, but if it's the latter, we'd need to add an 'else' clause # like the following: #else: # commit.file_changes = [x for x in commit.file_changes # if x.filename != args.file] fr_args = fr.FilteringOptions.parse_args([ '--preserve-commit-encoding', '--force', '--replace-refs', 'update-no-add' ]) filter = fr.RepoFilter(fr_args, commit_callback=fixup_commits) filter.run()
#!/usr/bin/env python3 """ Please see the ***** API BACKWARD COMPATIBILITY CAVEAT ***** near the top of git-filter-repo. """ import git_filter_repo as fr from git_filter_repo import Blob, Reset, FileChange, Commit, Tag, FixedTimeZone from git_filter_repo import Progress, Checkpoint from datetime import datetime, timedelta args = fr.FilteringOptions.default_options() out = fr.RepoFilter(args) out.importer_only() world = Blob(b"Hello") out.insert(world) bar = Blob(b"foo\n") out.insert(bar) master = Reset(b"refs/heads/master") out.insert(master) changes = [ FileChange(b'M', b'world', world.id, mode=b"100644"), FileChange(b'M', b'bar', bar.id, mode=b"100644") ] when = datetime(year=2005,
#!/usr/bin/env python3 """ Please see the ***** API BACKWARD COMPATIBILITY CAVEAT ***** near the top of git-filter-repo. """ import re import git_filter_repo as fr def strip_cvs_keywords(blob, metadata): # FIXME: Should first check if blob is a text file to avoid ruining # binaries. Could use python.magic here, or just output blob.data to # the unix 'file' command pattern = br'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$' replacement = br'$\1$' blob.data = re.sub(pattern, replacement, blob.data) args = fr.FilteringOptions.parse_args(['--force']) filter = fr.RepoFilter(args, blob_callback = strip_cvs_keywords) filter.run()
""" import re import datetime import git_filter_repo as fr def change_up_them_commits(commit, metadata): # Change the commit author if commit.author_name == b"Copy N. Paste": commit.author_name = b"Ima L. Oser" commit.author_email = b"*****@*****.**" # Fix the author email commit.author_email = re.sub(b"@my.crp", b"@my.corp", commit.author_email) # Fix the committer date (bad timezone conversion in initial import) oldtime = fr.string_to_date(commit.committer_date) newtime = oldtime + datetime.timedelta(hours=-5) commit.committer_date = fr.date_to_string(newtime) # Fix the commit message commit.message = re.sub(b"Marketing is staffed with pansies", b"", commit.message) args = fr.FilteringOptions.parse_args(['--force']) filter = fr.RepoFilter(args, commit_callback=change_up_them_commits) filter.run()
continue else: self.check_ignore_process.stdin.write(name + b'\n') self.check_ignore_process.stdin.flush() result = self.check_ignore_process.stdout.readline().rstrip( b'\n') (rest, pathname) = result.split(b"\t") if name != pathname: raise SystemExit("Error: Passed {} but got {}".format( name, pathname)) if rest == b'::': self.okay.add(name) else: self.ignored.add(name) ignored.add(name) return ignored def skip_ignores(self, commit, metadata): filenames = [x.filename for x in commit.file_changes] bad = self.get_ignored(filenames) commit.file_changes = [ x for x in commit.file_changes if x.filename not in bad ] checker = CheckIgnores() args = fr.FilteringOptions.default_options() filter = fr.RepoFilter(args, commit_callback=checker.skip_ignores) filter.run()
#!/usr/bin/env python3 """ Please see the ***** API BACKWARD COMPATIBILITY CAVEAT ***** near the top of git-filter-repo """ import git_filter_repo as fr def handle_tag(tag): print("Tagger: " + ''.join(tag.tagger_name)) args = fr.FilteringOptions.parse_args(['--force', '--tag-callback', 'pass']) filter = fr.RepoFilter(args, tag_callback=handle_tag) filter.run()
if len(sys.argv) != 3: raise SystemExit("Syntax:\n %s SOURCE_REPO TARGET_REPO") source_repo = sys.argv[1].encode() target_repo = sys.argv[2].encode() total_objects = fr.GitUtils.get_total_objects(source_repo) # blobs+trees total_commits = fr.GitUtils.get_commit_count(source_repo) object_count = 0 commit_count = 0 def print_progress(): global object_count, commit_count, total_objects, total_commits print("\rRewriting commits... %d/%d (%d objects)" % (commit_count, total_commits, object_count), end='') def my_blob_callback(blob, metadata): global object_count object_count += 1 print_progress() def my_commit_callback(commit, metadata): global commit_count commit_count += 1 print_progress() args = fr.FilteringOptions.parse_args(['--force', '--quiet']) filter = fr.RepoFilter(args, blob_callback = my_blob_callback, commit_callback = my_commit_callback) filter.run()
# Handle CLI arguments. if args.start is None: error("the start argument must be given") if args.org is None and getattr(args, 'org-string') is None: error("either --org or org-string must be given") if args.org is not None and getattr(args, 'org-string') is not None: error("both --org and org-string given") if args.org is not None: org_bytes = update_copyright.org_alias_map[args.org] else: org_bytes = getattr(args, 'org-string').encode() # Call git_filter_repo. # Args deduced from: # print(git_filter_repo.FilteringOptions.parse_args(['--refs', 'HEAD', # '--force'], error_on_empty=False)) filter_repo_args = git_filter_repo.FilteringOptions.default_options() filter_repo_args.force = True filter_repo_args.partial = True filter_repo_args.refs = ['{}..HEAD'.format(args.start)] filter_repo_args.repack=False filter_repo_args.replace_refs='update-no-add' def blob_callback(blob, callback_metadata, org_bytes): blob.data = update_copyright.update_copyright(blob.data, datetime.datetime.now().year, org_bytes) git_filter_repo.RepoFilter( filter_repo_args, blob_callback=lambda x, y: blob_callback( x, y, org_bytes) ).run()