def collect_commits(self): self.exported_stores = [] for repo_num, input_repo in enumerate(self.input_repos, start=1): repo_name = basename(input_repo) remember_commits_filename = os.path.join(abspath(input_repo), ".git", "splice-commits.json") exported_store = self.open_export_file(repo_name, ".git-fast-export", "export", overwrite=True) self.exported_stores.append(exported_store) logging.info("Doing export on repository %s [%d]", repo_name, repo_num) stored_commits = [] export_mark_file = join(abspath(input_repo), ".git", "splice-export-marks") export_mark_args = ["--all", "--export-marks=%s" % export_mark_file] if exists(export_mark_file): export_mark_args.append("--import-marks=%s" % export_mark_file) export = fast_export_output(input_repo, export_mark_args) logging.info("_next_id is %d", _IDS._next_id) try: collect = FastExportFilter(reset_callback=lambda r: self.skip_reset(r), commit_callback=lambda c: stored_commits.append(self.jsonize_commit(c))) logging.info("offset for repo %d is %d", repo_num, collect._id_offset) kwargs = {} if repo_num == 2: kwargs["id_offset"] = 8 collect.run(export.stdout, exported_store.file, **kwargs) exported_store.close() with open(remember_commits_filename, 'wb') as remember_commits_file: json.dump(stored_commits, remember_commits_file, indent=4) except Exception, e: logging.error("Error in memorizing export for %s [%d]: %s", input_repo, repo_num, e) raise # TODO: handle the fact that marks are getting messed up by the two repos on an incremental import self.remember_commits(repo_num, stored_commits) logging.info("id_offset is %d", collect._id_offset) self.export_id_offsets[repo_num] = collect._id_offset
def main(args): branch_renames = dict(args.branch_renames) branch_pattern_renames = args.branch_pattern_renames branch_excludes = dict(args.branch_excludes) file_renames = dict([(src, target) for src, target in args.file_renames if not src.endswith("/")]) dir_renames = [(src, target) for src, target in args.file_renames if src.endswith("/")] file_excludes = set([target for target in args.file_excludes if not target.endswith("/")]) dir_excludes = [target for target in args.file_excludes if target.endswith("/")] branches_found = set() files_found = set() def my_commit_callback(commit): branch = commit.branch for src_pattern, target in branch_pattern_renames: branch = re.sub(src_pattern, target, branch) if branch in branch_renames: branch = branch_renames[branch] elif branch in branch_excludes: if branch not in branches_found: logging.info("Excluding branch %s", branch) branches_found.add(branch) commit.skip() return if branch != commit.branch: if commit.branch not in branches_found: logging.info("Renaming branch %s to %s", commit.branch, branch) branches_found.add(commit.branch) commit.branch = branch new_file_changes, alter_commit = [], False for change in commit.file_changes: exclude_file = False filename = change.filename for src_dir, target_dir in dir_renames: if filename.startswith(src_dir): filename = filename.replace(src_dir, target_dir, 1) for target_dir in dir_excludes: if filename.startswith(target_dir): exclude_file = True if filename in file_renames: filename = file_renames[filename] if filename in file_excludes: exclude_file = True if exclude_file: if filename not in files_found: logging.info("Excluding file %s", filename) files_found.add(filename) alter_commit = True else: new_file_changes.append(change) if filename != change.filename: if change.filename not in files_found: logging.info("Renaming file %s to %s", change.filename, filename) files_found.add(change.filename) change.filename = filename if alter_commit: commit.file_changes = new_file_changes filter = FastExportFilter(commit_callback = my_commit_callback) filter.run()
def run(self): self.target = fast_import_input(self.output_dir) input1 = fast_export_output(self.repo1) filter1 = FastExportFilter(reset_callback = lambda r: self.skip_reset(r), commit_callback = lambda c: self.hold_commit(c)) filter1.run(input1.stdout, self.target.stdin) input2 = fast_export_output(self.repo2) filter2 = FastExportFilter(commit_callback = lambda c: self.weave_commit(c)) filter2.run(input2.stdout, self.target.stdin) # Wait for git-fast-import to complete (only necessary since we passed # file objects to FastExportFilter.run; and even then the worst that # happens is git-fast-import completes after this python script does) self.target.stdin.close() self.target.wait()
def run(self): self.target = fast_import_input(self.output_dir) input1 = fast_export_output(self.repo1) filter1 = FastExportFilter( reset_callback=lambda r: self.skip_reset(r), commit_callback=lambda c: self.hold_commit(c)) filter1.run(input1.stdout, self.target.stdin) input2 = fast_export_output(self.repo2) filter2 = FastExportFilter( commit_callback=lambda c: self.weave_commit(c)) filter2.run(input2.stdout, self.target.stdin) # Wait for git-fast-import to complete (only necessary since we passed # file objects to FastExportFilter.run; and even then the worst that # happens is git-fast-import completes after this python script does) self.target.stdin.close() self.target.wait()
#!/usr/bin/env python import re from git_fast_filter import Blob, Reset, FileChanges, Commit, FastExportFilter from datetime import datetime, timedelta def change_up_them_commits(commit): # Change the commit author if commit.author_name == "Copy N. Paste": commit.author_name = "Ima L. Oser" commit.author_email = "*****@*****.**" # Fix the author email commit.author_email = re.sub("@my.crp", "@my.corp", commit.author_email) # Fix the committer date (bad timezone conversion in initial import) commit.committer_date += timedelta(hours=-5) # Fix the commit message commit.message = re.sub("Marketing is staffed with pansies", "", commit.message) filter = FastExportFilter(commit_callback = change_up_them_commits) filter.run()
def my_blob_callback(blob): global object_count object_count += 1 print_progress() def my_commit_callback(commit): global commit_count commit_count += 1 print_progress() new_file_changes = [] for change in commit.file_changes: if regexp.match(change.filename): change.filename = re.sub('^[^/]*/[^/]*/org', 'tests/org', change.filename) new_file_changes.append(change) #print commit.branch + ":" + change.filename commit.file_changes = new_file_changes filter = FastExportFilter(blob_callback=my_blob_callback, commit_callback=my_commit_callback) filter.run(source_repo, target_repo) end = datetime.datetime.now() print "End : " + str(end - start)
global object_count, commit_count, total_objects, total_commits print "\rRewriting commits... %d/%d (%d objects)" \ % (commit_count, total_commits, object_count), def my_blob_callback(blob): global object_count object_count += 1 print_progress() def my_commit_callback(commit): global commit_count commit_count += 1 print_progress() new_file_changes = [] for change in commit.file_changes: if regexp.match(change.filename): change.filename = re.sub('^[^/]*/[^/]*/org','tests/org',change.filename) new_file_changes.append(change) #print commit.branch + ":" + change.filename commit.file_changes = new_file_changes filter = FastExportFilter(blob_callback = my_blob_callback, commit_callback = my_commit_callback) filter.run(source_repo, target_repo) end = datetime.datetime.now() print "End : " + str(end-start)
#!/usr/bin/env python from git_fast_filter import Blob, Reset, FileChanges, Commit, FastExportFilter def my_commit_callback(commit): if commit.branch == "refs/heads/master": commit.branch = "refs/heads/slave" filter = FastExportFilter(commit_callback=my_commit_callback) filter.run()
def run(self): ############################################################################# # Set members based on data from previous runs self._setup_files_and_excludes() # Setup the source and target processes. The source process will produce # fast-export output for the source repo, this output will be passed # through FastExportFilter which will manipulate the output using our # callbacks, finally, the manipulated output will be given to the # fast-import process and used to create the target repo. # (This should update sourcemarks and targetmarks) source = \ fast_export_output(self._source_repo, ["--export-marks=%s" % self._sourcemarks, "--import-marks=%s" % self._sourcemarks] + self._fast_export_args) target = \ fast_import_input( self._target_repo, ["--export-marks=%s" % self._targetmarks, "--import-marks=%s" % self._targetmarks]) filt = FastExportFilter(blob_callback = lambda b: self.blob_callback(b), commit_callback = lambda c: self.commit_callback(c)) filt.run(source.stdout, target.stdin) # Show progress if self._show_progress: sys.stdout.write("\nWaiting for git fast-import to complete...") sys.stdout.flush() target.stdin.close() target.wait() # need to wait for fast-import process to finish if self._show_progress: sys.stdout.write("done.\n") # Record the sourcemarks and targetmarks -- 2 steps # Step 1: Make sure the source and target marks have the same mark numbers. # Not doing this would allow one end of the grafting to reuse a number # that would then be misconnected on the other side. sourcemaps = self._get_maps(self._sourcemarks) targetmaps = self._get_maps(self._targetmarks) for key in sourcemaps.keys(): if key not in targetmaps: del sourcemaps[key] for key in targetmaps.keys(): if key not in sourcemaps: del targetmaps[key] # Step 2: Record the data for set_obj in [(sourcemaps, self._sourcemarks), (targetmaps, self._targetmarks)]: # get raw filename for source/target mapname = self._get_map_name(set_obj[1]) # create refs/collab if it's not there if not os.path.isdir(os.path.dirname(mapname)): os.mkdir(os.path.dirname(mapname)) # compute string content of commit-map content = ''.join([":%d %s\n" % (k,v) for k,v in set_obj[0].iteritems()]) # record content in the object database record_content(self._collab_git_dir, mapname, content) # Check if we are running from the target if self._target_repo == '.': # Record the excludes and includes so they can be reused next time for set_obj in [(self._excludes, 'excludes'), (self._includes, 'includes')]: filename = os.path.join(self._collab_git_dir, 'refs', 'collab', set_obj[1]) record_content(self._collab_git_dir, filename, '\n'.join(set_obj[0]) + '\n') # Record source_repo as the original repository filename = os.path.join(self._collab_git_dir, 'refs', 'collab', 'orig_repo') record_content(self._collab_git_dir, filename, self._source_repo+'\n')
def run(self): ############################################################################# # Set members based on data from previous runs self._setup_files_and_excludes() # Setup the source and target processes. The source process will produce # fast-export output for the source repo, this output will be passed # through FastExportFilter which will manipulate the output using our # callbacks, finally, the manipulated output will be given to the # fast-import process and used to create the target repo. # (This should update sourcemarks and targetmarks) source = \ fast_export_output(self._source_repo, ["--export-marks=%s" % self._sourcemarks, "--import-marks=%s" % self._sourcemarks] + self._fast_export_args) target = \ fast_import_input( self._target_repo, ["--export-marks=%s" % self._targetmarks, "--import-marks=%s" % self._targetmarks]) filt = FastExportFilter( blob_callback=lambda b: self.blob_callback(b), commit_callback=lambda c: self.commit_callback(c)) filt.run(source.stdout, target.stdin) # Show progress if self._show_progress: sys.stdout.write("\nWaiting for git fast-import to complete...") sys.stdout.flush() target.stdin.close() target.wait() # need to wait for fast-import process to finish if self._show_progress: sys.stdout.write("done.\n") # Record the sourcemarks and targetmarks -- 2 steps # Step 1: Make sure the source and target marks have the same mark numbers. # Not doing this would allow one end of the grafting to reuse a number # that would then be misconnected on the other side. sourcemaps = self._get_maps(self._sourcemarks) targetmaps = self._get_maps(self._targetmarks) for key in sourcemaps.keys(): if key not in targetmaps: del sourcemaps[key] for key in targetmaps.keys(): if key not in sourcemaps: del targetmaps[key] # Step 2: Record the data for set_obj in [(sourcemaps, self._sourcemarks), (targetmaps, self._targetmarks)]: # get raw filename for source/target mapname = self._get_map_name(set_obj[1]) # create refs/collab if it's not there if not os.path.isdir(os.path.dirname(mapname)): os.mkdir(os.path.dirname(mapname)) # compute string content of commit-map content = ''.join( [":%d %s\n" % (k, v) for k, v in set_obj[0].iteritems()]) # record content in the object database record_content(self._collab_git_dir, mapname, content) # Check if we are running from the target if self._target_repo == '.': # Record the excludes and includes so they can be reused next time for set_obj in [(self._excludes, 'excludes'), (self._includes, 'includes')]: filename = os.path.join(self._collab_git_dir, 'refs', 'collab', set_obj[1]) record_content(self._collab_git_dir, filename, '\n'.join(set_obj[0]) + '\n') # Record source_repo as the original repository filename = os.path.join(self._collab_git_dir, 'refs', 'collab', 'orig_repo') record_content(self._collab_git_dir, filename, self._source_repo + '\n')