class G2P: """class to handle batching of p4 commands when copying git to p4""" def __init__(self, ctx): self.ctx = ctx self.addeditdelete = {} self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([ OVERALL, (FAST_EXPORT, OVERALL), (TEST_BLOCK_PUSH, OVERALL), (CHECK_CONFLICT, OVERALL), (COPY, OVERALL), (GIT_CHECKOUT, COPY), (CHECK_PROTECTS, COPY), (COPY_BLOBS_1, COPY), (COPY_BLOBS_2, COPY), (MIRROR, OVERALL), ]) self.perf.add_counters([N_BLOBS, N_RENAMES]) self.usermap = p4gf_usermap.UserMap(ctx.p4gf) self.progress = ProgressReporter() def __str__(self): return "\n".join([str(self.perf), str(self.ctx.mirror)]) def revert_and_raise(self, errmsg): """An error occurred while attempting to submit the incoming change to Perforce. As a result, revert all modifications, log the error, and raise an exception.""" # roll back and raise the problem to the caller p4 = connect_p4(user=p4gf_const.P4GF_USER, client=self.ctx.p4.client) if p4: opened = p4.run('opened') if opened: p4.run('revert', '//{}/...'.format(self.ctx.p4.client)) # revert doesn't clean up added files self.remove_added_files() if not errmsg: errmsg = traceback.format_stack() msg = "import failed: {}".format(errmsg) LOG.error(msg) raise RuntimeError(msg) def _p4_message_to_text(self, msg): ''' Convert a list of P4 messages to a single string. Annotate some errors with additional context such as P4USER. ''' txt = str(msg) if msg.msgid in MSGID_EXPLAIN_P4USER: txt += ' P4USER={}.'.format(self.ctx.p4.user) if msg.msgid in MSGID_EXPLAIN_P4CLIENT: txt += ' P4USER={}.'.format(self.ctx.p4.client) return txt def check_p4_messages(self): """If the results indicate a file is locked by another user, raise an exception so that the overall commit will fail. The changes made so far will be reverted. """ msgs = p4gf_p4msg.find_all_msgid(self.ctx.p4, MSGID_CANNOT_OPEN) if not msgs: return lines = [self._p4_message_to_text(m) for m in msgs] self.revert_and_raise('\n'.join(lines)) def _p4run(self, cmd): ''' Run one P4 command, logging cmd and results. ''' p4 = self.ctx.p4 LOG.getChild('p4.cmd').debug(" ".join(cmd)) results = p4.run(cmd) if p4.errors: LOG.getChild('p4.err').error("\n".join(p4.errors)) if p4.warnings: LOG.getChild('p4.warn').warning("\n".join(p4.warnings)) LOG.getChild('p4.out').debug("{}".format(results)) if LOG.getChild('p4.msgid').isEnabledFor(logging.DEBUG): log = LOG.getChild('p4.msgid') for m in p4.messages: log.debug(p4gf_p4msg.msg_repr(m)) self.check_p4_messages() def run_p4_commands(self): """run all pending p4 commands""" for operation, paths in self.addeditdelete.items(): cmd = operation.split(' ') # avoid writable client files problem by using -k and handling # the actual file action ourselves (in add/edit cases the caller # has already written the new file) if not cmd[0] == 'add': cmd.append('-k') if cmd[0] == 'move': # move takes a tuple of two arguments, the old name and new name oldnames = [escape_path(pair[0]) for pair in paths] # move requires opening the file for edit first self._p4run(['edit', '-k'] + oldnames) LOG.debug("Edit {}".format(oldnames)) for pair in paths: (frompath, topath) = pair self._p4run([ 'move', '-k', escape_path(frompath), escape_path(topath) ]) LOG.debug("Move from {} to {}".format(frompath, topath)) else: reopen = [] if 'edit -t' in operation: # edit -t text does not work, must 'edit' then 'reopen -t' # "can't change from xtext - use 'reopen'" reopen = ['reopen', '-t', cmd[2]] cmd = cmd[0:1] + cmd[3:] if not cmd[0] == 'add': self._p4run(cmd + [escape_path(path) for path in paths]) else: self._p4run(cmd + paths) if reopen: self._p4run(reopen + [escape_path(path) for path in paths]) if cmd[0] == 'delete': LOG.debug("Delete {}".format(paths)) for path in paths: os.remove(path) def remove_added_files(self): """remove added files to restore p4 client after failure of p4 command""" for operation, paths in self.addeditdelete.items(): cmd = operation.split(' ') if cmd[0] == 'add': for path in paths: os.unlink(path) def setup_p4_command(self, command, p4path): """Add command to list to be run by run_p4_commands. If the command is 'move' then the p4path is expected to be a tuple of the frompath and topath.""" if command in self.addeditdelete: self.addeditdelete[command].append(p4path) else: self.addeditdelete[command] = [p4path] def _toggle_filetype(self, p4path, isx): """Returns the new file type for the named file, switching the executable state based on the isx value. Args: p4path: Path of the file to modify. isx: True if currently executable. Returns: New type for the file; may be None. """ p4type = None if isx: p4type = '+x' else: # To remove a previously assigned modifier, the whole filetype # must be specified. for tipe in ['headType', 'type']: # For a file that was executable, is being renamed (with # edits), and is no longer executable, we need to handle the # fact that it's not yet in Perforce and so does not have a # headType. try: p4type = p4gf_util.first_value_for_key( self.ctx.p4.run(['fstat', '-T' + tipe, p4path]), tipe) except P4.P4Exception: pass if p4type: p4type = p4gf_p4filetype.remove_mod(p4type, 'x') return p4type def add_or_edit_blob(self, blob): """run p4 add or edit for a new or modified file""" # get local path in p4 client p4path = self.ctx.contentlocalroot + blob['path'] # edit or add? isedit = os.path.exists(p4path) # make sure dest dir exists dstdir = os.path.dirname(p4path) if not os.path.exists(dstdir): os.makedirs(dstdir) if isedit: LOG.debug("Copy edit from: " + blob['path'] + " to " + p4path) # for edits, only use +x or -x to propagate partial filetype changes wasx = os.stat(p4path).st_mode & stat.S_IXUSR isx = os.stat(blob['path']).st_mode & stat.S_IXUSR if wasx != isx: p4type = self._toggle_filetype(p4path, isx) else: p4type = None if p4type: LOG.debug( " set filetype: {ft} oldx={oldx} newx={newx}".format( ft=p4type, oldx=wasx, newx=isx)) shutil.copystat(blob['path'], p4path) shutil.copyfile(blob['path'], p4path) else: LOG.debug("Copy add from: " + blob['path'] + " to " + p4path) # for adds, use complete filetype of new file p4type = p4type_from_mode(blob['mode']) shutil.copyfile(blob['path'], p4path) # if file exists it's an edit, so do p4 edit before copying content # for an add, do p4 add after copying content p4type = ' -t ' + p4type if p4type else '' if isedit: self.setup_p4_command("edit" + p4type, p4path) else: self.setup_p4_command("add -f" + p4type, p4path) def rename_blob(self, blob): """ run p4 move for a renamed/moved file""" self.perf.counter[N_RENAMES] += 1 # get local path in p4 client p4frompath = self.ctx.contentlocalroot + blob['path'] p4topath = self.ctx.contentlocalroot + blob['topath'] # ensure destination directory exists dstdir = os.path.dirname(p4topath) if not os.path.exists(dstdir): os.makedirs(dstdir) # copy out of Git repo to Perforce workspace shutil.copyfile(blob['topath'], p4topath) self.setup_p4_command("move", (p4frompath, p4topath)) def copy_blob(self, blob): """run p4 integ for a copied file""" self.perf.counter[N_BLOBS] += 1 # get local path in p4 client p4frompath = self.ctx.contentlocalroot + blob['path'] p4topath = self.ctx.contentlocalroot + blob['topath'] self._p4run( ["copy", "-v", escape_path(p4frompath), escape_path(p4topath)]) # make sure dest dir exists dstdir = os.path.dirname(p4topath) if not os.path.exists(dstdir): os.makedirs(dstdir) LOG.debug("Copy/integ from: " + p4frompath + " to " + p4topath) shutil.copyfile(p4frompath, p4topath) def delete_blob(self, blob): """run p4 delete for a deleted file""" # get local path in p4 client p4path = self.ctx.contentlocalroot + blob['path'] self.setup_p4_command("delete", p4path) def copy_blobs(self, blobs): """copy git blobs to perforce revs""" # first, one pass to do rename/copy # these don't batch. move can't batch due to p4 limitations. # however, the edit required before move is batched. # copy could be batched by creating a temporary branchspec # but for now it's done file by file with self.perf.timer[COPY_BLOBS_1]: for blob in blobs: if blob['action'] == 'R': self.rename_blob(blob) elif blob['action'] == 'C': self.copy_blob(blob) self.run_p4_commands() # then, another pass to do add/edit/delete # these are batched to allow running the minimum number of # p4 commands. That means no more than one delete, one add per # filetype and one edit per filetype. Since we only support three # possible filetypes (text, text+x, symlink) there could be at most # 1 + 3 + 3 commands run. with self.perf.timer[COPY_BLOBS_2]: self.addeditdelete = {} for blob in blobs: if blob['action'] == 'M': self.add_or_edit_blob(blob) elif blob['action'] == 'D': self.delete_blob(blob) self.run_p4_commands() def check_protects(self, p4user, blobs): """check if author is authorized to submit files""" pc = ProtectsChecker(self.ctx, self.ctx.authenticated_p4user, p4user) pc.filter_paths(blobs) if pc.has_error(): self.revert_and_raise(pc.error_message()) def _reset_for_new_commit(self): """ Clear out state from previous commit that must not carry over into next commit. """ self.addeditdelete = {} def attempt_resync(self): """Attempts to sync -k the Git Fusion client to the change that corresponds to the HEAD of the Git mirror repository. This prevents the obscure "file(s) not on client" error. """ # we assume we are in the GIT_WORK_TREE, which seems to be a safe # assumption at this point try: last_commit = p4gf_util.git_ref_master() if last_commit: last_changelist_number = self.ctx.mirror.get_change_for_commit( last_commit, self.ctx) if last_changelist_number: filerev = "//...@{}".format(last_changelist_number) self._p4run(['sync', '-k', filerev]) except P4.P4Exception: # don't stop the world if we have an error above LOG.warn("resync failed with exception", exc_info=True) def copy_commit(self, commit): """copy a single commit""" self._reset_for_new_commit() #OG.debug("dump commit {}".format(commit)) LOG.debug("for commit {}".format(commit['mark'])) LOG.debug("with description: {}".format(commit['data'])) LOG.debug("files affected: {}".format(commit['files'])) # Reject merge commits. Not supported in 2012.1. if 'merge' in commit: self.revert_and_raise(("Merge commit {} not permitted." + " Rebase to create a linear" + " history.").format(commit['sha1'])) # strip any enclosing angle brackets from the email address email = commit['author']['email'].strip('<>') user = self.usermap.lookup_by_email(email) LOG.debug("for email {} found user {}".format(email, user)) if (user is None) or (not self.usermap.p4user_exists(user[0])): # User is not a known and existing Perforce user, and the # unknown_git account is not set up, so reject the commit. self.revert_and_raise( "User '{}' not permitted to commit".format(email)) author_p4user = user[0] for blob in commit['files']: err = check_valid_filename(blob['path']) if err: self.revert_and_raise(err) with self.perf.timer[GIT_CHECKOUT]: d = p4gf_util.popen_no_throw(['git', 'checkout', commit['sha1']]) if d['Popen'].returncode: # Sometimes git cannot distinquish the revision from a path... p4gf_util.popen( ['git', 'reset', '--hard', commit['sha1'], '--']) with self.perf.timer[CHECK_PROTECTS]: self.check_protects(author_p4user, commit['files']) try: self.copy_blobs(commit['files']) except P4.P4Exception as e: self.revert_and_raise(str(e)) with self.perf.timer[COPY_BLOBS_2]: pusher_p4user = self.ctx.authenticated_p4user LOG.debug("Pusher is: {}, author is: {}".format( pusher_p4user, author_p4user)) desc = change_description(commit, pusher_p4user, author_p4user) try: opened = self.ctx.p4.run('opened') if opened: changenum = p4_submit(self.ctx.p4, desc, author_p4user, commit['author']['date']) LOG.info("Submitted change @{} for commit {}".format( changenum, commit['sha1'])) else: LOG.info("Ignored empty commit {}".format(commit['sha1'])) return None except P4.P4Exception as e: self.revert_and_raise(str(e)) return ":" + str(changenum) + " " + commit['sha1'] def test_block_push(self): """Test hook to temporarily block and let test script introduce conflicting changes. """ s = p4gf_util.test_vars().get(p4gf_const.P4GF_TEST_BLOCK_PUSH) if not s: return log = logging.getLogger("test_block_push") block_dict = p4gf_util.test_var_to_dict(s) log.debug(block_dict) # Fetch ALL the submitted changelists as of right now. log.debug("p4 changes {}".format( p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client))) cl_ay = self.ctx.p4.run( 'changes', '-l', p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client)) # Don't block until after something? after = block_dict['after'] if after: if not contains_desc(after, cl_ay): log.debug("Do not block until after: {}".format(after)) return until = block_dict['until'] log.debug("BLOCKING. Seen 'after': {}".format(after)) log.debug("BLOCKING. Waiting for 'until': {}".format(until)) changes_path_at = ("{path}@{change},now".format( path=p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client), change=cl_ay[-1]['change'])) while not contains_desc(until, cl_ay): time.sleep(1) cl_ay = self.ctx.p4.run('changes', changes_path_at) log.debug("Block released") def copy(self, start_at, end_at): """copy a set of commits from git into perforce""" with self.perf.timer[OVERALL]: with p4gf_util.HeadRestorer(): LOG.debug("begin copying from {} to {}".format( start_at, end_at)) self.attempt_resync() with self.perf.timer[CHECK_CONFLICT]: conflict_checker = G2PConflictChecker(self.ctx) with self.perf.timer[FAST_EXPORT]: fe = p4gf_fastexport.FastExport(start_at, end_at, self.ctx.tempdir.name) fe.run() marks = [] commit_count = 0 for x in fe.commands: if x['command'] == 'commit': commit_count += 1 self.progress.progress_init_determinate(commit_count) try: for command in fe.commands: with self.perf.timer[TEST_BLOCK_PUSH]: self.test_block_push() if command['command'] == 'commit': self.progress.progress_increment( "Copying changelists...") self.ctx.heartbeat() with self.perf.timer[COPY]: mark = self.copy_commit(command) if mark is None: continue with self.perf.timer[CHECK_CONFLICT]: (git_commit_sha1, p4_changelist_number ) = mark_to_commit_changelist(mark) conflict_checker.record_commit( git_commit_sha1, p4_changelist_number) if conflict_checker.check(): LOG.error("P4 conflict found") break marks.append(mark) elif command['command'] == 'reset': pass else: raise RuntimeError( "Unexpected fast-export command: " + command['command']) finally: # we want to write mirror objects for any commits that made it through # any exception will still be alive after this with self.perf.timer[MIRROR]: self.ctx.mirror.add_commits(marks) self.ctx.mirror.add_objects_to_p4(self.ctx) if conflict_checker.has_conflict(): raise RuntimeError( "Conflicting change from Perforce caused one" + " or more git commits to fail. Time to" + " pull, rebase, and try again.") LOG.getChild("time").debug("\n" + str(self))
class P2G: """class to manage copying from Perforce to git""" def __init__(self, ctx): self.ctx = ctx self.fastimport = FastImport(self.ctx) self.fastimport.set_timezone(self.ctx.timezone) self.fastimport.set_project_root_path(self.ctx.contentlocalroot) self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([ OVERALL, (SETUP, OVERALL), (PRINT, OVERALL), (FSTAT, OVERALL), (SYNC, OVERALL), (FAST_IMPORT, OVERALL), (MIRROR, OVERALL), (MERGE, OVERALL), (PACK, OVERALL) ]) self.rev_range = None # RevRange instance set in copy(). self.graft_change = None # self.changes = None # dict['changelist'] ==> P4Changelist of what to copy() self.printed_revs = None # RevList produced by PrintHandler self.status_verbose = True self.progress = ProgressReporter() def __str__(self): return "\n".join([ "\n\nFast Import:\n", str(self.fastimport), "", str(self.perf), "" ]) def _setup(self, start_at, stop_at): """Set RevRange rev_range, figure out which changelists to copy.""" self.rev_range = RevRange.from_start_stop(self.ctx, start_at, stop_at) LOG.debug( "Revision range to copy to Git: {rr}".format(rr=self.rev_range)) # get list of changes to import into git self.changes = P4Changelist.create_changelist_list_as_dict( self.ctx.p4, self._path_range()) # If grafting, get that too. if self.rev_range.graft_change_num: # Ignore all depotFile elements, we just want the change/desc/time/user. self.graft_change = P4Changelist.create_using_describe( self.ctx.p4, self.rev_range.graft_change_num, "ignore_depot_files") self.graft_change.description += ( '\n[grafted history before {start_at}]'.format( start_at=start_at)) def _path_range(self): """Return the common path...@range string we use frequently. """ return self.ctx.client_view_path() + self.rev_range.as_range_string() def _copy_print(self): """p4 print all revs and git-hash-object them into the git repo.""" server_can_unexpand = self.ctx.p4.server_level > 32 printhandler = PrintHandler(need_unexpand=not server_can_unexpand, tempdir=self.ctx.tempdir.name) self.ctx.p4.handler = printhandler args = ["-a"] if server_can_unexpand: args.append("-k") self.ctx.p4.run("print", args, self._path_range()) printhandler.flush() printhandler.progress.progress_finish() # If also grafting, print all revs in existence at time of graft. if self.graft_change: args = [] if server_can_unexpand: args.append("-k") path = self._graft_path() LOG.debug("Printing for grafted history: {}".format(path)) self.ctx.p4.run("print", args, path) printhandler.flush() # If grafting, we just printed revs that refer to changelists # that have no P4Changelist counterpart in self.changes. Make # some skeletal versions now so that FstatHandler will have # someplace to hang its outputStat() P4File instances. for (_key, p4file) in printhandler.revs.revs: if not p4file.change in self.changes: cl = P4Changelist() cl.change = p4file.change self.changes[p4file.change] = cl self.ctx.p4.handler = None self.printed_revs = printhandler.revs def _fstat(self): """run fstat to find deleted revs and get client paths""" # TODO for 12.2 print will also report deleted revs so between # that and using MapApi to get client paths, we won't need this fstat self.ctx.p4.handler = FstatHandler(self.printed_revs, self.changes) fstat_cols = "-T" + ",".join(P4File.fstat_cols()) self.ctx.p4.run("fstat", "-Of", fstat_cols, self._path_range()) if self.graft_change: # Also run 'p4 fstat //<view>/...@change' for the graft # change to catch all files as of @change, not just # revs changed between begin and end of _path_range(). self.ctx.p4.run("fstat", fstat_cols, self._graft_path()) self.ctx.p4.handler = None self._collapse_to_graft_change() self._add_graft_to_changes() # don't need this any more self.printed_revs = None sorted_changes = [ str(y) for y in sorted([int(x) for x in self.changes.keys()]) ] LOG.debug("\n".join([str(self.changes[ch]) for ch in sorted_changes])) return sorted_changes def _sync(self, sorted_changes): """fake sync of last change to make life easier at push time""" self.ctx.p4.handler = SyncHandler() lastchange = self.changes[sorted_changes[-1]] self.ctx.p4.run( "sync", "-kf", self.ctx.client_view_path() + "@" + str(lastchange.change)) self.ctx.p4.handler = None def _fast_import(self, sorted_changes, last_commit): """build fast-import script from changes, then run fast-import""" self.progress.progress_init_determinate(len(sorted_changes)) for changenum in sorted_changes: change = self.changes[changenum] self.progress.progress_increment("Copying changelists...") self.ctx.heartbeat() # create commit and trees self.fastimport.add_commit(change, last_commit) last_commit = change.change # run git-fast-import and get list of marks marks = self.fastimport.run_fast_import() # done with these self.changes = None return marks def _mirror(self, marks): """build up list of p4 objects to mirror git repo in perforce then submit them """ self.ctx.mirror.add_commits(marks) self.ctx.mirror.add_objects_to_p4(self.ctx) LOG.getChild("time").debug("\n\nGit Mirror:\n" + str(self.ctx.mirror)) self.ctx.mirror = GitMirror(self.ctx.config.view_name) last_commit = marks[len(marks) - 1] LOG.debug("Last commit created: " + last_commit) # pylint: disable=R0201 # R0201 Method could be a function def _pack(self): """run 'git gc' to pack up the blobs aside from any possible performance benefit, this prevents warnings from git about "unreachable loose objects" """ p4gf_util.popen_no_throw(["git", "gc"]) def _collapse_to_graft_change(self): """Move all of the files from pre-graft changelists into the graft changelist. Remove all pre-graft changelists. NOP if not grafting. 'p4 print //client/...@100' does indeed print all the files that exist @100, but the tag dict that goes with each file includes the changelist in which that file was last added/edited, not 100. So this function gathers up all the file revs with change=1..99 and sticks them under change 100's file list. """ if (not self.graft_change): return graft_num_int = int(self.graft_change.change) LOG.debug("_collapse_to_graft_change() graft_num_int={}".format( graft_num_int)) # Delete all P4Changelist elements from self.changes where they # refer to a change that will be collapsed into the graft change, # including the graft change itself. del_keys = [] for p4changelist in self.changes.values(): if graft_num_int < int(p4changelist.change): LOG.debug("_collapse_to_graft_change() skipping {}".format( p4changelist.change)) continue LOG.debug("_collapse_to_graft_change() deleting {}".format( p4changelist.change)) del_keys.append(p4changelist.change) for key in del_keys: del self.changes[key] # Associate with the graft change all printed P4File results from # graft-change or older for (_key, p4file) in self.printed_revs.revs: if graft_num_int < int(p4file.change): LOG.debug("_collapse_to_graft_change() skipping post-graft {}". format(p4file)) continue old = self.graft_change.file_from_depot_path(p4file.depot_path) # If print picked up multiple revs, keep the newest. if (not old) or (int(old.change) < int(p4file.change)): p4file.change = self.graft_change.change self.graft_change.files.append(p4file) LOG.debug( "_collapse_to_graft_change() keeping {}".format(p4file)) else: LOG.debug( "_collapse_to_graft_change() skipping, had newer {}". format(p4file)) def _add_graft_to_changes(self): """Add the graft changelist to our list of changes: It will be copied over like any other change. NOP if not grafting. """ if (not self.graft_change): return self.changes[self.graft_change.change] = self.graft_change def _graft_path(self): """If grafting, return '//<client>/...@N' where N is the graft changelist number. If not grafting, return None. """ if (not self.graft_change): return return "{path}@{change}".format(path=self.ctx.client_view_path(), change=self.graft_change.change) def copy(self, start_at, stop_at): """copy a set of changelists from perforce into git""" with self.perf.timer[OVERALL]: with self.perf.timer[SETUP]: self._setup(start_at, stop_at) if not len(self.changes): LOG.debug("No new changes found to copy") return last_commit = self.rev_range.last_commit with self.perf.timer[PRINT]: self._copy_print() with self.perf.timer[FSTAT]: sorted_changes = self._fstat() with self.perf.timer[SYNC]: self._sync(sorted_changes) with self.perf.timer[FAST_IMPORT]: marks = self._fast_import(sorted_changes, last_commit) sorted_changes = None with self.perf.timer[MIRROR]: self._mirror(marks) with self.perf.timer[MERGE]: # merge temporary branch into master, then delete it self.fastimport.merge() with self.perf.timer[PACK]: self._pack() LOG.getChild("time").debug("\n" + str(self))
class GitMirror: """handle git things that get mirrored in perforce""" def __init__(self, view_name): self.git_objects = GitObjectList() self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([ OVERALL, (BUILD, OVERALL), (CAT_FILE, BUILD), (LS_TREE, BUILD), (LS_TREE_PROCESS, BUILD), (DIFF_TREE, BUILD), (DIFF_TREE_PROCESS, BUILD), (ADD_SUBMIT, OVERALL), (EXTRACT_OBJECTS, ADD_SUBMIT), (P4_FSTAT, ADD_SUBMIT), (P4_ADD, ADD_SUBMIT), (P4_SUBMIT, ADD_SUBMIT), ]) self.perf.add_counters([(CAT_FILE_COUNT, "files"), (CAT_FILE_SIZE, "bytes")]) self.progress = ProgressReporter() self.view_name = view_name @staticmethod def get_change_for_commit(commit, ctx): """Given a commit sha1, find the corresponding perforce change. """ object_type = p4gf_object_type.sha1_to_object_type( sha1=commit, view_name=ctx.config.view_name, p4=ctx.p4gf, raise_on_error=False) if not object_type: return None return object_type.view_name_to_changelist(ctx.config.view_name) def add_commits(self, marks): """build list of commit and tree objects for a set of changelists marks: list of commit marks output by git-fast-import formatted as: :changenum sha1 """ with self.perf.timer[OVERALL]: with self.perf.timer[BUILD]: last_top_tree = None for mark in marks: #parse perforce change number and SHA1 from marks parts = mark.split(' ') change_num = parts[0][1:] sha1 = parts[1].strip() # add commit object self.git_objects.add_object( GitObject("commit", sha1, [(change_num, self.view_name)])) # add all trees referenced by the commit if last_top_tree: last_top_tree = self.__get_delta_trees( last_top_tree, sha1) else: last_top_tree = self.__get_snapshot_trees(sha1) def add_objects_with_views(self, ctx, add_files): """Add the list of files to the object cache in the depot and return the number of files not added. """ added_files = [] files_not_added = 0 treecount = 0 commitcount = 0 # Add new files to the object cache. bite_size = 1000 while len(add_files): bite = add_files[:bite_size] add_files = add_files[bite_size:] result = ctx.p4gf.run("add", "-t", "binary", bite) for m in [ m for m in ctx.p4gf.messages if (m.msgid != p4gf_p4msgid.MsgDm_OpenUpToDate or m.dict['action'] != 'add') ]: files_not_added += 1 LOG.debug(str(m)) for r in [r for r in result if isinstance(r, dict)]: if r["action"] != 'add': # file already exists in depot, perhaps? files_not_added += 1 LOG.debug(r) else: added_files.append(r["depotFile"]) if r["depotFile"].endswith("-tree"): treecount += 1 else: commitcount += 1 LOG.debug("Added {} commits and {} trees".format( commitcount, treecount)) # Set the 'views' attribute on the opened files. while len(added_files): bite = added_files[:bite_size] added_files = added_files[bite_size:] ctx.p4gf.run("attribute", "-p", "-n", "views", "-v", self.view_name, bite) return files_not_added def add_objects_to_p4(self, ctx): """actually run p4 add, submit to create mirror files in .git-fusion""" with self.perf.timer[OVERALL]: # Revert any opened files left over from a failed mirror operation. opened = ctx.p4gf.run('opened') if opened: ctx.p4gf.run('revert', '//{}/...'.format(ctx.config.p4client_gf)) with self.perf.timer[ADD_SUBMIT]: LOG.debug("adding {0} commits and {1} trees to .git-fusion...". format(self.git_objects.counts['commit'], self.git_objects.counts['tree'])) # build list of objects to add, extracting them from git self.progress.progress_init_determinate( len(self.git_objects.objects)) add_files = [ self.__add_object_to_p4(ctx, go) for go in self.git_objects.objects.values() ] # filter out any files that have already been added # only do this if the number of files is large enough to justify # the cost of the fstat existing_files = None with self.perf.timer[P4_FSTAT]: # Need to use fstat to get the 'views' attribute for existing # files, which we can't know until we use fstat to find out. bite_size = 1000 LOG.debug("using fstat to optimize add") original_count = len(add_files) ctx.p4gf.handler = FilterAddFstatHandler(self.view_name) # spoon-feed p4 to avoid blowing out memory while len(add_files): bite = add_files[:bite_size] add_files = add_files[bite_size:] # Try to get only the information we really need. ctx.p4gf.run("fstat", "-Oa", "-T", "depotFile, attr-views", bite) add_files = ctx.p4gf.handler.files existing_files = ctx.p4gf.handler.existing ctx.p4gf.handler = None LOG.debug("{} files removed from add list".format( original_count - len(add_files))) files_to_add = len(add_files) + len(existing_files) if files_to_add == 0: return with self.perf.timer[P4_ADD]: files_not_added = self.add_objects_with_views( ctx, add_files) edit_objects_with_views(ctx, existing_files) with self.perf.timer[P4_SUBMIT]: if files_not_added < files_to_add: desc = 'Git Fusion {view} copied to git'.format( view=ctx.config.view_name) self.progress.status( "Submitting new Git objects to Perforce...") ctx.p4gf.run("submit", "-d", desc) else: LOG.debug("ignoring empty change list...") def __str__(self): return "\n".join([str(self.git_objects), str(self.perf)]) def __repr__(self): return "\n".join([repr(self.git_objects), str(self.perf)]) # pylint: disable=R0201, W1401 # R0201 Method could be a function # I agree, this _could_ be a function, does not need self. But when I # blindly promote this to a module-level function, things break and I # cannot explain why. # W1401 Unescaped backslash # We want that null for the header, so we're keeping the backslash. def __add_object_to_p4(self, ctx, go): """add a commit or tree to the git-fusion perforce client workspace return the path of the client workspace file suitable for use with p4 add """ self.progress.progress_increment( "Adding new Git objects to Perforce...") ctx.heartbeat() # get client path for .git-fusion file dst = go.git_p4_client_path(ctx) # A tree is likely to already exist, in which case we don't need # or want to try to recreate it. We'll just use the existing one. if os.path.exists(dst): LOG.debug("reusing existing object: " + dst) return dst with self.perf.timer[EXTRACT_OBJECTS]: # make sure dir exists dstdir = os.path.dirname(dst) if not os.path.exists(dstdir): os.makedirs(dstdir) # get contents of commit or tree; can't just copy it because it's # probably in a packfile and we don't know which one. And there's # no way to have git give us the compressed commit directly, so we # need to recompress it p = Popen(['git', 'cat-file', go.type, go.sha1], stdout=PIPE) po = p.communicate()[0] header = go.type + " " + str(len(po)) + '\0' deflated = zlib.compress(header.encode() + po) # write it into our p4 client workspace for adding. LOG.debug("adding new object: " + dst) with open(dst, "wb") as f: f.write(deflated) return dst def __get_snapshot_trees(self, commit): """get all tree objects for a given commit commit: SHA1 of commit each tree is added to the list to be mirrored return the SHA1 of the commit's tree """ top_tree = self.__get_commit_tree(commit) with self.perf.timer[LS_TREE]: p = Popen(['git', 'ls-tree', '-rt', top_tree], stdout=PIPE) po = p.communicate()[0].decode() with self.perf.timer[LS_TREE_PROCESS]: # line is: mode SP type SP sha TAB path # we only want the sha from lines with type "tree" pattern = re.compile("^[0-7]{6} tree ([0-9a-fA-F]{40})\t.*") # yes, we're doing nothing with the result of this list comprehension # pylint: disable=W0106 [ self.git_objects.add_object(GitObject("tree", m.group(1))) for line in po.splitlines() for m in [pattern.match(line)] if m ] # pylint: enable=W0106 return top_tree def __get_delta_trees(self, top_tree1, commit2): """get all tree objects new in one commit vs another commit topTree1: SHA1 of first commit's tree commit2: SHA1 of second commit each tree is added to the list to be mirrored return the SHA1 of commit2's tree """ top_tree2 = self.__get_commit_tree(commit2) with self.perf.timer[DIFF_TREE]: p = Popen(['git', 'diff-tree', '-t', top_tree1, top_tree2], stdout=PIPE) po = p.communicate()[0].decode() with self.perf.timer[DIFF_TREE_PROCESS]: # line is: :mode1 SP mode2 SP sha1 SP sha2 SP action TAB path # we want sha2 from lines where mode2 indicates a dir pattern = re.compile( "^:[0-7]{6} ([0-7]{2})[0-7]{4} [0-9a-fA-F]{40} ([0-9a-fA-F]{40}) .*" ) # yes, we're doing nothing with the result of this list comprehension # pylint: disable=W0106 [ self.git_objects.add_object(GitObject("tree", m.group(2))) for line in po.splitlines() for m in [pattern.match(line)] if m and m.group(1) == "04" ] # pylint: enable=W0106 return top_tree2 def __get_commit_tree(self, commit): """get the one and only tree at the top of commit commit: SHA1 of the commit add the tree object to the list of objects to be mirrored and return its SHA1 """ with self.perf.timer[CAT_FILE]: self.perf.counter[CAT_FILE_COUNT] += 1 p = Popen(['git', 'cat-file', 'commit', commit], stdout=PIPE) po = p.communicate()[0].decode() self.perf.counter[CAT_FILE_SIZE] += len(po) for line in iter(po.splitlines()): if not line.startswith("tree"): continue # line is: tree sha parts = line.strip().split(' ') sha1 = parts[1] self.git_objects.add_object(GitObject("tree", sha1)) return sha1
class PrintHandler(OutputHandler): """OutputHandler for p4 print, hashes files into git repo""" def __init__(self, need_unexpand, tempdir): OutputHandler.__init__(self) self.rev = None self.revs = RevList() self.need_unexpand = need_unexpand self.tempfile = None self.tempdir = tempdir self.progress = ProgressReporter() self.progress.progress_init_indeterminate() def outputBinary(self, h): """assemble file content, then pass it to hasher via queue""" self.appendContent(h) return OutputHandler.HANDLED def outputText(self, h): """assemble file content, then pass it to hasher via queue """ b = bytes(h, 'UTF-8') self.appendContent(b) return OutputHandler.HANDLED def appendContent(self, h): """append a chunk of content to the temp file if server is 12.1 or older it may be sending expanded ktext files so we need to unexpand them It would be nice to incrementally compress and hash the file but that requires knowing the size up front, which p4 print does not currently supply. If/when it does, this can be reworked to be more efficient with large files. As it is, as long as the SpooledTemporaryFile doesn't rollover, it won't make much of a difference. So with that limitation, the incoming content is stuffed into a SpooledTemporaryFile. """ if not len(h): return if self.need_unexpand and self.rev.is_k_type(): h = unexpand(h) self.tempfile.write(h) def flush(self): """compress the last file, hash it and stick it in the repo Now that we've got the complete file contents, the header can be created and used along with the spooled content to create the sha1 and zlib compressed blob content. Finally that is written into the .git/objects dir. """ if not self.rev: return size = self.tempfile.tell() self.tempfile.seek(0) compressed = tempfile.NamedTemporaryFile(delete=False, dir=self.tempdir) compress = zlib.compressobj() # pylint doesn't understand dynamic definition of sha1 in hashlib # pylint: disable=E1101 sha1 = hashlib.sha1() # pylint:disable=W1401 # disable complaints about the null. We need that. # add header first header = ("blob " + str(size) + "\0").encode() compressed.write(compress.compress(header)) sha1.update(header) # then actual contents chunksize = 4096 while True: chunk = self.tempfile.read(chunksize) if chunk: compressed.write(compress.compress(chunk)) sha1.update(chunk) else: break # pylint: enable=E1101 compressed.write(compress.flush()) compressed.close() digest = sha1.hexdigest() self.rev.sha1 = digest blob_dir = ".git/objects/" + digest[:2] blob_file = digest[2:] blob_path = blob_dir + "/" + blob_file if not os.path.exists(blob_path): if not os.path.exists(blob_dir): os.makedirs(blob_dir) shutil.move(compressed.name, blob_path) self.rev = None def outputStat(self, h): """save path of current file""" self.flush() self.rev = P4File.create_from_print(h) self.revs.append(self.rev) self.progress.progress_increment('Copying files') LOG.debug("PrintHandler.outputStat() ch={} {}".format( h['change'], h["depotFile"] + "#" + h["rev"])) if self.tempfile: self.tempfile.seek(0) self.tempfile.truncate() else: self.tempfile = tempfile.TemporaryFile(buffering=10000000, dir=self.tempdir) return OutputHandler.HANDLED def outputInfo(self, _h): """outputInfo call not expected""" return OutputHandler.REPORT def outputMessage(self, _h): """outputMessage call not expected, indicates an error""" return OutputHandler.REPORT
class GitMirror: """handle git things that get mirrored in perforce""" def __init__(self, view_name): self.git_objects = GitObjectList() self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([OVERALL, (BUILD, OVERALL), (CAT_FILE, BUILD), (LS_TREE, BUILD), (LS_TREE_PROCESS, BUILD), (DIFF_TREE, BUILD), (DIFF_TREE_PROCESS, BUILD), (ADD_SUBMIT, OVERALL), (EXTRACT_OBJECTS, ADD_SUBMIT), (P4_FSTAT, ADD_SUBMIT), (P4_ADD, ADD_SUBMIT), (P4_SUBMIT, ADD_SUBMIT), ]) self.perf.add_counters([(CAT_FILE_COUNT, "files"), (CAT_FILE_SIZE, "bytes")]) self.progress = ProgressReporter() self.view_name = view_name @staticmethod def get_change_for_commit(commit, ctx): """Given a commit sha1, find the corresponding perforce change. """ object_type = p4gf_object_type.sha1_to_object_type( sha1 = commit , view_name = ctx.config.view_name , p4 = ctx.p4gf , raise_on_error = False) if not object_type: return None return object_type.view_name_to_changelist(ctx.config.view_name) def add_commits(self, marks): """build list of commit and tree objects for a set of changelists marks: list of commit marks output by git-fast-import formatted as: :changenum sha1 """ with self.perf.timer[OVERALL]: with self.perf.timer[BUILD]: last_top_tree = None for mark in marks: #parse perforce change number and SHA1 from marks parts = mark.split(' ') change_num = parts[0][1:] sha1 = parts[1].strip() # add commit object self.git_objects.add_object( GitObject( "commit" , sha1 , [(change_num, self.view_name)] )) # add all trees referenced by the commit if last_top_tree: last_top_tree = self.__get_delta_trees(last_top_tree, sha1) else: last_top_tree = self.__get_snapshot_trees(sha1) def add_objects_with_views(self, ctx, add_files): """Add the list of files to the object cache in the depot and return the number of files not added. """ added_files = [] files_not_added = 0 treecount = 0 commitcount = 0 # Add new files to the object cache. bite_size = 1000 while len(add_files): bite = add_files[:bite_size] add_files = add_files[bite_size:] result = ctx.p4gf.run("add", "-t", "binary", bite) for m in [m for m in ctx.p4gf.messages if (m.msgid != p4gf_p4msgid.MsgDm_OpenUpToDate or m.dict['action'] != 'add')]: files_not_added += 1 LOG.debug(str(m)) for r in [r for r in result if isinstance(r, dict)]: if r["action"] != 'add': # file already exists in depot, perhaps? files_not_added += 1 LOG.debug(r) else: added_files.append(r["depotFile"]) if r["depotFile"].endswith("-tree"): treecount += 1 else: commitcount += 1 LOG.debug("Added {} commits and {} trees".format(commitcount, treecount)) # Set the 'views' attribute on the opened files. while len(added_files): bite = added_files[:bite_size] added_files = added_files[bite_size:] ctx.p4gf.run("attribute", "-p", "-n", "views", "-v", self.view_name, bite) return files_not_added def add_objects_to_p4(self, ctx): """actually run p4 add, submit to create mirror files in .git-fusion""" with self.perf.timer[OVERALL]: # Revert any opened files left over from a failed mirror operation. opened = ctx.p4gf.run('opened') if opened: ctx.p4gf.run('revert', '//{}/...'.format(ctx.config.p4client_gf)) with self.perf.timer[ADD_SUBMIT]: LOG.debug("adding {0} commits and {1} trees to .git-fusion...". format(self.git_objects.counts['commit'], self.git_objects.counts['tree'])) # build list of objects to add, extracting them from git self.progress.progress_init_determinate(len(self.git_objects.objects)) add_files = [self.__add_object_to_p4(ctx, go) for go in self.git_objects.objects.values()] # filter out any files that have already been added # only do this if the number of files is large enough to justify # the cost of the fstat existing_files = None with self.perf.timer[P4_FSTAT]: # Need to use fstat to get the 'views' attribute for existing # files, which we can't know until we use fstat to find out. bite_size = 1000 LOG.debug("using fstat to optimize add") original_count = len(add_files) ctx.p4gf.handler = FilterAddFstatHandler(self.view_name) # spoon-feed p4 to avoid blowing out memory while len(add_files): bite = add_files[:bite_size] add_files = add_files[bite_size:] # Try to get only the information we really need. ctx.p4gf.run("fstat", "-Oa", "-T", "depotFile, attr-views", bite) add_files = ctx.p4gf.handler.files existing_files = ctx.p4gf.handler.existing ctx.p4gf.handler = None LOG.debug("{} files removed from add list" .format(original_count - len(add_files))) files_to_add = len(add_files) + len(existing_files) if files_to_add == 0: return with self.perf.timer[P4_ADD]: files_not_added = self.add_objects_with_views(ctx, add_files) edit_objects_with_views(ctx, existing_files) with self.perf.timer[P4_SUBMIT]: if files_not_added < files_to_add: desc = 'Git Fusion {view} copied to git'.format( view=ctx.config.view_name) self.progress.status("Submitting new Git objects to Perforce...") ctx.p4gf.run("submit", "-d", desc) else: LOG.debug("ignoring empty change list...") def __str__(self): return "\n".join([str(self.git_objects), str(self.perf) ]) def __repr__(self): return "\n".join([repr(self.git_objects), str(self.perf) ]) # pylint: disable=R0201, W1401 # R0201 Method could be a function # I agree, this _could_ be a function, does not need self. But when I # blindly promote this to a module-level function, things break and I # cannot explain why. # W1401 Unescaped backslash # We want that null for the header, so we're keeping the backslash. def __add_object_to_p4(self, ctx, go): """add a commit or tree to the git-fusion perforce client workspace return the path of the client workspace file suitable for use with p4 add """ self.progress.progress_increment("Adding new Git objects to Perforce...") ctx.heartbeat() # get client path for .git-fusion file dst = go.git_p4_client_path(ctx) # A tree is likely to already exist, in which case we don't need # or want to try to recreate it. We'll just use the existing one. if os.path.exists(dst): LOG.debug("reusing existing object: " + dst) return dst with self.perf.timer[EXTRACT_OBJECTS]: # make sure dir exists dstdir = os.path.dirname(dst) if not os.path.exists(dstdir): os.makedirs(dstdir) # get contents of commit or tree; can't just copy it because it's # probably in a packfile and we don't know which one. And there's # no way to have git give us the compressed commit directly, so we # need to recompress it p = Popen(['git', 'cat-file', go.type, go.sha1], stdout=PIPE) po = p.communicate()[0] header = go.type + " " + str(len(po)) + '\0' deflated = zlib.compress(header.encode() + po) # write it into our p4 client workspace for adding. LOG.debug("adding new object: " + dst) with open(dst, "wb") as f: f.write(deflated) return dst def __get_snapshot_trees(self, commit): """get all tree objects for a given commit commit: SHA1 of commit each tree is added to the list to be mirrored return the SHA1 of the commit's tree """ top_tree = self.__get_commit_tree(commit) with self.perf.timer[LS_TREE]: p = Popen(['git', 'ls-tree', '-rt', top_tree], stdout=PIPE) po = p.communicate()[0].decode() with self.perf.timer[LS_TREE_PROCESS]: # line is: mode SP type SP sha TAB path # we only want the sha from lines with type "tree" pattern = re.compile("^[0-7]{6} tree ([0-9a-fA-F]{40})\t.*") # yes, we're doing nothing with the result of this list comprehension # pylint: disable=W0106 [self.git_objects.add_object(GitObject("tree", m.group(1))) for line in po.splitlines() for m in [pattern.match(line)] if m] # pylint: enable=W0106 return top_tree def __get_delta_trees(self, top_tree1, commit2): """get all tree objects new in one commit vs another commit topTree1: SHA1 of first commit's tree commit2: SHA1 of second commit each tree is added to the list to be mirrored return the SHA1 of commit2's tree """ top_tree2 = self.__get_commit_tree(commit2) with self.perf.timer[DIFF_TREE]: p = Popen(['git', 'diff-tree', '-t', top_tree1, top_tree2], stdout=PIPE) po = p.communicate()[0].decode() with self.perf.timer[DIFF_TREE_PROCESS]: # line is: :mode1 SP mode2 SP sha1 SP sha2 SP action TAB path # we want sha2 from lines where mode2 indicates a dir pattern = re.compile( "^:[0-7]{6} ([0-7]{2})[0-7]{4} [0-9a-fA-F]{40} ([0-9a-fA-F]{40}) .*") # yes, we're doing nothing with the result of this list comprehension # pylint: disable=W0106 [self.git_objects.add_object(GitObject("tree", m.group(2))) for line in po.splitlines() for m in [pattern.match(line)] if m and m.group(1) == "04"] # pylint: enable=W0106 return top_tree2 def __get_commit_tree(self, commit): """get the one and only tree at the top of commit commit: SHA1 of the commit add the tree object to the list of objects to be mirrored and return its SHA1 """ with self.perf.timer[CAT_FILE]: self.perf.counter[CAT_FILE_COUNT] += 1 p = Popen(['git', 'cat-file', 'commit', commit], stdout=PIPE) po = p.communicate()[0].decode() self.perf.counter[CAT_FILE_SIZE] += len(po) for line in iter(po.splitlines()): if not line.startswith("tree"): continue # line is: tree sha parts = line.strip().split(' ') sha1 = parts[1] self.git_objects.add_object(GitObject("tree", sha1)) return sha1
class G2P: """class to handle batching of p4 commands when copying git to p4""" def __init__(self, ctx): self.ctx = ctx self.addeditdelete = {} self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([OVERALL, (FAST_EXPORT, OVERALL), (TEST_BLOCK_PUSH, OVERALL), (CHECK_CONFLICT, OVERALL), (COPY, OVERALL), (GIT_CHECKOUT, COPY), (CHECK_PROTECTS, COPY), (COPY_BLOBS_1, COPY), (COPY_BLOBS_2, COPY), (MIRROR, OVERALL), ]) self.perf.add_counters([N_BLOBS, N_RENAMES]) self.usermap = p4gf_usermap.UserMap(ctx.p4gf) self.progress = ProgressReporter() def __str__(self): return "\n".join([str(self.perf), str(self.ctx.mirror) ]) def revert_and_raise(self, errmsg): """An error occurred while attempting to submit the incoming change to Perforce. As a result, revert all modifications, log the error, and raise an exception.""" # roll back and raise the problem to the caller p4 = connect_p4(user=p4gf_const.P4GF_USER, client=self.ctx.p4.client) if p4: opened = p4.run('opened') if opened: p4.run('revert', '//{}/...'.format(self.ctx.p4.client)) # revert doesn't clean up added files self.remove_added_files() if not errmsg: errmsg = traceback.format_stack() msg = "import failed: {}".format(errmsg) LOG.error(msg) raise RuntimeError(msg) def _p4_message_to_text(self, msg): ''' Convert a list of P4 messages to a single string. Annotate some errors with additional context such as P4USER. ''' txt = str(msg) if msg.msgid in MSGID_EXPLAIN_P4USER: txt += ' P4USER={}.'.format(self.ctx.p4.user) if msg.msgid in MSGID_EXPLAIN_P4CLIENT: txt += ' P4USER={}.'.format(self.ctx.p4.client) return txt def check_p4_messages(self): """If the results indicate a file is locked by another user, raise an exception so that the overall commit will fail. The changes made so far will be reverted. """ msgs = p4gf_p4msg.find_all_msgid(self.ctx.p4, MSGID_CANNOT_OPEN) if not msgs: return lines = [self._p4_message_to_text(m) for m in msgs] self.revert_and_raise('\n'.join(lines)) def _p4run(self, cmd): ''' Run one P4 command, logging cmd and results. ''' p4 = self.ctx.p4 LOG.getChild('p4.cmd').debug(" ".join(cmd)) results = p4.run(cmd) if p4.errors: LOG.getChild('p4.err').error("\n".join(p4.errors)) if p4.warnings: LOG.getChild('p4.warn').warning("\n".join(p4.warnings)) LOG.getChild('p4.out').debug("{}".format(results)) if LOG.getChild('p4.msgid').isEnabledFor(logging.DEBUG): log = LOG.getChild('p4.msgid') for m in p4.messages: log.debug(p4gf_p4msg.msg_repr(m)) self.check_p4_messages() def run_p4_commands(self): """run all pending p4 commands""" for operation, paths in self.addeditdelete.items(): cmd = operation.split(' ') # avoid writable client files problem by using -k and handling # the actual file action ourselves (in add/edit cases the caller # has already written the new file) if not cmd[0] == 'add': cmd.append('-k') if cmd[0] == 'move': # move takes a tuple of two arguments, the old name and new name oldnames = [escape_path(pair[0]) for pair in paths] # move requires opening the file for edit first self._p4run(['edit', '-k'] + oldnames) LOG.debug("Edit {}".format(oldnames)) for pair in paths: (frompath, topath) = pair self._p4run(['move', '-k', escape_path(frompath), escape_path(topath)]) LOG.debug("Move from {} to {}".format(frompath, topath)) else: reopen = [] if 'edit -t' in operation: # edit -t text does not work, must 'edit' then 'reopen -t' # "can't change from xtext - use 'reopen'" reopen = ['reopen', '-t', cmd[2]] cmd = cmd[0:1] + cmd[3:] if not cmd[0] == 'add': self._p4run(cmd + [escape_path(path) for path in paths]) else: self._p4run(cmd + paths) if reopen: self._p4run(reopen + [escape_path(path) for path in paths]) if cmd[0] == 'delete': LOG.debug("Delete {}".format(paths)) for path in paths: os.remove(path) def remove_added_files(self): """remove added files to restore p4 client after failure of p4 command""" for operation, paths in self.addeditdelete.items(): cmd = operation.split(' ') if cmd[0] == 'add': for path in paths: os.unlink(path) def setup_p4_command(self, command, p4path): """Add command to list to be run by run_p4_commands. If the command is 'move' then the p4path is expected to be a tuple of the frompath and topath.""" if command in self.addeditdelete: self.addeditdelete[command].append(p4path) else: self.addeditdelete[command] = [p4path] def _toggle_filetype(self, p4path, isx): """Returns the new file type for the named file, switching the executable state based on the isx value. Args: p4path: Path of the file to modify. isx: True if currently executable. Returns: New type for the file; may be None. """ p4type = None if isx: p4type = '+x' else: # To remove a previously assigned modifier, the whole filetype # must be specified. for tipe in ['headType', 'type']: # For a file that was executable, is being renamed (with # edits), and is no longer executable, we need to handle the # fact that it's not yet in Perforce and so does not have a # headType. try: p4type = p4gf_util.first_value_for_key( self.ctx.p4.run(['fstat', '-T' + tipe, p4path]), tipe) except P4.P4Exception: pass if p4type: p4type = p4gf_p4filetype.remove_mod(p4type, 'x') return p4type def add_or_edit_blob(self, blob): """run p4 add or edit for a new or modified file""" # get local path in p4 client p4path = self.ctx.contentlocalroot + blob['path'] # edit or add? isedit = os.path.exists(p4path) # make sure dest dir exists dstdir = os.path.dirname(p4path) if not os.path.exists(dstdir): os.makedirs(dstdir) if isedit: LOG.debug("Copy edit from: " + blob['path'] + " to " + p4path) # for edits, only use +x or -x to propagate partial filetype changes wasx = os.stat(p4path).st_mode & stat.S_IXUSR isx = os.stat(blob['path']).st_mode & stat.S_IXUSR if wasx != isx: p4type = self._toggle_filetype(p4path, isx) else: p4type = None if p4type: LOG.debug(" set filetype: {ft} oldx={oldx} newx={newx}" .format(ft=p4type, oldx=wasx, newx=isx)) shutil.copystat(blob['path'], p4path) shutil.copyfile(blob['path'], p4path) else: LOG.debug("Copy add from: " + blob['path'] + " to " + p4path) # for adds, use complete filetype of new file p4type = p4type_from_mode(blob['mode']) shutil.copyfile(blob['path'], p4path) # if file exists it's an edit, so do p4 edit before copying content # for an add, do p4 add after copying content p4type = ' -t ' + p4type if p4type else '' if isedit: self.setup_p4_command("edit" + p4type, p4path) else: self.setup_p4_command("add -f" + p4type, p4path) def rename_blob(self, blob): """ run p4 move for a renamed/moved file""" self.perf.counter[N_RENAMES] += 1 # get local path in p4 client p4frompath = self.ctx.contentlocalroot + blob['path'] p4topath = self.ctx.contentlocalroot + blob['topath'] # ensure destination directory exists dstdir = os.path.dirname(p4topath) if not os.path.exists(dstdir): os.makedirs(dstdir) # copy out of Git repo to Perforce workspace shutil.copyfile(blob['topath'], p4topath) self.setup_p4_command("move", (p4frompath, p4topath)) def copy_blob(self, blob): """run p4 integ for a copied file""" self.perf.counter[N_BLOBS] += 1 # get local path in p4 client p4frompath = self.ctx.contentlocalroot + blob['path'] p4topath = self.ctx.contentlocalroot + blob['topath'] self._p4run(["copy", "-v", escape_path(p4frompath), escape_path(p4topath)]) # make sure dest dir exists dstdir = os.path.dirname(p4topath) if not os.path.exists(dstdir): os.makedirs(dstdir) LOG.debug("Copy/integ from: " + p4frompath + " to " + p4topath) shutil.copyfile(p4frompath, p4topath) def delete_blob(self, blob): """run p4 delete for a deleted file""" # get local path in p4 client p4path = self.ctx.contentlocalroot + blob['path'] self.setup_p4_command("delete", p4path) def copy_blobs(self, blobs): """copy git blobs to perforce revs""" # first, one pass to do rename/copy # these don't batch. move can't batch due to p4 limitations. # however, the edit required before move is batched. # copy could be batched by creating a temporary branchspec # but for now it's done file by file with self.perf.timer[COPY_BLOBS_1]: for blob in blobs: if blob['action'] == 'R': self.rename_blob(blob) elif blob['action'] == 'C': self.copy_blob(blob) self.run_p4_commands() # then, another pass to do add/edit/delete # these are batched to allow running the minimum number of # p4 commands. That means no more than one delete, one add per # filetype and one edit per filetype. Since we only support three # possible filetypes (text, text+x, symlink) there could be at most # 1 + 3 + 3 commands run. with self.perf.timer[COPY_BLOBS_2]: self.addeditdelete = {} for blob in blobs: if blob['action'] == 'M': self.add_or_edit_blob(blob) elif blob['action'] == 'D': self.delete_blob(blob) self.run_p4_commands() def check_protects(self, p4user, blobs): """check if author is authorized to submit files""" pc = ProtectsChecker(self.ctx, self.ctx.authenticated_p4user, p4user) pc.filter_paths(blobs) if pc.has_error(): self.revert_and_raise(pc.error_message()) def _reset_for_new_commit(self): """ Clear out state from previous commit that must not carry over into next commit. """ self.addeditdelete = {} def attempt_resync(self): """Attempts to sync -k the Git Fusion client to the change that corresponds to the HEAD of the Git mirror repository. This prevents the obscure "file(s) not on client" error. """ # we assume we are in the GIT_WORK_TREE, which seems to be a safe # assumption at this point try: last_commit = p4gf_util.git_ref_master() if last_commit: last_changelist_number = self.ctx.mirror.get_change_for_commit( last_commit, self.ctx) if last_changelist_number: filerev = "//...@{}".format(last_changelist_number) self._p4run(['sync', '-k', filerev]) except P4.P4Exception: # don't stop the world if we have an error above LOG.warn("resync failed with exception", exc_info=True) def copy_commit(self, commit): """copy a single commit""" self._reset_for_new_commit() #OG.debug("dump commit {}".format(commit)) LOG.debug("for commit {}".format(commit['mark'])) LOG.debug("with description: {}".format(commit['data'])) LOG.debug("files affected: {}".format(commit['files'])) # Reject merge commits. Not supported in 2012.1. if 'merge' in commit: self.revert_and_raise(("Merge commit {} not permitted." +" Rebase to create a linear" +" history.").format(commit['sha1'])) # strip any enclosing angle brackets from the email address email = commit['author']['email'].strip('<>') user = self.usermap.lookup_by_email(email) LOG.debug("for email {} found user {}".format(email, user)) if (user is None) or (not self.usermap.p4user_exists(user[0])): # User is not a known and existing Perforce user, and the # unknown_git account is not set up, so reject the commit. self.revert_and_raise("User '{}' not permitted to commit".format(email)) author_p4user = user[0] for blob in commit['files']: err = check_valid_filename(blob['path']) if err: self.revert_and_raise(err) with self.perf.timer[GIT_CHECKOUT]: d = p4gf_util.popen_no_throw(['git', 'checkout', commit['sha1']]) if d['Popen'].returncode: # Sometimes git cannot distinquish the revision from a path... p4gf_util.popen(['git', 'reset', '--hard', commit['sha1'], '--']) with self.perf.timer[CHECK_PROTECTS]: self.check_protects(author_p4user, commit['files']) try: self.copy_blobs(commit['files']) except P4.P4Exception as e: self.revert_and_raise(str(e)) with self.perf.timer[COPY_BLOBS_2]: pusher_p4user = self.ctx.authenticated_p4user LOG.debug("Pusher is: {}, author is: {}".format(pusher_p4user, author_p4user)) desc = change_description(commit, pusher_p4user, author_p4user) try: opened = self.ctx.p4.run('opened') if opened: changenum = p4_submit(self.ctx.p4, desc, author_p4user, commit['author']['date']) LOG.info("Submitted change @{} for commit {}".format(changenum, commit['sha1'])) else: LOG.info("Ignored empty commit {}".format(commit['sha1'])) return None except P4.P4Exception as e: self.revert_and_raise(str(e)) return ":" + str(changenum) + " " + commit['sha1'] def test_block_push(self): """Test hook to temporarily block and let test script introduce conflicting changes. """ s = p4gf_util.test_vars().get(p4gf_const.P4GF_TEST_BLOCK_PUSH) if not s: return log = logging.getLogger("test_block_push") block_dict = p4gf_util.test_var_to_dict(s) log.debug(block_dict) # Fetch ALL the submitted changelists as of right now. log.debug("p4 changes {}".format(p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client))) cl_ay = self.ctx.p4.run('changes', '-l', p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client)) # Don't block until after something? after = block_dict['after'] if after: if not contains_desc(after, cl_ay): log.debug("Do not block until after: {}".format(after)) return until = block_dict['until'] log.debug("BLOCKING. Seen 'after': {}".format(after)) log.debug("BLOCKING. Waiting for 'until': {}".format(until)) changes_path_at = ("{path}@{change},now" .format(path=p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client), change=cl_ay[-1]['change'])) while not contains_desc(until, cl_ay): time.sleep(1) cl_ay = self.ctx.p4.run('changes', changes_path_at) log.debug("Block released") def copy(self, start_at, end_at): """copy a set of commits from git into perforce""" with self.perf.timer[OVERALL]: with p4gf_util.HeadRestorer(): LOG.debug("begin copying from {} to {}".format(start_at, end_at)) self.attempt_resync() with self.perf.timer[CHECK_CONFLICT]: conflict_checker = G2PConflictChecker(self.ctx) with self.perf.timer[FAST_EXPORT]: fe = p4gf_fastexport.FastExport(start_at, end_at, self.ctx.tempdir.name) fe.run() marks = [] commit_count = 0 for x in fe.commands: if x['command'] == 'commit': commit_count += 1 self.progress.progress_init_determinate(commit_count) try: for command in fe.commands: with self.perf.timer[TEST_BLOCK_PUSH]: self.test_block_push() if command['command'] == 'commit': self.progress.progress_increment("Copying changelists...") self.ctx.heartbeat() with self.perf.timer[COPY]: mark = self.copy_commit(command) if mark is None: continue with self.perf.timer[CHECK_CONFLICT]: (git_commit_sha1, p4_changelist_number) = mark_to_commit_changelist(mark) conflict_checker.record_commit(git_commit_sha1, p4_changelist_number) if conflict_checker.check(): LOG.error("P4 conflict found") break marks.append(mark) elif command['command'] == 'reset': pass else: raise RuntimeError("Unexpected fast-export command: " + command['command']) finally: # we want to write mirror objects for any commits that made it through # any exception will still be alive after this with self.perf.timer[MIRROR]: self.ctx.mirror.add_commits(marks) self.ctx.mirror.add_objects_to_p4(self.ctx) if conflict_checker.has_conflict(): raise RuntimeError("Conflicting change from Perforce caused one" + " or more git commits to fail. Time to" + " pull, rebase, and try again.") LOG.getChild("time").debug("\n" + str(self))
class P2G: """class to manage copying from Perforce to git""" def __init__(self, ctx): self.ctx = ctx self.fastimport = FastImport(self.ctx) self.fastimport.set_timezone(self.ctx.timezone) self.fastimport.set_project_root_path(self.ctx.contentlocalroot) self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([OVERALL, (SETUP, OVERALL), (PRINT, OVERALL), (FSTAT, OVERALL), (SYNC, OVERALL), (FAST_IMPORT, OVERALL), (MIRROR, OVERALL), (MERGE, OVERALL), (PACK, OVERALL) ]) self.rev_range = None # RevRange instance set in copy(). self.graft_change = None # self.changes = None # dict['changelist'] ==> P4Changelist of what to copy() self.printed_revs = None # RevList produced by PrintHandler self.status_verbose = True self.progress = ProgressReporter() def __str__(self): return "\n".join(["\n\nFast Import:\n", str(self.fastimport), "", str(self.perf), "" ]) def _setup(self, start_at, stop_at): """Set RevRange rev_range, figure out which changelists to copy.""" self.rev_range = RevRange.from_start_stop(self.ctx, start_at, stop_at) LOG.debug("Revision range to copy to Git: {rr}" .format(rr=self.rev_range)) # get list of changes to import into git self.changes = P4Changelist.create_changelist_list_as_dict( self.ctx.p4, self._path_range()) # If grafting, get that too. if self.rev_range.graft_change_num: # Ignore all depotFile elements, we just want the change/desc/time/user. self.graft_change = P4Changelist.create_using_describe( self.ctx.p4, self.rev_range.graft_change_num, "ignore_depot_files") self.graft_change.description += ('\n[grafted history before {start_at}]' .format(start_at=start_at)) def _path_range(self): """Return the common path...@range string we use frequently. """ return self.ctx.client_view_path() + self.rev_range.as_range_string() def _copy_print(self): """p4 print all revs and git-hash-object them into the git repo.""" server_can_unexpand = self.ctx.p4.server_level > 32 printhandler = PrintHandler(need_unexpand=not server_can_unexpand, tempdir=self.ctx.tempdir.name) self.ctx.p4.handler = printhandler args = ["-a"] if server_can_unexpand: args.append("-k") self.ctx.p4.run("print", args, self._path_range()) printhandler.flush() printhandler.progress.progress_finish() # If also grafting, print all revs in existence at time of graft. if self.graft_change: args = [] if server_can_unexpand: args.append("-k") path = self._graft_path() LOG.debug("Printing for grafted history: {}".format(path)) self.ctx.p4.run("print", args, path) printhandler.flush() # If grafting, we just printed revs that refer to changelists # that have no P4Changelist counterpart in self.changes. Make # some skeletal versions now so that FstatHandler will have # someplace to hang its outputStat() P4File instances. for (_key, p4file) in printhandler.revs.revs: if not p4file.change in self.changes: cl = P4Changelist() cl.change = p4file.change self.changes[p4file.change] = cl self.ctx.p4.handler = None self.printed_revs = printhandler.revs def _fstat(self): """run fstat to find deleted revs and get client paths""" # TODO for 12.2 print will also report deleted revs so between # that and using MapApi to get client paths, we won't need this fstat self.ctx.p4.handler = FstatHandler(self.printed_revs, self.changes) fstat_cols = "-T" + ",".join(P4File.fstat_cols()) self.ctx.p4.run("fstat", "-Of", fstat_cols, self._path_range()) if self.graft_change: # Also run 'p4 fstat //<view>/...@change' for the graft # change to catch all files as of @change, not just # revs changed between begin and end of _path_range(). self.ctx.p4.run("fstat", fstat_cols, self._graft_path()) self.ctx.p4.handler = None self._collapse_to_graft_change() self._add_graft_to_changes() # don't need this any more self.printed_revs = None sorted_changes = [str(y) for y in sorted([int(x) for x in self.changes.keys()])] LOG.debug("\n".join([str(self.changes[ch]) for ch in sorted_changes])) return sorted_changes def _sync(self, sorted_changes): """fake sync of last change to make life easier at push time""" self.ctx.p4.handler = SyncHandler() lastchange = self.changes[sorted_changes[-1]] self.ctx.p4.run("sync", "-kf", self.ctx.client_view_path() + "@" + str(lastchange.change)) self.ctx.p4.handler = None def _fast_import(self, sorted_changes, last_commit): """build fast-import script from changes, then run fast-import""" self.progress.progress_init_determinate(len(sorted_changes)) for changenum in sorted_changes: change = self.changes[changenum] self.progress.progress_increment("Copying changelists...") self.ctx.heartbeat() # create commit and trees self.fastimport.add_commit(change, last_commit) last_commit = change.change # run git-fast-import and get list of marks marks = self.fastimport.run_fast_import() # done with these self.changes = None return marks def _mirror(self, marks): """build up list of p4 objects to mirror git repo in perforce then submit them """ self.ctx.mirror.add_commits(marks) self.ctx.mirror.add_objects_to_p4(self.ctx) LOG.getChild("time").debug("\n\nGit Mirror:\n" + str(self.ctx.mirror)) self.ctx.mirror = GitMirror(self.ctx.config.view_name) last_commit = marks[len(marks) - 1] LOG.debug("Last commit created: " + last_commit) # pylint: disable=R0201 # R0201 Method could be a function def _pack(self): """run 'git gc' to pack up the blobs aside from any possible performance benefit, this prevents warnings from git about "unreachable loose objects" """ p4gf_util.popen_no_throw(["git", "gc"]) def _collapse_to_graft_change(self): """Move all of the files from pre-graft changelists into the graft changelist. Remove all pre-graft changelists. NOP if not grafting. 'p4 print //client/...@100' does indeed print all the files that exist @100, but the tag dict that goes with each file includes the changelist in which that file was last added/edited, not 100. So this function gathers up all the file revs with change=1..99 and sticks them under change 100's file list. """ if (not self.graft_change): return graft_num_int = int(self.graft_change.change) LOG.debug("_collapse_to_graft_change() graft_num_int={}".format(graft_num_int)) # Delete all P4Changelist elements from self.changes where they # refer to a change that will be collapsed into the graft change, # including the graft change itself. del_keys = [] for p4changelist in self.changes.values(): if graft_num_int < int(p4changelist.change): LOG.debug("_collapse_to_graft_change() skipping {}".format(p4changelist.change)) continue LOG.debug("_collapse_to_graft_change() deleting {}".format(p4changelist.change)) del_keys.append(p4changelist.change) for key in del_keys: del self.changes[key] # Associate with the graft change all printed P4File results from # graft-change or older for (_key, p4file) in self.printed_revs.revs: if graft_num_int < int(p4file.change): LOG.debug("_collapse_to_graft_change() skipping post-graft {}".format(p4file)) continue old = self.graft_change.file_from_depot_path(p4file.depot_path) # If print picked up multiple revs, keep the newest. if (not old) or (int(old.change) < int(p4file.change)): p4file.change = self.graft_change.change self.graft_change.files.append(p4file) LOG.debug("_collapse_to_graft_change() keeping {}".format(p4file)) else: LOG.debug("_collapse_to_graft_change() skipping, had newer {}".format(p4file)) def _add_graft_to_changes(self): """Add the graft changelist to our list of changes: It will be copied over like any other change. NOP if not grafting. """ if (not self.graft_change): return self.changes[self.graft_change.change] = self.graft_change def _graft_path(self): """If grafting, return '//<client>/...@N' where N is the graft changelist number. If not grafting, return None. """ if (not self.graft_change): return return "{path}@{change}".format( path = self.ctx.client_view_path(), change = self.graft_change.change) def copy(self, start_at, stop_at): """copy a set of changelists from perforce into git""" with self.perf.timer[OVERALL]: with self.perf.timer[SETUP]: self._setup(start_at, stop_at) if not len(self.changes): LOG.debug("No new changes found to copy") return last_commit = self.rev_range.last_commit with self.perf.timer[PRINT]: self._copy_print() with self.perf.timer[FSTAT]: sorted_changes = self._fstat() with self.perf.timer[SYNC]: self._sync(sorted_changes) with self.perf.timer[FAST_IMPORT]: marks = self._fast_import(sorted_changes, last_commit) sorted_changes = None with self.perf.timer[MIRROR]: self._mirror(marks) with self.perf.timer[MERGE]: # merge temporary branch into master, then delete it self.fastimport.merge() with self.perf.timer[PACK]: self._pack() LOG.getChild("time").debug("\n" + str(self))
class PrintHandler(OutputHandler): """OutputHandler for p4 print, hashes files into git repo""" def __init__(self, need_unexpand, tempdir): OutputHandler.__init__(self) self.rev = None self.revs = RevList() self.need_unexpand = need_unexpand self.tempfile = None self.tempdir = tempdir self.progress = ProgressReporter() self.progress.progress_init_indeterminate() def outputBinary(self, h): """assemble file content, then pass it to hasher via queue""" self.appendContent(h) return OutputHandler.HANDLED def outputText(self, h): """assemble file content, then pass it to hasher via queue """ b = bytes(h, 'UTF-8') self.appendContent(b) return OutputHandler.HANDLED def appendContent(self, h): """append a chunk of content to the temp file if server is 12.1 or older it may be sending expanded ktext files so we need to unexpand them It would be nice to incrementally compress and hash the file but that requires knowing the size up front, which p4 print does not currently supply. If/when it does, this can be reworked to be more efficient with large files. As it is, as long as the SpooledTemporaryFile doesn't rollover, it won't make much of a difference. So with that limitation, the incoming content is stuffed into a SpooledTemporaryFile. """ if not len(h): return if self.need_unexpand and self.rev.is_k_type(): h = unexpand(h) self.tempfile.write(h) def flush(self): """compress the last file, hash it and stick it in the repo Now that we've got the complete file contents, the header can be created and used along with the spooled content to create the sha1 and zlib compressed blob content. Finally that is written into the .git/objects dir. """ if not self.rev: return size = self.tempfile.tell() self.tempfile.seek(0) compressed = tempfile.NamedTemporaryFile(delete=False, dir=self.tempdir) compress = zlib.compressobj() # pylint doesn't understand dynamic definition of sha1 in hashlib # pylint: disable=E1101 sha1 = hashlib.sha1() # pylint:disable=W1401 # disable complaints about the null. We need that. # add header first header = ("blob " + str(size) + "\0").encode() compressed.write(compress.compress(header)) sha1.update(header) # then actual contents chunksize = 4096 while True: chunk = self.tempfile.read(chunksize) if chunk: compressed.write(compress.compress(chunk)) sha1.update(chunk) else: break # pylint: enable=E1101 compressed.write(compress.flush()) compressed.close() digest = sha1.hexdigest() self.rev.sha1 = digest blob_dir = ".git/objects/"+digest[:2] blob_file = digest[2:] blob_path = blob_dir+"/"+blob_file if not os.path.exists(blob_path): if not os.path.exists(blob_dir): os.makedirs(blob_dir) shutil.move(compressed.name, blob_path) self.rev = None def outputStat(self, h): """save path of current file""" self.flush() self.rev = P4File.create_from_print(h) self.revs.append(self.rev) self.progress.progress_increment('Copying files') LOG.debug("PrintHandler.outputStat() ch={} {}" .format(h['change'], h["depotFile"] + "#" + h["rev"])) if self.tempfile: self.tempfile.seek(0) self.tempfile.truncate() else: self.tempfile = tempfile.TemporaryFile(buffering=10000000, dir=self.tempdir) return OutputHandler.HANDLED def outputInfo(self, _h): """outputInfo call not expected""" return OutputHandler.REPORT def outputMessage(self, _h): """outputMessage call not expected, indicates an error""" return OutputHandler.REPORT