Example #1
class G2P:
    """class to handle batching of p4 commands when copying git to p4"""
    def __init__(self, ctx):
        self.ctx = ctx
        self.addeditdelete = {}
        self.perf = p4gf_profiler.TimerCounterSet()
            (FAST_EXPORT, OVERALL),
            (COPY, OVERALL),
            (GIT_CHECKOUT, COPY),
            (CHECK_PROTECTS, COPY),
            (COPY_BLOBS_1, COPY),
            (COPY_BLOBS_2, COPY),
            (MIRROR, OVERALL),
        self.perf.add_counters([N_BLOBS, N_RENAMES])
        self.usermap = p4gf_usermap.UserMap(ctx.p4gf)
        self.progress = ProgressReporter()

    def __str__(self):
        return "\n".join([str(self.perf), str(self.ctx.mirror)])

    def revert_and_raise(self, errmsg):
        """An error occurred while attempting to submit the incoming change
        to Perforce. As a result, revert all modifications, log the error,
        and raise an exception."""
        # roll back and raise the problem to the caller
        p4 = connect_p4(user=p4gf_const.P4GF_USER, client=self.ctx.p4.client)
        if p4:
            opened = p4.run('opened')
            if opened:
                p4.run('revert', '//{}/...'.format(self.ctx.p4.client))
        # revert doesn't clean up added files
        if not errmsg:
            errmsg = traceback.format_stack()
        msg = "import failed: {}".format(errmsg)
        raise RuntimeError(msg)

    def _p4_message_to_text(self, msg):
        Convert a list of P4 messages to a single string.
        Annotate some errors with additional context such as P4USER.
        txt = str(msg)
        if msg.msgid in MSGID_EXPLAIN_P4USER:
            txt += ' P4USER={}.'.format(self.ctx.p4.user)
        if msg.msgid in MSGID_EXPLAIN_P4CLIENT:
            txt += ' P4USER={}.'.format(self.ctx.p4.client)
        return txt

    def check_p4_messages(self):
        """If the results indicate a file is locked by another user,
        raise an exception so that the overall commit will fail. The
        changes made so far will be reverted.
        msgs = p4gf_p4msg.find_all_msgid(self.ctx.p4, MSGID_CANNOT_OPEN)
        if not msgs:

        lines = [self._p4_message_to_text(m) for m in msgs]

    def _p4run(self, cmd):
        Run one P4 command, logging cmd and results.
        p4 = self.ctx.p4
        LOG.getChild('p4.cmd').debug(" ".join(cmd))

        results = p4.run(cmd)

        if p4.errors:
        if p4.warnings:
        if LOG.getChild('p4.msgid').isEnabledFor(logging.DEBUG):
            log = LOG.getChild('p4.msgid')
            for m in p4.messages:


    def run_p4_commands(self):
        """run all pending p4 commands"""
        for operation, paths in self.addeditdelete.items():
            cmd = operation.split(' ')
            # avoid writable client files problem by using -k and handling
            # the actual file action ourselves (in add/edit cases the caller
            # has already written the new file)
            if not cmd[0] == 'add':
            if cmd[0] == 'move':
                # move takes a tuple of two arguments, the old name and new name
                oldnames = [escape_path(pair[0]) for pair in paths]
                # move requires opening the file for edit first
                self._p4run(['edit', '-k'] + oldnames)
                LOG.debug("Edit {}".format(oldnames))
                for pair in paths:
                    (frompath, topath) = pair
                        'move', '-k',
                    LOG.debug("Move from {} to {}".format(frompath, topath))
                reopen = []
                if 'edit -t' in operation:
                    # edit -t text does not work, must 'edit' then 'reopen -t'
                    # "can't change from xtext - use 'reopen'"
                    reopen = ['reopen', '-t', cmd[2]]
                    cmd = cmd[0:1] + cmd[3:]

                if not cmd[0] == 'add':
                    self._p4run(cmd + [escape_path(path) for path in paths])
                    self._p4run(cmd + paths)

                if reopen:
                    self._p4run(reopen + [escape_path(path) for path in paths])

                if cmd[0] == 'delete':
                    LOG.debug("Delete {}".format(paths))
                    for path in paths:

    def remove_added_files(self):
        """remove added files to restore p4 client after failure of p4 command"""
        for operation, paths in self.addeditdelete.items():
            cmd = operation.split(' ')
            if cmd[0] == 'add':
                for path in paths:

    def setup_p4_command(self, command, p4path):
        """Add command to list to be run by run_p4_commands. If the command
        is 'move' then the p4path is expected to be a tuple of the frompath
        and topath."""
        if command in self.addeditdelete:
            self.addeditdelete[command] = [p4path]

    def _toggle_filetype(self, p4path, isx):
        """Returns the new file type for the named file, switching the
        executable state based on the isx value.

            p4path: Path of the file to modify.
            isx: True if currently executable.

            New type for the file; may be None.
        p4type = None
        if isx:
            p4type = '+x'
            # To remove a previously assigned modifier, the whole filetype
            # must be specified.
            for tipe in ['headType', 'type']:
                # For a file that was executable, is being renamed (with
                # edits), and is no longer executable, we need to handle the
                # fact that it's not yet in Perforce and so does not have a
                # headType.
                    p4type = p4gf_util.first_value_for_key(
                        self.ctx.p4.run(['fstat', '-T' + tipe, p4path]), tipe)
                except P4.P4Exception:
                if p4type:
                    p4type = p4gf_p4filetype.remove_mod(p4type, 'x')
        return p4type

    def add_or_edit_blob(self, blob):
        """run p4 add or edit for a new or modified file"""

        # get local path in p4 client
        p4path = self.ctx.contentlocalroot + blob['path']

        # edit or add?
        isedit = os.path.exists(p4path)

        # make sure dest dir exists
        dstdir = os.path.dirname(p4path)
        if not os.path.exists(dstdir):

        if isedit:
            LOG.debug("Copy edit from: " + blob['path'] + " to " + p4path)
            # for edits, only use +x or -x to propagate partial filetype changes
            wasx = os.stat(p4path).st_mode & stat.S_IXUSR
            isx = os.stat(blob['path']).st_mode & stat.S_IXUSR
            if wasx != isx:
                p4type = self._toggle_filetype(p4path, isx)
                p4type = None
            if p4type:
                    "  set filetype: {ft}  oldx={oldx} newx={newx}".format(
                        ft=p4type, oldx=wasx, newx=isx))
            shutil.copystat(blob['path'], p4path)
            shutil.copyfile(blob['path'], p4path)
            LOG.debug("Copy add from: " + blob['path'] + " to " + p4path)
            # for adds, use complete filetype of new file
            p4type = p4type_from_mode(blob['mode'])
            shutil.copyfile(blob['path'], p4path)

        # if file exists it's an edit, so do p4 edit before copying content
        # for an add, do p4 add after copying content
        p4type = ' -t ' + p4type if p4type else ''
        if isedit:
            self.setup_p4_command("edit" + p4type, p4path)
            self.setup_p4_command("add -f" + p4type, p4path)

    def rename_blob(self, blob):
        """ run p4 move for a renamed/moved file"""
        self.perf.counter[N_RENAMES] += 1

        # get local path in p4 client
        p4frompath = self.ctx.contentlocalroot + blob['path']
        p4topath = self.ctx.contentlocalroot + blob['topath']

        # ensure destination directory exists
        dstdir = os.path.dirname(p4topath)
        if not os.path.exists(dstdir):
        # copy out of Git repo to Perforce workspace
        shutil.copyfile(blob['topath'], p4topath)
        self.setup_p4_command("move", (p4frompath, p4topath))

    def copy_blob(self, blob):
        """run p4 integ for a copied file"""
        self.perf.counter[N_BLOBS] += 1

        # get local path in p4 client
        p4frompath = self.ctx.contentlocalroot + blob['path']
        p4topath = self.ctx.contentlocalroot + blob['topath']

            ["copy", "-v",

        # make sure dest dir exists
        dstdir = os.path.dirname(p4topath)
        if not os.path.exists(dstdir):

        LOG.debug("Copy/integ from: " + p4frompath + " to " + p4topath)
        shutil.copyfile(p4frompath, p4topath)

    def delete_blob(self, blob):
        """run p4 delete for a deleted file"""

        # get local path in p4 client
        p4path = self.ctx.contentlocalroot + blob['path']
        self.setup_p4_command("delete", p4path)

    def copy_blobs(self, blobs):
        """copy git blobs to perforce revs"""
        # first, one pass to do rename/copy
        # these don't batch.  move can't batch due to p4 limitations.
        # however, the edit required before move is batched.
        # copy could be batched by creating a temporary branchspec
        # but for now it's done file by file
        with self.perf.timer[COPY_BLOBS_1]:
            for blob in blobs:
                if blob['action'] == 'R':
                elif blob['action'] == 'C':
        # then, another pass to do add/edit/delete
        # these are batched to allow running the minimum number of
        # p4 commands.  That means no more than one delete, one add per
        # filetype and one edit per filetype.  Since we only support three
        # possible filetypes (text, text+x, symlink) there could be at most
        # 1 + 3 + 3 commands run.
        with self.perf.timer[COPY_BLOBS_2]:
            self.addeditdelete = {}
            for blob in blobs:
                if blob['action'] == 'M':
                elif blob['action'] == 'D':

    def check_protects(self, p4user, blobs):
        """check if author is authorized to submit files"""
        pc = ProtectsChecker(self.ctx, self.ctx.authenticated_p4user, p4user)
        if pc.has_error():

    def _reset_for_new_commit(self):
        Clear out state from previous commit that must not carry over
        into next commit.
        self.addeditdelete = {}

    def attempt_resync(self):
        """Attempts to sync -k the Git Fusion client to the change that
        corresponds to the HEAD of the Git mirror repository. This prevents
        the obscure "file(s) not on client" error.
        # we assume we are in the GIT_WORK_TREE, which seems to be a safe
        # assumption at this point
            last_commit = p4gf_util.git_ref_master()
            if last_commit:
                last_changelist_number = self.ctx.mirror.get_change_for_commit(
                    last_commit, self.ctx)
                if last_changelist_number:
                    filerev = "//...@{}".format(last_changelist_number)
                    self._p4run(['sync', '-k', filerev])
        except P4.P4Exception:
            # don't stop the world if we have an error above
            LOG.warn("resync failed with exception", exc_info=True)

    def copy_commit(self, commit):
        """copy a single commit"""


        #OG.debug("dump commit {}".format(commit))
        LOG.debug("for  commit {}".format(commit['mark']))
        LOG.debug("with description: {}".format(commit['data']))
        LOG.debug("files affected: {}".format(commit['files']))

        # Reject merge commits. Not supported in 2012.1.
        if 'merge' in commit:
            self.revert_and_raise(("Merge commit {} not permitted." +
                                   " Rebase to create a linear" +
                                   " history.").format(commit['sha1']))

        # strip any enclosing angle brackets from the email address
        email = commit['author']['email'].strip('<>')
        user = self.usermap.lookup_by_email(email)
        LOG.debug("for email {} found user {}".format(email, user))
        if (user is None) or (not self.usermap.p4user_exists(user[0])):
            # User is not a known and existing Perforce user, and the
            # unknown_git account is not set up, so reject the commit.
                "User '{}' not permitted to commit".format(email))
        author_p4user = user[0]

        for blob in commit['files']:
            err = check_valid_filename(blob['path'])
            if err:

        with self.perf.timer[GIT_CHECKOUT]:
            d = p4gf_util.popen_no_throw(['git', 'checkout', commit['sha1']])
            if d['Popen'].returncode:
                # Sometimes git cannot distinquish the revision from a path...
                    ['git', 'reset', '--hard', commit['sha1'], '--'])

        with self.perf.timer[CHECK_PROTECTS]:
            self.check_protects(author_p4user, commit['files'])

        except P4.P4Exception as e:

        with self.perf.timer[COPY_BLOBS_2]:
            pusher_p4user = self.ctx.authenticated_p4user
            LOG.debug("Pusher is: {}, author is: {}".format(
                pusher_p4user, author_p4user))
            desc = change_description(commit, pusher_p4user, author_p4user)

                opened = self.ctx.p4.run('opened')
                if opened:
                    changenum = p4_submit(self.ctx.p4, desc, author_p4user,
                    LOG.info("Submitted change @{} for commit {}".format(
                        changenum, commit['sha1']))
                    LOG.info("Ignored empty commit {}".format(commit['sha1']))
                    return None
            except P4.P4Exception as e:
            return ":" + str(changenum) + " " + commit['sha1']

    def test_block_push(self):
        """Test hook to temporarily block and let test script
        introduce conflicting changes.
        s = p4gf_util.test_vars().get(p4gf_const.P4GF_TEST_BLOCK_PUSH)
        if not s:

        log = logging.getLogger("test_block_push")
        block_dict = p4gf_util.test_var_to_dict(s)

        # Fetch ALL the submitted changelists as of right now.
        log.debug("p4 changes {}".format(
        cl_ay = self.ctx.p4.run(
            'changes', '-l',

        # Don't block until after something?
        after = block_dict['after']
        if after:
            if not contains_desc(after, cl_ay):
                log.debug("Do not block until after: {}".format(after))

        until = block_dict['until']
        log.debug("BLOCKING. Seen        'after': {}".format(after))
        log.debug("BLOCKING. Waiting for 'until': {}".format(until))

        changes_path_at = ("{path}@{change},now".format(

        while not contains_desc(until, cl_ay):
            cl_ay = self.ctx.p4.run('changes', changes_path_at)

        log.debug("Block released")

    def copy(self, start_at, end_at):
        """copy a set of commits from git into perforce"""
        with self.perf.timer[OVERALL]:
            with p4gf_util.HeadRestorer():
                LOG.debug("begin copying from {} to {}".format(
                    start_at, end_at))
                with self.perf.timer[CHECK_CONFLICT]:
                    conflict_checker = G2PConflictChecker(self.ctx)
                with self.perf.timer[FAST_EXPORT]:
                    fe = p4gf_fastexport.FastExport(start_at, end_at,
                marks = []
                commit_count = 0
                for x in fe.commands:
                    if x['command'] == 'commit':
                        commit_count += 1
                    for command in fe.commands:
                        with self.perf.timer[TEST_BLOCK_PUSH]:
                        if command['command'] == 'commit':
                                "Copying changelists...")
                            with self.perf.timer[COPY]:
                                mark = self.copy_commit(command)
                                if mark is None:
                            with self.perf.timer[CHECK_CONFLICT]:
                                (git_commit_sha1, p4_changelist_number
                                 ) = mark_to_commit_changelist(mark)
                                    git_commit_sha1, p4_changelist_number)
                                if conflict_checker.check():
                                    LOG.error("P4 conflict found")
                        elif command['command'] == 'reset':
                            raise RuntimeError(
                                "Unexpected fast-export command: " +
                    # we want to write mirror objects for any commits that made it through
                    # any exception will still be alive after this
                    with self.perf.timer[MIRROR]:

                if conflict_checker.has_conflict():
                    raise RuntimeError(
                        "Conflicting change from Perforce caused one" +
                        " or more git commits to fail. Time to" +
                        " pull, rebase, and try again.")

        LOG.getChild("time").debug("\n" + str(self))
class GitMirror:
    """handle git things that get mirrored in perforce"""
    def __init__(self, view_name):
        self.git_objects = GitObjectList()
        self.perf = p4gf_profiler.TimerCounterSet()
            (BUILD, OVERALL),
            (CAT_FILE, BUILD),
            (LS_TREE, BUILD),
            (LS_TREE_PROCESS, BUILD),
            (DIFF_TREE, BUILD),
            (ADD_SUBMIT, OVERALL),
            (P4_FSTAT, ADD_SUBMIT),
            (P4_ADD, ADD_SUBMIT),
            (P4_SUBMIT, ADD_SUBMIT),
        self.perf.add_counters([(CAT_FILE_COUNT, "files"),
                                (CAT_FILE_SIZE, "bytes")])
        self.progress = ProgressReporter()
        self.view_name = view_name

    def get_change_for_commit(commit, ctx):
        """Given a commit sha1, find the corresponding perforce change.
        object_type = p4gf_object_type.sha1_to_object_type(
        if not object_type:
            return None
        return object_type.view_name_to_changelist(ctx.config.view_name)

    def add_commits(self, marks):
        """build list of commit and tree objects for a set of changelists

        marks: list of commit marks output by git-fast-import
               formatted as: :changenum sha1

        with self.perf.timer[OVERALL]:
            with self.perf.timer[BUILD]:
                last_top_tree = None
                for mark in marks:

                    #parse perforce change number and SHA1 from marks
                    parts = mark.split(' ')
                    change_num = parts[0][1:]
                    sha1 = parts[1].strip()

                    # add commit object
                        GitObject("commit", sha1,
                                  [(change_num, self.view_name)]))

                    # add all trees referenced by the commit
                    if last_top_tree:
                        last_top_tree = self.__get_delta_trees(
                            last_top_tree, sha1)
                        last_top_tree = self.__get_snapshot_trees(sha1)

    def add_objects_with_views(self, ctx, add_files):
        """Add the list of files to the object cache in the depot and
        return the number of files not added.
        added_files = []
        files_not_added = 0
        treecount = 0
        commitcount = 0
        # Add new files to the object cache.
        bite_size = 1000
        while len(add_files):
            bite = add_files[:bite_size]
            add_files = add_files[bite_size:]
            result = ctx.p4gf.run("add", "-t", "binary", bite)
            for m in [
                    m for m in ctx.p4gf.messages
                    if (m.msgid != p4gf_p4msgid.MsgDm_OpenUpToDate
                        or m.dict['action'] != 'add')
                files_not_added += 1

            for r in [r for r in result if isinstance(r, dict)]:
                if r["action"] != 'add':
                    # file already exists in depot, perhaps?
                    files_not_added += 1
                    if r["depotFile"].endswith("-tree"):
                        treecount += 1
                        commitcount += 1
        LOG.debug("Added {} commits and {} trees".format(
            commitcount, treecount))
        # Set the 'views' attribute on the opened files.
        while len(added_files):
            bite = added_files[:bite_size]
            added_files = added_files[bite_size:]
            ctx.p4gf.run("attribute", "-p", "-n", "views", "-v",
                         self.view_name, bite)
        return files_not_added

    def add_objects_to_p4(self, ctx):
        """actually run p4 add, submit to create mirror files in .git-fusion"""

        with self.perf.timer[OVERALL]:
            # Revert any opened files left over from a failed mirror operation.
            opened = ctx.p4gf.run('opened')
            if opened:
            with self.perf.timer[ADD_SUBMIT]:
                LOG.debug("adding {0} commits and {1} trees to .git-fusion...".

                # build list of objects to add, extracting them from git
                add_files = [
                    self.__add_object_to_p4(ctx, go)
                    for go in self.git_objects.objects.values()

                # filter out any files that have already been added
                # only do this if the number of files is large enough to justify
                # the cost of the fstat
                existing_files = None
                with self.perf.timer[P4_FSTAT]:
                    # Need to use fstat to get the 'views' attribute for existing
                    # files, which we can't know until we use fstat to find out.
                    bite_size = 1000
                    LOG.debug("using fstat to optimize add")
                    original_count = len(add_files)
                    ctx.p4gf.handler = FilterAddFstatHandler(self.view_name)
                    # spoon-feed p4 to avoid blowing out memory
                    while len(add_files):
                        bite = add_files[:bite_size]
                        add_files = add_files[bite_size:]
                        # Try to get only the information we really need.
                        ctx.p4gf.run("fstat", "-Oa", "-T",
                                     "depotFile, attr-views", bite)
                    add_files = ctx.p4gf.handler.files
                    existing_files = ctx.p4gf.handler.existing
                    ctx.p4gf.handler = None
                    LOG.debug("{} files removed from add list".format(
                        original_count - len(add_files)))

                files_to_add = len(add_files) + len(existing_files)
                if files_to_add == 0:

                with self.perf.timer[P4_ADD]:
                    files_not_added = self.add_objects_with_views(
                        ctx, add_files)
                    edit_objects_with_views(ctx, existing_files)

                with self.perf.timer[P4_SUBMIT]:
                    if files_not_added < files_to_add:
                        desc = 'Git Fusion {view} copied to git'.format(
                            "Submitting new Git objects to Perforce...")
                        ctx.p4gf.run("submit", "-d", desc)
                        LOG.debug("ignoring empty change list...")

    def __str__(self):
        return "\n".join([str(self.git_objects), str(self.perf)])

    def __repr__(self):
        return "\n".join([repr(self.git_objects), str(self.perf)])

    # pylint: disable=R0201, W1401
    # R0201 Method could be a function
    # I agree, this _could_ be a function, does not need self. But when I
    # blindly promote this to a module-level function, things break and I
    # cannot explain why.
    # W1401 Unescaped backslash
    # We want that null for the header, so we're keeping the backslash.
    def __add_object_to_p4(self, ctx, go):
        """add a commit or tree to the git-fusion perforce client workspace

        return the path of the client workspace file suitable for use with
        p4 add
            "Adding new Git objects to Perforce...")

        # get client path for .git-fusion file
        dst = go.git_p4_client_path(ctx)

        # A tree is likely to already exist, in which case we don't need
        # or want to try to recreate it.  We'll just use the existing one.
        if os.path.exists(dst):
            LOG.debug("reusing existing object: " + dst)
            return dst

        with self.perf.timer[EXTRACT_OBJECTS]:

            # make sure dir exists
            dstdir = os.path.dirname(dst)
            if not os.path.exists(dstdir):

            # get contents of commit or tree; can't just copy it because it's
            # probably in a packfile and we don't know which one.  And there's
            # no way to have git give us the compressed commit directly, so we
            # need to recompress it
            p = Popen(['git', 'cat-file', go.type, go.sha1], stdout=PIPE)
            po = p.communicate()[0]
            header = go.type + " " + str(len(po)) + '\0'
            deflated = zlib.compress(header.encode() + po)

            # write it into our p4 client workspace for adding.
            LOG.debug("adding new object: " + dst)
            with open(dst, "wb") as f:

            return dst

    def __get_snapshot_trees(self, commit):
        """get all tree objects for a given commit
            commit: SHA1 of commit

        each tree is added to the list to be mirrored

        return the SHA1 of the commit's tree

        top_tree = self.__get_commit_tree(commit)
        with self.perf.timer[LS_TREE]:
            p = Popen(['git', 'ls-tree', '-rt', top_tree], stdout=PIPE)
            po = p.communicate()[0].decode()
        with self.perf.timer[LS_TREE_PROCESS]:
            # line is: mode SP type SP sha TAB path
            # we only want the sha from lines with type "tree"
            pattern = re.compile("^[0-7]{6} tree ([0-9a-fA-F]{40})\t.*")
            # yes, we're doing nothing with the result of this list comprehension
            # pylint: disable=W0106
                self.git_objects.add_object(GitObject("tree", m.group(1)))
                for line in po.splitlines() for m in [pattern.match(line)] if m
            # pylint: enable=W0106
        return top_tree

    def __get_delta_trees(self, top_tree1, commit2):
        """get all tree objects new in one commit vs another commit
            topTree1: SHA1 of first commit's tree
            commit2: SHA1 of second commit

        each tree is added to the list to be mirrored

        return the SHA1 of commit2's tree
        top_tree2 = self.__get_commit_tree(commit2)
        with self.perf.timer[DIFF_TREE]:
            p = Popen(['git', 'diff-tree', '-t', top_tree1, top_tree2],
            po = p.communicate()[0].decode()
        with self.perf.timer[DIFF_TREE_PROCESS]:
            # line is: :mode1 SP mode2 SP sha1 SP sha2 SP action TAB path
            # we want sha2 from lines where mode2 indicates a dir
            pattern = re.compile(
                "^:[0-7]{6} ([0-7]{2})[0-7]{4} [0-9a-fA-F]{40} ([0-9a-fA-F]{40}) .*"
            # yes, we're doing nothing with the result of this list comprehension
            # pylint: disable=W0106
                self.git_objects.add_object(GitObject("tree", m.group(2)))
                for line in po.splitlines() for m in [pattern.match(line)]
                if m and m.group(1) == "04"
            # pylint: enable=W0106
        return top_tree2

    def __get_commit_tree(self, commit):
        """get the one and only tree at the top of commit

            commit: SHA1 of the commit

        add the tree object to the list of objects to be mirrored
        and return its SHA1

        with self.perf.timer[CAT_FILE]:
            self.perf.counter[CAT_FILE_COUNT] += 1
            p = Popen(['git', 'cat-file', 'commit', commit], stdout=PIPE)
            po = p.communicate()[0].decode()
            self.perf.counter[CAT_FILE_SIZE] += len(po)
            for line in iter(po.splitlines()):
                if not line.startswith("tree"):
                # line is: tree sha
                parts = line.strip().split(' ')
                sha1 = parts[1]
                self.git_objects.add_object(GitObject("tree", sha1))
                return sha1
Example #3
class P2G:
    """class to manage copying from Perforce to git"""
    def __init__(self, ctx):
        self.ctx = ctx
        self.fastimport = FastImport(self.ctx)
        self.perf = p4gf_profiler.TimerCounterSet()

        self.rev_range = None  # RevRange instance set in copy().
        self.graft_change = None  #
        self.changes = None  # dict['changelist'] ==> P4Changelist of what to copy()
        self.printed_revs = None  # RevList produced by PrintHandler
        self.status_verbose = True
        self.progress = ProgressReporter()

    def __str__(self):
        return "\n".join([
            "\n\nFast Import:\n",
            str(self.fastimport), "",
            str(self.perf), ""

    def _setup(self, start_at, stop_at):
        """Set RevRange rev_range, figure out which changelists to copy."""
        self.rev_range = RevRange.from_start_stop(self.ctx, start_at, stop_at)
            "Revision range to copy to Git: {rr}".format(rr=self.rev_range))

        # get list of changes to import into git
        self.changes = P4Changelist.create_changelist_list_as_dict(
            self.ctx.p4, self._path_range())

        # If grafting, get that too.
        if self.rev_range.graft_change_num:
            # Ignore all depotFile elements, we just want the change/desc/time/user.
            self.graft_change = P4Changelist.create_using_describe(
                self.ctx.p4, self.rev_range.graft_change_num,
            self.graft_change.description += (
                '\n[grafted history before {start_at}]'.format(

    def _path_range(self):
        """Return the common path...@range string we use frequently.
        return self.ctx.client_view_path() + self.rev_range.as_range_string()

    def _copy_print(self):
        """p4 print all revs and git-hash-object them into the git repo."""
        server_can_unexpand = self.ctx.p4.server_level > 32
        printhandler = PrintHandler(need_unexpand=not server_can_unexpand,
        self.ctx.p4.handler = printhandler
        args = ["-a"]
        if server_can_unexpand:
        self.ctx.p4.run("print", args, self._path_range())

        # If also grafting, print all revs in existence at time of graft.
        if self.graft_change:
            args = []
            if server_can_unexpand:
            path = self._graft_path()
            LOG.debug("Printing for grafted history: {}".format(path))
            self.ctx.p4.run("print", args, path)

            # If grafting, we just printed revs that refer to changelists
            # that have no P4Changelist counterpart in self.changes. Make
            # some skeletal versions now so that FstatHandler will have
            # someplace to hang its outputStat() P4File instances.
            for (_key, p4file) in printhandler.revs.revs:
                if not p4file.change in self.changes:
                    cl = P4Changelist()
                    cl.change = p4file.change
                    self.changes[p4file.change] = cl

        self.ctx.p4.handler = None
        self.printed_revs = printhandler.revs

    def _fstat(self):
        """run fstat to find deleted revs and get client paths"""
        # TODO for 12.2 print will also report deleted revs so between
        # that and using MapApi to get client paths, we won't need this fstat
        self.ctx.p4.handler = FstatHandler(self.printed_revs, self.changes)
        fstat_cols = "-T" + ",".join(P4File.fstat_cols())
        self.ctx.p4.run("fstat", "-Of", fstat_cols, self._path_range())

        if self.graft_change:
            # Also run 'p4 fstat //<view>/...@change' for the graft
            # change to catch all files as of @change, not just
            # revs changed between begin and end of _path_range().
            self.ctx.p4.run("fstat", fstat_cols, self._graft_path())

        self.ctx.p4.handler = None


        # don't need this any more
        self.printed_revs = None

        sorted_changes = [
            str(y) for y in sorted([int(x) for x in self.changes.keys()])

        LOG.debug("\n".join([str(self.changes[ch]) for ch in sorted_changes]))
        return sorted_changes

    def _sync(self, sorted_changes):
        """fake sync of last change to make life easier at push time"""
        self.ctx.p4.handler = SyncHandler()
        lastchange = self.changes[sorted_changes[-1]]
            "sync", "-kf",
            self.ctx.client_view_path() + "@" + str(lastchange.change))
        self.ctx.p4.handler = None

    def _fast_import(self, sorted_changes, last_commit):
        """build fast-import script from changes, then run fast-import"""
        for changenum in sorted_changes:
            change = self.changes[changenum]
            self.progress.progress_increment("Copying changelists...")

            # create commit and trees
            self.fastimport.add_commit(change, last_commit)

            last_commit = change.change

        # run git-fast-import and get list of marks
        marks = self.fastimport.run_fast_import()

        # done with these
        self.changes = None
        return marks

    def _mirror(self, marks):
        """build up list of p4 objects to mirror git repo in perforce
        then submit them
        LOG.getChild("time").debug("\n\nGit Mirror:\n" + str(self.ctx.mirror))
        self.ctx.mirror = GitMirror(self.ctx.config.view_name)

        last_commit = marks[len(marks) - 1]
        LOG.debug("Last commit created: " + last_commit)

    # pylint: disable=R0201
    # R0201 Method could be a function
    def _pack(self):
        """run 'git gc' to pack up the blobs

        aside from any possible performance benefit, this prevents warnings
        from git about "unreachable loose objects"
        p4gf_util.popen_no_throw(["git", "gc"])

    def _collapse_to_graft_change(self):
        """Move all of the files from pre-graft changelists into the graft
        changelist. Remove all pre-graft changelists.

        NOP if not grafting.

        'p4 print //client/...@100' does indeed print all the files that
        exist @100, but the tag dict that goes with each file includes the
        changelist in which that file was last added/edited, not 100. So
        this function gathers up all the file revs with change=1..99 and
        sticks them under change 100's file list.
        if (not self.graft_change):
        graft_num_int = int(self.graft_change.change)
        LOG.debug("_collapse_to_graft_change() graft_num_int={}".format(

        # Delete all P4Changelist elements from self.changes where they
        # refer to a change that will be collapsed into the graft change,
        # including the graft change itself.
        del_keys = []
        for p4changelist in self.changes.values():
            if graft_num_int < int(p4changelist.change):
                LOG.debug("_collapse_to_graft_change() skipping {}".format(

            LOG.debug("_collapse_to_graft_change() deleting {}".format(
        for key in del_keys:
            del self.changes[key]

        # Associate with the graft change all printed P4File results from
        # graft-change or older
        for (_key, p4file) in self.printed_revs.revs:
            if graft_num_int < int(p4file.change):
                LOG.debug("_collapse_to_graft_change() skipping post-graft {}".

            old = self.graft_change.file_from_depot_path(p4file.depot_path)
            # If print picked up multiple revs, keep the newest.
            if (not old) or (int(old.change) < int(p4file.change)):
                p4file.change = self.graft_change.change
                    "_collapse_to_graft_change() keeping {}".format(p4file))
                    "_collapse_to_graft_change() skipping, had newer  {}".

    def _add_graft_to_changes(self):
        """Add the graft changelist to our list of changes:
        It will be copied over like any other change.

        NOP if not grafting.
        if (not self.graft_change):
        self.changes[self.graft_change.change] = self.graft_change

    def _graft_path(self):
        """If grafting, return '//<client>/...@N' where N is the graft
        changelist number.

        If not grafting, return None.
        if (not self.graft_change):
        return "{path}@{change}".format(path=self.ctx.client_view_path(),

    def copy(self, start_at, stop_at):
        """copy a set of changelists from perforce into git"""

        with self.perf.timer[OVERALL]:
            with self.perf.timer[SETUP]:
                self._setup(start_at, stop_at)

                if not len(self.changes):
                    LOG.debug("No new changes found to copy")

                last_commit = self.rev_range.last_commit

            with self.perf.timer[PRINT]:

            with self.perf.timer[FSTAT]:
                sorted_changes = self._fstat()

            with self.perf.timer[SYNC]:

            with self.perf.timer[FAST_IMPORT]:
                marks = self._fast_import(sorted_changes, last_commit)
                sorted_changes = None

            with self.perf.timer[MIRROR]:

            with self.perf.timer[MERGE]:
                # merge temporary branch into master, then delete it

            with self.perf.timer[PACK]:

        LOG.getChild("time").debug("\n" + str(self))
class GitMirror:
    """handle git things that get mirrored in perforce"""

    def __init__(self, view_name):
        self.git_objects = GitObjectList()
        self.perf = p4gf_profiler.TimerCounterSet()
                             (BUILD, OVERALL),
                             (CAT_FILE, BUILD),
                             (LS_TREE, BUILD),
                             (LS_TREE_PROCESS, BUILD),
                             (DIFF_TREE, BUILD),
                             (DIFF_TREE_PROCESS, BUILD),
                             (ADD_SUBMIT, OVERALL),
                             (EXTRACT_OBJECTS, ADD_SUBMIT),
                             (P4_FSTAT, ADD_SUBMIT),
                             (P4_ADD, ADD_SUBMIT),
                             (P4_SUBMIT, ADD_SUBMIT),
        self.perf.add_counters([(CAT_FILE_COUNT, "files"),
                                (CAT_FILE_SIZE, "bytes")])
        self.progress = ProgressReporter()
        self.view_name = view_name

    def get_change_for_commit(commit, ctx):
        """Given a commit sha1, find the corresponding perforce change.
        object_type = p4gf_object_type.sha1_to_object_type(
                              sha1           = commit
                            , view_name      = ctx.config.view_name
                            , p4             = ctx.p4gf
                            , raise_on_error = False)
        if not object_type:
            return None
        return object_type.view_name_to_changelist(ctx.config.view_name)

    def add_commits(self, marks):
        """build list of commit and tree objects for a set of changelists

        marks: list of commit marks output by git-fast-import
               formatted as: :changenum sha1

        with self.perf.timer[OVERALL]:
            with self.perf.timer[BUILD]:
                last_top_tree = None
                for mark in marks:
                    #parse perforce change number and SHA1 from marks
                    parts = mark.split(' ')
                    change_num = parts[0][1:]
                    sha1 = parts[1].strip()
                    # add commit object
                        GitObject( "commit"
                                 , sha1
                                 , [(change_num, self.view_name)]
                    # add all trees referenced by the commit
                    if last_top_tree:
                        last_top_tree = self.__get_delta_trees(last_top_tree, sha1)
                        last_top_tree = self.__get_snapshot_trees(sha1)

    def add_objects_with_views(self, ctx, add_files):
        """Add the list of files to the object cache in the depot and
        return the number of files not added.
        added_files = []
        files_not_added = 0
        treecount = 0
        commitcount = 0
        # Add new files to the object cache.
        bite_size = 1000
        while len(add_files):
            bite = add_files[:bite_size]
            add_files = add_files[bite_size:]
            result = ctx.p4gf.run("add", "-t", "binary", bite)
            for m in [m for m in ctx.p4gf.messages
                      if (m.msgid != p4gf_p4msgid.MsgDm_OpenUpToDate or
                          m.dict['action'] != 'add')]:
                files_not_added += 1

            for r in [r for r in result if isinstance(r, dict)]:
                if r["action"] != 'add':
                    # file already exists in depot, perhaps?
                    files_not_added += 1
                    if r["depotFile"].endswith("-tree"):
                        treecount += 1
                        commitcount += 1
        LOG.debug("Added {} commits and {} trees".format(commitcount, treecount))
        # Set the 'views' attribute on the opened files.
        while len(added_files):
            bite = added_files[:bite_size]
            added_files = added_files[bite_size:]
            ctx.p4gf.run("attribute", "-p", "-n", "views", "-v", self.view_name, bite)
        return files_not_added

    def add_objects_to_p4(self, ctx):
        """actually run p4 add, submit to create mirror files in .git-fusion"""

        with self.perf.timer[OVERALL]:
            # Revert any opened files left over from a failed mirror operation.
            opened = ctx.p4gf.run('opened')
            if opened:
                ctx.p4gf.run('revert', '//{}/...'.format(ctx.config.p4client_gf))
            with self.perf.timer[ADD_SUBMIT]:
                LOG.debug("adding {0} commits and {1} trees to .git-fusion...".

                # build list of objects to add, extracting them from git
                add_files = [self.__add_object_to_p4(ctx, go)
                              for go in self.git_objects.objects.values()]

                # filter out any files that have already been added
                # only do this if the number of files is large enough to justify
                # the cost of the fstat
                existing_files = None
                with self.perf.timer[P4_FSTAT]:
                    # Need to use fstat to get the 'views' attribute for existing
                    # files, which we can't know until we use fstat to find out.
                    bite_size = 1000
                    LOG.debug("using fstat to optimize add")
                    original_count = len(add_files)
                    ctx.p4gf.handler = FilterAddFstatHandler(self.view_name)
                    # spoon-feed p4 to avoid blowing out memory
                    while len(add_files):
                        bite = add_files[:bite_size]
                        add_files = add_files[bite_size:]
                        # Try to get only the information we really need.
                        ctx.p4gf.run("fstat", "-Oa", "-T", "depotFile, attr-views", bite)
                    add_files = ctx.p4gf.handler.files
                    existing_files = ctx.p4gf.handler.existing
                    ctx.p4gf.handler = None
                    LOG.debug("{} files removed from add list"
                              .format(original_count - len(add_files)))

                files_to_add = len(add_files) + len(existing_files)
                if files_to_add == 0:

                with self.perf.timer[P4_ADD]:
                    files_not_added = self.add_objects_with_views(ctx, add_files)
                    edit_objects_with_views(ctx, existing_files)

                with self.perf.timer[P4_SUBMIT]:
                    if files_not_added < files_to_add:
                        desc = 'Git Fusion {view} copied to git'.format(
                        self.progress.status("Submitting new Git objects to Perforce...")
                        ctx.p4gf.run("submit", "-d", desc)
                        LOG.debug("ignoring empty change list...")

    def __str__(self):
        return "\n".join([str(self.git_objects),

    def __repr__(self):
        return "\n".join([repr(self.git_objects),

    # pylint: disable=R0201, W1401
    # R0201 Method could be a function
    # I agree, this _could_ be a function, does not need self. But when I
    # blindly promote this to a module-level function, things break and I
    # cannot explain why.
    # W1401 Unescaped backslash
    # We want that null for the header, so we're keeping the backslash. 
    def __add_object_to_p4(self, ctx, go):
        """add a commit or tree to the git-fusion perforce client workspace

        return the path of the client workspace file suitable for use with
        p4 add
        self.progress.progress_increment("Adding new Git objects to Perforce...")

        # get client path for .git-fusion file
        dst = go.git_p4_client_path(ctx)

        # A tree is likely to already exist, in which case we don't need
        # or want to try to recreate it.  We'll just use the existing one.
        if os.path.exists(dst):
            LOG.debug("reusing existing object: " + dst)
            return dst

        with self.perf.timer[EXTRACT_OBJECTS]:

            # make sure dir exists
            dstdir = os.path.dirname(dst)
            if not os.path.exists(dstdir):

            # get contents of commit or tree; can't just copy it because it's
            # probably in a packfile and we don't know which one.  And there's
            # no way to have git give us the compressed commit directly, so we
            # need to recompress it
            p = Popen(['git', 'cat-file', go.type, go.sha1], stdout=PIPE)
            po = p.communicate()[0]
            header = go.type + " " + str(len(po)) + '\0'
            deflated = zlib.compress(header.encode() + po)

            # write it into our p4 client workspace for adding.
            LOG.debug("adding new object: " + dst)
            with open(dst, "wb") as f:

            return dst

    def __get_snapshot_trees(self, commit):
        """get all tree objects for a given commit
            commit: SHA1 of commit

        each tree is added to the list to be mirrored

        return the SHA1 of the commit's tree

        top_tree = self.__get_commit_tree(commit)
        with self.perf.timer[LS_TREE]:
            p = Popen(['git', 'ls-tree', '-rt', top_tree], stdout=PIPE)
            po = p.communicate()[0].decode()
        with self.perf.timer[LS_TREE_PROCESS]:
            # line is: mode SP type SP sha TAB path
            # we only want the sha from lines with type "tree"
            pattern = re.compile("^[0-7]{6} tree ([0-9a-fA-F]{40})\t.*")
            # yes, we're doing nothing with the result of this list comprehension
            # pylint: disable=W0106
            [self.git_objects.add_object(GitObject("tree", m.group(1)))
                                         for line in po.splitlines()
                                            for m in [pattern.match(line)]
                                                if m]
            # pylint: enable=W0106
        return top_tree

    def __get_delta_trees(self, top_tree1, commit2):
        """get all tree objects new in one commit vs another commit
            topTree1: SHA1 of first commit's tree
            commit2: SHA1 of second commit

        each tree is added to the list to be mirrored

        return the SHA1 of commit2's tree
        top_tree2 = self.__get_commit_tree(commit2)
        with self.perf.timer[DIFF_TREE]:
            p = Popen(['git', 'diff-tree', '-t', top_tree1, top_tree2], stdout=PIPE)
            po = p.communicate()[0].decode()
        with self.perf.timer[DIFF_TREE_PROCESS]:
            # line is: :mode1 SP mode2 SP sha1 SP sha2 SP action TAB path
            # we want sha2 from lines where mode2 indicates a dir
            pattern = re.compile(
                "^:[0-7]{6} ([0-7]{2})[0-7]{4} [0-9a-fA-F]{40} ([0-9a-fA-F]{40}) .*")
            # yes, we're doing nothing with the result of this list comprehension
            # pylint: disable=W0106
            [self.git_objects.add_object(GitObject("tree", m.group(2)))
                             for line in po.splitlines()
                                for m in [pattern.match(line)]
                                    if m and m.group(1) == "04"]
            # pylint: enable=W0106
        return top_tree2

    def __get_commit_tree(self, commit):
        """get the one and only tree at the top of commit

            commit: SHA1 of the commit

        add the tree object to the list of objects to be mirrored
        and return its SHA1

        with self.perf.timer[CAT_FILE]:
            self.perf.counter[CAT_FILE_COUNT] += 1
            p = Popen(['git', 'cat-file', 'commit', commit], stdout=PIPE)
            po = p.communicate()[0].decode()
            self.perf.counter[CAT_FILE_SIZE] += len(po)
            for line in iter(po.splitlines()):
                if not line.startswith("tree"):
                # line is: tree sha
                parts = line.strip().split(' ')
                sha1 = parts[1]
                self.git_objects.add_object(GitObject("tree", sha1))
                return sha1
class G2P:
    """class to handle batching of p4 commands when copying git to p4"""
    def __init__(self, ctx):
        self.ctx = ctx
        self.addeditdelete = {}
        self.perf = p4gf_profiler.TimerCounterSet()
                             (FAST_EXPORT, OVERALL),
                             (TEST_BLOCK_PUSH, OVERALL),
                             (CHECK_CONFLICT, OVERALL),
                             (COPY, OVERALL),
                             (GIT_CHECKOUT, COPY),
                             (CHECK_PROTECTS, COPY),
                             (COPY_BLOBS_1, COPY),
                             (COPY_BLOBS_2, COPY),
                             (MIRROR, OVERALL),
        self.perf.add_counters([N_BLOBS, N_RENAMES])
        self.usermap = p4gf_usermap.UserMap(ctx.p4gf)
        self.progress = ProgressReporter()

    def __str__(self):
        return "\n".join([str(self.perf),

    def revert_and_raise(self, errmsg):
        """An error occurred while attempting to submit the incoming change
        to Perforce. As a result, revert all modifications, log the error,
        and raise an exception."""
        # roll back and raise the problem to the caller
        p4 = connect_p4(user=p4gf_const.P4GF_USER, client=self.ctx.p4.client)
        if p4:
            opened = p4.run('opened')
            if opened:
                p4.run('revert', '//{}/...'.format(self.ctx.p4.client))
        # revert doesn't clean up added files
        if not errmsg:
            errmsg = traceback.format_stack()
        msg = "import failed: {}".format(errmsg)
        raise RuntimeError(msg)

    def _p4_message_to_text(self, msg):
        Convert a list of P4 messages to a single string.
        Annotate some errors with additional context such as P4USER.
        txt = str(msg)
        if msg.msgid in MSGID_EXPLAIN_P4USER:
            txt += ' P4USER={}.'.format(self.ctx.p4.user)
        if msg.msgid in MSGID_EXPLAIN_P4CLIENT:
            txt += ' P4USER={}.'.format(self.ctx.p4.client)
        return txt
    def check_p4_messages(self):
        """If the results indicate a file is locked by another user,
        raise an exception so that the overall commit will fail. The
        changes made so far will be reverted.
        msgs = p4gf_p4msg.find_all_msgid(self.ctx.p4, MSGID_CANNOT_OPEN)
        if not msgs:

        lines = [self._p4_message_to_text(m) for m in msgs]

    def _p4run(self, cmd):
        Run one P4 command, logging cmd and results.
        p4 = self.ctx.p4
        LOG.getChild('p4.cmd').debug(" ".join(cmd))

        results = p4.run(cmd)

        if p4.errors:
        if p4.warnings:
        if LOG.getChild('p4.msgid').isEnabledFor(logging.DEBUG):
            log = LOG.getChild('p4.msgid')
            for m in p4.messages:


    def run_p4_commands(self):
        """run all pending p4 commands"""
        for operation, paths in self.addeditdelete.items():
            cmd = operation.split(' ')
            # avoid writable client files problem by using -k and handling
            # the actual file action ourselves (in add/edit cases the caller
            # has already written the new file)
            if not cmd[0] == 'add':
            if cmd[0] == 'move':
                # move takes a tuple of two arguments, the old name and new name
                oldnames = [escape_path(pair[0]) for pair in paths]
                # move requires opening the file for edit first
                self._p4run(['edit', '-k'] + oldnames)
                LOG.debug("Edit {}".format(oldnames))
                for pair in paths:
                    (frompath, topath) = pair
                    self._p4run(['move', '-k', escape_path(frompath), escape_path(topath)])
                    LOG.debug("Move from {} to {}".format(frompath, topath))
                reopen = []
                if 'edit -t' in operation:
                    # edit -t text does not work, must 'edit' then 'reopen -t'
                    # "can't change from xtext - use 'reopen'"
                    reopen = ['reopen', '-t', cmd[2]]
                    cmd = cmd[0:1] + cmd[3:]

                if not cmd[0] == 'add':
                    self._p4run(cmd + [escape_path(path) for path in paths])
                    self._p4run(cmd + paths)

                if reopen:
                    self._p4run(reopen + [escape_path(path) for path in paths])

                if cmd[0] == 'delete':
                    LOG.debug("Delete {}".format(paths))
                    for path in paths:

    def remove_added_files(self):
        """remove added files to restore p4 client after failure of p4 command"""
        for operation, paths in self.addeditdelete.items():
            cmd = operation.split(' ')
            if cmd[0] == 'add':
                for path in paths:

    def setup_p4_command(self, command, p4path):
        """Add command to list to be run by run_p4_commands. If the command
        is 'move' then the p4path is expected to be a tuple of the frompath
        and topath."""
        if command in self.addeditdelete:
            self.addeditdelete[command] = [p4path]

    def _toggle_filetype(self, p4path, isx):
        """Returns the new file type for the named file, switching the
        executable state based on the isx value.

            p4path: Path of the file to modify.
            isx: True if currently executable.

            New type for the file; may be None.
        p4type = None
        if isx:
            p4type = '+x'
            # To remove a previously assigned modifier, the whole filetype
            # must be specified.
            for tipe in ['headType', 'type']:
                # For a file that was executable, is being renamed (with
                # edits), and is no longer executable, we need to handle the
                # fact that it's not yet in Perforce and so does not have a
                # headType.
                    p4type = p4gf_util.first_value_for_key(
                                self.ctx.p4.run(['fstat', '-T' + tipe, p4path]),
                except P4.P4Exception:
                if p4type:
                    p4type = p4gf_p4filetype.remove_mod(p4type, 'x')
        return p4type

    def add_or_edit_blob(self, blob):
        """run p4 add or edit for a new or modified file"""

        # get local path in p4 client
        p4path = self.ctx.contentlocalroot + blob['path']

        # edit or add?
        isedit = os.path.exists(p4path)

        # make sure dest dir exists
        dstdir = os.path.dirname(p4path)
        if not os.path.exists(dstdir):

        if isedit:
            LOG.debug("Copy edit from: " + blob['path'] + " to " + p4path)
            # for edits, only use +x or -x to propagate partial filetype changes
            wasx = os.stat(p4path).st_mode & stat.S_IXUSR
            isx = os.stat(blob['path']).st_mode & stat.S_IXUSR
            if wasx != isx:
                p4type = self._toggle_filetype(p4path, isx)
                p4type = None
            if p4type:
                LOG.debug("  set filetype: {ft}  oldx={oldx} newx={newx}"
            shutil.copystat(blob['path'], p4path)
            shutil.copyfile(blob['path'], p4path)
            LOG.debug("Copy add from: " + blob['path'] + " to " + p4path)
            # for adds, use complete filetype of new file
            p4type = p4type_from_mode(blob['mode'])
            shutil.copyfile(blob['path'], p4path)

        # if file exists it's an edit, so do p4 edit before copying content
        # for an add, do p4 add after copying content
        p4type = ' -t ' + p4type if p4type else ''
        if isedit:
            self.setup_p4_command("edit" + p4type, p4path)
            self.setup_p4_command("add -f" + p4type, p4path)

    def rename_blob(self, blob):
        """ run p4 move for a renamed/moved file"""
        self.perf.counter[N_RENAMES] += 1

        # get local path in p4 client
        p4frompath = self.ctx.contentlocalroot + blob['path']
        p4topath = self.ctx.contentlocalroot + blob['topath']

        # ensure destination directory exists
        dstdir = os.path.dirname(p4topath)
        if not os.path.exists(dstdir):
        # copy out of Git repo to Perforce workspace
        shutil.copyfile(blob['topath'], p4topath)
        self.setup_p4_command("move", (p4frompath, p4topath))

    def copy_blob(self, blob):
        """run p4 integ for a copied file"""
        self.perf.counter[N_BLOBS] += 1

        # get local path in p4 client
        p4frompath = self.ctx.contentlocalroot + blob['path']
        p4topath = self.ctx.contentlocalroot + blob['topath']

        self._p4run(["copy", "-v", escape_path(p4frompath), escape_path(p4topath)])

        # make sure dest dir exists
        dstdir = os.path.dirname(p4topath)
        if not os.path.exists(dstdir):

        LOG.debug("Copy/integ from: " + p4frompath + " to " + p4topath)
        shutil.copyfile(p4frompath, p4topath)

    def delete_blob(self, blob):
        """run p4 delete for a deleted file"""

        # get local path in p4 client
        p4path = self.ctx.contentlocalroot + blob['path']
        self.setup_p4_command("delete", p4path)

    def copy_blobs(self, blobs):
        """copy git blobs to perforce revs"""
        # first, one pass to do rename/copy
        # these don't batch.  move can't batch due to p4 limitations.
        # however, the edit required before move is batched.
        # copy could be batched by creating a temporary branchspec
        # but for now it's done file by file
        with self.perf.timer[COPY_BLOBS_1]:
            for blob in blobs:
                if blob['action'] == 'R':
                elif blob['action'] == 'C':
        # then, another pass to do add/edit/delete
        # these are batched to allow running the minimum number of
        # p4 commands.  That means no more than one delete, one add per
        # filetype and one edit per filetype.  Since we only support three
        # possible filetypes (text, text+x, symlink) there could be at most
        # 1 + 3 + 3 commands run.
        with self.perf.timer[COPY_BLOBS_2]:
            self.addeditdelete = {}
            for blob in blobs:
                if blob['action'] == 'M':
                elif blob['action'] == 'D':

    def check_protects(self, p4user, blobs):
        """check if author is authorized to submit files"""
        pc = ProtectsChecker(self.ctx, self.ctx.authenticated_p4user, p4user)
        if pc.has_error():

    def _reset_for_new_commit(self):
        Clear out state from previous commit that must not carry over
        into next commit.
        self.addeditdelete = {}

    def attempt_resync(self):
        """Attempts to sync -k the Git Fusion client to the change that
        corresponds to the HEAD of the Git mirror repository. This prevents
        the obscure "file(s) not on client" error.
        # we assume we are in the GIT_WORK_TREE, which seems to be a safe
        # assumption at this point
            last_commit = p4gf_util.git_ref_master()
            if last_commit:
                last_changelist_number = self.ctx.mirror.get_change_for_commit(
                    last_commit, self.ctx)
                if last_changelist_number:
                    filerev = "//...@{}".format(last_changelist_number)
                    self._p4run(['sync', '-k', filerev])
        except P4.P4Exception:
            # don't stop the world if we have an error above
            LOG.warn("resync failed with exception", exc_info=True)

    def copy_commit(self, commit):
        """copy a single commit"""


        #OG.debug("dump commit {}".format(commit))
        LOG.debug("for  commit {}".format(commit['mark']))
        LOG.debug("with description: {}".format(commit['data']))
        LOG.debug("files affected: {}".format(commit['files']))

        # Reject merge commits. Not supported in 2012.1.
        if 'merge' in commit:
            self.revert_and_raise(("Merge commit {} not permitted."
                                   +" Rebase to create a linear"
                                   +" history.").format(commit['sha1']))

        # strip any enclosing angle brackets from the email address
        email = commit['author']['email'].strip('<>')
        user = self.usermap.lookup_by_email(email)
        LOG.debug("for email {} found user {}".format(email, user))
        if (user is None) or (not self.usermap.p4user_exists(user[0])):
            # User is not a known and existing Perforce user, and the
            # unknown_git account is not set up, so reject the commit.
            self.revert_and_raise("User '{}' not permitted to commit".format(email))
        author_p4user = user[0]

        for blob in commit['files']:
            err = check_valid_filename(blob['path'])
            if err:

        with self.perf.timer[GIT_CHECKOUT]:
            d = p4gf_util.popen_no_throw(['git', 'checkout', commit['sha1']])
            if d['Popen'].returncode:
                # Sometimes git cannot distinquish the revision from a path...
                p4gf_util.popen(['git', 'reset', '--hard', commit['sha1'], '--'])

        with self.perf.timer[CHECK_PROTECTS]:
            self.check_protects(author_p4user, commit['files'])

        except P4.P4Exception as e:

        with self.perf.timer[COPY_BLOBS_2]:
            pusher_p4user = self.ctx.authenticated_p4user
            LOG.debug("Pusher is: {}, author is: {}".format(pusher_p4user, author_p4user))
            desc = change_description(commit, pusher_p4user, author_p4user)

                opened = self.ctx.p4.run('opened')
                if opened:
                    changenum = p4_submit(self.ctx.p4, desc, author_p4user,
                    LOG.info("Submitted change @{} for commit {}".format(changenum, commit['sha1']))
                    LOG.info("Ignored empty commit {}".format(commit['sha1']))
                    return None
            except P4.P4Exception as e:
            return ":" + str(changenum) + " " + commit['sha1']

    def test_block_push(self):
        """Test hook to temporarily block and let test script
        introduce conflicting changes.
        s = p4gf_util.test_vars().get(p4gf_const.P4GF_TEST_BLOCK_PUSH)
        if not s:

        log = logging.getLogger("test_block_push")
        block_dict = p4gf_util.test_var_to_dict(s)

        # Fetch ALL the submitted changelists as of right now.
        log.debug("p4 changes {}".format(p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client)))
        cl_ay = self.ctx.p4.run('changes',

        # Don't block until after something?
        after = block_dict['after']
        if after:
            if not contains_desc(after, cl_ay):
                log.debug("Do not block until after: {}".format(after))

        until = block_dict['until']
        log.debug("BLOCKING. Seen        'after': {}".format(after))
        log.debug("BLOCKING. Waiting for 'until': {}".format(until))

        changes_path_at = ("{path}@{change},now"

        while not contains_desc(until, cl_ay):
            cl_ay = self.ctx.p4.run('changes', changes_path_at)

        log.debug("Block released")
    def copy(self, start_at, end_at):
        """copy a set of commits from git into perforce"""
        with self.perf.timer[OVERALL]:
            with p4gf_util.HeadRestorer():
                LOG.debug("begin copying from {} to {}".format(start_at, end_at))
                with self.perf.timer[CHECK_CONFLICT]:
                    conflict_checker = G2PConflictChecker(self.ctx)
                with self.perf.timer[FAST_EXPORT]:
                    fe = p4gf_fastexport.FastExport(start_at, end_at, self.ctx.tempdir.name)
                marks = []
                commit_count = 0
                for x in fe.commands:
                    if x['command'] == 'commit':
                        commit_count += 1
                    for command in fe.commands:
                        with self.perf.timer[TEST_BLOCK_PUSH]:
                        if command['command'] == 'commit':
                            self.progress.progress_increment("Copying changelists...")
                            with self.perf.timer[COPY]:
                                mark = self.copy_commit(command)
                                if mark is None:
                            with self.perf.timer[CHECK_CONFLICT]:
                                 p4_changelist_number) = mark_to_commit_changelist(mark)
                                if conflict_checker.check():
                                    LOG.error("P4 conflict found")
                        elif command['command'] == 'reset':
                            raise RuntimeError("Unexpected fast-export command: " +
                    # we want to write mirror objects for any commits that made it through
                    # any exception will still be alive after this
                    with self.perf.timer[MIRROR]:

                if conflict_checker.has_conflict():
                    raise RuntimeError("Conflicting change from Perforce caused one"
                                       + " or more git commits to fail. Time to"
                                       + " pull, rebase, and try again.")

        LOG.getChild("time").debug("\n" + str(self))
class P2G:
    """class to manage copying from Perforce to git"""
    def __init__(self, ctx):
        self.ctx = ctx
        self.fastimport = FastImport(self.ctx)
        self.perf = p4gf_profiler.TimerCounterSet()
                            (SETUP, OVERALL),
                            (PRINT, OVERALL),
                            (FSTAT, OVERALL),
                            (SYNC, OVERALL),
                            (FAST_IMPORT, OVERALL),
                            (MIRROR, OVERALL),
                            (MERGE, OVERALL),
                            (PACK, OVERALL)

        self.rev_range      = None  # RevRange instance set in copy().
        self.graft_change   = None  #
        self.changes        = None  # dict['changelist'] ==> P4Changelist of what to copy()
        self.printed_revs   = None  # RevList produced by PrintHandler
        self.status_verbose = True
        self.progress       = ProgressReporter()

    def __str__(self):
        return "\n".join(["\n\nFast Import:\n",

    def _setup(self, start_at, stop_at):
        """Set RevRange rev_range, figure out which changelists to copy."""
        self.rev_range = RevRange.from_start_stop(self.ctx, start_at, stop_at)
        LOG.debug("Revision range to copy to Git: {rr}"

        # get list of changes to import into git
        self.changes = P4Changelist.create_changelist_list_as_dict(

        # If grafting, get that too.
        if self.rev_range.graft_change_num:
            # Ignore all depotFile elements, we just want the change/desc/time/user.
            self.graft_change = P4Changelist.create_using_describe(
            self.graft_change.description += ('\n[grafted history before {start_at}]'

    def _path_range(self):
        """Return the common path...@range string we use frequently.
        return self.ctx.client_view_path() + self.rev_range.as_range_string()

    def _copy_print(self):
        """p4 print all revs and git-hash-object them into the git repo."""
        server_can_unexpand = self.ctx.p4.server_level > 32
        printhandler = PrintHandler(need_unexpand=not server_can_unexpand,
        self.ctx.p4.handler = printhandler
        args = ["-a"]
        if server_can_unexpand:
        self.ctx.p4.run("print", args, self._path_range())

        # If also grafting, print all revs in existence at time of graft.
        if self.graft_change:
            args = []
            if server_can_unexpand:
            path = self._graft_path()
            LOG.debug("Printing for grafted history: {}".format(path))
            self.ctx.p4.run("print", args, path)

            # If grafting, we just printed revs that refer to changelists
            # that have no P4Changelist counterpart in self.changes. Make
            # some skeletal versions now so that FstatHandler will have
            # someplace to hang its outputStat() P4File instances.
            for (_key, p4file) in printhandler.revs.revs:
                if not p4file.change in self.changes:
                    cl = P4Changelist()
                    cl.change = p4file.change
                    self.changes[p4file.change] = cl

        self.ctx.p4.handler = None
        self.printed_revs = printhandler.revs

    def _fstat(self):
        """run fstat to find deleted revs and get client paths"""
        # TODO for 12.2 print will also report deleted revs so between
        # that and using MapApi to get client paths, we won't need this fstat
        self.ctx.p4.handler = FstatHandler(self.printed_revs, self.changes)
        fstat_cols = "-T" + ",".join(P4File.fstat_cols())
        self.ctx.p4.run("fstat", "-Of", fstat_cols, self._path_range())

        if self.graft_change:
            # Also run 'p4 fstat //<view>/...@change' for the graft
            # change to catch all files as of @change, not just
            # revs changed between begin and end of _path_range().
            self.ctx.p4.run("fstat", fstat_cols, self._graft_path())

        self.ctx.p4.handler = None


        # don't need this any more
        self.printed_revs = None

        sorted_changes = [str(y) for y in sorted([int(x) for x in self.changes.keys()])]

        LOG.debug("\n".join([str(self.changes[ch]) for ch in sorted_changes]))
        return sorted_changes

    def _sync(self, sorted_changes):
        """fake sync of last change to make life easier at push time"""
        self.ctx.p4.handler = SyncHandler()
        lastchange = self.changes[sorted_changes[-1]]
        self.ctx.p4.run("sync", "-kf",
                self.ctx.client_view_path() + "@" + str(lastchange.change))
        self.ctx.p4.handler = None

    def _fast_import(self, sorted_changes, last_commit):
        """build fast-import script from changes, then run fast-import"""
        for changenum in sorted_changes:
            change = self.changes[changenum]
            self.progress.progress_increment("Copying changelists...")

            # create commit and trees
            self.fastimport.add_commit(change, last_commit)

            last_commit = change.change

        # run git-fast-import and get list of marks
        marks = self.fastimport.run_fast_import()

        # done with these
        self.changes = None
        return marks

    def _mirror(self, marks):
        """build up list of p4 objects to mirror git repo in perforce
        then submit them
        LOG.getChild("time").debug("\n\nGit Mirror:\n" + str(self.ctx.mirror))
        self.ctx.mirror = GitMirror(self.ctx.config.view_name)

        last_commit = marks[len(marks) - 1]
        LOG.debug("Last commit created: " + last_commit)

    # pylint: disable=R0201
    # R0201 Method could be a function
    def _pack(self):
        """run 'git gc' to pack up the blobs

        aside from any possible performance benefit, this prevents warnings
        from git about "unreachable loose objects"
        p4gf_util.popen_no_throw(["git", "gc"])

    def _collapse_to_graft_change(self):
        """Move all of the files from pre-graft changelists into the graft
        changelist. Remove all pre-graft changelists.

        NOP if not grafting.

        'p4 print //client/...@100' does indeed print all the files that
        exist @100, but the tag dict that goes with each file includes the
        changelist in which that file was last added/edited, not 100. So
        this function gathers up all the file revs with change=1..99 and
        sticks them under change 100's file list.
        if (not self.graft_change):
        graft_num_int = int(self.graft_change.change)
        LOG.debug("_collapse_to_graft_change() graft_num_int={}".format(graft_num_int))

        # Delete all P4Changelist elements from self.changes where they
        # refer to a change that will be collapsed into the graft change,
        # including the graft change itself.
        del_keys = []
        for p4changelist in self.changes.values():
            if graft_num_int < int(p4changelist.change):
                LOG.debug("_collapse_to_graft_change() skipping {}".format(p4changelist.change))

            LOG.debug("_collapse_to_graft_change() deleting {}".format(p4changelist.change))
        for key in del_keys:
            del self.changes[key]

        # Associate with the graft change all printed P4File results from
        # graft-change or older
        for (_key, p4file) in self.printed_revs.revs:
            if graft_num_int < int(p4file.change):
                LOG.debug("_collapse_to_graft_change() skipping post-graft {}".format(p4file))

            old = self.graft_change.file_from_depot_path(p4file.depot_path)
            # If print picked up multiple revs, keep the newest.
            if (not old) or (int(old.change) < int(p4file.change)):
                p4file.change = self.graft_change.change
                LOG.debug("_collapse_to_graft_change() keeping {}".format(p4file))
                LOG.debug("_collapse_to_graft_change() skipping, had newer  {}".format(p4file))

    def _add_graft_to_changes(self):
        """Add the graft changelist to our list of changes:
        It will be copied over like any other change.

        NOP if not grafting.
        if (not self.graft_change):
        self.changes[self.graft_change.change] = self.graft_change

    def _graft_path(self):
        """If grafting, return '//<client>/...@N' where N is the graft
        changelist number.

        If not grafting, return None.
        if (not self.graft_change):
        return "{path}@{change}".format(
                        path = self.ctx.client_view_path(),
                        change = self.graft_change.change)

    def copy(self, start_at, stop_at):
        """copy a set of changelists from perforce into git"""

        with self.perf.timer[OVERALL]:
            with self.perf.timer[SETUP]:
                self._setup(start_at, stop_at)

                if not len(self.changes):
                    LOG.debug("No new changes found to copy")

                last_commit = self.rev_range.last_commit

            with self.perf.timer[PRINT]:

            with self.perf.timer[FSTAT]:
                sorted_changes = self._fstat()

            with self.perf.timer[SYNC]:

            with self.perf.timer[FAST_IMPORT]:
                marks = self._fast_import(sorted_changes, last_commit)
                sorted_changes = None

            with self.perf.timer[MIRROR]:

            with self.perf.timer[MERGE]:
                # merge temporary branch into master, then delete it

            with self.perf.timer[PACK]:

        LOG.getChild("time").debug("\n" + str(self))