Esempio n. 1
0
 def __init__(self, need_unexpand, tempdir):
     OutputHandler.__init__(self)
     self.rev = None
     self.revs = RevList()
     self.need_unexpand = need_unexpand
     self.tempfile = None
     self.tempdir = tempdir
     self.progress = ProgressReporter()
     self.progress.progress_init_indeterminate()
Esempio n. 2
0
    def __init__(self, ctx):
        self.ctx = ctx
        self.fastimport = FastImport(self.ctx)
        self.fastimport.set_timezone(self.ctx.timezone)
        self.fastimport.set_project_root_path(self.ctx.contentlocalroot)
        self.perf = p4gf_profiler.TimerCounterSet()
        self.perf.add_timers([
            OVERALL, (SETUP, OVERALL), (PRINT, OVERALL), (FSTAT, OVERALL),
            (SYNC, OVERALL), (FAST_IMPORT, OVERALL), (MIRROR, OVERALL),
            (MERGE, OVERALL), (PACK, OVERALL)
        ])

        self.rev_range = None  # RevRange instance set in copy().
        self.graft_change = None  #
        self.changes = None  # dict['changelist'] ==> P4Changelist of what to copy()
        self.printed_revs = None  # RevList produced by PrintHandler
        self.status_verbose = True
        self.progress = ProgressReporter()
 def __init__(self, need_unexpand, tempdir):
     OutputHandler.__init__(self)
     self.rev = None
     self.revs = RevList()
     self.need_unexpand = need_unexpand
     self.tempfile = None
     self.tempdir = tempdir
     self.progress = ProgressReporter()
     self.progress.progress_init_indeterminate()
Esempio n. 4
0
 def __init__(self, ctx):
     self.ctx = ctx
     self.addeditdelete = {}
     self.perf = p4gf_profiler.TimerCounterSet()
     self.perf.add_timers([
         OVERALL,
         (FAST_EXPORT, OVERALL),
         (TEST_BLOCK_PUSH, OVERALL),
         (CHECK_CONFLICT, OVERALL),
         (COPY, OVERALL),
         (GIT_CHECKOUT, COPY),
         (CHECK_PROTECTS, COPY),
         (COPY_BLOBS_1, COPY),
         (COPY_BLOBS_2, COPY),
         (MIRROR, OVERALL),
     ])
     self.perf.add_counters([N_BLOBS, N_RENAMES])
     self.usermap = p4gf_usermap.UserMap(ctx.p4gf)
     self.progress = ProgressReporter()
 def __init__(self, view_name):
     self.git_objects = GitObjectList()
     self.perf = p4gf_profiler.TimerCounterSet()
     self.perf.add_timers([
         OVERALL,
         (BUILD, OVERALL),
         (CAT_FILE, BUILD),
         (LS_TREE, BUILD),
         (LS_TREE_PROCESS, BUILD),
         (DIFF_TREE, BUILD),
         (DIFF_TREE_PROCESS, BUILD),
         (ADD_SUBMIT, OVERALL),
         (EXTRACT_OBJECTS, ADD_SUBMIT),
         (P4_FSTAT, ADD_SUBMIT),
         (P4_ADD, ADD_SUBMIT),
         (P4_SUBMIT, ADD_SUBMIT),
     ])
     self.perf.add_counters([(CAT_FILE_COUNT, "files"),
                             (CAT_FILE_SIZE, "bytes")])
     self.progress = ProgressReporter()
     self.view_name = view_name
 def __init__(self, ctx):
     self.ctx = ctx
     self.addeditdelete = {}
     self.perf = p4gf_profiler.TimerCounterSet()
     self.perf.add_timers([OVERALL,
                          (FAST_EXPORT, OVERALL),
                          (TEST_BLOCK_PUSH, OVERALL),
                          (CHECK_CONFLICT, OVERALL),
                          (COPY, OVERALL),
                          (GIT_CHECKOUT, COPY),
                          (CHECK_PROTECTS, COPY),
                          (COPY_BLOBS_1, COPY),
                          (COPY_BLOBS_2, COPY),
                          (MIRROR, OVERALL),
                          ])
     self.perf.add_counters([N_BLOBS, N_RENAMES])
     self.usermap = p4gf_usermap.UserMap(ctx.p4gf)
     self.progress = ProgressReporter()
 def __init__(self, view_name):
     self.git_objects = GitObjectList()
     self.perf = p4gf_profiler.TimerCounterSet()
     self.perf.add_timers([OVERALL,
                          (BUILD, OVERALL),
                          (CAT_FILE, BUILD),
                          (LS_TREE, BUILD),
                          (LS_TREE_PROCESS, BUILD),
                          (DIFF_TREE, BUILD),
                          (DIFF_TREE_PROCESS, BUILD),
                          (ADD_SUBMIT, OVERALL),
                          (EXTRACT_OBJECTS, ADD_SUBMIT),
                          (P4_FSTAT, ADD_SUBMIT),
                          (P4_ADD, ADD_SUBMIT),
                          (P4_SUBMIT, ADD_SUBMIT),
                          ])
     self.perf.add_counters([(CAT_FILE_COUNT, "files"),
                             (CAT_FILE_SIZE, "bytes")])
     self.progress = ProgressReporter()
     self.view_name = view_name
    def __init__(self, ctx):
        self.ctx = ctx
        self.fastimport = FastImport(self.ctx)
        self.fastimport.set_timezone(self.ctx.timezone)
        self.fastimport.set_project_root_path(self.ctx.contentlocalroot)
        self.perf = p4gf_profiler.TimerCounterSet()
        self.perf.add_timers([OVERALL,
                            (SETUP, OVERALL),
                            (PRINT, OVERALL),
                            (FSTAT, OVERALL),
                            (SYNC, OVERALL),
                            (FAST_IMPORT, OVERALL),
                            (MIRROR, OVERALL),
                            (MERGE, OVERALL),
                            (PACK, OVERALL)
                            ])

        self.rev_range      = None  # RevRange instance set in copy().
        self.graft_change   = None  #
        self.changes        = None  # dict['changelist'] ==> P4Changelist of what to copy()
        self.printed_revs   = None  # RevList produced by PrintHandler
        self.status_verbose = True
        self.progress       = ProgressReporter()
Esempio n. 9
0
class G2P:
    """class to handle batching of p4 commands when copying git to p4"""
    def __init__(self, ctx):
        self.ctx = ctx
        self.addeditdelete = {}
        self.perf = p4gf_profiler.TimerCounterSet()
        self.perf.add_timers([
            OVERALL,
            (FAST_EXPORT, OVERALL),
            (TEST_BLOCK_PUSH, OVERALL),
            (CHECK_CONFLICT, OVERALL),
            (COPY, OVERALL),
            (GIT_CHECKOUT, COPY),
            (CHECK_PROTECTS, COPY),
            (COPY_BLOBS_1, COPY),
            (COPY_BLOBS_2, COPY),
            (MIRROR, OVERALL),
        ])
        self.perf.add_counters([N_BLOBS, N_RENAMES])
        self.usermap = p4gf_usermap.UserMap(ctx.p4gf)
        self.progress = ProgressReporter()

    def __str__(self):
        return "\n".join([str(self.perf), str(self.ctx.mirror)])

    def revert_and_raise(self, errmsg):
        """An error occurred while attempting to submit the incoming change
        to Perforce. As a result, revert all modifications, log the error,
        and raise an exception."""
        # roll back and raise the problem to the caller
        p4 = connect_p4(user=p4gf_const.P4GF_USER, client=self.ctx.p4.client)
        if p4:
            opened = p4.run('opened')
            if opened:
                p4.run('revert', '//{}/...'.format(self.ctx.p4.client))
        # revert doesn't clean up added files
        self.remove_added_files()
        if not errmsg:
            errmsg = traceback.format_stack()
        msg = "import failed: {}".format(errmsg)
        LOG.error(msg)
        raise RuntimeError(msg)

    def _p4_message_to_text(self, msg):
        '''
        Convert a list of P4 messages to a single string.
        
        Annotate some errors with additional context such as P4USER.
        '''
        txt = str(msg)
        if msg.msgid in MSGID_EXPLAIN_P4USER:
            txt += ' P4USER={}.'.format(self.ctx.p4.user)
        if msg.msgid in MSGID_EXPLAIN_P4CLIENT:
            txt += ' P4USER={}.'.format(self.ctx.p4.client)
        return txt

    def check_p4_messages(self):
        """If the results indicate a file is locked by another user,
        raise an exception so that the overall commit will fail. The
        changes made so far will be reverted.
        """
        msgs = p4gf_p4msg.find_all_msgid(self.ctx.p4, MSGID_CANNOT_OPEN)
        if not msgs:
            return

        lines = [self._p4_message_to_text(m) for m in msgs]
        self.revert_and_raise('\n'.join(lines))

    def _p4run(self, cmd):
        '''
        Run one P4 command, logging cmd and results.
        '''
        p4 = self.ctx.p4
        LOG.getChild('p4.cmd').debug(" ".join(cmd))

        results = p4.run(cmd)

        if p4.errors:
            LOG.getChild('p4.err').error("\n".join(p4.errors))
        if p4.warnings:
            LOG.getChild('p4.warn').warning("\n".join(p4.warnings))
        LOG.getChild('p4.out').debug("{}".format(results))
        if LOG.getChild('p4.msgid').isEnabledFor(logging.DEBUG):
            log = LOG.getChild('p4.msgid')
            for m in p4.messages:
                log.debug(p4gf_p4msg.msg_repr(m))

        self.check_p4_messages()

    def run_p4_commands(self):
        """run all pending p4 commands"""
        for operation, paths in self.addeditdelete.items():
            cmd = operation.split(' ')
            # avoid writable client files problem by using -k and handling
            # the actual file action ourselves (in add/edit cases the caller
            # has already written the new file)
            if not cmd[0] == 'add':
                cmd.append('-k')
            if cmd[0] == 'move':
                # move takes a tuple of two arguments, the old name and new name
                oldnames = [escape_path(pair[0]) for pair in paths]
                # move requires opening the file for edit first
                self._p4run(['edit', '-k'] + oldnames)
                LOG.debug("Edit {}".format(oldnames))
                for pair in paths:
                    (frompath, topath) = pair
                    self._p4run([
                        'move', '-k',
                        escape_path(frompath),
                        escape_path(topath)
                    ])
                    LOG.debug("Move from {} to {}".format(frompath, topath))
            else:
                reopen = []
                if 'edit -t' in operation:
                    # edit -t text does not work, must 'edit' then 'reopen -t'
                    # "can't change from xtext - use 'reopen'"
                    reopen = ['reopen', '-t', cmd[2]]
                    cmd = cmd[0:1] + cmd[3:]

                if not cmd[0] == 'add':
                    self._p4run(cmd + [escape_path(path) for path in paths])
                else:
                    self._p4run(cmd + paths)

                if reopen:
                    self._p4run(reopen + [escape_path(path) for path in paths])

                if cmd[0] == 'delete':
                    LOG.debug("Delete {}".format(paths))
                    for path in paths:
                        os.remove(path)

    def remove_added_files(self):
        """remove added files to restore p4 client after failure of p4 command"""
        for operation, paths in self.addeditdelete.items():
            cmd = operation.split(' ')
            if cmd[0] == 'add':
                for path in paths:
                    os.unlink(path)

    def setup_p4_command(self, command, p4path):
        """Add command to list to be run by run_p4_commands. If the command
        is 'move' then the p4path is expected to be a tuple of the frompath
        and topath."""
        if command in self.addeditdelete:
            self.addeditdelete[command].append(p4path)
        else:
            self.addeditdelete[command] = [p4path]

    def _toggle_filetype(self, p4path, isx):
        """Returns the new file type for the named file, switching the
        executable state based on the isx value.

        Args:
            p4path: Path of the file to modify.
            isx: True if currently executable.

        Returns:
            New type for the file; may be None.
        """
        p4type = None
        if isx:
            p4type = '+x'
        else:
            # To remove a previously assigned modifier, the whole filetype
            # must be specified.
            for tipe in ['headType', 'type']:
                # For a file that was executable, is being renamed (with
                # edits), and is no longer executable, we need to handle the
                # fact that it's not yet in Perforce and so does not have a
                # headType.
                try:
                    p4type = p4gf_util.first_value_for_key(
                        self.ctx.p4.run(['fstat', '-T' + tipe, p4path]), tipe)
                except P4.P4Exception:
                    pass
                if p4type:
                    p4type = p4gf_p4filetype.remove_mod(p4type, 'x')
        return p4type

    def add_or_edit_blob(self, blob):
        """run p4 add or edit for a new or modified file"""

        # get local path in p4 client
        p4path = self.ctx.contentlocalroot + blob['path']

        # edit or add?
        isedit = os.path.exists(p4path)

        # make sure dest dir exists
        dstdir = os.path.dirname(p4path)
        if not os.path.exists(dstdir):
            os.makedirs(dstdir)

        if isedit:
            LOG.debug("Copy edit from: " + blob['path'] + " to " + p4path)
            # for edits, only use +x or -x to propagate partial filetype changes
            wasx = os.stat(p4path).st_mode & stat.S_IXUSR
            isx = os.stat(blob['path']).st_mode & stat.S_IXUSR
            if wasx != isx:
                p4type = self._toggle_filetype(p4path, isx)
            else:
                p4type = None
            if p4type:
                LOG.debug(
                    "  set filetype: {ft}  oldx={oldx} newx={newx}".format(
                        ft=p4type, oldx=wasx, newx=isx))
            shutil.copystat(blob['path'], p4path)
            shutil.copyfile(blob['path'], p4path)
        else:
            LOG.debug("Copy add from: " + blob['path'] + " to " + p4path)
            # for adds, use complete filetype of new file
            p4type = p4type_from_mode(blob['mode'])
            shutil.copyfile(blob['path'], p4path)

        # if file exists it's an edit, so do p4 edit before copying content
        # for an add, do p4 add after copying content
        p4type = ' -t ' + p4type if p4type else ''
        if isedit:
            self.setup_p4_command("edit" + p4type, p4path)
        else:
            self.setup_p4_command("add -f" + p4type, p4path)

    def rename_blob(self, blob):
        """ run p4 move for a renamed/moved file"""
        self.perf.counter[N_RENAMES] += 1

        # get local path in p4 client
        p4frompath = self.ctx.contentlocalroot + blob['path']
        p4topath = self.ctx.contentlocalroot + blob['topath']

        # ensure destination directory exists
        dstdir = os.path.dirname(p4topath)
        if not os.path.exists(dstdir):
            os.makedirs(dstdir)
        # copy out of Git repo to Perforce workspace
        shutil.copyfile(blob['topath'], p4topath)
        self.setup_p4_command("move", (p4frompath, p4topath))

    def copy_blob(self, blob):
        """run p4 integ for a copied file"""
        self.perf.counter[N_BLOBS] += 1

        # get local path in p4 client
        p4frompath = self.ctx.contentlocalroot + blob['path']
        p4topath = self.ctx.contentlocalroot + blob['topath']

        self._p4run(
            ["copy", "-v",
             escape_path(p4frompath),
             escape_path(p4topath)])

        # make sure dest dir exists
        dstdir = os.path.dirname(p4topath)
        if not os.path.exists(dstdir):
            os.makedirs(dstdir)

        LOG.debug("Copy/integ from: " + p4frompath + " to " + p4topath)
        shutil.copyfile(p4frompath, p4topath)

    def delete_blob(self, blob):
        """run p4 delete for a deleted file"""

        # get local path in p4 client
        p4path = self.ctx.contentlocalroot + blob['path']
        self.setup_p4_command("delete", p4path)

    def copy_blobs(self, blobs):
        """copy git blobs to perforce revs"""
        # first, one pass to do rename/copy
        # these don't batch.  move can't batch due to p4 limitations.
        # however, the edit required before move is batched.
        # copy could be batched by creating a temporary branchspec
        # but for now it's done file by file
        with self.perf.timer[COPY_BLOBS_1]:
            for blob in blobs:
                if blob['action'] == 'R':
                    self.rename_blob(blob)
                elif blob['action'] == 'C':
                    self.copy_blob(blob)
            self.run_p4_commands()
        # then, another pass to do add/edit/delete
        # these are batched to allow running the minimum number of
        # p4 commands.  That means no more than one delete, one add per
        # filetype and one edit per filetype.  Since we only support three
        # possible filetypes (text, text+x, symlink) there could be at most
        # 1 + 3 + 3 commands run.
        with self.perf.timer[COPY_BLOBS_2]:
            self.addeditdelete = {}
            for blob in blobs:
                if blob['action'] == 'M':
                    self.add_or_edit_blob(blob)
                elif blob['action'] == 'D':
                    self.delete_blob(blob)
            self.run_p4_commands()

    def check_protects(self, p4user, blobs):
        """check if author is authorized to submit files"""
        pc = ProtectsChecker(self.ctx, self.ctx.authenticated_p4user, p4user)
        pc.filter_paths(blobs)
        if pc.has_error():
            self.revert_and_raise(pc.error_message())

    def _reset_for_new_commit(self):
        """
        Clear out state from previous commit that must not carry over
        into next commit.
        """
        self.addeditdelete = {}

    def attempt_resync(self):
        """Attempts to sync -k the Git Fusion client to the change that
        corresponds to the HEAD of the Git mirror repository. This prevents
        the obscure "file(s) not on client" error.
        """
        # we assume we are in the GIT_WORK_TREE, which seems to be a safe
        # assumption at this point
        try:
            last_commit = p4gf_util.git_ref_master()
            if last_commit:
                last_changelist_number = self.ctx.mirror.get_change_for_commit(
                    last_commit, self.ctx)
                if last_changelist_number:
                    filerev = "//...@{}".format(last_changelist_number)
                    self._p4run(['sync', '-k', filerev])
        except P4.P4Exception:
            # don't stop the world if we have an error above
            LOG.warn("resync failed with exception", exc_info=True)

    def copy_commit(self, commit):
        """copy a single commit"""

        self._reset_for_new_commit()

        #OG.debug("dump commit {}".format(commit))
        LOG.debug("for  commit {}".format(commit['mark']))
        LOG.debug("with description: {}".format(commit['data']))
        LOG.debug("files affected: {}".format(commit['files']))

        # Reject merge commits. Not supported in 2012.1.
        if 'merge' in commit:
            self.revert_and_raise(("Merge commit {} not permitted." +
                                   " Rebase to create a linear" +
                                   " history.").format(commit['sha1']))

        # strip any enclosing angle brackets from the email address
        email = commit['author']['email'].strip('<>')
        user = self.usermap.lookup_by_email(email)
        LOG.debug("for email {} found user {}".format(email, user))
        if (user is None) or (not self.usermap.p4user_exists(user[0])):
            # User is not a known and existing Perforce user, and the
            # unknown_git account is not set up, so reject the commit.
            self.revert_and_raise(
                "User '{}' not permitted to commit".format(email))
        author_p4user = user[0]

        for blob in commit['files']:
            err = check_valid_filename(blob['path'])
            if err:
                self.revert_and_raise(err)

        with self.perf.timer[GIT_CHECKOUT]:
            d = p4gf_util.popen_no_throw(['git', 'checkout', commit['sha1']])
            if d['Popen'].returncode:
                # Sometimes git cannot distinquish the revision from a path...
                p4gf_util.popen(
                    ['git', 'reset', '--hard', commit['sha1'], '--'])

        with self.perf.timer[CHECK_PROTECTS]:
            self.check_protects(author_p4user, commit['files'])

        try:
            self.copy_blobs(commit['files'])
        except P4.P4Exception as e:
            self.revert_and_raise(str(e))

        with self.perf.timer[COPY_BLOBS_2]:
            pusher_p4user = self.ctx.authenticated_p4user
            LOG.debug("Pusher is: {}, author is: {}".format(
                pusher_p4user, author_p4user))
            desc = change_description(commit, pusher_p4user, author_p4user)

            try:
                opened = self.ctx.p4.run('opened')
                if opened:
                    changenum = p4_submit(self.ctx.p4, desc, author_p4user,
                                          commit['author']['date'])
                    LOG.info("Submitted change @{} for commit {}".format(
                        changenum, commit['sha1']))
                else:
                    LOG.info("Ignored empty commit {}".format(commit['sha1']))
                    return None
            except P4.P4Exception as e:
                self.revert_and_raise(str(e))
            return ":" + str(changenum) + " " + commit['sha1']

    def test_block_push(self):
        """Test hook to temporarily block and let test script
        introduce conflicting changes.
        """
        s = p4gf_util.test_vars().get(p4gf_const.P4GF_TEST_BLOCK_PUSH)
        if not s:
            return

        log = logging.getLogger("test_block_push")
        block_dict = p4gf_util.test_var_to_dict(s)
        log.debug(block_dict)

        # Fetch ALL the submitted changelists as of right now.
        log.debug("p4 changes {}".format(
            p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client)))
        cl_ay = self.ctx.p4.run(
            'changes', '-l',
            p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client))

        # Don't block until after something?
        after = block_dict['after']
        if after:
            if not contains_desc(after, cl_ay):
                log.debug("Do not block until after: {}".format(after))
                return

        until = block_dict['until']
        log.debug("BLOCKING. Seen        'after': {}".format(after))
        log.debug("BLOCKING. Waiting for 'until': {}".format(until))

        changes_path_at = ("{path}@{change},now".format(
            path=p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client),
            change=cl_ay[-1]['change']))

        while not contains_desc(until, cl_ay):
            time.sleep(1)
            cl_ay = self.ctx.p4.run('changes', changes_path_at)

        log.debug("Block released")

    def copy(self, start_at, end_at):
        """copy a set of commits from git into perforce"""
        with self.perf.timer[OVERALL]:
            with p4gf_util.HeadRestorer():
                LOG.debug("begin copying from {} to {}".format(
                    start_at, end_at))
                self.attempt_resync()
                with self.perf.timer[CHECK_CONFLICT]:
                    conflict_checker = G2PConflictChecker(self.ctx)
                with self.perf.timer[FAST_EXPORT]:
                    fe = p4gf_fastexport.FastExport(start_at, end_at,
                                                    self.ctx.tempdir.name)
                    fe.run()
                marks = []
                commit_count = 0
                for x in fe.commands:
                    if x['command'] == 'commit':
                        commit_count += 1
                self.progress.progress_init_determinate(commit_count)
                try:
                    for command in fe.commands:
                        with self.perf.timer[TEST_BLOCK_PUSH]:
                            self.test_block_push()
                        if command['command'] == 'commit':
                            self.progress.progress_increment(
                                "Copying changelists...")
                            self.ctx.heartbeat()
                            with self.perf.timer[COPY]:
                                mark = self.copy_commit(command)
                                if mark is None:
                                    continue
                            with self.perf.timer[CHECK_CONFLICT]:
                                (git_commit_sha1, p4_changelist_number
                                 ) = mark_to_commit_changelist(mark)
                                conflict_checker.record_commit(
                                    git_commit_sha1, p4_changelist_number)
                                if conflict_checker.check():
                                    LOG.error("P4 conflict found")
                                    break
                            marks.append(mark)
                        elif command['command'] == 'reset':
                            pass
                        else:
                            raise RuntimeError(
                                "Unexpected fast-export command: " +
                                command['command'])
                finally:
                    # we want to write mirror objects for any commits that made it through
                    # any exception will still be alive after this
                    with self.perf.timer[MIRROR]:
                        self.ctx.mirror.add_commits(marks)
                        self.ctx.mirror.add_objects_to_p4(self.ctx)

                if conflict_checker.has_conflict():
                    raise RuntimeError(
                        "Conflicting change from Perforce caused one" +
                        " or more git commits to fail. Time to" +
                        " pull, rebase, and try again.")

        LOG.getChild("time").debug("\n" + str(self))
class GitMirror:
    """handle git things that get mirrored in perforce"""
    def __init__(self, view_name):
        self.git_objects = GitObjectList()
        self.perf = p4gf_profiler.TimerCounterSet()
        self.perf.add_timers([
            OVERALL,
            (BUILD, OVERALL),
            (CAT_FILE, BUILD),
            (LS_TREE, BUILD),
            (LS_TREE_PROCESS, BUILD),
            (DIFF_TREE, BUILD),
            (DIFF_TREE_PROCESS, BUILD),
            (ADD_SUBMIT, OVERALL),
            (EXTRACT_OBJECTS, ADD_SUBMIT),
            (P4_FSTAT, ADD_SUBMIT),
            (P4_ADD, ADD_SUBMIT),
            (P4_SUBMIT, ADD_SUBMIT),
        ])
        self.perf.add_counters([(CAT_FILE_COUNT, "files"),
                                (CAT_FILE_SIZE, "bytes")])
        self.progress = ProgressReporter()
        self.view_name = view_name

    @staticmethod
    def get_change_for_commit(commit, ctx):
        """Given a commit sha1, find the corresponding perforce change.
        """
        object_type = p4gf_object_type.sha1_to_object_type(
            sha1=commit,
            view_name=ctx.config.view_name,
            p4=ctx.p4gf,
            raise_on_error=False)
        if not object_type:
            return None
        return object_type.view_name_to_changelist(ctx.config.view_name)

    def add_commits(self, marks):
        """build list of commit and tree objects for a set of changelists

        marks: list of commit marks output by git-fast-import
               formatted as: :changenum sha1
        """

        with self.perf.timer[OVERALL]:
            with self.perf.timer[BUILD]:
                last_top_tree = None
                for mark in marks:

                    #parse perforce change number and SHA1 from marks
                    parts = mark.split(' ')
                    change_num = parts[0][1:]
                    sha1 = parts[1].strip()

                    # add commit object
                    self.git_objects.add_object(
                        GitObject("commit", sha1,
                                  [(change_num, self.view_name)]))

                    # add all trees referenced by the commit
                    if last_top_tree:
                        last_top_tree = self.__get_delta_trees(
                            last_top_tree, sha1)
                    else:
                        last_top_tree = self.__get_snapshot_trees(sha1)

    def add_objects_with_views(self, ctx, add_files):
        """Add the list of files to the object cache in the depot and
        return the number of files not added.
        """
        added_files = []
        files_not_added = 0
        treecount = 0
        commitcount = 0
        # Add new files to the object cache.
        bite_size = 1000
        while len(add_files):
            bite = add_files[:bite_size]
            add_files = add_files[bite_size:]
            result = ctx.p4gf.run("add", "-t", "binary", bite)
            for m in [
                    m for m in ctx.p4gf.messages
                    if (m.msgid != p4gf_p4msgid.MsgDm_OpenUpToDate
                        or m.dict['action'] != 'add')
            ]:
                files_not_added += 1
                LOG.debug(str(m))

            for r in [r for r in result if isinstance(r, dict)]:
                if r["action"] != 'add':
                    # file already exists in depot, perhaps?
                    files_not_added += 1
                    LOG.debug(r)
                else:
                    added_files.append(r["depotFile"])
                    if r["depotFile"].endswith("-tree"):
                        treecount += 1
                    else:
                        commitcount += 1
        LOG.debug("Added {} commits and {} trees".format(
            commitcount, treecount))
        # Set the 'views' attribute on the opened files.
        while len(added_files):
            bite = added_files[:bite_size]
            added_files = added_files[bite_size:]
            ctx.p4gf.run("attribute", "-p", "-n", "views", "-v",
                         self.view_name, bite)
        return files_not_added

    def add_objects_to_p4(self, ctx):
        """actually run p4 add, submit to create mirror files in .git-fusion"""

        with self.perf.timer[OVERALL]:
            # Revert any opened files left over from a failed mirror operation.
            opened = ctx.p4gf.run('opened')
            if opened:
                ctx.p4gf.run('revert',
                             '//{}/...'.format(ctx.config.p4client_gf))
            with self.perf.timer[ADD_SUBMIT]:
                LOG.debug("adding {0} commits and {1} trees to .git-fusion...".
                          format(self.git_objects.counts['commit'],
                                 self.git_objects.counts['tree']))

                # build list of objects to add, extracting them from git
                self.progress.progress_init_determinate(
                    len(self.git_objects.objects))
                add_files = [
                    self.__add_object_to_p4(ctx, go)
                    for go in self.git_objects.objects.values()
                ]

                # filter out any files that have already been added
                # only do this if the number of files is large enough to justify
                # the cost of the fstat
                existing_files = None
                with self.perf.timer[P4_FSTAT]:
                    # Need to use fstat to get the 'views' attribute for existing
                    # files, which we can't know until we use fstat to find out.
                    bite_size = 1000
                    LOG.debug("using fstat to optimize add")
                    original_count = len(add_files)
                    ctx.p4gf.handler = FilterAddFstatHandler(self.view_name)
                    # spoon-feed p4 to avoid blowing out memory
                    while len(add_files):
                        bite = add_files[:bite_size]
                        add_files = add_files[bite_size:]
                        # Try to get only the information we really need.
                        ctx.p4gf.run("fstat", "-Oa", "-T",
                                     "depotFile, attr-views", bite)
                    add_files = ctx.p4gf.handler.files
                    existing_files = ctx.p4gf.handler.existing
                    ctx.p4gf.handler = None
                    LOG.debug("{} files removed from add list".format(
                        original_count - len(add_files)))

                files_to_add = len(add_files) + len(existing_files)
                if files_to_add == 0:
                    return

                with self.perf.timer[P4_ADD]:
                    files_not_added = self.add_objects_with_views(
                        ctx, add_files)
                    edit_objects_with_views(ctx, existing_files)

                with self.perf.timer[P4_SUBMIT]:
                    if files_not_added < files_to_add:
                        desc = 'Git Fusion {view} copied to git'.format(
                            view=ctx.config.view_name)
                        self.progress.status(
                            "Submitting new Git objects to Perforce...")
                        ctx.p4gf.run("submit", "-d", desc)
                    else:
                        LOG.debug("ignoring empty change list...")

    def __str__(self):
        return "\n".join([str(self.git_objects), str(self.perf)])

    def __repr__(self):
        return "\n".join([repr(self.git_objects), str(self.perf)])

    # pylint: disable=R0201, W1401
    # R0201 Method could be a function
    # I agree, this _could_ be a function, does not need self. But when I
    # blindly promote this to a module-level function, things break and I
    # cannot explain why.
    # W1401 Unescaped backslash
    # We want that null for the header, so we're keeping the backslash.
    def __add_object_to_p4(self, ctx, go):
        """add a commit or tree to the git-fusion perforce client workspace

        return the path of the client workspace file suitable for use with
        p4 add
        """
        self.progress.progress_increment(
            "Adding new Git objects to Perforce...")
        ctx.heartbeat()

        # get client path for .git-fusion file
        dst = go.git_p4_client_path(ctx)

        # A tree is likely to already exist, in which case we don't need
        # or want to try to recreate it.  We'll just use the existing one.
        if os.path.exists(dst):
            LOG.debug("reusing existing object: " + dst)
            return dst

        with self.perf.timer[EXTRACT_OBJECTS]:

            # make sure dir exists
            dstdir = os.path.dirname(dst)
            if not os.path.exists(dstdir):
                os.makedirs(dstdir)

            # get contents of commit or tree; can't just copy it because it's
            # probably in a packfile and we don't know which one.  And there's
            # no way to have git give us the compressed commit directly, so we
            # need to recompress it
            p = Popen(['git', 'cat-file', go.type, go.sha1], stdout=PIPE)
            po = p.communicate()[0]
            header = go.type + " " + str(len(po)) + '\0'
            deflated = zlib.compress(header.encode() + po)

            # write it into our p4 client workspace for adding.
            LOG.debug("adding new object: " + dst)
            with open(dst, "wb") as f:
                f.write(deflated)

            return dst

    def __get_snapshot_trees(self, commit):
        """get all tree objects for a given commit
            commit: SHA1 of commit

        each tree is added to the list to be mirrored

        return the SHA1 of the commit's tree
        """

        top_tree = self.__get_commit_tree(commit)
        with self.perf.timer[LS_TREE]:
            p = Popen(['git', 'ls-tree', '-rt', top_tree], stdout=PIPE)
            po = p.communicate()[0].decode()
        with self.perf.timer[LS_TREE_PROCESS]:
            # line is: mode SP type SP sha TAB path
            # we only want the sha from lines with type "tree"
            pattern = re.compile("^[0-7]{6} tree ([0-9a-fA-F]{40})\t.*")
            # yes, we're doing nothing with the result of this list comprehension
            # pylint: disable=W0106
            [
                self.git_objects.add_object(GitObject("tree", m.group(1)))
                for line in po.splitlines() for m in [pattern.match(line)] if m
            ]
            # pylint: enable=W0106
        return top_tree

    def __get_delta_trees(self, top_tree1, commit2):
        """get all tree objects new in one commit vs another commit
            topTree1: SHA1 of first commit's tree
            commit2: SHA1 of second commit

        each tree is added to the list to be mirrored

        return the SHA1 of commit2's tree
        """
        top_tree2 = self.__get_commit_tree(commit2)
        with self.perf.timer[DIFF_TREE]:
            p = Popen(['git', 'diff-tree', '-t', top_tree1, top_tree2],
                      stdout=PIPE)
            po = p.communicate()[0].decode()
        with self.perf.timer[DIFF_TREE_PROCESS]:
            # line is: :mode1 SP mode2 SP sha1 SP sha2 SP action TAB path
            # we want sha2 from lines where mode2 indicates a dir
            pattern = re.compile(
                "^:[0-7]{6} ([0-7]{2})[0-7]{4} [0-9a-fA-F]{40} ([0-9a-fA-F]{40}) .*"
            )
            # yes, we're doing nothing with the result of this list comprehension
            # pylint: disable=W0106
            [
                self.git_objects.add_object(GitObject("tree", m.group(2)))
                for line in po.splitlines() for m in [pattern.match(line)]
                if m and m.group(1) == "04"
            ]
            # pylint: enable=W0106
        return top_tree2

    def __get_commit_tree(self, commit):
        """get the one and only tree at the top of commit

            commit: SHA1 of the commit

        add the tree object to the list of objects to be mirrored
        and return its SHA1
        """

        with self.perf.timer[CAT_FILE]:
            self.perf.counter[CAT_FILE_COUNT] += 1
            p = Popen(['git', 'cat-file', 'commit', commit], stdout=PIPE)
            po = p.communicate()[0].decode()
            self.perf.counter[CAT_FILE_SIZE] += len(po)
            for line in iter(po.splitlines()):
                if not line.startswith("tree"):
                    continue
                # line is: tree sha
                parts = line.strip().split(' ')
                sha1 = parts[1]
                self.git_objects.add_object(GitObject("tree", sha1))
                return sha1
Esempio n. 11
0
class P2G:
    """class to manage copying from Perforce to git"""
    def __init__(self, ctx):
        self.ctx = ctx
        self.fastimport = FastImport(self.ctx)
        self.fastimport.set_timezone(self.ctx.timezone)
        self.fastimport.set_project_root_path(self.ctx.contentlocalroot)
        self.perf = p4gf_profiler.TimerCounterSet()
        self.perf.add_timers([
            OVERALL, (SETUP, OVERALL), (PRINT, OVERALL), (FSTAT, OVERALL),
            (SYNC, OVERALL), (FAST_IMPORT, OVERALL), (MIRROR, OVERALL),
            (MERGE, OVERALL), (PACK, OVERALL)
        ])

        self.rev_range = None  # RevRange instance set in copy().
        self.graft_change = None  #
        self.changes = None  # dict['changelist'] ==> P4Changelist of what to copy()
        self.printed_revs = None  # RevList produced by PrintHandler
        self.status_verbose = True
        self.progress = ProgressReporter()

    def __str__(self):
        return "\n".join([
            "\n\nFast Import:\n",
            str(self.fastimport), "",
            str(self.perf), ""
        ])

    def _setup(self, start_at, stop_at):
        """Set RevRange rev_range, figure out which changelists to copy."""
        self.rev_range = RevRange.from_start_stop(self.ctx, start_at, stop_at)
        LOG.debug(
            "Revision range to copy to Git: {rr}".format(rr=self.rev_range))

        # get list of changes to import into git
        self.changes = P4Changelist.create_changelist_list_as_dict(
            self.ctx.p4, self._path_range())

        # If grafting, get that too.
        if self.rev_range.graft_change_num:
            # Ignore all depotFile elements, we just want the change/desc/time/user.
            self.graft_change = P4Changelist.create_using_describe(
                self.ctx.p4, self.rev_range.graft_change_num,
                "ignore_depot_files")
            self.graft_change.description += (
                '\n[grafted history before {start_at}]'.format(
                    start_at=start_at))

    def _path_range(self):
        """Return the common path...@range string we use frequently.
        """
        return self.ctx.client_view_path() + self.rev_range.as_range_string()

    def _copy_print(self):
        """p4 print all revs and git-hash-object them into the git repo."""
        server_can_unexpand = self.ctx.p4.server_level > 32
        printhandler = PrintHandler(need_unexpand=not server_can_unexpand,
                                    tempdir=self.ctx.tempdir.name)
        self.ctx.p4.handler = printhandler
        args = ["-a"]
        if server_can_unexpand:
            args.append("-k")
        self.ctx.p4.run("print", args, self._path_range())
        printhandler.flush()
        printhandler.progress.progress_finish()

        # If also grafting, print all revs in existence at time of graft.
        if self.graft_change:
            args = []
            if server_can_unexpand:
                args.append("-k")
            path = self._graft_path()
            LOG.debug("Printing for grafted history: {}".format(path))
            self.ctx.p4.run("print", args, path)
            printhandler.flush()

            # If grafting, we just printed revs that refer to changelists
            # that have no P4Changelist counterpart in self.changes. Make
            # some skeletal versions now so that FstatHandler will have
            # someplace to hang its outputStat() P4File instances.
            for (_key, p4file) in printhandler.revs.revs:
                if not p4file.change in self.changes:
                    cl = P4Changelist()
                    cl.change = p4file.change
                    self.changes[p4file.change] = cl

        self.ctx.p4.handler = None
        self.printed_revs = printhandler.revs

    def _fstat(self):
        """run fstat to find deleted revs and get client paths"""
        # TODO for 12.2 print will also report deleted revs so between
        # that and using MapApi to get client paths, we won't need this fstat
        self.ctx.p4.handler = FstatHandler(self.printed_revs, self.changes)
        fstat_cols = "-T" + ",".join(P4File.fstat_cols())
        self.ctx.p4.run("fstat", "-Of", fstat_cols, self._path_range())

        if self.graft_change:
            # Also run 'p4 fstat //<view>/...@change' for the graft
            # change to catch all files as of @change, not just
            # revs changed between begin and end of _path_range().
            self.ctx.p4.run("fstat", fstat_cols, self._graft_path())

        self.ctx.p4.handler = None

        self._collapse_to_graft_change()
        self._add_graft_to_changes()

        # don't need this any more
        self.printed_revs = None

        sorted_changes = [
            str(y) for y in sorted([int(x) for x in self.changes.keys()])
        ]

        LOG.debug("\n".join([str(self.changes[ch]) for ch in sorted_changes]))
        return sorted_changes

    def _sync(self, sorted_changes):
        """fake sync of last change to make life easier at push time"""
        self.ctx.p4.handler = SyncHandler()
        lastchange = self.changes[sorted_changes[-1]]
        self.ctx.p4.run(
            "sync", "-kf",
            self.ctx.client_view_path() + "@" + str(lastchange.change))
        self.ctx.p4.handler = None

    def _fast_import(self, sorted_changes, last_commit):
        """build fast-import script from changes, then run fast-import"""
        self.progress.progress_init_determinate(len(sorted_changes))
        for changenum in sorted_changes:
            change = self.changes[changenum]
            self.progress.progress_increment("Copying changelists...")
            self.ctx.heartbeat()

            # create commit and trees
            self.fastimport.add_commit(change, last_commit)

            last_commit = change.change

        # run git-fast-import and get list of marks
        marks = self.fastimport.run_fast_import()

        # done with these
        self.changes = None
        return marks

    def _mirror(self, marks):
        """build up list of p4 objects to mirror git repo in perforce
        then submit them
        """
        self.ctx.mirror.add_commits(marks)
        self.ctx.mirror.add_objects_to_p4(self.ctx)
        LOG.getChild("time").debug("\n\nGit Mirror:\n" + str(self.ctx.mirror))
        self.ctx.mirror = GitMirror(self.ctx.config.view_name)

        last_commit = marks[len(marks) - 1]
        LOG.debug("Last commit created: " + last_commit)

    # pylint: disable=R0201
    # R0201 Method could be a function
    def _pack(self):
        """run 'git gc' to pack up the blobs

        aside from any possible performance benefit, this prevents warnings
        from git about "unreachable loose objects"
        """
        p4gf_util.popen_no_throw(["git", "gc"])

    def _collapse_to_graft_change(self):
        """Move all of the files from pre-graft changelists into the graft
        changelist. Remove all pre-graft changelists.

        NOP if not grafting.

        'p4 print //client/...@100' does indeed print all the files that
        exist @100, but the tag dict that goes with each file includes the
        changelist in which that file was last added/edited, not 100. So
        this function gathers up all the file revs with change=1..99 and
        sticks them under change 100's file list.
        """
        if (not self.graft_change):
            return
        graft_num_int = int(self.graft_change.change)
        LOG.debug("_collapse_to_graft_change() graft_num_int={}".format(
            graft_num_int))

        # Delete all P4Changelist elements from self.changes where they
        # refer to a change that will be collapsed into the graft change,
        # including the graft change itself.
        del_keys = []
        for p4changelist in self.changes.values():
            if graft_num_int < int(p4changelist.change):
                LOG.debug("_collapse_to_graft_change() skipping {}".format(
                    p4changelist.change))
                continue

            LOG.debug("_collapse_to_graft_change() deleting {}".format(
                p4changelist.change))
            del_keys.append(p4changelist.change)
        for key in del_keys:
            del self.changes[key]

        # Associate with the graft change all printed P4File results from
        # graft-change or older
        for (_key, p4file) in self.printed_revs.revs:
            if graft_num_int < int(p4file.change):
                LOG.debug("_collapse_to_graft_change() skipping post-graft {}".
                          format(p4file))
                continue

            old = self.graft_change.file_from_depot_path(p4file.depot_path)
            # If print picked up multiple revs, keep the newest.
            if (not old) or (int(old.change) < int(p4file.change)):
                p4file.change = self.graft_change.change
                self.graft_change.files.append(p4file)
                LOG.debug(
                    "_collapse_to_graft_change() keeping {}".format(p4file))
            else:
                LOG.debug(
                    "_collapse_to_graft_change() skipping, had newer  {}".
                    format(p4file))

    def _add_graft_to_changes(self):
        """Add the graft changelist to our list of changes:
        It will be copied over like any other change.

        NOP if not grafting.
        """
        if (not self.graft_change):
            return
        self.changes[self.graft_change.change] = self.graft_change

    def _graft_path(self):
        """If grafting, return '//<client>/...@N' where N is the graft
        changelist number.

        If not grafting, return None.
        """
        if (not self.graft_change):
            return
        return "{path}@{change}".format(path=self.ctx.client_view_path(),
                                        change=self.graft_change.change)

    def copy(self, start_at, stop_at):
        """copy a set of changelists from perforce into git"""

        with self.perf.timer[OVERALL]:
            with self.perf.timer[SETUP]:
                self._setup(start_at, stop_at)

                if not len(self.changes):
                    LOG.debug("No new changes found to copy")
                    return

                last_commit = self.rev_range.last_commit

            with self.perf.timer[PRINT]:
                self._copy_print()

            with self.perf.timer[FSTAT]:
                sorted_changes = self._fstat()

            with self.perf.timer[SYNC]:
                self._sync(sorted_changes)

            with self.perf.timer[FAST_IMPORT]:
                marks = self._fast_import(sorted_changes, last_commit)
                sorted_changes = None

            with self.perf.timer[MIRROR]:
                self._mirror(marks)

            with self.perf.timer[MERGE]:
                # merge temporary branch into master, then delete it
                self.fastimport.merge()

            with self.perf.timer[PACK]:
                self._pack()

        LOG.getChild("time").debug("\n" + str(self))
Esempio n. 12
0
class PrintHandler(OutputHandler):
    """OutputHandler for p4 print, hashes files into git repo"""
    def __init__(self, need_unexpand, tempdir):
        OutputHandler.__init__(self)
        self.rev = None
        self.revs = RevList()
        self.need_unexpand = need_unexpand
        self.tempfile = None
        self.tempdir = tempdir
        self.progress = ProgressReporter()
        self.progress.progress_init_indeterminate()

    def outputBinary(self, h):
        """assemble file content, then pass it to hasher via queue"""
        self.appendContent(h)
        return OutputHandler.HANDLED

    def outputText(self, h):
        """assemble file content, then pass it to hasher via queue
        """
        b = bytes(h, 'UTF-8')
        self.appendContent(b)
        return OutputHandler.HANDLED

    def appendContent(self, h):
        """append a chunk of content to the temp file

        if server is 12.1 or older it may be sending expanded ktext files
        so we need to unexpand them

        It would be nice to incrementally compress and hash the file
        but that requires knowing the size up front, which p4 print does
        not currently supply.  If/when it does, this can be reworked to
        be more efficient with large files.  As it is, as long as the
        SpooledTemporaryFile doesn't rollover, it won't make much of a
        difference.

        So with that limitation, the incoming content is stuffed into
        a SpooledTemporaryFile.
        """
        if not len(h):
            return
        if self.need_unexpand and self.rev.is_k_type():
            h = unexpand(h)
        self.tempfile.write(h)

    def flush(self):
        """compress the last file, hash it and stick it in the repo

        Now that we've got the complete file contents, the header can be
        created and used along with the spooled content to create the sha1
        and zlib compressed blob content.  Finally that is written into
        the .git/objects dir.
        """
        if not self.rev:
            return
        size = self.tempfile.tell()
        self.tempfile.seek(0)
        compressed = tempfile.NamedTemporaryFile(delete=False,
                                                 dir=self.tempdir)
        compress = zlib.compressobj()
        # pylint doesn't understand dynamic definition of sha1 in hashlib
        # pylint: disable=E1101
        sha1 = hashlib.sha1()

        # pylint:disable=W1401
        # disable complaints about the null. We need that.
        # add header first
        header = ("blob " + str(size) + "\0").encode()
        compressed.write(compress.compress(header))
        sha1.update(header)

        # then actual contents
        chunksize = 4096
        while True:
            chunk = self.tempfile.read(chunksize)
            if chunk:
                compressed.write(compress.compress(chunk))
                sha1.update(chunk)
            else:
                break
        # pylint: enable=E1101
        compressed.write(compress.flush())
        compressed.close()
        digest = sha1.hexdigest()
        self.rev.sha1 = digest
        blob_dir = ".git/objects/" + digest[:2]
        blob_file = digest[2:]
        blob_path = blob_dir + "/" + blob_file
        if not os.path.exists(blob_path):
            if not os.path.exists(blob_dir):
                os.makedirs(blob_dir)
            shutil.move(compressed.name, blob_path)
        self.rev = None

    def outputStat(self, h):
        """save path of current file"""
        self.flush()
        self.rev = P4File.create_from_print(h)
        self.revs.append(self.rev)
        self.progress.progress_increment('Copying files')
        LOG.debug("PrintHandler.outputStat() ch={} {}".format(
            h['change'], h["depotFile"] + "#" + h["rev"]))
        if self.tempfile:
            self.tempfile.seek(0)
            self.tempfile.truncate()
        else:
            self.tempfile = tempfile.TemporaryFile(buffering=10000000,
                                                   dir=self.tempdir)
        return OutputHandler.HANDLED

    def outputInfo(self, _h):
        """outputInfo call not expected"""
        return OutputHandler.REPORT

    def outputMessage(self, _h):
        """outputMessage call not expected, indicates an error"""
        return OutputHandler.REPORT
class GitMirror:
    """handle git things that get mirrored in perforce"""

    def __init__(self, view_name):
        self.git_objects = GitObjectList()
        self.perf = p4gf_profiler.TimerCounterSet()
        self.perf.add_timers([OVERALL,
                             (BUILD, OVERALL),
                             (CAT_FILE, BUILD),
                             (LS_TREE, BUILD),
                             (LS_TREE_PROCESS, BUILD),
                             (DIFF_TREE, BUILD),
                             (DIFF_TREE_PROCESS, BUILD),
                             (ADD_SUBMIT, OVERALL),
                             (EXTRACT_OBJECTS, ADD_SUBMIT),
                             (P4_FSTAT, ADD_SUBMIT),
                             (P4_ADD, ADD_SUBMIT),
                             (P4_SUBMIT, ADD_SUBMIT),
                             ])
        self.perf.add_counters([(CAT_FILE_COUNT, "files"),
                                (CAT_FILE_SIZE, "bytes")])
        self.progress = ProgressReporter()
        self.view_name = view_name

    @staticmethod
    def get_change_for_commit(commit, ctx):
        """Given a commit sha1, find the corresponding perforce change.
        """
        object_type = p4gf_object_type.sha1_to_object_type(
                              sha1           = commit
                            , view_name      = ctx.config.view_name
                            , p4             = ctx.p4gf
                            , raise_on_error = False)
        if not object_type:
            return None
        return object_type.view_name_to_changelist(ctx.config.view_name)

    def add_commits(self, marks):
        """build list of commit and tree objects for a set of changelists

        marks: list of commit marks output by git-fast-import
               formatted as: :changenum sha1
        """

        with self.perf.timer[OVERALL]:
            with self.perf.timer[BUILD]:
                last_top_tree = None
                for mark in marks:
    
                    #parse perforce change number and SHA1 from marks
                    parts = mark.split(' ')
                    change_num = parts[0][1:]
                    sha1 = parts[1].strip()
    
                    # add commit object
                    self.git_objects.add_object(
                        GitObject( "commit"
                                 , sha1
                                 , [(change_num, self.view_name)]
                                 ))
    
                    # add all trees referenced by the commit
                    if last_top_tree:
                        last_top_tree = self.__get_delta_trees(last_top_tree, sha1)
                    else:
                        last_top_tree = self.__get_snapshot_trees(sha1)

    def add_objects_with_views(self, ctx, add_files):
        """Add the list of files to the object cache in the depot and
        return the number of files not added.
        """
        added_files = []
        files_not_added = 0
        treecount = 0
        commitcount = 0
        # Add new files to the object cache.
        bite_size = 1000
        while len(add_files):
            bite = add_files[:bite_size]
            add_files = add_files[bite_size:]
            result = ctx.p4gf.run("add", "-t", "binary", bite)
            for m in [m for m in ctx.p4gf.messages
                      if (m.msgid != p4gf_p4msgid.MsgDm_OpenUpToDate or
                          m.dict['action'] != 'add')]:
                files_not_added += 1
                LOG.debug(str(m))

            for r in [r for r in result if isinstance(r, dict)]:
                if r["action"] != 'add':
                    # file already exists in depot, perhaps?
                    files_not_added += 1
                    LOG.debug(r)
                else:
                    added_files.append(r["depotFile"])
                    if r["depotFile"].endswith("-tree"):
                        treecount += 1
                    else:
                        commitcount += 1
        LOG.debug("Added {} commits and {} trees".format(commitcount, treecount))
        # Set the 'views' attribute on the opened files.
        while len(added_files):
            bite = added_files[:bite_size]
            added_files = added_files[bite_size:]
            ctx.p4gf.run("attribute", "-p", "-n", "views", "-v", self.view_name, bite)
        return files_not_added

    def add_objects_to_p4(self, ctx):
        """actually run p4 add, submit to create mirror files in .git-fusion"""

        with self.perf.timer[OVERALL]:
            # Revert any opened files left over from a failed mirror operation.
            opened = ctx.p4gf.run('opened')
            if opened:
                ctx.p4gf.run('revert', '//{}/...'.format(ctx.config.p4client_gf))
            with self.perf.timer[ADD_SUBMIT]:
                LOG.debug("adding {0} commits and {1} trees to .git-fusion...".
                          format(self.git_objects.counts['commit'],
                                 self.git_objects.counts['tree']))

                # build list of objects to add, extracting them from git
                self.progress.progress_init_determinate(len(self.git_objects.objects))
                add_files = [self.__add_object_to_p4(ctx, go)
                              for go in self.git_objects.objects.values()]

                # filter out any files that have already been added
                # only do this if the number of files is large enough to justify
                # the cost of the fstat
                existing_files = None
                with self.perf.timer[P4_FSTAT]:
                    # Need to use fstat to get the 'views' attribute for existing
                    # files, which we can't know until we use fstat to find out.
                    bite_size = 1000
                    LOG.debug("using fstat to optimize add")
                    original_count = len(add_files)
                    ctx.p4gf.handler = FilterAddFstatHandler(self.view_name)
                    # spoon-feed p4 to avoid blowing out memory
                    while len(add_files):
                        bite = add_files[:bite_size]
                        add_files = add_files[bite_size:]
                        # Try to get only the information we really need.
                        ctx.p4gf.run("fstat", "-Oa", "-T", "depotFile, attr-views", bite)
                    add_files = ctx.p4gf.handler.files
                    existing_files = ctx.p4gf.handler.existing
                    ctx.p4gf.handler = None
                    LOG.debug("{} files removed from add list"
                              .format(original_count - len(add_files)))

                files_to_add = len(add_files) + len(existing_files)
                if files_to_add == 0:
                    return

                with self.perf.timer[P4_ADD]:
                    files_not_added = self.add_objects_with_views(ctx, add_files)
                    edit_objects_with_views(ctx, existing_files)

                with self.perf.timer[P4_SUBMIT]:
                    if files_not_added < files_to_add:
                        desc = 'Git Fusion {view} copied to git'.format(
                                view=ctx.config.view_name)
                        self.progress.status("Submitting new Git objects to Perforce...")
                        ctx.p4gf.run("submit", "-d", desc)
                    else:
                        LOG.debug("ignoring empty change list...")

    def __str__(self):
        return "\n".join([str(self.git_objects),
                          str(self.perf)
                          ])

    def __repr__(self):
        return "\n".join([repr(self.git_objects),
                          str(self.perf)
                          ])

    # pylint: disable=R0201, W1401
    # R0201 Method could be a function
    # I agree, this _could_ be a function, does not need self. But when I
    # blindly promote this to a module-level function, things break and I
    # cannot explain why.
    # W1401 Unescaped backslash
    # We want that null for the header, so we're keeping the backslash. 
    def __add_object_to_p4(self, ctx, go):
        """add a commit or tree to the git-fusion perforce client workspace

        return the path of the client workspace file suitable for use with
        p4 add
        """
        self.progress.progress_increment("Adding new Git objects to Perforce...")
        ctx.heartbeat()

        # get client path for .git-fusion file
        dst = go.git_p4_client_path(ctx)

        # A tree is likely to already exist, in which case we don't need
        # or want to try to recreate it.  We'll just use the existing one.
        if os.path.exists(dst):
            LOG.debug("reusing existing object: " + dst)
            return dst

        with self.perf.timer[EXTRACT_OBJECTS]:

            # make sure dir exists
            dstdir = os.path.dirname(dst)
            if not os.path.exists(dstdir):
                os.makedirs(dstdir)

            # get contents of commit or tree; can't just copy it because it's
            # probably in a packfile and we don't know which one.  And there's
            # no way to have git give us the compressed commit directly, so we
            # need to recompress it
            p = Popen(['git', 'cat-file', go.type, go.sha1], stdout=PIPE)
            po = p.communicate()[0]
            header = go.type + " " + str(len(po)) + '\0'
            deflated = zlib.compress(header.encode() + po)

            # write it into our p4 client workspace for adding.
            LOG.debug("adding new object: " + dst)
            with open(dst, "wb") as f:
                f.write(deflated)

            return dst

    def __get_snapshot_trees(self, commit):
        """get all tree objects for a given commit
            commit: SHA1 of commit

        each tree is added to the list to be mirrored

        return the SHA1 of the commit's tree
        """

        top_tree = self.__get_commit_tree(commit)
        with self.perf.timer[LS_TREE]:
            p = Popen(['git', 'ls-tree', '-rt', top_tree], stdout=PIPE)
            po = p.communicate()[0].decode()
        with self.perf.timer[LS_TREE_PROCESS]:
            # line is: mode SP type SP sha TAB path
            # we only want the sha from lines with type "tree"
            pattern = re.compile("^[0-7]{6} tree ([0-9a-fA-F]{40})\t.*")
            # yes, we're doing nothing with the result of this list comprehension
            # pylint: disable=W0106
            [self.git_objects.add_object(GitObject("tree", m.group(1)))
                                         for line in po.splitlines()
                                            for m in [pattern.match(line)]
                                                if m]
            # pylint: enable=W0106
        return top_tree

    def __get_delta_trees(self, top_tree1, commit2):
        """get all tree objects new in one commit vs another commit
            topTree1: SHA1 of first commit's tree
            commit2: SHA1 of second commit

        each tree is added to the list to be mirrored

        return the SHA1 of commit2's tree
        """
        top_tree2 = self.__get_commit_tree(commit2)
        with self.perf.timer[DIFF_TREE]:
            p = Popen(['git', 'diff-tree', '-t', top_tree1, top_tree2], stdout=PIPE)
            po = p.communicate()[0].decode()
        with self.perf.timer[DIFF_TREE_PROCESS]:
            # line is: :mode1 SP mode2 SP sha1 SP sha2 SP action TAB path
            # we want sha2 from lines where mode2 indicates a dir
            pattern = re.compile(
                "^:[0-7]{6} ([0-7]{2})[0-7]{4} [0-9a-fA-F]{40} ([0-9a-fA-F]{40}) .*")
            # yes, we're doing nothing with the result of this list comprehension
            # pylint: disable=W0106
            [self.git_objects.add_object(GitObject("tree", m.group(2)))
                             for line in po.splitlines()
                                for m in [pattern.match(line)]
                                    if m and m.group(1) == "04"]
            # pylint: enable=W0106
        return top_tree2

    def __get_commit_tree(self, commit):
        """get the one and only tree at the top of commit

            commit: SHA1 of the commit

        add the tree object to the list of objects to be mirrored
        and return its SHA1
        """

        with self.perf.timer[CAT_FILE]:
            self.perf.counter[CAT_FILE_COUNT] += 1
            p = Popen(['git', 'cat-file', 'commit', commit], stdout=PIPE)
            po = p.communicate()[0].decode()
            self.perf.counter[CAT_FILE_SIZE] += len(po)
            for line in iter(po.splitlines()):
                if not line.startswith("tree"):
                    continue
                # line is: tree sha
                parts = line.strip().split(' ')
                sha1 = parts[1]
                self.git_objects.add_object(GitObject("tree", sha1))
                return sha1
class G2P:
    """class to handle batching of p4 commands when copying git to p4"""
    def __init__(self, ctx):
        self.ctx = ctx
        self.addeditdelete = {}
        self.perf = p4gf_profiler.TimerCounterSet()
        self.perf.add_timers([OVERALL,
                             (FAST_EXPORT, OVERALL),
                             (TEST_BLOCK_PUSH, OVERALL),
                             (CHECK_CONFLICT, OVERALL),
                             (COPY, OVERALL),
                             (GIT_CHECKOUT, COPY),
                             (CHECK_PROTECTS, COPY),
                             (COPY_BLOBS_1, COPY),
                             (COPY_BLOBS_2, COPY),
                             (MIRROR, OVERALL),
                             ])
        self.perf.add_counters([N_BLOBS, N_RENAMES])
        self.usermap = p4gf_usermap.UserMap(ctx.p4gf)
        self.progress = ProgressReporter()

    def __str__(self):
        return "\n".join([str(self.perf),
                          str(self.ctx.mirror)
                         ])

    def revert_and_raise(self, errmsg):
        """An error occurred while attempting to submit the incoming change
        to Perforce. As a result, revert all modifications, log the error,
        and raise an exception."""
        # roll back and raise the problem to the caller
        p4 = connect_p4(user=p4gf_const.P4GF_USER, client=self.ctx.p4.client)
        if p4:
            opened = p4.run('opened')
            if opened:
                p4.run('revert', '//{}/...'.format(self.ctx.p4.client))
        # revert doesn't clean up added files
        self.remove_added_files()
        if not errmsg:
            errmsg = traceback.format_stack()
        msg = "import failed: {}".format(errmsg)
        LOG.error(msg)
        raise RuntimeError(msg)

    def _p4_message_to_text(self, msg):
        '''
        Convert a list of P4 messages to a single string.
        
        Annotate some errors with additional context such as P4USER.
        '''
        txt = str(msg)
        if msg.msgid in MSGID_EXPLAIN_P4USER:
            txt += ' P4USER={}.'.format(self.ctx.p4.user)
        if msg.msgid in MSGID_EXPLAIN_P4CLIENT:
            txt += ' P4USER={}.'.format(self.ctx.p4.client)
        return txt
        
    def check_p4_messages(self):
        """If the results indicate a file is locked by another user,
        raise an exception so that the overall commit will fail. The
        changes made so far will be reverted.
        """
        msgs = p4gf_p4msg.find_all_msgid(self.ctx.p4, MSGID_CANNOT_OPEN)
        if not msgs:
            return

        lines = [self._p4_message_to_text(m) for m in msgs]
        self.revert_and_raise('\n'.join(lines))

    def _p4run(self, cmd):
        '''
        Run one P4 command, logging cmd and results.
        '''
        p4 = self.ctx.p4
        LOG.getChild('p4.cmd').debug(" ".join(cmd))

        results = p4.run(cmd)

        if p4.errors:
            LOG.getChild('p4.err').error("\n".join(p4.errors))
        if p4.warnings:
            LOG.getChild('p4.warn').warning("\n".join(p4.warnings))
        LOG.getChild('p4.out').debug("{}".format(results))
        if LOG.getChild('p4.msgid').isEnabledFor(logging.DEBUG):
            log = LOG.getChild('p4.msgid')
            for m in p4.messages:
                log.debug(p4gf_p4msg.msg_repr(m))

        self.check_p4_messages()

    def run_p4_commands(self):
        """run all pending p4 commands"""
        for operation, paths in self.addeditdelete.items():
            cmd = operation.split(' ')
            # avoid writable client files problem by using -k and handling
            # the actual file action ourselves (in add/edit cases the caller
            # has already written the new file)
            if not cmd[0] == 'add':
                cmd.append('-k')
            if cmd[0] == 'move':
                # move takes a tuple of two arguments, the old name and new name
                oldnames = [escape_path(pair[0]) for pair in paths]
                # move requires opening the file for edit first
                self._p4run(['edit', '-k'] + oldnames)
                LOG.debug("Edit {}".format(oldnames))
                for pair in paths:
                    (frompath, topath) = pair
                    self._p4run(['move', '-k', escape_path(frompath), escape_path(topath)])
                    LOG.debug("Move from {} to {}".format(frompath, topath))
            else:
                reopen = []
                if 'edit -t' in operation:
                    # edit -t text does not work, must 'edit' then 'reopen -t'
                    # "can't change from xtext - use 'reopen'"
                    reopen = ['reopen', '-t', cmd[2]]
                    cmd = cmd[0:1] + cmd[3:]

                if not cmd[0] == 'add':
                    self._p4run(cmd + [escape_path(path) for path in paths])
                else:
                    self._p4run(cmd + paths)

                if reopen:
                    self._p4run(reopen + [escape_path(path) for path in paths])

                if cmd[0] == 'delete':
                    LOG.debug("Delete {}".format(paths))
                    for path in paths:
                        os.remove(path)

    def remove_added_files(self):
        """remove added files to restore p4 client after failure of p4 command"""
        for operation, paths in self.addeditdelete.items():
            cmd = operation.split(' ')
            if cmd[0] == 'add':
                for path in paths:
                    os.unlink(path)

    def setup_p4_command(self, command, p4path):
        """Add command to list to be run by run_p4_commands. If the command
        is 'move' then the p4path is expected to be a tuple of the frompath
        and topath."""
        if command in self.addeditdelete:
            self.addeditdelete[command].append(p4path)
        else:
            self.addeditdelete[command] = [p4path]

    def _toggle_filetype(self, p4path, isx):
        """Returns the new file type for the named file, switching the
        executable state based on the isx value.

        Args:
            p4path: Path of the file to modify.
            isx: True if currently executable.

        Returns:
            New type for the file; may be None.
        """
        p4type = None
        if isx:
            p4type = '+x'
        else:
            # To remove a previously assigned modifier, the whole filetype
            # must be specified.
            for tipe in ['headType', 'type']:
                # For a file that was executable, is being renamed (with
                # edits), and is no longer executable, we need to handle the
                # fact that it's not yet in Perforce and so does not have a
                # headType.
                try:
                    p4type = p4gf_util.first_value_for_key(
                                self.ctx.p4.run(['fstat', '-T' + tipe, p4path]),
                                tipe)
                except P4.P4Exception:
                    pass
                if p4type:
                    p4type = p4gf_p4filetype.remove_mod(p4type, 'x')
        return p4type

    def add_or_edit_blob(self, blob):
        """run p4 add or edit for a new or modified file"""

        # get local path in p4 client
        p4path = self.ctx.contentlocalroot + blob['path']

        # edit or add?
        isedit = os.path.exists(p4path)

        # make sure dest dir exists
        dstdir = os.path.dirname(p4path)
        if not os.path.exists(dstdir):
            os.makedirs(dstdir)

        if isedit:
            LOG.debug("Copy edit from: " + blob['path'] + " to " + p4path)
            # for edits, only use +x or -x to propagate partial filetype changes
            wasx = os.stat(p4path).st_mode & stat.S_IXUSR
            isx = os.stat(blob['path']).st_mode & stat.S_IXUSR
            if wasx != isx:
                p4type = self._toggle_filetype(p4path, isx)
            else:
                p4type = None
            if p4type:
                LOG.debug("  set filetype: {ft}  oldx={oldx} newx={newx}"
                          .format(ft=p4type,
                                  oldx=wasx,
                                  newx=isx))
            shutil.copystat(blob['path'], p4path)
            shutil.copyfile(blob['path'], p4path)
        else:
            LOG.debug("Copy add from: " + blob['path'] + " to " + p4path)
            # for adds, use complete filetype of new file
            p4type = p4type_from_mode(blob['mode'])
            shutil.copyfile(blob['path'], p4path)

        # if file exists it's an edit, so do p4 edit before copying content
        # for an add, do p4 add after copying content
        p4type = ' -t ' + p4type if p4type else ''
        if isedit:
            self.setup_p4_command("edit" + p4type, p4path)
        else:
            self.setup_p4_command("add -f" + p4type, p4path)

    def rename_blob(self, blob):
        """ run p4 move for a renamed/moved file"""
        self.perf.counter[N_RENAMES] += 1

        # get local path in p4 client
        p4frompath = self.ctx.contentlocalroot + blob['path']
        p4topath = self.ctx.contentlocalroot + blob['topath']

        # ensure destination directory exists
        dstdir = os.path.dirname(p4topath)
        if not os.path.exists(dstdir):
            os.makedirs(dstdir)
        # copy out of Git repo to Perforce workspace
        shutil.copyfile(blob['topath'], p4topath)
        self.setup_p4_command("move", (p4frompath, p4topath))

    def copy_blob(self, blob):
        """run p4 integ for a copied file"""
        self.perf.counter[N_BLOBS] += 1

        # get local path in p4 client
        p4frompath = self.ctx.contentlocalroot + blob['path']
        p4topath = self.ctx.contentlocalroot + blob['topath']

        self._p4run(["copy", "-v", escape_path(p4frompath), escape_path(p4topath)])

        # make sure dest dir exists
        dstdir = os.path.dirname(p4topath)
        if not os.path.exists(dstdir):
            os.makedirs(dstdir)

        LOG.debug("Copy/integ from: " + p4frompath + " to " + p4topath)
        shutil.copyfile(p4frompath, p4topath)

    def delete_blob(self, blob):
        """run p4 delete for a deleted file"""

        # get local path in p4 client
        p4path = self.ctx.contentlocalroot + blob['path']
        self.setup_p4_command("delete", p4path)

    def copy_blobs(self, blobs):
        """copy git blobs to perforce revs"""
        # first, one pass to do rename/copy
        # these don't batch.  move can't batch due to p4 limitations.
        # however, the edit required before move is batched.
        # copy could be batched by creating a temporary branchspec
        # but for now it's done file by file
        with self.perf.timer[COPY_BLOBS_1]:
            for blob in blobs:
                if blob['action'] == 'R':
                    self.rename_blob(blob)
                elif blob['action'] == 'C':
                    self.copy_blob(blob)
            self.run_p4_commands()
        # then, another pass to do add/edit/delete
        # these are batched to allow running the minimum number of
        # p4 commands.  That means no more than one delete, one add per
        # filetype and one edit per filetype.  Since we only support three
        # possible filetypes (text, text+x, symlink) there could be at most
        # 1 + 3 + 3 commands run.
        with self.perf.timer[COPY_BLOBS_2]:
            self.addeditdelete = {}
            for blob in blobs:
                if blob['action'] == 'M':
                    self.add_or_edit_blob(blob)
                elif blob['action'] == 'D':
                    self.delete_blob(blob)
            self.run_p4_commands()

    def check_protects(self, p4user, blobs):
        """check if author is authorized to submit files"""
        pc = ProtectsChecker(self.ctx, self.ctx.authenticated_p4user, p4user)
        pc.filter_paths(blobs)
        if pc.has_error():
            self.revert_and_raise(pc.error_message())

    def _reset_for_new_commit(self):
        """
        Clear out state from previous commit that must not carry over
        into next commit.
        """
        self.addeditdelete = {}

    def attempt_resync(self):
        """Attempts to sync -k the Git Fusion client to the change that
        corresponds to the HEAD of the Git mirror repository. This prevents
        the obscure "file(s) not on client" error.
        """
        # we assume we are in the GIT_WORK_TREE, which seems to be a safe
        # assumption at this point
        try:
            last_commit = p4gf_util.git_ref_master()
            if last_commit:
                last_changelist_number = self.ctx.mirror.get_change_for_commit(
                    last_commit, self.ctx)
                if last_changelist_number:
                    filerev = "//...@{}".format(last_changelist_number)
                    self._p4run(['sync', '-k', filerev])
        except P4.P4Exception:
            # don't stop the world if we have an error above
            LOG.warn("resync failed with exception", exc_info=True)

    def copy_commit(self, commit):
        """copy a single commit"""

        self._reset_for_new_commit()

        #OG.debug("dump commit {}".format(commit))
        LOG.debug("for  commit {}".format(commit['mark']))
        LOG.debug("with description: {}".format(commit['data']))
        LOG.debug("files affected: {}".format(commit['files']))

        # Reject merge commits. Not supported in 2012.1.
        if 'merge' in commit:
            self.revert_and_raise(("Merge commit {} not permitted."
                                   +" Rebase to create a linear"
                                   +" history.").format(commit['sha1']))

        # strip any enclosing angle brackets from the email address
        email = commit['author']['email'].strip('<>')
        user = self.usermap.lookup_by_email(email)
        LOG.debug("for email {} found user {}".format(email, user))
        if (user is None) or (not self.usermap.p4user_exists(user[0])):
            # User is not a known and existing Perforce user, and the
            # unknown_git account is not set up, so reject the commit.
            self.revert_and_raise("User '{}' not permitted to commit".format(email))
        author_p4user = user[0]

        for blob in commit['files']:
            err = check_valid_filename(blob['path'])
            if err:
                self.revert_and_raise(err)

        with self.perf.timer[GIT_CHECKOUT]:
            d = p4gf_util.popen_no_throw(['git', 'checkout', commit['sha1']])
            if d['Popen'].returncode:
                # Sometimes git cannot distinquish the revision from a path...
                p4gf_util.popen(['git', 'reset', '--hard', commit['sha1'], '--'])

        with self.perf.timer[CHECK_PROTECTS]:
            self.check_protects(author_p4user, commit['files'])

        try:
            self.copy_blobs(commit['files'])
        except P4.P4Exception as e:
            self.revert_and_raise(str(e))

        with self.perf.timer[COPY_BLOBS_2]:
            pusher_p4user = self.ctx.authenticated_p4user
            LOG.debug("Pusher is: {}, author is: {}".format(pusher_p4user, author_p4user))
            desc = change_description(commit, pusher_p4user, author_p4user)

            try:
                opened = self.ctx.p4.run('opened')
                if opened:
                    changenum = p4_submit(self.ctx.p4, desc, author_p4user,
                                          commit['author']['date'])
                    LOG.info("Submitted change @{} for commit {}".format(changenum, commit['sha1']))
                else:
                    LOG.info("Ignored empty commit {}".format(commit['sha1']))
                    return None
            except P4.P4Exception as e:
                self.revert_and_raise(str(e))
            return ":" + str(changenum) + " " + commit['sha1']

    def test_block_push(self):
        """Test hook to temporarily block and let test script
        introduce conflicting changes.
        """
        s = p4gf_util.test_vars().get(p4gf_const.P4GF_TEST_BLOCK_PUSH)
        if not s:
            return

        log = logging.getLogger("test_block_push")
        block_dict = p4gf_util.test_var_to_dict(s)
        log.debug(block_dict)

        # Fetch ALL the submitted changelists as of right now.
        log.debug("p4 changes {}".format(p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client)))
        cl_ay = self.ctx.p4.run('changes',
                                '-l',
                                p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client))

        # Don't block until after something?
        after = block_dict['after']
        if after:
            if not contains_desc(after, cl_ay):
                log.debug("Do not block until after: {}".format(after))
                return

        until = block_dict['until']
        log.debug("BLOCKING. Seen        'after': {}".format(after))
        log.debug("BLOCKING. Waiting for 'until': {}".format(until))

        changes_path_at = ("{path}@{change},now"
                           .format(path=p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client),
                                   change=cl_ay[-1]['change']))

        while not contains_desc(until, cl_ay):
            time.sleep(1)
            cl_ay = self.ctx.p4.run('changes', changes_path_at)

        log.debug("Block released")
        
    def copy(self, start_at, end_at):
        """copy a set of commits from git into perforce"""
        with self.perf.timer[OVERALL]:
            with p4gf_util.HeadRestorer():
                LOG.debug("begin copying from {} to {}".format(start_at, end_at))
                self.attempt_resync()
                with self.perf.timer[CHECK_CONFLICT]:
                    conflict_checker = G2PConflictChecker(self.ctx)
                with self.perf.timer[FAST_EXPORT]:
                    fe = p4gf_fastexport.FastExport(start_at, end_at, self.ctx.tempdir.name)
                    fe.run()
                marks = []
                commit_count = 0
                for x in fe.commands:
                    if x['command'] == 'commit':
                        commit_count += 1
                self.progress.progress_init_determinate(commit_count)
                try:
                    for command in fe.commands:
                        with self.perf.timer[TEST_BLOCK_PUSH]:
                            self.test_block_push()
                        if command['command'] == 'commit':
                            self.progress.progress_increment("Copying changelists...")
                            self.ctx.heartbeat()
                            with self.perf.timer[COPY]:
                                mark = self.copy_commit(command)
                                if mark is None:
                                    continue
                            with self.perf.timer[CHECK_CONFLICT]:
                                (git_commit_sha1,
                                 p4_changelist_number) = mark_to_commit_changelist(mark)
                                conflict_checker.record_commit(git_commit_sha1,
                                                               p4_changelist_number)
                                if conflict_checker.check():
                                    LOG.error("P4 conflict found")
                                    break
                            marks.append(mark)
                        elif command['command'] == 'reset':
                            pass
                        else:
                            raise RuntimeError("Unexpected fast-export command: " +
                                               command['command'])
                finally:
                    # we want to write mirror objects for any commits that made it through
                    # any exception will still be alive after this
                    with self.perf.timer[MIRROR]:
                        self.ctx.mirror.add_commits(marks)
                        self.ctx.mirror.add_objects_to_p4(self.ctx)

                if conflict_checker.has_conflict():
                    raise RuntimeError("Conflicting change from Perforce caused one"
                                       + " or more git commits to fail. Time to"
                                       + " pull, rebase, and try again.")

        LOG.getChild("time").debug("\n" + str(self))
class P2G:
    """class to manage copying from Perforce to git"""
    def __init__(self, ctx):
        self.ctx = ctx
        self.fastimport = FastImport(self.ctx)
        self.fastimport.set_timezone(self.ctx.timezone)
        self.fastimport.set_project_root_path(self.ctx.contentlocalroot)
        self.perf = p4gf_profiler.TimerCounterSet()
        self.perf.add_timers([OVERALL,
                            (SETUP, OVERALL),
                            (PRINT, OVERALL),
                            (FSTAT, OVERALL),
                            (SYNC, OVERALL),
                            (FAST_IMPORT, OVERALL),
                            (MIRROR, OVERALL),
                            (MERGE, OVERALL),
                            (PACK, OVERALL)
                            ])

        self.rev_range      = None  # RevRange instance set in copy().
        self.graft_change   = None  #
        self.changes        = None  # dict['changelist'] ==> P4Changelist of what to copy()
        self.printed_revs   = None  # RevList produced by PrintHandler
        self.status_verbose = True
        self.progress       = ProgressReporter()

    def __str__(self):
        return "\n".join(["\n\nFast Import:\n",
                          str(self.fastimport),
                          "",
                          str(self.perf),
                          ""
                          ])

    def _setup(self, start_at, stop_at):
        """Set RevRange rev_range, figure out which changelists to copy."""
        self.rev_range = RevRange.from_start_stop(self.ctx, start_at, stop_at)
        LOG.debug("Revision range to copy to Git: {rr}"
                  .format(rr=self.rev_range))

        # get list of changes to import into git
        self.changes = P4Changelist.create_changelist_list_as_dict(
                            self.ctx.p4,
                            self._path_range())

        # If grafting, get that too.
        if self.rev_range.graft_change_num:
            # Ignore all depotFile elements, we just want the change/desc/time/user.
            self.graft_change = P4Changelist.create_using_describe(
                                    self.ctx.p4,
                                    self.rev_range.graft_change_num,
                                    "ignore_depot_files")
            self.graft_change.description += ('\n[grafted history before {start_at}]'
                                              .format(start_at=start_at))

    def _path_range(self):
        """Return the common path...@range string we use frequently.
        """
        return self.ctx.client_view_path() + self.rev_range.as_range_string()

    def _copy_print(self):
        """p4 print all revs and git-hash-object them into the git repo."""
        server_can_unexpand = self.ctx.p4.server_level > 32
        printhandler = PrintHandler(need_unexpand=not server_can_unexpand,
                                    tempdir=self.ctx.tempdir.name)
        self.ctx.p4.handler = printhandler
        args = ["-a"]
        if server_can_unexpand:
            args.append("-k")
        self.ctx.p4.run("print", args, self._path_range())
        printhandler.flush()
        printhandler.progress.progress_finish()

        # If also grafting, print all revs in existence at time of graft.
        if self.graft_change:
            args = []
            if server_can_unexpand:
                args.append("-k")
            path = self._graft_path()
            LOG.debug("Printing for grafted history: {}".format(path))
            self.ctx.p4.run("print", args, path)
            printhandler.flush()

            # If grafting, we just printed revs that refer to changelists
            # that have no P4Changelist counterpart in self.changes. Make
            # some skeletal versions now so that FstatHandler will have
            # someplace to hang its outputStat() P4File instances.
            for (_key, p4file) in printhandler.revs.revs:
                if not p4file.change in self.changes:
                    cl = P4Changelist()
                    cl.change = p4file.change
                    self.changes[p4file.change] = cl

        self.ctx.p4.handler = None
        self.printed_revs = printhandler.revs

    def _fstat(self):
        """run fstat to find deleted revs and get client paths"""
        # TODO for 12.2 print will also report deleted revs so between
        # that and using MapApi to get client paths, we won't need this fstat
        self.ctx.p4.handler = FstatHandler(self.printed_revs, self.changes)
        fstat_cols = "-T" + ",".join(P4File.fstat_cols())
        self.ctx.p4.run("fstat", "-Of", fstat_cols, self._path_range())

        if self.graft_change:
            # Also run 'p4 fstat //<view>/...@change' for the graft
            # change to catch all files as of @change, not just
            # revs changed between begin and end of _path_range().
            self.ctx.p4.run("fstat", fstat_cols, self._graft_path())

        self.ctx.p4.handler = None

        self._collapse_to_graft_change()
        self._add_graft_to_changes()

        # don't need this any more
        self.printed_revs = None

        sorted_changes = [str(y) for y in sorted([int(x) for x in self.changes.keys()])]

        LOG.debug("\n".join([str(self.changes[ch]) for ch in sorted_changes]))
        return sorted_changes

    def _sync(self, sorted_changes):
        """fake sync of last change to make life easier at push time"""
        self.ctx.p4.handler = SyncHandler()
        lastchange = self.changes[sorted_changes[-1]]
        self.ctx.p4.run("sync", "-kf",
                self.ctx.client_view_path() + "@" + str(lastchange.change))
        self.ctx.p4.handler = None

    def _fast_import(self, sorted_changes, last_commit):
        """build fast-import script from changes, then run fast-import"""
        self.progress.progress_init_determinate(len(sorted_changes))
        for changenum in sorted_changes:
            change = self.changes[changenum]
            self.progress.progress_increment("Copying changelists...")
            self.ctx.heartbeat()

            # create commit and trees
            self.fastimport.add_commit(change, last_commit)

            last_commit = change.change

        # run git-fast-import and get list of marks
        marks = self.fastimport.run_fast_import()

        # done with these
        self.changes = None
        return marks

    def _mirror(self, marks):
        """build up list of p4 objects to mirror git repo in perforce
        then submit them
        """
        self.ctx.mirror.add_commits(marks)
        self.ctx.mirror.add_objects_to_p4(self.ctx)
        LOG.getChild("time").debug("\n\nGit Mirror:\n" + str(self.ctx.mirror))
        self.ctx.mirror = GitMirror(self.ctx.config.view_name)

        last_commit = marks[len(marks) - 1]
        LOG.debug("Last commit created: " + last_commit)

    # pylint: disable=R0201
    # R0201 Method could be a function
    def _pack(self):
        """run 'git gc' to pack up the blobs

        aside from any possible performance benefit, this prevents warnings
        from git about "unreachable loose objects"
        """
        p4gf_util.popen_no_throw(["git", "gc"])

    def _collapse_to_graft_change(self):
        """Move all of the files from pre-graft changelists into the graft
        changelist. Remove all pre-graft changelists.

        NOP if not grafting.

        'p4 print //client/...@100' does indeed print all the files that
        exist @100, but the tag dict that goes with each file includes the
        changelist in which that file was last added/edited, not 100. So
        this function gathers up all the file revs with change=1..99 and
        sticks them under change 100's file list.
        """
        if (not self.graft_change):
            return
        graft_num_int = int(self.graft_change.change)
        LOG.debug("_collapse_to_graft_change() graft_num_int={}".format(graft_num_int))

        # Delete all P4Changelist elements from self.changes where they
        # refer to a change that will be collapsed into the graft change,
        # including the graft change itself.
        del_keys = []
        for p4changelist in self.changes.values():
            if graft_num_int < int(p4changelist.change):
                LOG.debug("_collapse_to_graft_change() skipping {}".format(p4changelist.change))
                continue

            LOG.debug("_collapse_to_graft_change() deleting {}".format(p4changelist.change))
            del_keys.append(p4changelist.change)
        for key in del_keys:
            del self.changes[key]

        # Associate with the graft change all printed P4File results from
        # graft-change or older
        for (_key, p4file) in self.printed_revs.revs:
            if graft_num_int < int(p4file.change):
                LOG.debug("_collapse_to_graft_change() skipping post-graft {}".format(p4file))
                continue

            old = self.graft_change.file_from_depot_path(p4file.depot_path)
            # If print picked up multiple revs, keep the newest.
            if (not old) or (int(old.change) < int(p4file.change)):
                p4file.change = self.graft_change.change
                self.graft_change.files.append(p4file)
                LOG.debug("_collapse_to_graft_change() keeping {}".format(p4file))
            else:
                LOG.debug("_collapse_to_graft_change() skipping, had newer  {}".format(p4file))

    def _add_graft_to_changes(self):
        """Add the graft changelist to our list of changes:
        It will be copied over like any other change.

        NOP if not grafting.
        """
        if (not self.graft_change):
            return
        self.changes[self.graft_change.change] = self.graft_change

    def _graft_path(self):
        """If grafting, return '//<client>/...@N' where N is the graft
        changelist number.

        If not grafting, return None.
        """
        if (not self.graft_change):
            return
        return "{path}@{change}".format(
                        path = self.ctx.client_view_path(),
                        change = self.graft_change.change)

    def copy(self, start_at, stop_at):
        """copy a set of changelists from perforce into git"""

        with self.perf.timer[OVERALL]:
            with self.perf.timer[SETUP]:
                self._setup(start_at, stop_at)

                if not len(self.changes):
                    LOG.debug("No new changes found to copy")
                    return

                last_commit = self.rev_range.last_commit

            with self.perf.timer[PRINT]:
                self._copy_print()

            with self.perf.timer[FSTAT]:
                sorted_changes = self._fstat()

            with self.perf.timer[SYNC]:
                self._sync(sorted_changes)

            with self.perf.timer[FAST_IMPORT]:
                marks = self._fast_import(sorted_changes, last_commit)
                sorted_changes = None

            with self.perf.timer[MIRROR]:
                self._mirror(marks)

            with self.perf.timer[MERGE]:
                # merge temporary branch into master, then delete it
                self.fastimport.merge()

            with self.perf.timer[PACK]:
                self._pack()

        LOG.getChild("time").debug("\n" + str(self))
class PrintHandler(OutputHandler):
    """OutputHandler for p4 print, hashes files into git repo"""
    def __init__(self, need_unexpand, tempdir):
        OutputHandler.__init__(self)
        self.rev = None
        self.revs = RevList()
        self.need_unexpand = need_unexpand
        self.tempfile = None
        self.tempdir = tempdir
        self.progress = ProgressReporter()
        self.progress.progress_init_indeterminate()

    def outputBinary(self, h):
        """assemble file content, then pass it to hasher via queue"""
        self.appendContent(h)
        return OutputHandler.HANDLED

    def outputText(self, h):
        """assemble file content, then pass it to hasher via queue
        """
        b = bytes(h, 'UTF-8')
        self.appendContent(b)
        return OutputHandler.HANDLED

    def appendContent(self, h):
        """append a chunk of content to the temp file

        if server is 12.1 or older it may be sending expanded ktext files
        so we need to unexpand them

        It would be nice to incrementally compress and hash the file
        but that requires knowing the size up front, which p4 print does
        not currently supply.  If/when it does, this can be reworked to
        be more efficient with large files.  As it is, as long as the
        SpooledTemporaryFile doesn't rollover, it won't make much of a
        difference.

        So with that limitation, the incoming content is stuffed into
        a SpooledTemporaryFile.
        """
        if not len(h):
            return
        if self.need_unexpand and self.rev.is_k_type():
            h = unexpand(h)
        self.tempfile.write(h)

    def flush(self):
        """compress the last file, hash it and stick it in the repo

        Now that we've got the complete file contents, the header can be
        created and used along with the spooled content to create the sha1
        and zlib compressed blob content.  Finally that is written into
        the .git/objects dir.
        """
        if not self.rev:
            return
        size = self.tempfile.tell()
        self.tempfile.seek(0)
        compressed = tempfile.NamedTemporaryFile(delete=False, dir=self.tempdir)
        compress = zlib.compressobj()
        # pylint doesn't understand dynamic definition of sha1 in hashlib
        # pylint: disable=E1101
        sha1 = hashlib.sha1()

        # pylint:disable=W1401
        # disable complaints about the null. We need that.
        # add header first
        header = ("blob " + str(size) + "\0").encode()
        compressed.write(compress.compress(header))
        sha1.update(header)

        # then actual contents
        chunksize = 4096
        while True:
            chunk = self.tempfile.read(chunksize)
            if chunk:
                compressed.write(compress.compress(chunk))
                sha1.update(chunk)
            else:
                break
        # pylint: enable=E1101
        compressed.write(compress.flush())
        compressed.close()
        digest = sha1.hexdigest()
        self.rev.sha1 = digest
        blob_dir = ".git/objects/"+digest[:2]
        blob_file = digest[2:]
        blob_path = blob_dir+"/"+blob_file
        if not os.path.exists(blob_path):
            if not os.path.exists(blob_dir):
                os.makedirs(blob_dir)
            shutil.move(compressed.name, blob_path)
        self.rev = None

    def outputStat(self, h):
        """save path of current file"""
        self.flush()
        self.rev = P4File.create_from_print(h)
        self.revs.append(self.rev)
        self.progress.progress_increment('Copying files')
        LOG.debug("PrintHandler.outputStat() ch={} {}"
                  .format(h['change'], h["depotFile"] + "#" + h["rev"]))
        if self.tempfile:
            self.tempfile.seek(0)
            self.tempfile.truncate()
        else:
            self.tempfile = tempfile.TemporaryFile(buffering=10000000, dir=self.tempdir)
        return OutputHandler.HANDLED

    def outputInfo(self, _h):
        """outputInfo call not expected"""
        return OutputHandler.REPORT

    def outputMessage(self, _h):
        """outputMessage call not expected, indicates an error"""
        return OutputHandler.REPORT