예제 #1
0
 def outputStat(self, h):
     """Supress output to prevent memory exhaustion."""
     try:
         self.gc.eta.increment()
         if not self.gc.quiet:
             ProgressReporter.increment(self.message.format(
                 otype = self.object_type, et = self.gc.eta.eta_str(),
                 ed = self.gc.eta.eta_delta_str()))
         self.gc.sync_counts[self.object_type] += 1
     except Exception:  # pylint: disable=broad-except
         pass
     return OutputHandler.HANDLED
 def outputStat(self, h):
     """save path of current file"""
     self.flush()
     self.rev = P4File.create_from_print(h)
     self.change_set.add(self.rev.change)
     ProgressReporter.increment(_('Copying files'))
     LOG.debug2("PrintHandler.outputStat() ch={} {}#{}".format(
         self.rev.change, self.rev.depot_path, self.rev.revision))
     # use the git working tree so we can use create_blob_fromfile()
     tmpdir = os.getcwd()
     self.tempfile = tempfile.NamedTemporaryFile(
         buffering=10000000, prefix='p2g-print-', dir=tmpdir, delete=False)
     return OutputHandler.HANDLED
예제 #3
0
 def outputStat(self, h):
     """Used to insert DELETED trees and commits into the database."""
     try:
         if not self.gc.quiet:
             ProgressReporter.increment(self.message)
         depot_path = h['depotFile']
         if self.object_type == 'tree':
             #sha1 = depot_path.replace(self.depot_path_prefix,'')
             #sha1 = sha1.replace('/','')
             self.gc.sql_insert_object(DELETED_TREES, depot_path)
         elif self.object_type == 'commit':
             self.gc.sql_insert_object(DELETED_COMMITS, depot_path)
     except Exception as e:  # pylint: disable=broad-except
         LOG.exception("FstatHandler:outputStat {}".format(str(e)))
     return OutputHandler.HANDLED
예제 #4
0
 def sql_report_table(self, statement, value=None):
     """Select and print the rows of a table."""
     rows_printed = 0
     if not value == None:
         cursor = self.db.execute(statement, (0,))
     else:
         cursor = self.db.execute(statement)
     while True:
         row = cursor.fetchone()
         if row == None:
             break
         print("%s" %  (row[0],), file=self.fd)
         rows_printed += 1
         if not self.report_file == 'stdout' and not self.quiet:
             ProgressReporter.increment(self.report_progress_msg)
     return rows_printed
예제 #5
0
    def _really_add_commits_to_p4(self, ctx):
        """actually run p4 add, submit to create mirror files in .git-fusion"""
        desc = _("Git Fusion '{view}' copied to Git.").format(
            view=ctx.config.view_name)
        with p4gf_util.NumberedChangelist(gfctx=ctx, description=desc) as nc:
            with Timer(ADD_SUBMIT):
                LOG.debug("adding {0} commits to .git-fusion...".format(
                    len(self.commits.commits)))

                # build list of objects to add, extracting them from git
                add_files = [
                    self.__add_object_to_p4(ctx, go)
                    for go in self.commits.commits.values()
                ]
                add_files = GitMirror.optimize_objects_to_add_to_p4(
                    ctx, add_files)

                if not (len(add_files) or self.depot_branch_info_list
                        or self.branch_list):
                    # Avoid a blank line in output by printing something
                    ProgressReporter.write(
                        _('No Git objects to submit to Perforce'))
                    LOG.debug("_really_add_objects_to_p4() nothing to add...")
                    return

                with Timer(P4_ADD):
                    files_added = self.add_objects_to_p4_2(ctx, add_files)

                    depot_branch_infos_added = \
                                    self._add_depot_branch_infos_to_p4(ctx)

                    config2_added = self._add_branch_defs_to_p4(ctx)

                    cldfs_added = self._add_cldfs_to_p4(ctx)

                with Timer(P4_SUBMIT):
                    if (files_added or depot_branch_infos_added
                            or config2_added or cldfs_added):
                        ProgressReporter.increment(
                            _('Submitting new Git commit objects to Perforce'))
                        r = nc.submit()
                        ObjectType.update_indexes(ctx, r)
                    else:
                        ProgressReporter.write(
                            _('No new Git objects to submit to Perforce'))
                        LOG.debug("ignoring empty change list...")
예제 #6
0
 def outputStat(self, h):
     """Used to store all blob depot_paths into the database."""
     try:
         if not self.gc.quiet:
             self.gc.eta.increment()
             ProgressReporter.increment(self.message.format(
                 et = self.gc.eta.eta_str() , ed = self.gc.eta.eta_delta_str()))
         depot_path = h['depotFile']
         deleted = 1 if h['action'] in ['delete','move/delete'] else 0
         sha1 = depot_path.replace(self.blobs_root,'')
         sha1 = sha1.replace('/','')
         if deleted:
             self.gc.sql_insert_object(DELETED_BLOBS, depot_path)
         else:
             self.gc.sql_insert_object(BLOBS, sha1, depot_path)
     except Exception as e:  # pylint: disable=broad-except
         LOG.exception("FilesHandler:outputStat {}".format(str(e)))
     return OutputHandler.HANDLED
예제 #7
0
    def _really_add_commits_to_p4(self, ctx):
        """actually run p4 add, submit to create mirror files in .git-fusion"""
        desc = _("Git Fusion '{view}' copied to Git.").format(view=ctx.config.view_name)
        with p4gf_util.NumberedChangelist(gfctx=ctx, description=desc) as nc:
            with Timer(ADD_SUBMIT):
                LOG.debug("adding {0} commits to .git-fusion...".
                          format(len(self.commits.commits)))

                # build list of objects to add, extracting them from git
                add_files = [self.__add_object_to_p4(ctx, go)
                             for go in self.commits.commits.values()]
                add_files = GitMirror.optimize_objects_to_add_to_p4(ctx, add_files)

                if not (   len(add_files)
                        or self.depot_branch_info_list
                        or self.branch_list ):
                    # Avoid a blank line in output by printing something
                    ProgressReporter.write(_('No Git objects to submit to Perforce'))
                    LOG.debug("_really_add_objects_to_p4() nothing to add...")
                    return

                with Timer(P4_ADD):
                    files_added = self.add_objects_to_p4_2(ctx, add_files)

                    depot_branch_infos_added = \
                                    self._add_depot_branch_infos_to_p4(ctx)

                    config2_added = self._add_branch_defs_to_p4(ctx)

                    cldfs_added = self._add_cldfs_to_p4(ctx)

                with Timer(P4_SUBMIT):
                    if (   files_added
                        or depot_branch_infos_added
                        or config2_added
                        or cldfs_added ):
                        ProgressReporter.increment(
                               _('Submitting new Git commit objects to Perforce'))
                        r = nc.submit()
                        ObjectType.update_indexes(ctx, r)
                    else:
                        ProgressReporter.write(
                               _('No new Git objects to submit to Perforce'))
                        LOG.debug("ignoring empty change list...")
예제 #8
0
    def __add_object_to_p4(ctx, go):
        """add a commit to the git-fusion perforce client workspace

        return the path of the client workspace file suitable for use with
        p4 add
        """
        ProgressReporter.increment(
            _('Adding new Git commit objects to Perforce...'))
        ctx.heartbeat()

        # get client path for .git-fusion file
        dst = os.path.join(ctx.gitlocalroot, go.to_p4_client_path())

        # A tree is likely to already exist, in which case we don't need
        # or want to try to recreate it.  We'll just use the existing one.
        if os.path.exists(dst):
            LOG.debug("reusing existing object: " + dst)
            return dst

        with Timer(EXTRACT_OBJECTS):

            # make sure dir exists
            dstdir = os.path.dirname(dst)
            if not os.path.exists(dstdir):
                try:
                    os.makedirs(dstdir)
                #pylint:disable=E0602
                # pylint running on python 3.2 does not know about 3.3 features
                except FileExistsError:
                    #pylint:enable=E0602
                    # For file exists error, just ignore it, probably another
                    # process creating the same thing simultaneously.
                    pass
                except OSError as e:
                    raise e

            # Hardlink the Git object into the Perforce workspace
            op = p4gf_git.object_path(go.sha1)
            os.link(op, dst)
            LOG.debug2("adding new object: " + dst)

            return dst
예제 #9
0
 def outputStat(self, h):
     """Save path of current file."""
     try:
         self.flush()
         self.rev = P4File.create_from_print(h)
         self.change_set.add(self.rev.change)
         ProgressReporter.increment(_('Copying files'))
         LOG.debug2("PrintHandler.outputStat() ch={} {}#{}".format(
             self.rev.change, self.rev.depot_path, self.rev.revision))
         # use the git working tree so we can use create_blob_fromfile()
         tmpdir = os.getcwd()
         self.temp_file = tempfile.NamedTemporaryFile(buffering=10000000,
                                                      prefix='p2g-print-',
                                                      dir=tmpdir,
                                                      delete=False)
         LOG.debug3('outputStat() temporary file created: {}'.format(
             self.temp_file.name))
     except Exception:  # pylint: disable=broad-except
         LOG.exception("outputStat")
     return OutputHandler.HANDLED
예제 #10
0
    def __add_object_to_p4(ctx, go):
        """add a commit to the git-fusion perforce client workspace

        return the path of the client workspace file suitable for use with
        p4 add
        """
        ProgressReporter.increment(_('Adding new Git commit objects to Perforce...'))
        ctx.heartbeat()

        # get client path for .git-fusion file
        dst = os.path.join(ctx.gitlocalroot, go.to_p4_client_path())

        # A tree is likely to already exist, in which case we don't need
        # or want to try to recreate it.  We'll just use the existing one.
        if os.path.exists(dst):
            LOG.debug("reusing existing object: " + dst)
            return dst

        with Timer(EXTRACT_OBJECTS):

            # make sure dir exists
            dstdir = os.path.dirname(dst)
            if not os.path.exists(dstdir):
                try:
                    os.makedirs(dstdir)
                #pylint:disable=E0602
                # pylint running on python 3.2 does not know about 3.3 features
                except FileExistsError:
                #pylint:enable=E0602
                    # For file exists error, just ignore it, probably another
                    # process creating the same thing simultaneously.
                    pass
                except OSError as e:
                    raise e

            # Hardlink the Git object into the Perforce workspace
            op = p4gf_git.object_path(go.sha1)
            os.link(op, dst)
            LOG.debug2("adding new object: " + dst)

            return dst
예제 #11
0
 def find_reachable(self):
     """Mark every tree and blob reachable in the database.
     The trees in these tables are already marked reachable.
     Use recursion into the trees and mark trees/blobs reachable."""
     tree_count = self.table_type_counts[TREES_FROM_COMMITS]
     self.eta = p4gf_eta.ETA(total_ct = tree_count)
     with ProgressReporter.Determinate(tree_count):
         # This first table set contains the trees extracted from known commits.
         for table in self.table_names[TREES_FROM_COMMITS]:
             cursor = self.db.execute("SELECT * from {}".format(table))
             self.git_dir = "--git-dir={}".format(self.git_dir_abspath)
             while True:
                 row = cursor.fetchone()
                 if row == None:
                     break
                 if not self.quiet:
                     self.eta.increment()
                     ProgressReporter.increment(
                             _("Traversing commit trees to find "
                               " cached reachable trees and blobs ... {et} {ed}").
                         format( et = self.eta.eta_str() , ed = self.eta.eta_delta_str()))
                 tree = row[0]
                 # this method recurses for any tree entry within this top-level tree
                 self.mark_tree_contents_reachable(str(tree))
예제 #12
0
    def copy(self, start_at, stop_at, new_git_branches):
        """copy a set of changelists from perforce into git"""

        LOG.debug('copy() start={} stop={} new_git_branches={}'.format(
            start_at, stop_at, new_git_branches))

        with self.p2g.perf.timer[OVERALL]:
            self.p2g._log_memory('start')

            # Stop early if nothing to copy.
            # 'p4 changes -m1 //client/...'
            repo_empty = p4gf_util.git_empty()
            if not self._requires_copy(new_git_branches, repo_empty):
                self.p2g.fastimport.cleanup()
                LOG.debug("No changes since last copy.")
                return

            ### Fake start. Needs proper setup with graft support.
            self.p2g.rev_range = self.p2g.mc_rev_range()

            self.ctx.view_repo = pygit2.Repository(self.ctx.view_dirs.GIT_DIR)
            self.p2g._log_memory('pygit2')

            # O(Bfp + 1) p4 changes: one for each Branch fully populated,
            #                        + 1 for //.git-fusion/branches/repo/...
            #
            with self.p2g.perf.timer[CHANGES]:
                self.change_num_on_branch_list \
                                        = deque(self._p4_changes_each_branch())
            LOG.debug('Found ChangeNumOnBranch count: {}'.format(
                len(self.change_num_on_branch_list)))
            self.p2g._log_memory('O(Bfp + 1) p4 changes')
            #p4gf_gc.report_growth ('aftr O(Bfp + 1) p4 changes')
            #p4gf_gc.report_objects('aftr O(Bfp + 1) p4 changes')
            if not self.change_num_on_branch_list:
                LOG.debug("No new changes found to copy")
                return

                # Prepare for the loop over each changelist x branch.
            p4gf_util.ensure_dir(self.symlink_dir)
            self.p2g._fill_head_marks_from_current_heads()
            self.mark_to_branch_id = {}
            self.branch_id_to_temp_name \
                                = self.p2g._create_branch_id_to_temp_name_dict()

            self.known_dbi_set = {
                branch.depot_branch
                for branch in self.ctx.branch_dict().values()
                if branch.depot_branch
            }
            # All string placeholders should have been replaced with
            # pointers to full DepotBranchInfo instances before
            # calling copy().
            for dbi in self.known_dbi_set:
                assert not isinstance(dbi, str)
                LOG.error('known: {}'.format(dbi))  ##ZZ

            # O(C) 'p4 changes -m1 + filelog + print'
            #
            # Print each file revision to its blob in the .git/objects/
            # Write each changelist to git-fast-import script.
            #
            with ProgressReporter.Indeterminate():
                while self.change_num_on_branch_list:
                    ProgressReporter.increment("MC Copying changelists...")
                    cnob = self.change_num_on_branch_list.pop()
                    self._copy_one(cnob)

                    # Explicitly delete the PrintHandler now so that it
                    # won't show up in any leak reports between now and
                    # P2GMemCapped's end-of-life.
            if self.print_handler:
                self.printed_byte_count = self.print_handler.total_byte_count
                self.printed_rev_count = self.print_handler.printed_rev_count
                self.print_handler = None

            self.p2g._log_memory('P2G_MC.copy() loop')
            p4gf_gc.report_growth('after P2G_MC.copy() loop')
            p4gf_gc.report_objects('after P2G_MC.copy() loop')
            #p4gf_gc.backref_objects_by_type(dict().__class__)

            # Run git-fast-import to add everything to Git.
            with self.p2g.perf.timer[FAST_IMPORT]:
                LOG.info('Running git-fast-import')
                marks = self.p2g.fastimport.run_fast_import()

                # Remove all temporary Git branch refs.
                # After git-fast-import, we no longer need them.
            self._delete_temp_git_branch_refs()

            # Record how much we've copied in a p4 counter so that
            # future calls to _any_changes_since_last_copy() can
            # tell if there's anything new to copy.
            self.ctx.write_last_copied_change(self.highest_copied_change_num)

            if repo_empty:
                # If we are just now rebuilding the Git repository, also
                # grab all of the tags that have been pushed in the past.
                p4gf_tag.generate_tags(self.ctx)
                self.p2g._log_memory('_generate_tags')

            with self.p2g.perf.timer[MIRROR]:
                self.p2g._mirror(marks, self.mark_to_branch_id)
                self.p2g._log_memory('_mirror')

            with self.p2g.perf.timer[BRANCH_REF]:
                self.p2g._set_branch_refs(marks)
                self.p2g._log_memory('_set_branch_refs')

            with self.p2g.perf.timer[PACK]:
                self.p2g._pack()
                self.p2g._log_memory('_pack')

        LOG.getChild("time").debug("\n" + str(self))
        LOG.info('MC Done. Commits: {cnob_ct:,d}  File Revisions: {rev_ct:,d}'
                 '  Bytes: {byte_ct:,d}  Seconds: {sec:,d}'.format(
                     cnob_ct=self.cnob_count,
                     rev_ct=self.printed_rev_count,
                     byte_ct=self.printed_byte_count,
                     sec=int(self.p2g.perf.timer[OVERALL].time)))
        p4gf_gc.report_objects('after P2G MC copy()')
        self.p2g._log_memory('copy() done')
예제 #13
0
    def _load_commit_dag(self):
        '''
        Load the Git commit tree into memory. We just need the
        parent/child relationships.
        '''
        # A single call to git-rev-list produces both the commit sha1 list
        # that we need AND the child->parent associations that we need. It's
        # screaming fast: 32,000 commit lines in <1 second.
        with Timer(TIMER_RUN_REV_LIST):
            range_list = [prt.to_range() for prt in self.pre_receive_list]
            cmd        = [ 'git', 'rev-list'
                         , '--date-order', '--parents'] + range_list
            LOG.debug2("DAG: {}".format(' '.join(cmd)))
            d = p4gf_proc.popen(cmd)

        seen_parents = set()

        # Pass 1: Build up a dict of sha1->Assign objects, one per commit.
        with Timer(TIMER_CONSUME_REV_LIST):
            lines = d['out'].splitlines()
            with ProgressReporter.Determinate(len(lines)):
                for line in lines:
                    ProgressReporter.increment(_('Loading commit tree into memory...'))
                    sha1s = line.split()
                    curr_sha1 = sha1s.pop(0)
                    self.rev_list.append(curr_sha1)
                    if LOG.isEnabledFor(logging.DEBUG3):
                        LOG.debug3('DAG: rev_list {} {}'
                                   .format( p4gf_util.abbrev(curr_sha1)
                                          , ' '.join(p4gf_util.abbrev(sha1s))))
                    self.assign_dict[curr_sha1] = Assign(curr_sha1, sha1s)
                    seen_parents.update(sha1s)

        # git-rev-list is awesome in that it gives us only as much as we need
        # for self.rev_list, but unawesome in that this optimization tends to
        # omit paths to branch refs' OLD heads if the old heads are 2+ commits
        # back in time, and that time is ALREADY covered by some OTHER branch.
        # Re-run each pushed branch separately to add enough Assign() nodes
        # to form a full path to its old ref.
        if 2 <= len(self.pre_receive_list):
            for prt in self.pre_receive_list:
                # Skip NEW branch refs: those don't have
                # to connect up to anything.
                if prt.old_sha1 == p4gf_const.NULL_COMMIT_SHA1:
                    continue
                with Timer(TIMER_RUN_REV_LIST):
                    cmd  = [ 'git', 'rev-list'
                           , '--date-order', '--parents', '--reverse', prt.to_range()]
                    LOG.debug2("DAG: {}".format(' '.join(cmd)))
                    d = p4gf_proc.popen(cmd)

                with Timer(TIMER_CONSUME_REV_LIST):
                    for line in d['out'].splitlines():
                        sha1s = line.split()
                        curr_sha1 = sha1s.pop(0)
                        if curr_sha1 in self.assign_dict:
                            break
                        LOG.debug3('DAG: path     {} {}'
                                   .format( p4gf_util.abbrev(curr_sha1)
                                          , ' '.join(p4gf_util.abbrev(sha1s))))
                        self.assign_dict[curr_sha1] = Assign(curr_sha1, sha1s)
                        seen_parents.update(sha1s)

        # Create acting-as-parent-only nodes in dict, too. We don't process
        # these as part of iterating over revs, but we need them when
        # tree walking.
        with Timer(TIMER_CONSUME_REV_LIST):
            parent_only = seen_parents - set(self.assign_dict.keys())
            for curr_sha1 in parent_only:
                if curr_sha1 in self.assign_dict:
                    break
                LOG.debug3('DAG: par only {}'.format( p4gf_util.abbrev(curr_sha1)))
                self.assign_dict[curr_sha1] = Assign(curr_sha1, [])

        # Pass 2: Fill in Assign.children list
        with Timer(TIMER_ASSIGN_CHILDREN):
            with ProgressReporter.Determinate(len(self.assign_dict)):
                for assign in self.assign_dict.values():
                    ProgressReporter.increment(_('Finding child commits...'))
                    for par_sha1 in assign.parents:
                        par_assign = self.assign_dict.get(par_sha1)
                        if par_assign:
                            par_assign.children.add(assign.sha1)
                        else:
                            # Expected and okay: some parents already exist and
                            # are not part of our push/fast-export list.
                            LOG.debug2(
                                "DAG: child {child} -> parent {parent}: parent not part of push"
                                .format(child=assign.sha1[:7], parent=par_sha1[:7]))
예제 #14
0
    def check_commits(self, commits):
        """Ensure the entire sequence of commits will (likely) go through
        without any errors related to permissions or locks. Raises an
        exception if anything goes wrong.

        Arguments:
            commits -- commits from FastExport class
        """
        LOG.info('Checking Perforce permissions and locks')
        self.ctx.checkpoint("copy_to_p4._preflight_check")

        # Stop if files are opened in our repo client
        # We expect this to be none, since we have the view lock
        opened = self.ctx.p4.run(['opened', '-m1'])
        if opened:
            raise PreflightException(
                _('There are files opened by Git Fusion for this repo.'))

        # fetch the repo setting only, without cascading to global config
        is_read_only = self.ctx.repo_config.getboolean(
            p4gf_config.SECTION_REPO,
            p4gf_config.KEY_READ_ONLY,
            fallback=False)
        if is_read_only:
            raise PreflightException(
                _("Push to repo {repo_name} prohibited.").format(
                    repo_name=self.ctx.config.repo_name))

        # get a list of stream depots for later checks for read-only paths
        depots = self.ctx.p4.run(['depots'])
        self.stream_depots = set(
            [d['name'] for d in depots if d['type'] == 'stream'])
        any_locked_files = self._find_locked_by()
        LOG.debug("any_locked_files {0}".format(any_locked_files))
        case_conflict_checker = None
        if not self.ctx.server_is_case_sensitive:
            case_conflict_checker = CaseConflictChecker(self.ctx)
            case_conflict_checker.read_perforce_paths()

        ui_name = self._curr_ref_ui_name()
        if ui_name:
            progress_msg = _('Checking commits for {ref}...').format(
                ref=ui_name)
        else:
            progress_msg = _('Checking commits...')

        with ProgressReporter.Determinate(len(commits)):
            for commit in commits:
                ProgressReporter.increment(progress_msg)

                self.g2p_user.get_author_pusher_owner(commit)

                rev = commit['sha1']
                if not self.assigner.is_assigned(commit['sha1']):
                    continue

                self.check_commit(commit)

                for branch_id in self.assigner.branch_id_list(rev):
                    self.check_commit_for_branch(commit, branch_id,
                                                 any_locked_files,
                                                 case_conflict_checker)

        if case_conflict_checker:
            cc_text = case_conflict_checker.conflict_text()
            if cc_text:
                raise PreflightException(cc_text)
예제 #15
0
    def move_objects(self, walk_root, top_level_regex, subdir_regex, trim_hyphenated_suffix=False):
        """Move the object cache objects to '.git/objects'."""
        # pylint: disable=too-many-branches,too-many-statements
        # Because git paths are distributed over directory names taken
        # from the first 2 chars of the sha1, and Git Fusion cache
        # paths use xx/xx (trees and commits)
        # there is a boatload of data munging going on here.
        doing_trees = TREES in walk_root
        doing_commits = 'repos' in walk_root
        if doing_trees:
            progress_msg = _("Moving cached trees to local git ... {et} {ed}")
            object_count = self.sync_counts[TREES]
        else:
            progress_msg = _("Moving cached commits to local git ... {et} {ed}")
            object_count = self.sync_counts[COMMITS]
        self.eta = p4gf_eta.ETA(total_ct = object_count)
        with ProgressReporter.Determinate(object_count):
            for walk_root, _dirs, files in os.walk(walk_root):
                # For top level dirs, create the same dir under '.git/objects'
                m = top_level_regex.match(walk_root)
                if m:
                    for d in _dirs:
                        obj_dir = os.path.join(self.git_dir_objects, d)
                        p4gf_ensure_dir.ensure_dir(obj_dir)

                # If we have files we need to move them to 'git/objects'
                if files:
                    if not self.quiet:
                        self.eta.increment()
                        ProgressReporter.increment(progress_msg.
                            format( et = self.eta.eta_str()
                                  , ed = self.eta.eta_delta_str()))
                    sub1 = sub2 = None
                    m = subdir_regex.match(walk_root)
                    if m:
                        sub1 = m.group('sub1')
                        sub2 = m.group('sub2')
                    else:
                        LOG.error("regex failed to match as expected on {}.\nStopping.".format
                                (walk_root))
                        print("regex failed to match as expected on {}.\nStopping.".
                                format(walk_root))
                        sys.exit(1)
                    if doing_trees:
                        depot_path_prefix = NTR("{}/trees/{}/{}/".format(
                             p4gf_const.objects_root(), sub1, sub2))
                    for name in files:
                        git_file = sub2 + name
                        if trim_hyphenated_suffix:
                            git_file = re.sub(r'-.*$','',git_file)
                        git_sha1 = sub1 + git_file
                        if doing_trees:
                            depot_path = depot_path_prefix + name
                            self.sql_insert_object(TREES, git_sha1, depot_path)

                        git_path = os.path.join(self.git_dir_objects, sub1)
                        git_path = os.path.join(git_path,git_file)
                        p4_path = os.path.join(walk_root, name)
                        # Finally , move the p4 path to the git path
                        try:
                            os.rename(p4_path,git_path)
                        except OSError as e:
                            LOG.error("exception {}".format(str(e)))
                            sys.exit(1)
                        if doing_commits:
                            self.add_tree_from_commit_to_table(git_sha1)
                            # now that the commit's tree sha1 is in the db,
                            # the commit object is no longer needed
                            try:
                                os.unlink(git_path)
                            except OSError as e:
                                LOG.error("exception {}".format(str(e)))
                                sys.exit(1)
예제 #16
0
    def copy(self, start_at, stop_at, new_git_branches):
        """copy a set of changelists from perforce into git"""

        LOG.debug('copy() start={} stop={} new_git_branches={}'
                  .format(start_at, stop_at, new_git_branches))

        with self.p2g.perf.timer[OVERALL]:
            self.p2g._log_memory('start')

            # Stop early if nothing to copy.
            # 'p4 changes -m1 //client/...'
            repo_empty = p4gf_util.git_empty()
            if not self._requires_copy(new_git_branches, repo_empty):
                self.p2g.fastimport.cleanup()
                LOG.debug("No changes since last copy.")
                return

            ### Fake start. Needs proper setup with graft support.
            self.p2g.rev_range = self.p2g.mc_rev_range()

            self.ctx.view_repo = pygit2.Repository(self.ctx.view_dirs.GIT_DIR)
            self.p2g._log_memory('pygit2')

            # O(Bfp + 1) p4 changes: one for each Branch fully populated,
            #                        + 1 for //.git-fusion/branches/repo/...
            #
            with self.p2g.perf.timer[CHANGES]:
                self.change_num_on_branch_list \
                                        = deque(self._p4_changes_each_branch())
            LOG.debug('Found ChangeNumOnBranch count: {}'
                      .format(len(self.change_num_on_branch_list)))
            self.p2g._log_memory(        'O(Bfp + 1) p4 changes')
            #p4gf_gc.report_growth ('aftr O(Bfp + 1) p4 changes')
            #p4gf_gc.report_objects('aftr O(Bfp + 1) p4 changes')
            if not self.change_num_on_branch_list:
                LOG.debug("No new changes found to copy")
                return

                        # Prepare for the loop over each changelist x branch.
            p4gf_util.ensure_dir(self.symlink_dir)
            self.p2g._fill_head_marks_from_current_heads()
            self.mark_to_branch_id = {}
            self.branch_id_to_temp_name \
                                = self.p2g._create_branch_id_to_temp_name_dict()

            self.known_dbi_set = { branch.depot_branch
                               for branch in self.ctx.branch_dict().values()
                               if branch.depot_branch }
                        # All string placeholders should have been replaced with
                        # pointers to full DepotBranchInfo instances before
                        # calling copy().
            for dbi in self.known_dbi_set:
                assert not isinstance(dbi, str)
                LOG.error('known: {}'.format(dbi))  ##ZZ

            # O(C) 'p4 changes -m1 + filelog + print'
            #
            # Print each file revision to its blob in the .git/objects/
            # Write each changelist to git-fast-import script.
            #
            with ProgressReporter.Indeterminate():
                while self.change_num_on_branch_list:
                    ProgressReporter.increment("MC Copying changelists...")
                    cnob = self.change_num_on_branch_list.pop()
                    self._copy_one(cnob)

                        # Explicitly delete the PrintHandler now so that it
                        # won't show up in any leak reports between now and
                        # P2GMemCapped's end-of-life.
            if self.print_handler:
                self.printed_byte_count = self.print_handler.total_byte_count
                self.printed_rev_count  = self.print_handler.printed_rev_count
                self.print_handler      = None

            self.p2g._log_memory(        'P2G_MC.copy() loop')
            p4gf_gc.report_growth ('after P2G_MC.copy() loop')
            p4gf_gc.report_objects('after P2G_MC.copy() loop')
            #p4gf_gc.backref_objects_by_type(dict().__class__)

            # Run git-fast-import to add everything to Git.
            with self.p2g.perf.timer[FAST_IMPORT]:
                LOG.info('Running git-fast-import')
                marks = self.p2g.fastimport.run_fast_import()

                        # Remove all temporary Git branch refs.
                        # After git-fast-import, we no longer need them.
            self._delete_temp_git_branch_refs()

                        # Record how much we've copied in a p4 counter so that
                        # future calls to _any_changes_since_last_copy() can
                        # tell if there's anything new to copy.
            self.ctx.write_last_copied_change(self.highest_copied_change_num)

            if repo_empty:
                # If we are just now rebuilding the Git repository, also
                # grab all of the tags that have been pushed in the past.
                p4gf_tag.generate_tags(self.ctx)
                self.p2g._log_memory('_generate_tags')

            with self.p2g.perf.timer[MIRROR]:
                self.p2g._mirror(marks, self.mark_to_branch_id)
                self.p2g._log_memory('_mirror')

            with self.p2g.perf.timer[BRANCH_REF]:
                self.p2g._set_branch_refs(marks)
                self.p2g._log_memory('_set_branch_refs')

            with self.p2g.perf.timer[PACK]:
                self.p2g._pack()
                self.p2g._log_memory('_pack')

        LOG.getChild("time").debug("\n" + str(self))
        LOG.info('MC Done. Commits: {cnob_ct:,d}  File Revisions: {rev_ct:,d}'
                 '  Bytes: {byte_ct:,d}  Seconds: {sec:,d}'
                 .format( cnob_ct = self.cnob_count
                        , rev_ct  = self.printed_rev_count
                        , byte_ct = self.printed_byte_count
                        , sec     = int(self.p2g.perf.timer[OVERALL].time)
                        ))
        p4gf_gc.report_objects('after P2G MC copy()')
        self.p2g._log_memory('copy() done')