def sync(self): """Sync. The naming convention of /git-fusion/objects/repos/commits/... prevents using 'sync' to 'git/objects/... ( requires two lhs '...' against one rhs '...'). Additionally blobs use a tree depth two deeper than used by commits and trees. To make easier git-comformed renaming of these objects into .git/objects/... we initially sync each into separate directories: trees/..., repos/... Use P4 OutputHandler to avoid memory overruns and perform db inserts. """ # Set client for syncing trees into git/objects/trees/... view = ['//{depot}/objects/trees/... //{client}/trees/...'.format( depot=p4gf_const.P4GF_DEPOT, client=self.p4.client)] trees_path = '//{depot}/objects/trees/...'.format( depot=p4gf_const.P4GF_DEPOT) # Use p4 sizes to get number of trees num_trees = self.get_num_files(trees_path) self.eta = p4gf_eta.ETA(total_ct = num_trees) p4gf_p4spec.ensure_spec_values(self.p4, 'client', self.p4.client, {'Root': self.p4_dir_abspath, 'View': view}) with ProgressReporter.Determinate(num_trees): handler = SyncHandler(self, TREES) with self.p4.using_handler(handler): self.p4.run('sync', '-p', '...') # Set client for syncing clients into git/objects/repos/... view = ['//{depot}/objects/repos/.../commits/... //{client}/repos/.../commits/...'.format( depot=p4gf_const.P4GF_DEPOT, client=self.p4.client)] p4gf_p4spec.ensure_spec_values(self.p4, 'client', self.p4.client, {'Root': self.p4_dir_abspath, 'View': view}) commits_path = '//{depot}/objects/repos/.../commits/...'.format( depot=p4gf_const.P4GF_DEPOT) # Use p4 sizes to get number of commits num_commits = self.get_num_files(commits_path) self.eta = p4gf_eta.ETA(total_ct = num_commits) with ProgressReporter.Determinate(num_commits): handler = SyncHandler(self, COMMITS) with self.p4.using_handler(handler): self.p4.run('sync', '-p', '...')
def add_blobs_to_db(self): """Use p4 files to populate the database with blob sha1. The FilesHandler does the inserts and avoids memory overruns.""" blobs_root = NTR('{objects_root}/blobs/').format( objects_root=p4gf_const.objects_root()) num_blobs = self.get_num_files(blobs_root + '...') self.eta = p4gf_eta.ETA(total_ct = num_blobs) with ProgressReporter.Determinate(num_blobs): handler = FilesHandler(self, blobs_root) with self.p4.using_handler(handler): self.p4.run('files', blobs_root + '...')
def __init__(self): self.repo = pygit2.Repository(".") self.trees = set() self.blobs = set() self.commits = set() self.gwt_paths = set() self.commit_byte_ct = [] self.blob_byte_ct = [] self.tree_byte_ct = [] self.gwt_ct = [] self.file_action_ct = [] self.eta = p4gf_eta.ETA()
def find_reachable(self): """Mark every tree and blob reachable in the database. The trees in these tables are already marked reachable. Use recursion into the trees and mark trees/blobs reachable.""" tree_count = self.table_type_counts[TREES_FROM_COMMITS] self.eta = p4gf_eta.ETA(total_ct = tree_count) with ProgressReporter.Determinate(tree_count): # This first table set contains the trees extracted from known commits. for table in self.table_names[TREES_FROM_COMMITS]: cursor = self.db.execute("SELECT * from {}".format(table)) self.git_dir = "--git-dir={}".format(self.git_dir_abspath) while True: row = cursor.fetchone() if row == None: break if not self.quiet: self.eta.increment() ProgressReporter.increment( _("Traversing commit trees to find " " cached reachable trees and blobs ... {et} {ed}"). format( et = self.eta.eta_str() , ed = self.eta.eta_delta_str())) tree = row[0] # this method recurses for any tree entry within this top-level tree self.mark_tree_contents_reachable(str(tree))
def move_objects(self, walk_root, top_level_regex, subdir_regex, trim_hyphenated_suffix=False): """Move the object cache objects to '.git/objects'.""" # pylint: disable=too-many-branches,too-many-statements # Because git paths are distributed over directory names taken # from the first 2 chars of the sha1, and Git Fusion cache # paths use xx/xx (trees and commits) # there is a boatload of data munging going on here. doing_trees = TREES in walk_root doing_commits = 'repos' in walk_root if doing_trees: progress_msg = _("Moving cached trees to local git ... {et} {ed}") object_count = self.sync_counts[TREES] else: progress_msg = _("Moving cached commits to local git ... {et} {ed}") object_count = self.sync_counts[COMMITS] self.eta = p4gf_eta.ETA(total_ct = object_count) with ProgressReporter.Determinate(object_count): for walk_root, _dirs, files in os.walk(walk_root): # For top level dirs, create the same dir under '.git/objects' m = top_level_regex.match(walk_root) if m: for d in _dirs: obj_dir = os.path.join(self.git_dir_objects, d) p4gf_ensure_dir.ensure_dir(obj_dir) # If we have files we need to move them to 'git/objects' if files: if not self.quiet: self.eta.increment() ProgressReporter.increment(progress_msg. format( et = self.eta.eta_str() , ed = self.eta.eta_delta_str())) sub1 = sub2 = None m = subdir_regex.match(walk_root) if m: sub1 = m.group('sub1') sub2 = m.group('sub2') else: LOG.error("regex failed to match as expected on {}.\nStopping.".format (walk_root)) print("regex failed to match as expected on {}.\nStopping.". format(walk_root)) sys.exit(1) if doing_trees: depot_path_prefix = NTR("{}/trees/{}/{}/".format( p4gf_const.objects_root(), sub1, sub2)) for name in files: git_file = sub2 + name if trim_hyphenated_suffix: git_file = re.sub(r'-.*$','',git_file) git_sha1 = sub1 + git_file if doing_trees: depot_path = depot_path_prefix + name self.sql_insert_object(TREES, git_sha1, depot_path) git_path = os.path.join(self.git_dir_objects, sub1) git_path = os.path.join(git_path,git_file) p4_path = os.path.join(walk_root, name) # Finally , move the p4 path to the git path try: os.rename(p4_path,git_path) except OSError as e: LOG.error("exception {}".format(str(e))) sys.exit(1) if doing_commits: self.add_tree_from_commit_to_table(git_sha1) # now that the commit's tree sha1 is in the db, # the commit object is no longer needed try: os.unlink(git_path) except OSError as e: LOG.error("exception {}".format(str(e))) sys.exit(1)