def update_tree( repo: git.Repository, tree: git.Oid, path: List[str], content: str ) -> git.Oid: """ adds a blob with `content` at `path` to `tree` in `repo` >>> repo = create_repo() >>> tree = repo.TreeBuilder().write() >>> for i in range(10): ... path = store_hash(f"{i}") ... content = nar_hash(path) ... tree = update_tree(repo, tree, common.shards(path, depth=5), content) >>> print(tree) 00f68bdb866b654d4ce3da90609b74137605bd90 """ for entry in repo.get(tree): # subdir exists: recurse if (entry.name == path[0]) and (entry.type == "tree"): sub = update_tree(repo, entry.id, path[1:], content) builder = repo.TreeBuilder(repo.get(tree)) builder.remove(path[0]) builder.insert(path[0], sub, git.GIT_FILEMODE_TREE) return builder.write() # subdir does not exist: create required objects if len(path) > 1: # write leaf node sub = update_tree(repo, repo.TreeBuilder().write(), [path[-1]], content) # build intermediate nodes for d in reversed(path[1:-1]): builder = repo.TreeBuilder() builder.insert(d, sub, git.GIT_FILEMODE_TREE) sub = builder.write() # attach to `tree` builder = repo.TreeBuilder(repo.get(tree)) builder.insert(path[0], sub, git.GIT_FILEMODE_TREE) return builder.write() # path[0] is not a subdir: write blob elif len(path) == 1: blob = repo.write(git.GIT_OBJ_BLOB, content) builder = repo.TreeBuilder(repo.get(tree)) builder.insert(path[0], blob, git.GIT_FILEMODE_BLOB) return builder.write() else: raise Exception(f"invalid path: {path}")
class GitDict(): ''' A python dict, stored in git so it can be larger than memory and yet accessed for reading and writing efficiently. Keys and values are git objects. The collection of key-value pairs is stored in a one level hierarchy of pages (git objects) that are indexed by a page table (also a git object). ''' def __init__(self, dir_, name, log=None, do_create=False, refs_ns='tags', h_order=3): self.dir_ = dir_ self.name = name self.name_size = name + '.size' self.name_items = name + '.items' self.log = log or print self.refs_ns = refs_ns self.h_order = h_order self.h_key_len = ((7 * h_order // 8) + 1) * 2 try: self.repo = Repository(dir_) except GitError as e: if do_create: self.repo = init_repository(dir_, bare=True) else: raise e self.none = self.repo.write(GIT_OBJ_BLOB, '') self._init() def __repr__(self): return f'GitDict("{self.dir_}", "{self.name}")' def _lookup_reference(self, name): return self.repo.lookup_reference(f'refs/{self.refs_ns}/{name}') def _set_reference(self, name, target): try: self._lookup_reference(name).set_target(target) except KeyError: self.repo.references.create(f'refs/{self.refs_ns}/{name}', target) def _init(self): self._set_reference(self.name_size, self.repo.write(GIT_OBJ_BLOB, '0')) self.items_table = PageTable() @property def items_table(self): return PageTable(self.repo[self._lookup_reference( self.name_items).target].data) @items_table.setter def items_table(self, table): self._set_reference(self.name_items, self.repo.write(GIT_OBJ_BLOB, table.data)) def __len__(self): return int(self.repo[self._lookup_reference(self.name_size).target]) def _inc_size(self): new_size = self.repo.write(GIT_OBJ_BLOB, str(len(self) + 1)) self._set_reference(self.name_size, new_size) def __contains__(self, key): return True if self.get(key) is not None else False def get(self, key, default=None): try: return self[key] except KeyError: return default def _key_oid_and_h_key(self, oid): return oid.raw, int(oid.hex[:self.h_key_len], 16) @staticmethod def _entry_no(h_key, level): return (h_key >> (7 * level)) & 127 def _get_page(self, h_key, table=None): table = table or self.items_table entry_no = self._entry_no(h_key, 0) try: return ItemPage(self.repo[Oid(table[entry_no])].data) except TypeError: return ItemPage() def __getitem__(self, key): key_oid, h_key = self._key_oid_and_h_key(pyghash(key)) page = self._get_page(h_key) value_oid = page[key_oid] return self.repo[Oid(value_oid)].data def __setitem__(self, key, value): key_oid, h_key = self._key_oid_and_h_key( self.repo.write(GIT_OBJ_BLOB, key)) value_oid = self.repo.write(GIT_OBJ_BLOB, value).raw table = self.items_table page = self._get_page(h_key, table) if key_oid in page: return page[key_oid] = value_oid page_oid = self.repo.write(GIT_OBJ_BLOB, page.data).raw entry_no = self._entry_no(h_key, 0) table[entry_no] = page_oid self.items_table = table self._inc_size() def report(self): self.log(f'{repr(self)}: contains {len(self)} elements.') def keys(self): table = self.items_table keys = [] for k in range(table.TABLE_SIZE): if table[k] != table.EMPTY_PAGE_ID: page = ItemPage(self.repo[Oid(table[k])].data) for key in page.keys(): keys.append(self.repo[Oid(key)].data) return keys
class DictRepository(object): """The :class:`DictRepository <DictRepository>` object. :param repo_or_path: The path to a repository, or an existing pygit2.Repository object. If it is a path that does not exist, a new bare git repository will be initialized there. If it is a path that does exist, then the directory will be used as a bare git repository. :type repo_or_path: string or pygit2.Repository """ def __init__(self, repo_or_path=None): self._default_author = get_default_author() if isinstance(repo_or_path, Repository): self._repo = repo_or_path elif os.path.isdir(repo_or_path): self._repo = Repository(repo_or_path) else: self._repo = init_repository(repo_or_path, True) # bare repo def _key_to_ref(self, key): return "refs/%s/HEAD" % key def get_commit_oid_for_key(self, key): return self._repo[self._repo.lookup_reference(self._key_to_ref(key)).oid].oid def get_raw_dict_for_commit_oid(self, commit_oid): return json.loads(self._repo[self._repo[commit_oid].tree[DATA].oid].data) def get_parent_oids_for_commit_oid(self, commit_oid): return [parent.oid for parent in self._repo[commit_oid].parents] def raw_commit(self, key, raw_dict, author, committer, message, parents): """Commit a dict to this :class:`DictRepository <DictRepository>`. It is recommended that you use the :class:`GitDict <GitDict>` commit method instead. :param raw_dict: the data to commit. :type raw_dict: dict :param author: The author of the commit. If None, will be replaced with default. :type author: pygit2.Signature :param committer: The committer of this commit. If None, will be replaced with author. :type committer: pygit2.Signature :param message: The commit message. :type message: string :param parents: A list of 20-byte object IDs of parent commits. An empty list means this is the first commit. :return: The oid of the new commit. :rtype: 20 bytes """ if not isinstance(raw_dict, dict): raise ValueError("%s is not a dict" % raw_dict) author = author or self._default_author.signature() committer = committer or author blob_id = self._repo.write(GIT_OBJ_BLOB, json.dumps(raw_dict)) # TreeBuilder doesn't support inserting into trees, so we roll our own tree_id = self._repo.write(GIT_OBJ_TREE, "100644 %s\x00%s" % (DATA, blob_id)) return self._repo.create_commit(self._key_to_ref(key), author, committer, message, tree_id, parents) def create(self, key, dict={}, autocommit=False, message="first commit", author=None, committer=None): """Create a new :class:`GitDict <GitDict>` :param key: The key of the new :class:`GitDict <GitDict>` :type key: :class:`GitDict <GitDict>` :param dict: (optional) The value of the dict. Defaults to empty. :type dict: dict :param autocommit: (optional) Whether the :class:`GitDict <GitDict>` should automatically commit. Defaults to false. :type autocommit: boolean :param message: (optional) Message for first commit. Defaults to "first commit". :type message: string :param author: (optional) The signature for the author of the first commit. Defaults to global author. :type author: pygit2.Signature :param committer: (optional) The signature for the committer of the first commit. Defaults to author. :type author: pygit2.Signature :returns: the GitDict :rtype: :class:`GitDict <GitDict>` """ self.raw_commit(key, dict, author, committer, message, []) return self.get(key, autocommit=autocommit) def has(self, key): """Determine whether there is an entry for key in this repository. :param key: The key to check :type key: string :returns: whether there is an entry :rtype: boolean """ try: self._repo.lookup_reference(self._key_to_ref(key)) return True except KeyError: return False def get(self, key, autocommit=False): """Obtain the :class:`GitDict <GitDict>` for a key. :param key: The key to look up. :type key: string :param autocommit: (optional) Whether the :class:`GitDict <GitDict>` should automatically commit. Defaults to false. :type autocommit: boolean :returns: the GitDict :rtype: :class:`GitDict <GitDict>` :raises: KeyError if there is no entry for key """ return GitDict(self, key, autocommit=autocommit) def fast_forward(self, from_dict, to_dict): """Fast forward a :class:`GitDict <GitDict>`. :param from_dict: the :class:`GitDict <GitDict>` to fast forward. :type from_dict: :class:`GitDict <GitDict>` :param to_dict: the :class:`GitDict <GitDict>`to fast forward to. :type to_dict: :class:`GitDict <GitDict>` """ from_ref = self._key_to_ref(from_dict.key) self._repo.lookup_reference(from_ref).delete() self._repo.create_reference(from_ref, self.get_commit_oid_for_key(to_dict.key)) def clone(self, original, key): """Clone a :class:`GitDict <GitDict>`. :param original: the :class:`GitDict <GitDict>` to clone :type original: :class:`GitDict <GitDict>` :param key: where to clone to :type key: string :raises: ValueError if to_key already exists. """ try: self._repo.create_reference(self._key_to_ref(key), self.get_commit_oid_for_key(original.key)) return self.get(key, autocommit=original.autocommit) except GitError: raise ValueError("Cannot clone to %s, there is already a dict there." % key)