Beispiel #1
0
def update_tree(
    repo: git.Repository, tree: git.Oid, path: List[str], content: str
) -> git.Oid:
    """
    adds a blob with `content` at `path` to `tree` in `repo`

    >>> repo = create_repo()
    >>> tree = repo.TreeBuilder().write()
    >>> for i in range(10):
    ...    path = store_hash(f"{i}")
    ...    content = nar_hash(path)
    ...    tree = update_tree(repo, tree, common.shards(path, depth=5), content)
    >>> print(tree)
    00f68bdb866b654d4ce3da90609b74137605bd90
    """
    for entry in repo.get(tree):
        # subdir exists: recurse
        if (entry.name == path[0]) and (entry.type == "tree"):
            sub = update_tree(repo, entry.id, path[1:], content)
            builder = repo.TreeBuilder(repo.get(tree))
            builder.remove(path[0])
            builder.insert(path[0], sub, git.GIT_FILEMODE_TREE)
            return builder.write()

    # subdir does not exist: create required objects
    if len(path) > 1:
        # write leaf node
        sub = update_tree(repo, repo.TreeBuilder().write(), [path[-1]], content)
        # build intermediate nodes
        for d in reversed(path[1:-1]):
            builder = repo.TreeBuilder()
            builder.insert(d, sub, git.GIT_FILEMODE_TREE)
            sub = builder.write()

        # attach to `tree`
        builder = repo.TreeBuilder(repo.get(tree))
        builder.insert(path[0], sub, git.GIT_FILEMODE_TREE)
        return builder.write()

    # path[0] is not a subdir: write blob
    elif len(path) == 1:
        blob = repo.write(git.GIT_OBJ_BLOB, content)
        builder = repo.TreeBuilder(repo.get(tree))
        builder.insert(path[0], blob, git.GIT_FILEMODE_BLOB)
        return builder.write()

    else:
        raise Exception(f"invalid path: {path}")
Beispiel #2
0
class GitDict():
    '''
    A python dict, stored in git so it can be larger than memory and yet accessed for
    reading and writing efficiently.  Keys and values are git objects.  The collection of
    key-value pairs is stored in a one level hierarchy of pages (git objects) that are
    indexed by a page table (also a git object).
    '''
    def __init__(self,
                 dir_,
                 name,
                 log=None,
                 do_create=False,
                 refs_ns='tags',
                 h_order=3):
        self.dir_ = dir_
        self.name = name
        self.name_size = name + '.size'
        self.name_items = name + '.items'
        self.log = log or print
        self.refs_ns = refs_ns
        self.h_order = h_order
        self.h_key_len = ((7 * h_order // 8) + 1) * 2
        try:
            self.repo = Repository(dir_)
        except GitError as e:
            if do_create:
                self.repo = init_repository(dir_, bare=True)
            else:
                raise e
        self.none = self.repo.write(GIT_OBJ_BLOB, '')
        self._init()

    def __repr__(self):
        return f'GitDict("{self.dir_}", "{self.name}")'

    def _lookup_reference(self, name):
        return self.repo.lookup_reference(f'refs/{self.refs_ns}/{name}')

    def _set_reference(self, name, target):
        try:
            self._lookup_reference(name).set_target(target)
        except KeyError:
            self.repo.references.create(f'refs/{self.refs_ns}/{name}', target)

    def _init(self):
        self._set_reference(self.name_size, self.repo.write(GIT_OBJ_BLOB, '0'))
        self.items_table = PageTable()

    @property
    def items_table(self):
        return PageTable(self.repo[self._lookup_reference(
            self.name_items).target].data)

    @items_table.setter
    def items_table(self, table):
        self._set_reference(self.name_items,
                            self.repo.write(GIT_OBJ_BLOB, table.data))

    def __len__(self):
        return int(self.repo[self._lookup_reference(self.name_size).target])

    def _inc_size(self):
        new_size = self.repo.write(GIT_OBJ_BLOB, str(len(self) + 1))
        self._set_reference(self.name_size, new_size)

    def __contains__(self, key):
        return True if self.get(key) is not None else False

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    def _key_oid_and_h_key(self, oid):
        return oid.raw, int(oid.hex[:self.h_key_len], 16)

    @staticmethod
    def _entry_no(h_key, level):
        return (h_key >> (7 * level)) & 127

    def _get_page(self, h_key, table=None):
        table = table or self.items_table
        entry_no = self._entry_no(h_key, 0)
        try:
            return ItemPage(self.repo[Oid(table[entry_no])].data)
        except TypeError:
            return ItemPage()

    def __getitem__(self, key):
        key_oid, h_key = self._key_oid_and_h_key(pyghash(key))
        page = self._get_page(h_key)
        value_oid = page[key_oid]
        return self.repo[Oid(value_oid)].data

    def __setitem__(self, key, value):
        key_oid, h_key = self._key_oid_and_h_key(
            self.repo.write(GIT_OBJ_BLOB, key))
        value_oid = self.repo.write(GIT_OBJ_BLOB, value).raw

        table = self.items_table
        page = self._get_page(h_key, table)
        if key_oid in page:
            return

        page[key_oid] = value_oid
        page_oid = self.repo.write(GIT_OBJ_BLOB, page.data).raw
        entry_no = self._entry_no(h_key, 0)
        table[entry_no] = page_oid
        self.items_table = table
        self._inc_size()

    def report(self):
        self.log(f'{repr(self)}: contains {len(self)} elements.')

    def keys(self):
        table = self.items_table
        keys = []
        for k in range(table.TABLE_SIZE):
            if table[k] != table.EMPTY_PAGE_ID:
                page = ItemPage(self.repo[Oid(table[k])].data)
                for key in page.keys():
                    keys.append(self.repo[Oid(key)].data)
        return keys
Beispiel #3
0
class DictRepository(object):
    """The :class:`DictRepository <DictRepository>` object.

    :param repo_or_path:
        The path to a repository, or an existing pygit2.Repository object.
        If it is a path that does not exist, a new bare git repository will
        be initialized there.  If it is a path that does exist, then the
        directory will be used as a bare git repository.
    :type repo_or_path: string or pygit2.Repository
    """

    def __init__(self, repo_or_path=None):

        self._default_author = get_default_author()
        if isinstance(repo_or_path, Repository):
            self._repo = repo_or_path
        elif os.path.isdir(repo_or_path):
            self._repo = Repository(repo_or_path)
        else:
            self._repo = init_repository(repo_or_path, True)  # bare repo

    def _key_to_ref(self, key):
        return "refs/%s/HEAD" % key

    def get_commit_oid_for_key(self, key):
        return self._repo[self._repo.lookup_reference(self._key_to_ref(key)).oid].oid

    def get_raw_dict_for_commit_oid(self, commit_oid):
        return json.loads(self._repo[self._repo[commit_oid].tree[DATA].oid].data)

    def get_parent_oids_for_commit_oid(self, commit_oid):
        return [parent.oid for parent in self._repo[commit_oid].parents]

    def raw_commit(self, key, raw_dict, author, committer, message, parents):
        """Commit a dict to this :class:`DictRepository <DictRepository>`.
        It is recommended that you use the :class:`GitDict <GitDict>` commit
        method instead.

        :param raw_dict: the data to commit.
        :type raw_dict: dict
        :param author:
            The author of the commit.  If None, will be replaced with default.
        :type author: pygit2.Signature
        :param committer:
            The committer of this commit. If None, will be replaced with author.
        :type committer: pygit2.Signature
        :param message: The commit message.
        :type message: string
        :param parents:
            A list of 20-byte object IDs of parent commits.  An empty list
            means this is the first commit.

        :return: The oid of the new commit.
        :rtype: 20 bytes
        """
        if not isinstance(raw_dict, dict):
            raise ValueError("%s is not a dict" % raw_dict)

        author = author or self._default_author.signature()
        committer = committer or author

        blob_id = self._repo.write(GIT_OBJ_BLOB, json.dumps(raw_dict))

        # TreeBuilder doesn't support inserting into trees, so we roll our own
        tree_id = self._repo.write(GIT_OBJ_TREE, "100644 %s\x00%s" % (DATA, blob_id))

        return self._repo.create_commit(self._key_to_ref(key), author, committer, message, tree_id, parents)

    def create(self, key, dict={}, autocommit=False, message="first commit", author=None, committer=None):
        """Create a new :class:`GitDict <GitDict>`

        :param key: The key of the new :class:`GitDict <GitDict>`
        :type key: :class:`GitDict <GitDict>`
        :param dict: (optional) The value of the dict.  Defaults to empty.
        :type dict: dict
        :param autocommit:
            (optional) Whether the :class:`GitDict <GitDict>` should
            automatically commit. Defaults to false.
        :type autocommit: boolean
        :param message:
            (optional) Message for first commit.  Defaults to "first commit".
        :type message: string
        :param author:
            (optional) The signature for the author of the first commit.
            Defaults to global author.
        :type author: pygit2.Signature
        :param committer:
            (optional) The signature for the committer of the first commit.
            Defaults to author.
        :type author: pygit2.Signature

        :returns: the GitDict
        :rtype: :class:`GitDict <GitDict>`
        """
        self.raw_commit(key, dict, author, committer, message, [])
        return self.get(key, autocommit=autocommit)

    def has(self, key):
        """Determine whether there is an entry for key in this repository.

        :param key: The key to check
        :type key: string

        :returns: whether there is an entry
        :rtype: boolean
        """
        try:
            self._repo.lookup_reference(self._key_to_ref(key))
            return True
        except KeyError:
            return False

    def get(self, key, autocommit=False):
        """Obtain the :class:`GitDict <GitDict>` for a key.

        :param key: The key to look up.
        :type key: string
        :param autocommit:
            (optional) Whether the :class:`GitDict <GitDict>` should
            automatically commit. Defaults to false.
        :type autocommit: boolean

        :returns: the GitDict
        :rtype: :class:`GitDict <GitDict>`
        :raises: KeyError if there is no entry for key
        """
        return GitDict(self, key, autocommit=autocommit)

    def fast_forward(self, from_dict, to_dict):
        """Fast forward a :class:`GitDict <GitDict>`.

        :param from_dict: the :class:`GitDict <GitDict>` to fast forward.
        :type from_dict: :class:`GitDict <GitDict>`
        :param to_dict: the :class:`GitDict <GitDict>`to fast forward to.
        :type to_dict: :class:`GitDict <GitDict>`
        """
        from_ref = self._key_to_ref(from_dict.key)
        self._repo.lookup_reference(from_ref).delete()
        self._repo.create_reference(from_ref, self.get_commit_oid_for_key(to_dict.key))

    def clone(self, original, key):
        """Clone a :class:`GitDict <GitDict>`.

        :param original: the :class:`GitDict <GitDict>` to clone
        :type original: :class:`GitDict <GitDict>`
        :param key: where to clone to
        :type key: string
        :raises: ValueError if to_key already exists.
        """
        try:
            self._repo.create_reference(self._key_to_ref(key), self.get_commit_oid_for_key(original.key))
            return self.get(key, autocommit=original.autocommit)
        except GitError:
            raise ValueError("Cannot clone to %s, there is already a dict there." % key)