def commit_from(src, allow_just_repo=False): if isinstance(src, pfs_proto.Commit): return src elif isinstance(src, (tuple, list)) and len(src) == 2: return pfs_proto.Commit(repo=pfs_proto.Repo(name=src[0]), id=src[1]) elif isinstance(src, str): repo_name, commit_id = src.split("/", 1) return pfs_proto.Commit(repo=pfs_proto.Repo(name=repo_name), id=commit_id) if not allow_just_repo: raise ValueError("Invalid commit type") return pfs_proto.Commit(repo=pfs_proto.Repo(name=src))
def create_branch(self, repo_name, branch_name, commit=None, provenance=None, trigger=None): """ Creates a new branch. Params: * `repo_name`: A string specifying the name of the repo. * `branch_name`: A string specifying the new branch name. * `commit`: An optional tuple, string, or `Commit` object representing the head commit of the branch. * `provenance`: An optional iterable of `Branch` objects representing the branch provenance. * `trigger`: An optional `Trigger` object controlling when the head of `branch_name` is moved. """ return self._req( Service.PFS, "CreateBranch", branch=pfs_proto.Branch(repo=pfs_proto.Repo(name=repo_name), name=branch_name), head=commit_from(commit) if commit is not None else None, provenance=provenance, trigger=trigger, )
def flush_commit(self, commits, repos=None): """ Blocks until all of the commits which have a set of commits as provenance have finished. For commits to be considered they must have all of the specified commits as provenance. This in effect waits for all of the jobs that are triggered by a set of commits to complete. It returns an error if any of the commits it's waiting on are cancelled due to one of the jobs encountering an error during runtime. Note that it's never necessary to call FlushCommit to run jobs, they'll run no matter what, FlushCommit just allows you to wait for them to complete and see their output once they do. This returns an iterator of CommitInfo objects. Yields `CommitInfo` objects. Params: * `commits`: A list of tuples, strings, or `Commit` objects representing the commits to flush. * `repos`: An optional list of strings specifying repo names. If specified, only commits within these repos will be flushed. """ return self._req( Service.PFS, "FlushCommit", commits=[commit_from(c) for c in commits], to_repos=[pfs_proto.Repo(name=r) for r in repos] if repos is not None else None, )
def subscribe_commit(self, repo_name, branch, from_commit_id=None, state=None, prov=None): """ Yields `CommitInfo` objects as commits occur. Params: * `repo_name`: A string specifying the name of the repo. * `branch`: A string specifying branch to subscribe to. * `from_commit_id`: An optional string specifying the commit ID. Only commits created since this commit are returned. * `state`: The commit state to filter on. * `prov`: An optional `CommitProvenance` object. """ repo = pfs_proto.Repo(name=repo_name) req = pfs_proto.SubscribeCommitRequest(repo=repo, branch=branch, state=state, prov=prov) if from_commit_id is not None: getattr(req, "from").CopyFrom( pfs_proto.Commit(repo=repo, id=from_commit_id)) return self._req(Service.PFS, "SubscribeCommit", req=req)
def list_commit(self, repo_name, to_commit=None, from_commit=None, number=None, reverse=None): """ Lists commits. Yields `CommitInfo` objects. Params: * `repo_name`: If only `repo_name` is given, all commits in the repo are returned. * `to_commit`: Optional. Only the ancestors of `to`, including `to` itself, are considered. * `from_commit`: Optional. Only the descendants of `from`, including `from` itself, are considered. * `number`: Optional. Determines how many commits are returned. If `number` is 0, all commits that match the aforementioned criteria are returned. """ req = pfs_proto.ListCommitRequest(repo=pfs_proto.Repo(name=repo_name), number=number, reverse=reverse) if to_commit is not None: req.to.CopyFrom(commit_from(to_commit)) if from_commit is not None: getattr(req, "from").CopyFrom(commit_from(from_commit)) return self._req(Service.PFS, "ListCommitStream", req=req)
def inspect_branch(self, repo_name, branch_name): """ Inspects a branch. Returns a `BranchInfo` object. """ return self._req( Service.PFS, "InspectBranch", branch=pfs_proto.Branch(repo=pfs_proto.Repo(name=repo_name), name=branch_name), )
def inspect_repo(self, repo_name): """ Returns info about a specific repo. Returns a `RepoInfo` object. Params: * `repo_name`: Name of the repo. """ return self._req(Service.PFS, "InspectRepo", repo=pfs_proto.Repo(name=repo_name))
def list_branch(self, repo_name, reverse=None): """ Lists the active branch objects on a repo. Returns a list of `BranchInfo` objects. Params: * `repo_name`: A string specifying the repo name. """ return self._req(Service.PFS, "ListBranch", repo=pfs_proto.Repo(name=repo_name), reverse=reverse).branch_info
def delete_repo(self, repo_name, force=None): """ Deletes a repo and reclaims the storage space it was using. Params: * `repo_name`: The name of the repo. * `force`: If set to true, the repo will be removed regardless of errors. This argument should be used with care. """ return self._req(Service.PFS, "DeleteRepo", repo=pfs_proto.Repo(name=repo_name), force=force, all=False)
def delete_branch(self, repo_name, branch_name, force=None): """ Deletes a branch, but leaves the commits themselves intact. In other words, those commits can still be accessed via commit IDs and other branches they happen to be on. Params: * `repo_name`: A string specifying the repo name. * `branch_name`: A string specifying the name of the branch to delete. * `force`: A bool specifying whether to force the branch deletion. """ return self._req( Service.PFS, "DeleteBranch", branch=pfs_proto.Branch(repo=pfs_proto.Repo(name=repo_name), name=branch_name), force=force, )
def create_repo(self, repo_name, description=None, update=None): """ Creates a new `Repo` object in PFS with the given name. Repos are the top level data object in PFS and should be used to store data of a similar type. For example rather than having a single `Repo` for an entire project you might have separate `Repo`s for logs, metrics, database dumps etc. Params: * `repo_name`: Name of the repo. * `description`: An optional string describing the repo. * `update`: Whether to update if the repo already exists. """ return self._req( Service.PFS, "CreateRepo", repo=pfs_proto.Repo(name=repo_name), description=description, update=update, )
def start_commit(self, repo_name, branch=None, parent=None, description=None, provenance=None): """ Begins the process of committing data to a Repo. Once started you can write to the Commit with PutFile and when all the data has been written you must finish the Commit with FinishCommit. NOTE, data is not persisted until FinishCommit is called. A Commit object is returned. Params: * `repo_name`: A string specifying the name of the repo. * `branch`: A string specifying the branch name. This is a more convenient way to build linear chains of commits. When a commit is started with a non-empty branch the value of branch becomes an alias for the created Commit. This enables a more intuitive access pattern. When the commit is started on a branch the previous head of the branch is used as the parent of the commit. * `parent`: An optional `Commit` object specifying the parent commit. Upon creation the new commit will appear identical to the parent commit, data can safely be added to the new commit without affecting the contents of the parent commit. * `description`: An optional string describing the commit. * `provenance`: An optional iterable of `CommitProvenance` objects specifying the commit provenance. """ return self._req( Service.PFS, "StartCommit", parent=pfs_proto.Commit(repo=pfs_proto.Repo(name=repo_name), id=parent), branch=branch, description=description, provenance=provenance, )
def delete_repo(self, repo_name, force=None, split_transaction=None): """ Deletes a repo and reclaims the storage space it was using. Params: * `repo_name`: The name of the repo. * `force`: If set to true, the repo will be removed regardless of errors. This argument should be used with care. * `split_transaction`: An optional bool that controls whether Pachyderm attempts to delete the entire repo in a single database transaction. Setting this to `True` can work around certain Pachyderm errors, but, if set, the `delete_repo` call may need to be retried. """ return self._req( Service.PFS, "DeleteRepo", repo=pfs_proto.Repo(name=repo_name), force=force, all=False, split_transaction=split_transaction, )