def push_dataset(self,
                     repo_name,
                     branch_name,
                     file_path_list,
                     push_description=None):
        """
        pushes a dataset into pachyderm cluster

        starts a new commit and pushes the file provided at the `path`
        to the cluster

        :param repo_name:
            name of the repo
        :param branch_name:
            name of the branch
        :param file_path_list:
            list of paths of all the files
        :param push_description:
            description for this push
        :return:
            status of the push
        """
        # TODO: add FileNotFound & byte encoding exceptions
        try:
            # opens a new commit
            new_commit = self.client.start_commit(repo_name,
                                                  branch_name,
                                                  parent=None,
                                                  description=push_description)
        except PachClientException as err:
            # returns in case of opening a new commit
            raise PachydermOperationException(err.details())

        try:
            for (local_path, pachyderm_path) in file_path_list:
                # fetching file extension before reading
                file_extension = os.path.splitext(local_path)[1]

                with open(local_path, "rb") as dataset:
                    if file_extension == ".pkl":
                        # if pickle file is provided
                        dataset_object = pickle.load(dataset)
                        byte_data = pickle.dumps(dataset_object)
                    else:
                        byte_data = dataset.read()
                        # byte_data = data.encode('utf-8')
                self.client.put_file_bytes((repo_name, new_commit.id),
                                           pachyderm_path, byte_data)

            self.client.finish_commit((repo_name, new_commit.id))
            return new_commit.id
        except PachClientException as err:
            # removes the above commit
            self.client.delete_commit((repo_name, new_commit))
            raise PachydermOperationException(err.details())
        except UnicodeError as err:
            raise PachydermOperationException("File encoding failure")
    def list_commit(self,
                    repo_name,
                    upper_commit=None,
                    lower_commit=None,
                    count=None):
        """
        lists commits in a repo

        :param repo_name:
            name of the commit
        :param upper_commit:
            id of the last commit that needs to be shown
        :param lower_commit:
            id of the commit from which list starts
        :param count:
            number of commits to be shown
        :return:
            A in-flight _Rendezvous object
        """
        if upper_commit:
            upper_commit = (repo_name, upper_commit)
        if lower_commit:
            lower_commit = (repo_name, lower_commit)
        commits = self.client.list_commit(repo_name, upper_commit,
                                          lower_commit, count)
        if commits.done():
            raise PachydermOperationException(commits.details())
        return commits
Exemple #3
0
    def list_dataset(self,
                     repo_name,
                     commit_id,
                     path="/",
                     history=None,
                     include_contents=None):
        """
        list the dataset in a branch or provided commit

        :param repo_name:
            name of the repo
        :param commit_id:
            id of the commit
        :param path:
            path to the dataset file/folder
        :param history:
            (Optional) retrieves previous versions of file
        :param include_contents:
            includes file contents in response
        :return:

        """
        files = self.client.list_file((repo_name, commit_id), path, history,
                                      include_contents)
        if files.done():
            raise PachydermOperationException(files.details())
        file_list = []
        sub_dir_path_list = []
        try:
            for file_item in files:
                file_info = self.fetch_dataset_info(file_item)
                file_list.append(file_info)
                # if file type is a directory
                if file_item.file_type == 2:
                    sub_dir_path_list.append(file_item.file.path)
        except PachClientException as err:
            raise PachydermOperationException(err.details())

        for sub_dir_path in sub_dir_path_list:
            file_list += self.list_dataset(repo_name, commit_id, sub_dir_path,
                                           history, include_contents)

        return file_list
Exemple #4
0
    def check_server(host, port):
        """

        :param host:
        :param port:
        :return:
        """
        try:
            pachyderm.get_remote_version(host, port)
        except PachClientException:
            raise PachydermOperationException("Invalid server details")
    def delete_commit(self, repo_name, commit_id):
        """
        deletes a commit and its contents

        :param repo_name:
            name of the repo
        :param commit_id:
            id of the commit that needs to be deleted
        """
        try:
            self.client.delete_commit((repo_name, commit_id))
        except PachClientException as err:
            raise PachydermOperationException(err.details())
    def create_new_branch(self, repo_name, branch_name):
        """
        create a new branch in specified repo

        :param repo_name:
            name of the repo
        :param branch_name:
            name of the branch
        :return:
        """
        try:
            self.client.create_branch(repo_name, branch_name)
        except PachClientException as err:
            raise PachydermOperationException(err.details())
Exemple #7
0
 def create_new_repo(self, repo_name, description=None, update=None):
     """
     :param repo_name:
         name of the repo
     :param description:
         description on the repo(Optional)
     :param update:
         update flag to overwrite if repo already exists
     :return:
     """
     try:
         self.client.create_repo(repo_name, description, update)
     except PachClientException as err:
         raise PachydermOperationException(
             f"Repo Creation failed: {err.details()}")
    def inspect_branch(self, repo_name, branch_name):
        """
        provides information on a branch

        :param repo_name:
            name of the repo where branch resides
        :param branch_name:
            name of the branch
        :return:
            branch information object
        """
        try:
            branch_info = self.client.inspect_branch(repo_name, branch_name)
            return branch_info
        except PachClientException as err:
            raise PachydermOperationException(err.details())
    def delete_dataset(self, repo_name, commit_id, file_path):
        """
        deletes a dataset from the cluster

        :param repo_name:
            name of the repo dataset is in
        :param commit_id:
            id of the commit to find the dataset
        :param file_path:
            path of the dataset on the pachyderm cluster
        """
        try:
            if not file_path or file_path == "/":
                self.delete_commit(repo_name, commit_id)
            else:
                self.client.delete_file((repo_name, commit_id), file_path)
        except PachClientException as err:
            raise PachydermOperationException(err.details())
    def inspect_commit(self, repo_name, commit_id, block_state=None):
        """
        provides information on a commit

        :param repo_name:
            name of the repo
        :param commit_id:
            commit id
        :param block_state:
        :return:
            returns CommitInfo Object
        """
        try:
            commit_info = self.client.inspect_commit((repo_name, commit_id),
                                                     block_state)
            return commit_info
        except PachClientException as err:
            raise PachydermOperationException(err.details())
    def pull_dataset(self, repo_name, commit_id, path):
        """
        Pulls dataset/file at the specified path of the commit

        :param repo_name:
            name of the repo where the dataset is saved
        :param commit_id:
            id of the commit when the dataset is pushed
        :param path:
            path of the file in the pachyderm cluster
        :return:
            returns the fileInfo object if success
        """
        commit_tuple = (repo_name, commit_id)
        files = self.client.get_file(commit_tuple, path)
        try:
            for file in files:
                return file
        except PachClientException as err:
            raise PachydermOperationException(err.details())