Exemple #1
0
    def copy_file(self,
                  source_commit,
                  source_path,
                  dest_commit,
                  dest_path,
                  overwrite=None):
        """
        Efficiently copies files already in PFS. Note that the destination
        repo cannot be an output repo, or the copy operation will (as of
        1.9.0) silently fail.

        Params:

        * `source_commit`: A tuple, string, or `Commit` object representing the
        commit for the source file.
        * `source_path`: A string specifying the path of the source file.
        * `dest_commit`: A tuple, string, or `Commit` object representing the
        commit for the destination file.
        * `dest_path`: A string specifying the path of the destination file.
        * `overwrite`: An optional bool specifying whether to overwrite the
        destination file if it already exists.
        """
        return self._req(
            Service.PFS,
            "CopyFile",
            src=pfs_proto.File(commit=commit_from(source_commit),
                               path=source_path),
            dst=pfs_proto.File(commit=commit_from(dest_commit),
                               path=dest_path),
            overwrite=overwrite,
        )
Exemple #2
0
    def list_file(self, commit, path, history=None, include_contents=None):
        """
        Lists the files in a directory.

        Params:

        * `commit`: A tuple, string, or `Commit` object representing the
        commit.
        * `path`: The path to the directory.
        * `history`: An optional int that indicates to return jobs from
        historical versions of pipelines. Semantics are:
         0: Return jobs from the current version of the pipeline or pipelines.
         1: Return the above and jobs from the next most recent version
         2: etc.
        -1: Return jobs from all historical versions.
        * `include_contents`: An optional bool. If `True`, file contents are
        included.
        """
        return self._req(
            Service.PFS,
            "ListFileStream",
            file=pfs_proto.File(commit=commit_from(commit), path=path),
            history=history,
            full=include_contents,
        )
Exemple #3
0
    def get_file(self, commit, path, offset_bytes=None, size_bytes=None):
        """
        Returns a `PFSFile` object, containing the contents of a file stored
        in PFS.

        Params:

        * `commit`: A tuple, string, or `Commit` object representing the
        commit.
        * `path`: A string specifying the path of the file.
        * `offset_bytes`: An optional int. Specifies a number of bytes that
        should be skipped in the beginning of the file.
        * `size_bytes`: An optional int. limits the total amount of data
        returned, note you will get fewer bytes than size if you pass a value
        larger than the size of the file. If size is set to 0 then all of the
        data will be returned.
        """
        res = self._req(
            Service.PFS,
            "GetFile",
            file=pfs_proto.File(commit=commit_from(commit), path=path),
            offset_bytes=offset_bytes,
            size_bytes=size_bytes,
        )
        return PFSFile(res)
Exemple #4
0
    def inspect_file(self, commit, path):
        """
        Inspects a file. Returns a `FileInfo` object.

        Params:

        * `commit`: A tuple, string, or `Commit` object representing the
        commit.
        * `path`: A string specifying the path to the file.
        """
        return self._req(Service.PFS,
                         "InspectFile",
                         file=pfs_proto.File(commit=commit_from(commit),
                                             path=path))
Exemple #5
0
    def diff_file(self,
                  new_commit,
                  new_path,
                  old_commit=None,
                  old_path=None,
                  shallow=None):
        """
        Diffs two files. If `old_commit` or `old_path` are not specified, the
        same path in the parent of the file specified by `new_commit` and
        `new_path` will be used.

        Params:

        * `new_commit`: A tuple, string, or `Commit` object representing the
        commit for the new file.
        * `new_path`: A string specifying the path of the new file.
        * `old_commit`: A tuple, string, or `Commit` object representing the
        commit for the old file.
        * `old_path`: A string specifying the path of the old file.
        * `shallow`: An optional bool specifying whether to do a shallow diff.
        """

        if old_commit is not None and old_path is not None:
            old_file = pfs_proto.File(commit=commit_from(old_commit),
                                      path=old_path)
        else:
            old_file = None

        return self._req(
            Service.PFS,
            "DiffFile",
            new_file=pfs_proto.File(commit=commit_from(new_commit),
                                    path=new_path),
            old_file=old_file,
            shallow=shallow,
        )
Exemple #6
0
    def walk_file(self, commit, path):
        """
        Walks over all descendant files in a directory. Returns a generator of
        `FileInfo` objects.

        Params:

        * `commit`: A tuple, string, or `Commit` object representing the
        commit.
        * `path`: The path to the directory.
        """
        return self._req(Service.PFS,
                         "WalkFile",
                         file=pfs_proto.File(commit=commit_from(commit),
                                             path=path))
Exemple #7
0
    def delete_file(self, commit, path):
        """
        Deletes a file from a Commit. DeleteFile leaves a tombstone in the
        Commit, assuming the file isn't written to later attempting to get the
        file from the finished commit will result in not found error. The file
        will of course remain intact in the Commit's parent.

        Params:

        * `commit`: A tuple, string, or `Commit` object representing the
        commit.
        * `path`: The path to the file.
        """
        return self._req(Service.PFS,
                         "DeleteFile",
                         file=pfs_proto.File(commit=commit_from(commit),
                                             path=path))
Exemple #8
0
    def put_file_bytes(
        self,
        commit,
        path,
        value,
        delimiter=None,
        target_file_datums=None,
        target_file_bytes=None,
        overwrite_index=None,
        header_records=None,
    ):
        """
        Uploads a PFS file from a file-like object, bytestring, or iterator
        of bytestrings.

        Params:

        * `commit`: A tuple, string, or `Commit` object representing the
        commit.
        * `path`: A string specifying the path in the repo the file(s) will be
        written to.
        * `value`: The file contents as bytes, represented as a file-like
        object, bytestring, or iterator of bytestrings.
        * `delimiter`: An optional int. causes data to be broken up into
        separate files by the delimiter. e.g. if you used
        `Delimiter.CSV.value`, a separate PFS file will be created for each
        row in the input CSV file, rather than one large CSV file.
        * `target_file_datums`: An optional int. Specifies the target number of
        datums in each written file. It may be lower if data does not split
        evenly, but will never be higher, unless the value is 0.
        * `target_file_bytes`: An optional int. Specifies the target number of
        bytes in each written file, files may have more or fewer bytes than
        the target.
        * `overwrite_index`: An optional int. This is the object index where
        the write starts from.  All existing objects starting from the index
        are deleted.
        * `header_records: An optional int for splitting data when `delimiter`
        is not `NONE` (or `SQL`). It specifies the number of records that are
        converted to a header and applied to all file shards.
        """
        if isinstance(value, collections.abc.Iterable) and not isinstance(
                value, (str, bytes)):
            warnings.warn(
                "'put_file_bytes' with an iterable 'value' is deprecated, use file-like objects or bytestrings instead",
                DeprecationWarning,
            )
            reqs = put_file_from_iterable_reqs(
                value,
                file=pfs_proto.File(commit=commit_from(commit), path=path),
                delimiter=delimiter,
                target_file_datums=target_file_datums,
                target_file_bytes=target_file_bytes,
                overwrite_index=overwrite_index,
                header_records=header_records,
            )
            return self._req(Service.PFS, "PutFile", req=reqs)

        with self.put_file_client() as pfc:
            if hasattr(value, "read"):
                return pfc.put_file_from_fileobj(
                    commit,
                    path,
                    value,
                    delimiter=delimiter,
                    target_file_datums=target_file_datums,
                    target_file_bytes=target_file_bytes,
                    overwrite_index=overwrite_index,
                    header_records=header_records,
                )
            else:
                return pfc.put_file_from_bytes(
                    commit,
                    path,
                    value,
                    delimiter=delimiter,
                    target_file_datums=target_file_datums,
                    target_file_bytes=target_file_bytes,
                    overwrite_index=overwrite_index,
                    header_records=header_records,
                )
Exemple #9
0
 def __init__(self, commit, path, **kwargs):
     kwargs["file"] = pfs_proto.File(commit=commit_from(commit), path=path)
     self.kwargs = kwargs