def copy_file(self, source_commit, source_path, dest_commit, dest_path, overwrite=None): """ Efficiently copies files already in PFS. Note that the destination repo cannot be an output repo, or the copy operation will (as of 1.9.0) silently fail. Params: * `source_commit`: A tuple, string, or `Commit` object representing the commit for the source file. * `source_path`: A string specifying the path of the source file. * `dest_commit`: A tuple, string, or `Commit` object representing the commit for the destination file. * `dest_path`: A string specifying the path of the destination file. * `overwrite`: An optional bool specifying whether to overwrite the destination file if it already exists. """ return self._req( Service.PFS, "CopyFile", src=pfs_proto.File(commit=commit_from(source_commit), path=source_path), dst=pfs_proto.File(commit=commit_from(dest_commit), path=dest_path), overwrite=overwrite, )
def list_file(self, commit, path, history=None, include_contents=None): """ Lists the files in a directory. Params: * `commit`: A tuple, string, or `Commit` object representing the commit. * `path`: The path to the directory. * `history`: An optional int that indicates to return jobs from historical versions of pipelines. Semantics are: 0: Return jobs from the current version of the pipeline or pipelines. 1: Return the above and jobs from the next most recent version 2: etc. -1: Return jobs from all historical versions. * `include_contents`: An optional bool. If `True`, file contents are included. """ return self._req( Service.PFS, "ListFileStream", file=pfs_proto.File(commit=commit_from(commit), path=path), history=history, full=include_contents, )
def get_file(self, commit, path, offset_bytes=None, size_bytes=None): """ Returns a `PFSFile` object, containing the contents of a file stored in PFS. Params: * `commit`: A tuple, string, or `Commit` object representing the commit. * `path`: A string specifying the path of the file. * `offset_bytes`: An optional int. Specifies a number of bytes that should be skipped in the beginning of the file. * `size_bytes`: An optional int. limits the total amount of data returned, note you will get fewer bytes than size if you pass a value larger than the size of the file. If size is set to 0 then all of the data will be returned. """ res = self._req( Service.PFS, "GetFile", file=pfs_proto.File(commit=commit_from(commit), path=path), offset_bytes=offset_bytes, size_bytes=size_bytes, ) return PFSFile(res)
def inspect_file(self, commit, path): """ Inspects a file. Returns a `FileInfo` object. Params: * `commit`: A tuple, string, or `Commit` object representing the commit. * `path`: A string specifying the path to the file. """ return self._req(Service.PFS, "InspectFile", file=pfs_proto.File(commit=commit_from(commit), path=path))
def diff_file(self, new_commit, new_path, old_commit=None, old_path=None, shallow=None): """ Diffs two files. If `old_commit` or `old_path` are not specified, the same path in the parent of the file specified by `new_commit` and `new_path` will be used. Params: * `new_commit`: A tuple, string, or `Commit` object representing the commit for the new file. * `new_path`: A string specifying the path of the new file. * `old_commit`: A tuple, string, or `Commit` object representing the commit for the old file. * `old_path`: A string specifying the path of the old file. * `shallow`: An optional bool specifying whether to do a shallow diff. """ if old_commit is not None and old_path is not None: old_file = pfs_proto.File(commit=commit_from(old_commit), path=old_path) else: old_file = None return self._req( Service.PFS, "DiffFile", new_file=pfs_proto.File(commit=commit_from(new_commit), path=new_path), old_file=old_file, shallow=shallow, )
def walk_file(self, commit, path): """ Walks over all descendant files in a directory. Returns a generator of `FileInfo` objects. Params: * `commit`: A tuple, string, or `Commit` object representing the commit. * `path`: The path to the directory. """ return self._req(Service.PFS, "WalkFile", file=pfs_proto.File(commit=commit_from(commit), path=path))
def delete_file(self, commit, path): """ Deletes a file from a Commit. DeleteFile leaves a tombstone in the Commit, assuming the file isn't written to later attempting to get the file from the finished commit will result in not found error. The file will of course remain intact in the Commit's parent. Params: * `commit`: A tuple, string, or `Commit` object representing the commit. * `path`: The path to the file. """ return self._req(Service.PFS, "DeleteFile", file=pfs_proto.File(commit=commit_from(commit), path=path))
def put_file_bytes( self, commit, path, value, delimiter=None, target_file_datums=None, target_file_bytes=None, overwrite_index=None, header_records=None, ): """ Uploads a PFS file from a file-like object, bytestring, or iterator of bytestrings. Params: * `commit`: A tuple, string, or `Commit` object representing the commit. * `path`: A string specifying the path in the repo the file(s) will be written to. * `value`: The file contents as bytes, represented as a file-like object, bytestring, or iterator of bytestrings. * `delimiter`: An optional int. causes data to be broken up into separate files by the delimiter. e.g. if you used `Delimiter.CSV.value`, a separate PFS file will be created for each row in the input CSV file, rather than one large CSV file. * `target_file_datums`: An optional int. Specifies the target number of datums in each written file. It may be lower if data does not split evenly, but will never be higher, unless the value is 0. * `target_file_bytes`: An optional int. Specifies the target number of bytes in each written file, files may have more or fewer bytes than the target. * `overwrite_index`: An optional int. This is the object index where the write starts from. All existing objects starting from the index are deleted. * `header_records: An optional int for splitting data when `delimiter` is not `NONE` (or `SQL`). It specifies the number of records that are converted to a header and applied to all file shards. """ if isinstance(value, collections.abc.Iterable) and not isinstance( value, (str, bytes)): warnings.warn( "'put_file_bytes' with an iterable 'value' is deprecated, use file-like objects or bytestrings instead", DeprecationWarning, ) reqs = put_file_from_iterable_reqs( value, file=pfs_proto.File(commit=commit_from(commit), path=path), delimiter=delimiter, target_file_datums=target_file_datums, target_file_bytes=target_file_bytes, overwrite_index=overwrite_index, header_records=header_records, ) return self._req(Service.PFS, "PutFile", req=reqs) with self.put_file_client() as pfc: if hasattr(value, "read"): return pfc.put_file_from_fileobj( commit, path, value, delimiter=delimiter, target_file_datums=target_file_datums, target_file_bytes=target_file_bytes, overwrite_index=overwrite_index, header_records=header_records, ) else: return pfc.put_file_from_bytes( commit, path, value, delimiter=delimiter, target_file_datums=target_file_datums, target_file_bytes=target_file_bytes, overwrite_index=overwrite_index, header_records=header_records, )
def __init__(self, commit, path, **kwargs): kwargs["file"] = pfs_proto.File(commit=commit_from(commit), path=path) self.kwargs = kwargs