Ejemplo n.º 1
0
def download_stream(object_client, project_id, datasets_path):
    """Stream the contents of file from the object store.

    Parameters
    ----------
    object_client : faculty.clients.object.ObjectClient
    project_id : uuid.UUID
    datasets_path : str
        The target path to download to in the object store

    Returns
    -------
    Iterable[bytes]
        The content of the file, chunked
    """

    url = object_client.presign_download(project_id, datasets_path)

    with requests.get(url, stream=True) as response:

        if response.status_code == 404:
            raise DatasetsError("No such object {} in project {}".format(
                datasets_path, project_id))

        response.raise_for_status()

        for chunk in response.iter_content(chunk_size=KILOBYTE):
            if chunk:  # Filter out keep-alive chunks
                yield chunk
Ejemplo n.º 2
0
def _get_file(project_path, local_path, project_id, object_client):

    if local_path.endswith("/"):
        msg = ("the source path {} is a normal file but the destination "
               "path {} indicates a directory - please provide a "
               "full destination path").format(repr(project_path),
                                               repr(local_path))
        raise DatasetsError(msg)

    transfer.download_file(object_client, project_id, project_path, local_path)
Ejemplo n.º 3
0
def rmdir(project_path, project_id=None, object_client=None):
    """Remove an empty directory from the project datasets.

    Parameters
    ----------
    remote_path : str
        The path of the directory to remove.
    project_id : str, optional
        The project to get files from. You need to have access to this project
        for it to work. Defaults to the project set by FACULTY_PROJECT_ID in
        your environment.
    object_client : faculty.clients.object.ObjectClient, optional
        Advanced - can be used to benefit from caching in chain interactions
        with datasets.
    """

    contents = ls(
        prefix=project_path,
        project_id=project_id,
        show_hidden=True,
        object_client=object_client,
    )

    rationalised_path = _rationalise_path(project_path)
    project_path_as_file = rationalised_path.rstrip("/")
    project_path_as_dir = project_path_as_file + "/"

    if contents == [project_path_as_dir]:
        rm(
            project_path_as_dir,
            project_id=project_id,
            object_client=object_client,
            recursive=True,
        )
    elif contents == [project_path_as_file]:
        raise DatasetsError("'{}' Not a directory".format(project_path))
    elif project_path_as_dir not in contents:
        raise DatasetsError(
            "'{}' No such file or directory".format(project_path)
        )
    else:
        raise DatasetsError("'{}' Directory is not empty".format(project_path))
Ejemplo n.º 4
0
def open(project_path, mode="r", temp_dir=None, project_id=None, **kwargs):
    """Open a file from a project's datasets for reading.

    This downloads the file into a temporary directory before opening it, so if
    your files are very large, this function can take a long time.

    Parameters
    ----------
    project_path : str
        The path of the file in the project's datasets to open.
    mode : str
        The opening mode, either 'r' or 'rb'. This is passed down to the
        standard python open function. Writing is currently not supported.
    temp_dir : str
        A directory on the local filesystem where you would like the file to be
        saved into temporarily. Note that on Faculty servers, the default
        temporary directory can break with large files, so if your file is
        upwards of 2GB, it is recommended to specify temp_dir='/project'.
    project_id : str, optional
        The project to get files from. You need to have access to this project
        for it to work. Defaults to the project set by FACULTY_PROJECT_ID in
        your environment.
    """

    if _isdir(project_path, project_id=project_id):
        raise DatasetsError("Can't open directories.")

    if any(char in mode for char in ("w", "a", "x")):
        raise NotImplementedError("Currently, only reading is implemented.")

    tmpdir = tempfile.mkdtemp(prefix=".", dir=temp_dir)
    local_path = os.path.join(tmpdir, os.path.basename(project_path))

    try:
        get(project_path, local_path, project_id=project_id)
        with io.open(local_path, mode, **kwargs) as file_object:
            yield file_object
    finally:
        if os.path.isfile(local_path):
            os.remove(local_path)
        if os.path.isdir(tmpdir):
            os.rmdir(tmpdir)