Ejemplo n.º 1
0
    async def download(
            self,  # type: ignore
            path: WaterButlerPath,
            revision: str = None,
            range: Tuple[int, int] = None,
            **kwargs) -> streams.ResponseStreamReader:
        if path.identifier is None:
            raise exceptions.DownloadError('"{}" not found'.format(str(path)),
                                           code=404)

        query = {}
        if revision and revision != path.identifier:
            query['version'] = revision

        logger.debug('request-range:: {}'.format(range))
        resp = await self.make_request(
            'GET',
            self.build_url('files', path.identifier, 'content', **query),
            headers={'Accept-Encoding': ''},
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )
        logger.debug('download-headers:: {}'.format([(x, resp.headers[x])
                                                     for x in resp.headers]))

        return streams.ResponseStreamReader(resp)
Ejemplo n.º 2
0
 def download(self):
     response = yield from aiohttp.request('GET', self.url)
     if response.status >= 400:
         raise exceptions.ProviderError(
             'Unable to download the requested file, please try again later.',
             code=response.status)
     return streams.ResponseStreamReader(response)
Ejemplo n.º 3
0
    async def download(self, path, revision=None, range=None, **kwargs):
        if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION):
            metadata = await self.metadata(path, revision=revision)
        else:
            metadata = await self.metadata(path)

        download_resp = await self.make_request(
            'GET',
            metadata.raw.get('downloadUrl')
            or drive_utils.get_export_link(metadata.raw),
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )

        if metadata.size is not None:
            return streams.ResponseStreamReader(download_resp,
                                                size=metadata.size)

        # google docs, not drive files, have no way to get the file size
        # must buffer the entire file into memory
        stream = streams.StringStream(await download_resp.read())
        if download_resp.headers.get('Content-Type'):
            stream.content_type = download_resp.headers['Content-Type']
        stream.name = metadata.export_name
        return stream
Ejemplo n.º 4
0
    async def download(self):
        """Download file from WaterButler, returning stream."""
        download_url = await self._fetch_download_url()
        headers = {settings.MFR_IDENTIFYING_HEADER: '1'}
        response = await self._make_request('GET',
                                            download_url,
                                            allow_redirects=False,
                                            headers=headers)

        if response.status >= 400:
            resp_text = await response.text()
            logger.error('Unable to download file: ({}) {}'.format(
                response.status, resp_text))
            raise exceptions.DownloadError(
                'Unable to download the requested file, please try again later.',
                download_url=download_url,
                response=resp_text,
                provider=self.NAME,
            )

        self.metrics.add('download.saw_redirect', False)
        if response.status in (302, 301):
            await response.release()
            response = await aiohttp.request('GET',
                                             response.headers['location'])
            self.metrics.add('download.saw_redirect', True)

        return streams.ResponseStreamReader(response, unsizable=True)
Ejemplo n.º 5
0
    async def download(self, path: BitbucketPath,  # type: ignore
                       range: Tuple[int, int]=None, **kwargs) -> streams.ResponseStreamReader:
        """Get the stream to the specified file on Bitbucket

        In BB API 2.0, the ``repo/username/repo_slug/src/node/path`` endpoint is used for download.

        Please note that same endpoint has several different usages / behaviors depending on the
        type of the path and the query params.

        1) File download: type is file, no query param``format=meta``
        2) File metadata: type is file, with ``format=meta`` as query param
        3) Folder contents: type is folder, no query param``format=meta``
        4) Folder metadata: type is folder, with ``format=meta`` as query param

        API Doc: https://developer.atlassian.com/bitbucket/api/2/reference/resource/repositories/%7Busername%7D/%7Brepo_slug%7D/src/%7Bnode%7D/%7Bpath%7D

        :param path: the BitbucketPath object of the file to be downloaded
        :param range: the range header
        """
        metadata = await self.metadata(path)
        logger.debug('requested-range:: {}'.format(range))
        resp = await self.make_request(
            'GET',
            self._build_v2_repo_url('src', path.commit_sha, *path.path_tuple()),
            range=range,
            expects=(200, ),
            throws=exceptions.DownloadError,
        )
        logger.debug('download-headers:: {}'.format([(x, resp.headers[x]) for x in resp.headers]))
        return streams.ResponseStreamReader(resp, size=metadata.size)
Ejemplo n.º 6
0
    def download(self, path, revision=None, **kwargs):
        if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION):
            # Must make additional request to look up download URL for revision
            response = yield from self.make_request(
                'GET',
                self.build_url('files',
                               path.identifier,
                               'revisions',
                               revision,
                               alt='json'),
                expects=(200, ),
                throws=exceptions.MetadataError,
            )
            data = yield from response.json()
        else:
            data = yield from self.metadata(path, raw=True)

        download_resp = yield from self.make_request(
            'GET',
            data.get('downloadUrl')
            or drive_utils.get_export_link(data['exportLinks']),
            expects=(200, ),
            throws=exceptions.DownloadError,
        )

        if 'fileSize' in data:
            return streams.ResponseStreamReader(download_resp,
                                                size=data['fileSize'])

        # google docs, not drive files, have no way to get the file size
        # must buffer the entire file into memory
        stream = streams.StringStream((yield from download_resp.read()))
        if download_resp.headers.get('Content-Type'):
            stream.content_type = download_resp.headers['Content-Type']
        return stream
Ejemplo n.º 7
0
 async def download(
         self,  # type: ignore
         path: WaterButlerPath,
         revision: str = None,
         range: typing.Tuple[int, int] = None,
         **kwargs) -> streams.ResponseStreamReader:
     path_arg = {
         "path": ("rev:" + revision if revision else path.full_path)
     }
     resp = await self.make_request(
         'POST',
         self._build_content_url('files', 'download'),
         headers={
             'Dropbox-API-Arg': json.dumps(path_arg),
             'Content-Type': ''
         },
         range=range,
         expects=(
             200,
             206,
             409,
         ),
         throws=exceptions.DownloadError,
     )
     if resp.status == 409:
         data = await resp.json()
         self.dropbox_conflict_error_handler(data)
     if 'Content-Length' not in resp.headers:
         size = json.loads(resp.headers['dropbox-api-result'])['size']
     else:
         size = None  # ResponseStreamReader will extract it from the resp
     return streams.ResponseStreamReader(resp, size=size)
Ejemplo n.º 8
0
    async def download(self, path, accept_url=False, range=None, **kwargs):
        """Creates a stream for downloading files from the remote host. If the metadata query for
        the file has no size metadata, downloads to memory.

        :param waterbutler.core.path.WaterButlerPath path: user-supplied path to download
        :raises: `waterbutler.core.exceptions.DownloadError`
        """

        self.metrics.add(
            'download', {
                'got_accept_url': accept_url is False,
                'got_range': range is not None,
            })
        download_resp = await self.make_request(
            'GET',
            self._webdav_url_ + path.full_path,
            range=range,
            expects=(
                200,
                206,
            ),
            throws=exceptions.DownloadError,
            auth=self._auth,
            connector=self.connector(),
        )
        return streams.ResponseStreamReader(download_resp)
Ejemplo n.º 9
0
    async def download(self, path: WaterButlerPath, revision: str=None,  # type: ignore
                       range: Tuple[int, int] = None, **kwargs) -> streams.ResponseStreamReader:
        """Returns a ResponseWrapper (Stream) for the specified path
        raises FileNotFoundError if the status from Dataverse is not 200

        :param WaterButlerPath path: Path to the file you want to download
        :param str revision: Used to verify if file is in selected dataset
                - 'latest' to check draft files
                - 'latest-published' to check published files
                - None to check all data
        :param Tuple[int, int] range: the range header
        :param dict \*\*kwargs: Additional arguments that are ignored
        :rtype: :class:`waterbutler.core.streams.ResponseStreamReader`
        :raises: :class:`waterbutler.core.exceptions.DownloadError`
        """
        if path.identifier is None:
            raise exceptions.NotFoundError(str(path))

        logger.debug('request-range:: {}'.format(range))
        resp = await self.make_request(
            'GET',
            self.build_url(pd_settings.DOWN_BASE_URL, path.identifier, key=self.token),
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )
        return streams.ResponseStreamReader(resp)
Ejemplo n.º 10
0
    async def download(
            self,
            path: FigsharePath,  # type: ignore
            range: Tuple[int, int] = None,
            **kwargs) -> streams.ResponseStreamReader:
        """Download the file identified by ``path`` from this project.

        :param FigsharePath path: FigsharePath to file you want to download
        :rtype streams.ResponseStreamReader:
        """
        if not path.is_file:
            raise exceptions.NotFoundError(str(path))

        file_metadata = await self.metadata(path)
        download_url = file_metadata.extra['downloadUrl']  # type: ignore
        if download_url is None:
            raise exceptions.DownloadError('Download not available',
                                           code=HTTPStatus.FORBIDDEN)

        logger.debug('requested-range:: {}'.format(range))
        params = {} if file_metadata.is_public else {
            'token': self.token
        }  # type: ignore
        resp = await self.make_request(
            'GET',
            download_url,
            range=range,
            params=params,
        )
        if resp.status == 404:
            await resp.release()
            raise exceptions.DownloadError('Download not available',
                                           code=HTTPStatus.FORBIDDEN)

        return streams.ResponseStreamReader(resp)
Ejemplo n.º 11
0
    async def download(self,
                       path,
                       accept_url=False,
                       revision=None,
                       range=None,
                       **kwargs):
        """Returns a ResponseWrapper (Stream) for the specified path
        raises FileNotFoundError if the status from S3 is not 200

        :param str path: Path to the key you want to download
        :param dict \*\*kwargs: Additional arguments that are ignored
        :rtype: :class:`waterbutler.core.streams.ResponseStreamReader`
        :raises: :class:`waterbutler.core.exceptions.DownloadError`
        """

        await self._check_region()

        if not path.is_file:
            raise exceptions.DownloadError('No file specified for download',
                                           code=400)

        if not revision or revision.lower() == 'latest':
            query_parameters = None
        else:
            query_parameters = {'versionId': revision}

        if kwargs.get('displayName'):
            response_headers = {
                'response-content-disposition':
                'attachment; filename*=UTF-8\'\'{}'.format(
                    parse.quote(kwargs['displayName']))
            }
        else:
            response_headers = {'response-content-disposition': 'attachment'}

        url = functools.partial(self.bucket.new_key(path.path).generate_url,
                                settings.TEMP_URL_SECS,
                                query_parameters=query_parameters,
                                response_headers=response_headers)

        if accept_url:
            return url()

        resp = await self.make_request(
            'GET',
            url,
            range=range,
            expects=(
                200,
                206,
            ),
            throws=exceptions.DownloadError,
        )

        return streams.ResponseStreamReader(resp)
Ejemplo n.º 12
0
    async def download(
            self,  # type: ignore
            path: GitLabPath,
            **kwargs):
        """Return a stream to the specified file on GitLab.

        There is an endpoint for downloading the raw file directly, but we cannot use it because
        GitLab requires periods in the file path to be encoded.  Python and aiohttp make this
        difficult, though their behavior is arguably correct. See
        https://gitlab.com/gitlab-org/gitlab-ce/issues/31470 for details. (Update: this is due to
        be fixed in the GL 10.0 release)

        API docs: https://docs.gitlab.com/ce/api/repository_files.html#get-file-from-repository

        This uses the same endpoint as `_fetch_file_contents`, but relies on the response headers,
        which are not returned by that method.  It may also be replaced when the above bug is
        fixed.

        :param str path: The path to the file on GitLab
        :param dict \*\*kwargs: Ignored
        :raises: :class:`waterbutler.core.exceptions.DownloadError`
        """

        url = self._build_file_url(path)
        resp = await self.make_request(
            'GET',
            url,
            expects=(200, ),
            throws=exceptions.DownloadError,
        )

        raw_data = (await resp.read()).decode("utf-8")
        data = None
        try:
            data = json.loads(raw_data)
        except json.decoder.JSONDecodeError:
            # GitLab API sometimes returns ruby hashes instead of json
            # see: https://gitlab.com/gitlab-org/gitlab-ce/issues/31790
            # fixed in GL v9.5
            data = self._convert_ruby_hash_to_dict(raw_data)

        raw = base64.b64decode(data['content'])

        mdict_options = {}
        mimetype = mimetypes.guess_type(path.full_path)[0]
        if mimetype is not None:
            mdict_options['CONTENT-TYPE'] = mimetype

        mdict = aiohttp.multidict.MultiDict(resp.headers)
        mdict.update(mdict_options)
        resp.headers = mdict
        resp.content = streams.StringStream(raw)

        return streams.ResponseStreamReader(resp, len(raw))
Ejemplo n.º 13
0
 def download(self):
     download_url = yield from self._fetch_download_url()
     response = yield from self._make_request('GET',
                                              download_url,
                                              allow_redirects=False)
     if response.status >= 400:
         raise exceptions.ProviderError(
             'Unable to download the requested file, please try again later.',
             code=response.status)
     if response.status in (302, 301):
         response = yield from aiohttp.request('GET',
                                               response.headers['location'])
     return streams.ResponseStreamReader(response, unsizable=True)
Ejemplo n.º 14
0
 async def download(self):
     response = await aiohttp.request('GET', self.url)
     if response.status >= 400:
         err_resp = await response.read()
         logger.error('Unable to download file: ({}) {}'.format(
             response.status, err_resp.decode('utf-8')))
         raise exceptions.DownloadError(
             'Unable to download the requested file, please try again later.',
             download_url=self.url,
             response=await response.text(),
             code=response.status,
             provider='http',
         )
     return streams.ResponseStreamReader(response)
Ejemplo n.º 15
0
    async def download(self, path: BitbucketPath, **kwargs):  # type: ignore
        '''Get the stream to the specified file on bitbucket
        :param str path: The path to the file on bitbucket
        '''
        metadata = await self.metadata(path)

        resp = await self.make_request(
            'GET',
            self._build_v1_repo_url('raw', path.commit_sha,
                                    *path.path_tuple()),
            expects=(200, ),
            throws=exceptions.DownloadError,
        )

        return streams.ResponseStreamReader(resp, size=metadata.size)
Ejemplo n.º 16
0
    async def download(
            self,  # type: ignore
            path: GoogleDrivePath,
            revision: str = None,
            range: Tuple[int, int] = None,
            **kwargs) -> streams.BaseStream:
        """Download the file at `path`.  If `revision` is present, attempt to download that revision
        of the file.  See **Revisions** in the class doctring for an explanation of this provider's
        revision handling.   The actual revision handling is done in `_file_metadata()`.

        Quirks:

        Google docs don't have a size until they're exported, so WB must download them, then
        re-stream them as a StringStream.

        :param GoogleDrivePath path: the file to download
        :param str revision: the id of a particular version to download
        :param tuple(int, int) range: range of bytes to download in this request
        :rtype: streams.ResponseStreamReader
        :rtype: streams.StringStream
        :returns: For GDocs, a StringStream.  All others, a ResponseStreamReader.
        """

        metadata = await self.metadata(path, revision=revision)

        download_resp = await self.make_request(
            'GET',
            metadata.raw.get('downloadUrl')
            or utils.get_export_link(metadata.raw),  # type: ignore
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )

        if metadata.size is not None:  # type: ignore
            return streams.ResponseStreamReader(
                download_resp, size=metadata.size_as_int)  # type: ignore

        # google docs, not drive files, have no way to get the file size
        # must buffer the entire file into memory
        stream = streams.StringStream(await download_resp.read())
        if download_resp.headers.get('Content-Type'):
            # TODO: Add these properties to base class officially, instead of as one-off
            stream.content_type = download_resp.headers[
                'Content-Type']  # type: ignore
        stream.name = metadata.export_name  # type: ignore
        return stream
Ejemplo n.º 17
0
    def download(self, path, revision=None, range=None, **kwargs):
        if path.identifier is None:
            raise exceptions.DownloadError('"{}" not found'.format(str(path)), code=404)

        query = {}
        if revision and revision != path.identifier:
            query['version'] = revision

        resp = yield from self.make_request(
            'GET',
            self.build_url('files', path.identifier, 'content', **query),
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )

        return streams.ResponseStreamReader(resp)
Ejemplo n.º 18
0
    async def download(
            self,  # type: ignore
            path: WaterButlerPath,
            revision: str = None,
            range: typing.Tuple[int, int] = None,
            **kwargs) -> streams.ResponseStreamReader:
        """
        Dropbox V2 API Files Download
        https://www.dropbox.com/developers/documentation/http/documentation#files-download

        Request and Response Format: Content-download endpoints
        https://www.dropbox.com/developers/documentation/http/documentation#formats

        According to Dropbox's API docs for files download and content-download endpoints, the file
        content is contained in the response body and the result (metadata about the file) appears
        as JSON in the "Dropbox-API-Result" response header.  As far as the WB Dropbox provider is
        concerned, the header contains the size (in bytes) of the file that ``ResponseStreamReader``
        needs if the "Content-Length" header is not provided.
        """
        path_arg = {
            "path": ("rev:" + revision if revision else path.full_path)
        }
        resp = await self.make_request(
            'POST',
            self._build_content_url('files', 'download'),
            headers={
                'Dropbox-API-Arg': json.dumps(path_arg),
                'Content-Type': ''
            },
            range=range,
            expects=(
                200,
                206,
                409,
            ),
            throws=core_exceptions.DownloadError,
        )
        if resp.status == 409:
            data = await resp.json()
            self.dropbox_conflict_error_handler(data)
        if 'Content-Length' not in resp.headers:
            size = json.loads(resp.headers['dropbox-api-result'])['size']
        else:
            size = None  # ResponseStreamReader will extract it from the resp
        return streams.ResponseStreamReader(resp, size=size)
Ejemplo n.º 19
0
    async def download(self, path, revision=None, **kwargs):
        '''Get the stream to the specified file on github
        :param str path: The path to the file on github
        :param str ref: The git 'ref' a branch or commit sha at which to get the file from
        :param str fileSha: The sha of file to be downloaded if specifed path will be ignored
        :param dict kwargs: Ignored
        '''
        data = await self.metadata(path, revision=revision)
        file_sha = path.file_sha or data.extra['fileSha']

        resp = await self.make_request(
            'GET',
            self.build_repo_url('git', 'blobs', file_sha),
            headers={'Accept': 'application/vnd.github.v3.raw'},
            expects=(200, ),
            throws=exceptions.DownloadError,
        )

        return streams.ResponseStreamReader(resp, size=data.size)
Ejemplo n.º 20
0
    def download(self, path, revision=None, **kwargs):
        if revision:
            url = self._build_content_url('files', 'auto', path.full_path, rev=revision)
        else:
            # Dont add unused query parameters
            url = self._build_content_url('files', 'auto', path.full_path)

        resp = yield from self.make_request(
            'GET',
            url,
            expects=(200, ),
            throws=exceptions.DownloadError,
        )

        if 'Content-Length' not in resp.headers:
            size = json.loads(resp.headers['X-DROPBOX-METADATA'])['bytes']
        else:
            size = None

        return streams.ResponseStreamReader(resp, size=size)
Ejemplo n.º 21
0
    def download(self, path, **kwargs):
        """Download a file. Note: Although Figshare may return a download URL,
        the `accept_url` parameter is ignored here, since Figshare does not
        support HTTPS for downloads.

        :param str path: Path to the key you want to download
        :rtype ResponseWrapper:
        """
        if path.identifier is None:
            raise exceptions.NotFoundError(str(path))

        file_metadata = yield from self.metadata(path)
        download_url = file_metadata.extra['downloadUrl']
        if download_url is None:
            raise exceptions.DownloadError(
                'Cannot download private files',
                code=http.client.FORBIDDEN,
            )
        resp = yield from aiohttp.request('GET', download_url)
        return streams.ResponseStreamReader(resp)
Ejemplo n.º 22
0
    async def download(self, path: BitbucketPath,  # type: ignore
                       range: Tuple[int, int]=None, **kwargs) -> streams.ResponseStreamReader:
        """Get the stream to the specified file on Bitbucket

        :param path: The path to the file on Bitbucket
        :param range: the range header
        """
        metadata = await self.metadata(path)

        logger.debug('requested-range:: {}'.format(range))
        resp = await self.make_request(
            'GET',
            self._build_v1_repo_url('raw', path.commit_sha, *path.path_tuple()),
            range=range,
            expects=(200, ),
            throws=exceptions.DownloadError,
        )
        logger.debug('download-headers:: {}'.format([(x, resp.headers[x]) for x in resp.headers]))

        return streams.ResponseStreamReader(resp, size=metadata.size)
Ejemplo n.º 23
0
    def download(self, path, accept_url=False, **kwargs):
        """Returns a ResponseStreamReader (Stream) for the specified path
        :param str path: Path to the object you want to download
        :param dict **kwargs: Additional arguments that are ignored
        :rtype str:
        :rtype ResponseStreamReader:
        :raises: exceptions.DownloadError
        """
        if accept_url:
            parsed_url = furl.furl(self.sign_url(path, endpoint=self.public_endpoint))
            parsed_url.args['filename'] = kwargs.get('displayName') or path.name
            return parsed_url.url

        resp = yield from self.make_request(
            'GET',
            self.sign_url(path),
            expects=(200, ),
            throws=exceptions.DownloadError,
        )
        return streams.ResponseStreamReader(resp)
Ejemplo n.º 24
0
    async def download(self, path, **kwargs):
        """Download the file identified by ``path`` from this project.

        :param FigsharePath path: FigsharePath to file you want to download
        :rtype ResponseStreamReader:
        """
        if not path.is_file:
            raise exceptions.NotFoundError(str(path))

        file_metadata = await self.metadata(path)
        download_url = file_metadata.extra['downloadUrl']
        if download_url is None:
            raise exceptions.DownloadError('Download not available', code=HTTPStatus.FORBIDDEN)

        params = {} if file_metadata.is_public else {'token': self.token}
        resp = await aiohttp.request('GET', download_url, params=params)
        if resp.status == 404:
            await resp.release()
            raise exceptions.DownloadError('Download not available', code=HTTPStatus.FORBIDDEN)

        return streams.ResponseStreamReader(resp)
Ejemplo n.º 25
0
    async def download(self, path: GitLabPath, **kwargs):  # type: ignore
        r"""Return a stream to the specified file on GitLab.

        API Docs: https://docs.gitlab.com/ce/api/repository_files.html#get-raw-file-from-repository

        Historically this method was implemented using a different endpoint which returned the file
        data as a base-64 encoded string.  We used this endpoint because the one listed above was
        buggy (see: https://gitlab.com/gitlab-org/gitlab-ce/issues/31470).  That issue has since
        been fixed in GL.  We removed the workaround since it required slurping the file contents
        into memory.  As a side-effect, the Gitlab download() method no longer supports the Range
        header.  It had been manually implemented by array slicing the slurped data.  The raw file
        endpoint does not currently respect it.

        :param str path: The path to the file on GitLab
        :param dict \*\*kwargs: Ignored
        :raises: :class:`waterbutler.core.exceptions.DownloadError`
        """

        logger.debug('requested-range:: {}'.format(range))

        url = self._build_file_url(path, raw=True)
        resp = await self.make_request(
            'GET',
            url,
            expects=(
                200,
                206,
            ),
            throws=exceptions.DownloadError,
        )

        logger.debug('download-headers:: {}'.format([(x, resp.headers[x])
                                                     for x in resp.headers]))

        # get size from X-Gitlab-Size header, since some responses don't set Content-Length
        return streams.ResponseStreamReader(resp,
                                            size=int(
                                                resp.headers['X-Gitlab-Size']))
Ejemplo n.º 26
0
    async def download(self, path: GitHubPath, range: Tuple[int, int]=None,  # type: ignore
                       revision=None, **kwargs) -> streams.ResponseStreamReader:
        """Get the stream to the specified file on github
        :param GitHubPath path: The path to the file on github
        :param range: The range header
        :param revision:
        :param dict kwargs: Additional kwargs are ignored
        """

        data = await self.metadata(path, revision=revision)
        file_sha = path.file_sha or data.extra['fileSha']

        logger.debug('requested-range:: {}'.format(range))
        resp = await self.make_request(
            'GET',
            self.build_repo_url('git', 'blobs', file_sha),
            headers={'Accept': 'application/vnd.github.v3.raw'},
            range=range,
            expects=(200, ),
            throws=exceptions.DownloadError,
        )

        return streams.ResponseStreamReader(resp, size=data.size)
Ejemplo n.º 27
0
    def download(self, path, revision=None, **kwargs):
        """Returns a ResponseWrapper (Stream) for the specified path
        raises FileNotFoundError if the status from Dataverse is not 200

        :param str path: Path to the file you want to download
        :param str revision: Used to verify if file is in selected dataset

            - 'latest' to check draft files
            - 'latest-published' to check published files
            - None to check all data
        :param dict \*\*kwargs: Additional arguments that are ignored
        :rtype: :class:`waterbutler.core.streams.ResponseStreamReader`
        :raises: :class:`waterbutler.core.exceptions.DownloadError`
        """
        if path.identifier is None:
            raise exceptions.NotFoundError(str(path))

        resp = yield from self.make_request(
            'GET',
            self.build_url(settings.DOWN_BASE_URL, path.identifier, key=self.token),
            expects=(200, ),
            throws=exceptions.DownloadError,
        )
        return streams.ResponseStreamReader(resp)
Ejemplo n.º 28
0
    async def download(self, path, accept_url=False, range=None, **kwargs):
        """Returns a ResponseStreamReader (Stream) for the specified path
        :param str path: Path to the object you want to download
        :param dict \*\*kwargs: Additional arguments that are ignored
        :rtype str:
        :rtype ResponseStreamReader:
        :raises: exceptions.DownloadError
        """
        self.metrics.add('download.accept_url', accept_url)
        if accept_url:
            parsed_url = furl.furl(
                self.sign_url(path, endpoint=self.public_endpoint))
            parsed_url.args['filename'] = kwargs.get(
                'displayName') or path.name
            return parsed_url.url

        resp = await self.make_request(
            'GET',
            functools.partial(self.sign_url, path),
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )
        return streams.ResponseStreamReader(resp)
Ejemplo n.º 29
0
    async def download(
            self,  # type: ignore
            path: OneDrivePath,
            revision: str = None,
            range: typing.Tuple[int, int] = None,
            **kwargs) -> streams.ResponseStreamReader:
        r"""Download the file identified by ``path``.  If ``revision`` is not ``None``, get
        the file at the version identified by ``revision``.

        API docs: https://dev.onedrive.com/items/download.htm

        :param str path: The path to the file on OneDrive
        :param str revision: The revision of the file to download. If ``None``, download latest.
        :param dict \*\*kwargs: Ignored
        :raises: :class:`waterbutler.core.exceptions.DownloadError`
        :rtype: waterbutler.core.streams.ResponseStreamReader
        :return: a stream of the contents of the file
        """
        logger.debug(
            'download path::{} path.identifier::{} revision::{} range::{} '
            'kwargs::{}'.format(path, path.identifier, revision, range,
                                kwargs))

        if path.identifier is None:
            raise exceptions.DownloadError('"{}" not found'.format(str(path)),
                                           code=404)

        download_url = None
        if revision:
            items = await self._revisions_json(path)
            for item in items['value']:
                if item['id'] == revision:
                    try:
                        download_url = item['@content.downloadUrl']
                    except KeyError:
                        raise exceptions.UnexportableFileTypeError(str(path))
                    break
        else:
            # TODO: we should be able to get the download url from validate_v1_path
            metadata_resp = await self.make_request(
                'GET',
                self._build_drive_url(*path.api_identifier),
                expects=(200, ),
                throws=exceptions.MetadataError)
            logger.debug('download metadata_resp::{}'.format(
                repr(metadata_resp)))
            metadata = await metadata_resp.json()
            logger.debug('download metadata::{}'.format(json.dumps(metadata)))

            try:
                package_type = metadata['package']['type']
            except KeyError:
                pass
            else:
                if package_type == 'oneNote':
                    raise exceptions.UnexportableFileTypeError(str(path))

            download_url = metadata.get('@content.downloadUrl', None)

        if download_url is None:
            raise exceptions.NotFoundError(str(path))

        logger.debug('download download_url::{}'.format(download_url))
        download_resp = await self.make_request(
            'GET',
            download_url,
            range=range,
            expects=(200, 206),
            headers={'accept-encoding': ''},
            throws=exceptions.DownloadError,
        )
        logger.debug('download download_resp::{}'.format(repr(download_resp)))

        return streams.ResponseStreamReader(download_resp)