def metadata(self, path, **kwargs): if path.is_dir: if not os.path.exists(path.full_path) or not os.path.isdir( path.full_path): raise exceptions.MetadataError( 'Could not retrieve folder \'{0}\''.format(path), code=404, ) ret = [] for item in os.listdir(path.full_path): if os.path.isdir(os.path.join(path.full_path, item)): metadata = self._metadata_folder(path, item) ret.append( FileSystemFolderMetadata(metadata, self.folder).serialized()) else: metadata = self._metadata_file(path, item) ret.append( FileSystemFileMetadata(metadata, self.folder).serialized()) return ret else: if not os.path.exists(path.full_path) or os.path.isdir( path.full_path): raise exceptions.MetadataError( 'Could not retrieve file \'{0}\''.format(path), code=404, ) metadata = self._metadata_file(path) return FileSystemFileMetadata(metadata, self.folder).serialized()
def metadata(self, path, **kwargs): resp = yield from self.make_request( 'GET', self.build_url('metadata', 'auto', path.full_path), expects=(200, ), throws=exceptions.MetadataError ) data = yield from resp.json() # Dropbox will match a file or folder by name within the requested path if path.is_file and data['is_dir']: raise exceptions.MetadataError( "Could not retrieve file '{}'".format(path), code=http.client.NOT_FOUND, ) if data.get('is_deleted'): raise exceptions.MetadataError( "Could not retrieve {kind} '{path}'".format( kind='folder' if data['is_dir'] else 'file', path=path, ), code=http.client.NOT_FOUND, ) if data['is_dir']: ret = [] for item in data['contents']: if item['is_dir']: ret.append(DropboxFolderMetadata(item, self.folder).serialized()) else: ret.append(DropboxFileMetadata(item, self.folder).serialized()) return ret return DropboxFileMetadata(data, self.folder).serialized()
async def metadata(self, # type: ignore path: WaterButlerPath, revision: str=None, **kwargs) \ -> typing.Union[BaseDropboxMetadata, typing.List[BaseDropboxMetadata]]: full_path = path.full_path.rstrip('/') url = self.build_url('files', 'get_metadata') body = {'path': full_path} if revision: body = {'path': 'rev:' + revision} elif path.is_folder: url = self.build_url('files', 'list_folder') if path.is_folder: ret = [] # type: typing.List[BaseDropboxMetadata] has_more = True page_count = 0 while has_more: page_count += 1 data = await self.dropbox_request( url, body, throws=exceptions.MetadataError) for entry in data['entries']: if entry['.tag'] == 'folder': ret.append(DropboxFolderMetadata(entry, self.folder)) else: ret.append(DropboxFileMetadata(entry, self.folder)) if not data['has_more']: has_more = False else: url = self.build_url('files', 'list_folder', 'continue') body = {'cursor': data['cursor']} self.metrics.add('metadata.folder.pages', page_count) return ret data = await self.dropbox_request(url, body, throws=exceptions.MetadataError) # Dropbox v2 API will not indicate file/folder if path "deleted" if data['.tag'] == 'deleted': raise exceptions.MetadataError( "Could not retrieve '{}'".format(path), code=HTTPStatus.NOT_FOUND, ) # Dropbox will match a file or folder by name within the requested path if path.is_file and data['.tag'] == 'folder': raise exceptions.MetadataError( "Could not retrieve file '{}'".format(path), code=HTTPStatus.NOT_FOUND, ) return DropboxFileMetadata(data, self.folder)
async def metadata(self, path, revision=None, **kwargs): if revision: url = self.build_url('revisions', 'auto', path.full_path, rev_limit=250) else: url = self.build_url('metadata', 'auto', path.full_path) resp = await self.make_request('GET', url, expects=(200, ), throws=exceptions.MetadataError) data = await resp.json() if revision: try: data = next(v for v in (await resp.json()) if v['rev'] == revision) except StopIteration: raise exceptions.NotFoundError(str(path)) # Dropbox will match a file or folder by name within the requested path if path.is_file and data['is_dir']: raise exceptions.MetadataError( "Could not retrieve file '{}'".format(path), code=http.client.NOT_FOUND, ) if data.get('is_deleted'): raise exceptions.MetadataError( "Could not retrieve {kind} '{path}'".format( kind='folder' if data['is_dir'] else 'file', path=path, ), code=http.client.NOT_FOUND, ) if data['is_dir']: ret = [] for item in data['contents']: if item['is_dir']: ret.append(DropboxFolderMetadata(item, self.folder)) else: ret.append(DropboxFileMetadata(item, self.folder)) return ret return DropboxFileMetadata(data, self.folder)
async def test_upload_existing(self, monkeypatch, provider_and_mock, file_stream, upload_path, upload_response, mock_time): self.patch_tasks(monkeypatch) provider, inner_provider = provider_and_mock url = 'https://waterbutler.io/{}/children/'.format(upload_path.parent.identifier) inner_provider.move.return_value = (utils.MockFileMetadata(), True) inner_provider.metadata.side_effect = exceptions.MetadataError('Boom!', code=404) aiohttpretty.register_json_uri('POST', url, status=200, body=upload_response) res, created = await provider.upload(file_stream, upload_path) assert created is False assert res.name == '[TEST]' assert res.extra['version'] == 8 assert res.provider == 'osfstorage' assert res.extra['downloads'] == 0 assert res.extra['checkout'] is None assert upload_path.identifier_path == res.path expected_path = WaterButlerPath('/' + file_stream.writers['sha256'].hexdigest) inner_provider.metadata.assert_called_once_with(expected_path) inner_provider.upload.assert_called_once_with(file_stream, WaterButlerPath('/patched_path'), check_created=False, fetch_metadata=False) inner_provider.move.assert_called_once_with(inner_provider, WaterButlerPath('/patched_path'), expected_path)
async def _metadata_file(self, path, revision=None, **kwargs): resp = await self.make_request( 'GET', self.build_repo_url('commits', path=path.path, sha=revision or path.branch_ref), expects=(200, ), throws=exceptions.MetadataError, ) commits = await resp.json() if not commits: raise exceptions.NotFoundError(str(path)) latest = commits[0] tree = await self._fetch_tree(latest['commit']['tree']['sha'], recursive=True) try: data = next(x for x in tree['tree'] if x['path'] == path.path) except StopIteration: raise exceptions.NotFoundError(str(path)) if isinstance(data, list): raise exceptions.MetadataError( 'Could not retrieve file "{0}"'.format(str(path)), code=404, ) return GitHubFileTreeMetadata(data, commit=latest['commit'], web_view=self._web_view(path), ref=path.branch_ref)
async def _resolve_id_to_parts(self, _id, accum=None): if _id == self.folder['id']: return [{ 'title': '', 'mimeType': 'folder', 'id': self.folder['id'], }] + (accum or []) if accum is None: async with self.request( 'GET', self.build_url('files', _id, fields='id,title,mimeType'), expects=(200, ), throws=exceptions.MetadataError, ) as resp: accum = [await resp.json()] for parent in await self._get_parent_ids(_id): if self.folder['id'] == parent['id']: return [parent] + (accum or []) try: return await self._resolve_id_to_parts( self, parent['id'], [parent] + (accum or [])) except exceptions.MetadataError: pass # TODO Custom exception here raise exceptions.MetadataError('ID is out of scope')
async def _metadata_folder(self, path, recursive=False, **kwargs): # if we have a sha or recursive lookup specified we'll need to perform # the operation using the git/trees api which requires a sha. if not (self._is_sha(path.identifier[0]) or recursive): try: data = await self._fetch_contents(path, ref=path.identifier[0]) except exceptions.MetadataError as e: if e.data.get('message') == 'This repository is empty.': data = [] else: raise if isinstance(data, dict): raise exceptions.MetadataError( 'Could not retrieve folder "{0}"'.format(str(path)), code=404, ) ret = [] for item in data: if item['type'] == 'dir': ret.append(GitHubFolderContentMetadata(item)) else: ret.append( GitHubFileContentMetadata(item, web_view=item['html_url'])) return ret
def metadata(self, path, **kwargs): if path.identifier is None: raise exceptions.MetadataError('{} not found'.format(str(path)), code=404) if not path.is_dir: return (yield from self._item_metadata(path)) return (yield from self._children_metadata(path))
def test_upload_existing(self, monkeypatch, provider_and_mock, file_stream, mock_time): self.patch_tasks(monkeypatch) provider, inner_provider = provider_and_mock path = WaterButlerPath('/foopath', _ids=('Test', 'OtherTest')) url = 'https://waterbutler.io/{}/children/'.format(path.parent.identifier) inner_provider.move.return_value = (utils.MockFileMetadata(), True) inner_provider.metadata.side_effect = exceptions.MetadataError('Boom!', code=404) aiohttpretty.register_json_uri('POST', url, status=200, body={'data': {'downloads': 10, 'version': 8, 'path': '/24601', 'checkout': 'hmoco', 'md5': '1234', 'sha256': '2345'}}) res, created = yield from provider.upload(file_stream, path) assert created is False assert res.name == 'foopath' assert res.path == '/24601' assert res.extra['version'] == 8 assert res.provider == 'osfstorage' assert res.extra['downloads'] == 10 assert res.extra['checkout'] == 'hmoco' inner_provider.metadata.assert_called_once_with(WaterButlerPath('/' + file_stream.writers['sha256'].hexdigest)) inner_provider.upload.assert_called_once_with(file_stream, WaterButlerPath('/uniquepath'), check_created=False, fetch_metadata=False) inner_provider.move.assert_called_once_with(inner_provider, WaterButlerPath('/uniquepath'), WaterButlerPath('/' + file_stream.writers['sha256'].hexdigest))
async def metadata(self, path, version=None, **kwargs): """ :param str version: - 'latest' for draft files - 'latest-published' for published files - None for all data """ version = version or path.revision if path.is_root: return (await self._maybe_fetch_metadata(version=version)) try: return next( item for item in (await self._maybe_fetch_metadata(version=version)) if item.extra['fileId'] == path.identifier ) except StopIteration: raise exceptions.MetadataError( "Could not retrieve file '{}'".format(path), code=HTTPStatus.NOT_FOUND, )
async def _delete_folder_contents(self, path: wb_path.WaterButlerPath) -> None: """Given a WaterButlerPath, delete all contents of folder :param WaterButlerPath path: Folder to be emptied :rtype: None :raises: :class:`waterbutler.core.exceptions.NotFoundError` :raises: :class:`waterbutler.core.exceptions.MetadataError` :raises: :class:`waterbutler.core.exceptions.DeleteError` """ file_id = path.identifier if not file_id: raise exceptions.NotFoundError(str(path)) resp = await self.make_request( 'GET', self.build_url('files', q="'{}' in parents".format(file_id), fields='items(id)'), expects=(200, ), throws=exceptions.MetadataError) try: child_ids = (await resp.json())['items'] except (KeyError, IndexError): raise exceptions.MetadataError('{} not found'.format(str(path)), code=HTTPStatus.NOT_FOUND) for child in child_ids: await self.make_request( 'PUT', self.build_url('files', child['id']), data=json.dumps({'labels': {'trashed': 'true'}}), headers={'Content-Type': 'application/json'}, expects=(200, ), throws=exceptions.DeleteError)
async def _delete_file(self, path, message=None, **kwargs): if path.file_sha: sha = path.file_sha else: sha = (await self.metadata(path)).extra['fileSha'] if not sha: raise exceptions.MetadataError('A sha is required for deleting') data = { 'sha': sha, 'branch': path.branch_ref, 'committer': self.committer, 'message': message or settings.DELETE_FILE_MESSAGE, } resp = await self.make_request( 'DELETE', self.build_repo_url('contents', path.path), headers={'Content-Type': 'application/json'}, data=json.dumps(data), expects=(200, ), throws=exceptions.DeleteError, ) await resp.release()
def test_upload_and_tasks(self, monkeypatch, provider_and_mock, file_stream, credentials, settings): provider, inner_provider = provider_and_mock basepath = 'waterbutler.providers.osfstorage.provider.{}' path = WaterButlerPath('/foopath', _ids=('Test', 'OtherTest')) url = 'https://waterbutler.io/{}/children/'.format( path.parent.identifier) mock_parity = mock.Mock() mock_backup = mock.Mock() inner_provider.move.return_value = ({}, True) inner_provider.metadata.side_effect = exceptions.MetadataError( 'Boom!', code=404) aiohttpretty.register_json_uri('POST', url, status=201, body={ 'version': 'versionpk', 'data': { 'version': 42, 'downloads': 30, 'path': '/alkjdaslke09' } }) monkeypatch.setattr(basepath.format('backup.main'), mock_backup) monkeypatch.setattr(basepath.format('parity.main'), mock_parity) monkeypatch.setattr(basepath.format('settings.RUN_TASKS'), True) monkeypatch.setattr(basepath.format('os.rename'), lambda *_: None) monkeypatch.setattr(basepath.format('uuid.uuid4'), lambda: 'uniquepath') res, created = yield from provider.upload(file_stream, path) assert created is True assert res['name'] == 'foopath' assert res['extra']['version'] == 42 assert res['provider'] == 'osfstorage' assert res['extra']['downloads'] == 30 inner_provider.upload.assert_called_once_with( file_stream, WaterButlerPath('/uniquepath'), check_created=False, fetch_metadata=False) complete_path = os.path.join(FILE_PATH_COMPLETE, file_stream.writers['sha256'].hexdigest) mock_parity.assert_called_once_with(complete_path, credentials['parity'], settings['parity']) mock_backup.assert_called_once_with( complete_path, 'versionpk', 'https://waterbutler.io/hooks/metadata/', credentials['archive'], settings['parity']) inner_provider.metadata.assert_called_once_with( WaterButlerPath('/' + file_stream.writers['sha256'].hexdigest)) inner_provider.move.assert_called_once_with( inner_provider, WaterButlerPath('/uniquepath'), WaterButlerPath('/' + file_stream.writers['sha256'].hexdigest))
async def _metadata_folder(self, path, **kwargs): ref = path.branch_ref try: # it's cool to use the contents API here because we know path is a dir and won't hit # the 1mb size limit data = await self._fetch_contents(path, ref=ref) except exceptions.MetadataError as e: if e.data.get('message') == 'This repository is empty.': data = [] else: raise if isinstance(data, dict): raise exceptions.MetadataError( 'Could not retrieve folder "{0}"'.format(str(path)), code=404, ) ret = [] for item in data: if item['type'] == 'dir': ret.append(GitHubFolderContentMetadata(item, ref=ref)) else: ret.append( GitHubFileContentMetadata(item, ref=ref, web_view=item['html_url'])) return ret
def test_exits_raises_non_404(self, provider1): with pytest.raises(exceptions.MetadataError) as e: yield from provider1.exists('somepath', throw=exceptions.MetadataError( '', code=422)) assert e.value.code == 422
def validate_path(self, path, **kwargs): if path == '/': return WaterButlerPath('/', _ids=[self.folder]) try: obj_id, new_name = path.strip('/').split('/') except ValueError: obj_id, new_name = path.strip('/'), None if path.endswith('/') or new_name is not None: files_or_folders = 'folders' else: files_or_folders = 'files' # Box file ids must be a valid base10 number if obj_id.isdecimal(): response = yield from self.make_request( 'get', self.build_url(files_or_folders, obj_id, fields='id,name,path_collection'), expects=(200, 404, 405), throws=exceptions.MetadataError, ) else: response = None # Ugly but easiest if response is None or response.status in (404, 405): if new_name is not None: raise exceptions.MetadataError( 'Could not find {}'.format(path), code=404) return (yield from self.revalidate_path(WaterButlerPath('/', _ids=[self.folder]), obj_id, folder=path.endswith('/'))) else: data = yield from response.json() names, ids = zip(*[(x['name'], x['id']) for x in data['path_collection']['entries'] + [data]]) try: names, ids = ('', ) + names[ids.index(self.folder) + 1:], ids[ids.index(self.folder):] except ValueError: raise Exception # TODO is_folder = path.endswith('/') ret = WaterButlerPath('/'.join(names), _ids=ids, folder=is_folder) if new_name is not None: return (yield from self.revalidate_path(ret, new_name, folder=is_folder)) return ret
async def metadata(self, path, raw=False, revision=None, **kwargs): if path.identifier is None: raise exceptions.MetadataError('{} not found'.format(str(path)), code=404) if path.is_dir: return await self._folder_metadata(path, raw=raw) return await self._file_metadata(path, revision=revision, raw=raw)
def metadata(self, path, raw=False, **kwargs): if path.identifier is None: raise exceptions.MetadataError('{} not found'.format(str(path)), code=404) if path.is_dir: return (yield from self._folder_metadata(path, raw=raw)) return (yield from self._file_metadata(path, raw=raw))
async def test_upload_and_tasks(self, monkeypatch, provider_and_mock, file_stream, upload_response, credentials, settings, mock_time): provider, inner_provider = provider_and_mock basepath = 'waterbutler.providers.osfstorage.provider.{}' path = WaterButlerPath('/' + upload_response['data']['name'], _ids=('Test', upload_response['data']['id'])) url = 'https://waterbutler.io/{}/children/'.format( path.parent.identifier) mock_parity = mock.Mock() mock_backup = mock.Mock() inner_provider.move.return_value = (utils.MockFileMetadata(), True) inner_provider.metadata.side_effect = exceptions.MetadataError( 'Boom!', code=404) aiohttpretty.register_json_uri('POST', url, status=201, body=upload_response) monkeypatch.setattr(basepath.format('backup.main'), mock_backup) monkeypatch.setattr(basepath.format('parity.main'), mock_parity) monkeypatch.setattr(basepath.format('settings.RUN_TASKS'), True) monkeypatch.setattr(basepath.format('os.rename'), lambda *_: None) monkeypatch.setattr(basepath.format('uuid.uuid4'), lambda: 'uniquepath') res, created = await provider.upload(file_stream, path) assert created is True assert res.name == '[TEST]' assert res.extra['version'] == 8 assert res.provider == 'osfstorage' assert res.extra['downloads'] == 0 assert res.extra['checkout'] is None inner_provider.upload.assert_called_once_with( file_stream, WaterButlerPath('/uniquepath'), check_created=False, fetch_metadata=False) complete_path = os.path.join(FILE_PATH_COMPLETE, file_stream.writers['sha256'].hexdigest) mock_parity.assert_called_once_with( complete_path, upload_response['version'], 'https://waterbutler.io/hooks/metadata/', credentials['parity'], settings['parity']) mock_backup.assert_called_once_with( complete_path, upload_response['version'], 'https://waterbutler.io/hooks/metadata/', credentials['archive'], settings['archive']) expected_path = WaterButlerPath( '/' + file_stream.writers['sha256'].hexdigest) inner_provider.metadata.assert_called_once_with(expected_path) inner_provider.move.assert_called_once_with( inner_provider, WaterButlerPath('/uniquepath'), expected_path)
def _assert_child(self, paths, target=None): if self.folder == 0: return True if target == self.folder: return True if not paths: raise exceptions.MetadataError('Not found', code=http.client.NOT_FOUND) if paths[0]['id'] == self.folder: return True return self._assert_child(paths[1:])
def _metadata_file(self, path, ref=None, **kwargs): if not GitHubProvider.is_sha(path.identifier[0]): latest = yield from self._get_latest_sha(ref=path.identifier[0]) else: latest = path.identifier[0] tree = yield from self._fetch_tree(latest, recursive=True) try: data = next(x for x in tree['tree'] if x['path'] == path.path) except StopIteration: raise exceptions.MetadataError(';', code=404) if isinstance(data, list): raise exceptions.MetadataError( 'Could not retrieve file "{0}"'.format(str(path)), code=404, ) return GitHubFileTreeMetadata(data).serialized()
def file_or_error(article, file_id): try: return next( each for each in article['files'] if each['id'] == int(file_id) ) except StopIteration: raise exceptions.MetadataError( 'Could not resolve file with ID {0}'.format(file_id), code=http.client.NOT_FOUND, )
async def _resolve_path_to_ids(self, path, start_at=None): ret = start_at or [{ 'title': '', 'mimeType': 'folder', 'id': self.folder['id'], }] item_id = ret[0]['id'] # parts is list of [path_part_name, is_folder] parts = [[parse.unquote(x), True] for x in path.strip('/').split('/')] if not path.endswith('/'): parts[-1][1] = False while parts: current_part = parts.pop(0) query = "title = '{}' " \ "and trashed = false " \ "and mimeType != 'application/vnd.google-apps.form' " \ "and mimeType {} '{}'".format( clean_query(current_part[0]), '=' if current_part[1] else '!=', self.FOLDER_MIME_TYPE ) async with self.request( 'GET', self.build_url('files', item_id, 'children', q=query, fields='items(id)'), expects=(200, ), throws=exceptions.MetadataError, ) as resp: data = await resp.json() try: item_id = data['items'][0]['id'] except (KeyError, IndexError): if parts: raise exceptions.MetadataError('{} not found'.format(str(path)), code=http.client.NOT_FOUND) name, ext = os.path.splitext(current_part[0]) if ext not in ('.gdoc', '.gdraw', '.gslides', '.gsheet'): return ret + [{ 'id': None, 'title': current_part[0], 'mimeType': 'folder' if path.endswith('/') else '', }] parts.append([name, current_part[1]]) async with self.request( 'GET', self.build_url('files', item_id, fields='id,title,mimeType'), expects=(200, ), throws=exceptions.MetadataError, ) as resp: ret.append(await resp.json()) return ret
async def metadata(self, # type: ignore path: GoogleDrivePath, raw: bool = False, revision=None, **kwargs) -> typing.Union[dict, BaseGoogleDriveMetadata, typing.List[typing.Union[BaseGoogleDriveMetadata, dict]]]: if path.identifier is None: raise exceptions.MetadataError('{} not found'.format(str(path)), code=404) if path.is_dir: return await self._folder_metadata(path, raw=raw) return await self._file_metadata(path, revision=revision, raw=raw)
async def test_upload_catch_non_404_errors(self, monkeypatch, provider_and_mock, file_stream, upload_path, mock_time): self.patch_tasks(monkeypatch) provider, inner_provider = provider_and_mock url = 'https://waterbutler.io/{}/children/'.format(upload_path.parent.identifier) inner_provider.metadata.side_effect = exceptions.MetadataError('Boom!', code=500) aiohttpretty.register_json_uri('POST', url, status=500) with pytest.raises(exceptions.MetadataError): await provider.upload(file_stream, upload_path)
def _resolve_path_to_ids(self, path, start_at=None): ret = start_at or [{ 'title': '', 'mimeType': 'folder', 'id': self.folder['id'], }] item_id = ret[0]['id'] parts = [parse.unquote(x) for x in path.strip('/').split('/')] while parts: current_part = parts.pop(0) resp = yield from self.make_request( 'GET', self.build_url('files', item_id, 'children', q="title = '{}'".format( clean_query(current_part)), fields='items(id)'), expects=(200, ), throws=exceptions.MetadataError, ) try: item_id = (yield from resp.json())['items'][0]['id'] except (KeyError, IndexError): if parts: raise exceptions.MetadataError('{} not found'.format( str(path)), code=http.client.NOT_FOUND) name, ext = os.path.splitext(current_part) if ext not in ('.gdoc', '.gdraw', '.gslides', '.gsheet'): return ret + [ { 'id': None, 'title': current_part, 'mimeType': 'folder' if path.endswith('/') else '', } ] parts.append(name) resp = yield from self.make_request( 'GET', self.build_url('files', item_id, fields='id,title,mimeType'), expects=(200, ), throws=exceptions.MetadataError, ) ret.append((yield from resp.json())) return ret
async def revisions(self, path, view_only=None, **kwargs): if path.identifier is None: raise exceptions.MetadataError('File not found', code=404) async with self.signed_request('GET', self.build_url(path.identifier, 'revisions', view_only=view_only), expects=(200, )) as resp: return [ OsfStorageRevisionMetadata(item) for item in (await resp.json())['revisions'] ]
def revisions(self, path, view_only=None, **kwargs): if path.identifier is None: raise exceptions.MetadataError('File not found', code=404) resp = yield from self.make_signed_request('GET', self.build_url( path.identifier, 'revisions', view_only=view_only), expects=(200, )) return [ OsfStorageRevisionMetadata(item).serialized() for item in (yield from resp.json())['revisions'] ]
async def revisions(self, path, view_only=None, **kwargs): if path.identifier is None: raise exceptions.MetadataError('File not found', code=404) self.metrics.add('revisions', {'got_view_only': view_only is not None}) resp = await self.make_signed_request('GET', self.build_url( path.identifier, 'revisions', view_only=view_only), expects=(200, )) return [ OsfStorageRevisionMetadata(item) for item in (await resp.json())['revisions'] ]