def create_folder(self, path, **kwargs): WaterButlerPath.validate_folder(path) if path.identifier is not None: raise exceptions.FolderNamingConflict(str(path)) resp = yield from self.make_request( 'POST', self.build_url('folders'), data={ 'name': path.name, 'parent': { 'id': path.parent.identifier } }, expects=(201, 409), throws=exceptions.CreateFolderError, ) # Catch 409s to avoid race conditions if resp.status == 409: raise exceptions.FolderNamingConflict(str(path)) return BoxFolderMetadata( (yield from resp.json()), path ).serialized()
def validate_path(self, path, **kwargs): split = path.rstrip('/').split('/')[1:] wbpath = WaterButlerPath('/', _ids=(self.settings['project_id'], ), folder=True) if split: name_or_id = split.pop(0) try: article = yield from self._assert_contains_article(name_or_id) except ValueError: return wbpath.child(name_or_id, folder=False) except exceptions.ProviderError as e: if e.code not in (404, 401): raise return wbpath.child(name_or_id, folder=False) wbpath = wbpath.child(article['title'], article['id'], folder=True) if split: provider = yield from self._make_article_provider(article['id'], check_parent=False) try: return (yield from provider.validate_path('/'.join([''] + split), parent=wbpath)) except exceptions.ProviderError as e: if e.code not in (404, 401): raise return wbpath.child(split.pop(0), folder=False) return wbpath
async def handle_name_conflict(self, path: wb_path.WaterButlerPath, conflict: str='replace', **kwargs) -> typing.Tuple[wb_path.WaterButlerPath, bool]: """Check WaterButlerPath and resolve conflicts Given a WaterButlerPath and a conflict resolution pattern determine the correct file path to upload to and indicate if that file exists or not :param path: ( :class:`.WaterButlerPath` ) Desired path to check for conflict :param conflict: ( :class:`str` ) replace, keep, warn :rtype: (:class:`.WaterButlerPath` or False) :raises: :class:`.NamingConflict` """ exists = await self.exists(path, **kwargs) if (not exists and not exists == []) or conflict == 'replace': return path, exists # type: ignore if conflict == 'warn': raise exceptions.NamingConflict(path.name) while True: path.increment_name() test_path = await self.revalidate_path( path.parent, path.name, folder=path.is_dir ) exists = await self.exists(test_path, **kwargs) if not (exists or exists == []): break return path, False
def create_folder(self, path, **kwargs): WaterButlerPath.validate_folder(path) if path.identifier is not None: raise exceptions.FolderNamingConflict(str(path)) resp = yield from self.make_request( 'POST', self.build_url('folders'), data={ 'name': path.name, 'parent': { 'id': path.parent.identifier } }, expects=(201, 409), throws=exceptions.CreateFolderError, ) # Catch 409s to avoid race conditions if resp.status == 409: raise exceptions.FolderNamingConflict(str(path)) resp_json = yield from resp.json() # save new folder's id into the WaterButlerPath object. logs will need it later. path._parts[-1]._id = resp_json['id'] return BoxFolderMetadata(resp_json, path)
async def test_intra_move_folder_replace(self, provider, intra_fixtures, root_provider_fixtures): item = intra_fixtures['intra_folder_metadata'] list_metadata = root_provider_fixtures['folder_list_metadata'] src_path = WaterButlerPath('/name/', _ids=(provider, item['id'])) dest_path = WaterButlerPath('/charmander/name/', _ids=(provider, item['id'], item['id'])) file_url = provider.build_url('folders', src_path.identifier) delete_url = provider.build_url('folders', dest_path.identifier, recursive=True) list_url = provider.build_url('folders', item['id'], 'items', fields='id,name,size,modified_at,etag,total_count', offset=0, limit=1000) aiohttpretty.register_json_uri('PUT', file_url, body=item) aiohttpretty.register_uri('DELETE', delete_url, status=204) aiohttpretty.register_json_uri('GET', list_url, body=list_metadata) expected_folder = BoxFolderMetadata(item, dest_path) expected_folder._children = [] for child_item in list_metadata['entries']: child_path = dest_path.child(child_item['name'], folder=(child_item['type'] == 'folder')) serialized_child = provider._serialize_item(child_item, child_path) expected_folder._children.append(serialized_child) expected = (expected_folder, False) result = await provider.intra_move(provider, src_path, dest_path) assert result == expected assert aiohttpretty.has_call(method='DELETE', uri=delete_url)
async def test_intra_copy_folder(self, provider, intra_fixtures, root_provider_fixtures): item = intra_fixtures['intra_folder_metadata'] list_metadata = root_provider_fixtures['folder_list_metadata'] src_path = WaterButlerPath('/name/', _ids=(provider, item['id'])) dest_path = WaterButlerPath('/charmander/name/', _ids=(provider, item['id'])) file_url = provider.build_url('folders', src_path.identifier, 'copy') list_url = provider.build_url('folders', item['id'], 'items', fields='id,name,size,modified_at,etag,total_count', offset=0, limit=1000) aiohttpretty.register_json_uri('GET', list_url, body=list_metadata) aiohttpretty.register_json_uri('POST', file_url, body=item) expected_folder = BoxFolderMetadata(item, dest_path) expected_folder._children = [] for child_item in list_metadata['entries']: child_path = dest_path.child(child_item['name'], folder=(child_item['type'] == 'folder')) serialized_child = provider._serialize_item(child_item, child_path) expected_folder._children.append(serialized_child) expected = (expected_folder, True) result = await provider.intra_copy(provider, src_path, dest_path) assert result == expected
async def create_folder(self, path: WaterButlerPath, folder_precheck: bool=True, **kwargs) -> BoxFolderMetadata: WaterButlerPath.validate_folder(path) if folder_precheck: if path.identifier is not None: raise exceptions.FolderNamingConflict(path.name) async with self.request( 'POST', self.build_url('folders'), data={ 'name': path.name, 'parent': { 'id': path.parent.identifier } }, expects=(201, 409), throws=exceptions.CreateFolderError, ) as resp: # Catch 409s to avoid race conditions if resp.status == 409: raise exceptions.FolderNamingConflict(path.name) resp_json = await resp.json() # save new folder's id into the WaterButlerPath object. logs will need it later. path._parts[-1]._id = resp_json['id'] return BoxFolderMetadata(resp_json, path)
async def create_folder(self, path, **kwargs): """ :param str path: The path to create a folder at """ WaterButlerPath.validate_folder(path) response = await self.make_request( 'POST', self.build_url('fileops', 'create_folder'), params={ 'root': 'auto', 'path': path.full_path }, expects=(200, 403), throws=exceptions.CreateFolderError ) data = await response.json() if response.status == 403: if 'because a file or folder already exists at path' in data.get('error'): raise exceptions.FolderNamingConflict(str(path)) raise exceptions.CreateFolderError(data, code=403) return DropboxFolderMetadata(data, self.folder)
def test_rename(self): path = WaterButlerPath('/this/is/a/long/path') assert path.name == 'path' path.rename('journey') assert path.name == 'journey'
async def create_folder(self, path, **kwargs): """ :param str path: The path to create a folder at """ WaterButlerPath.validate_folder(path) data = await self.dropbox_request( self.build_url('files', 'create_folder'), {'path': path.full_path.rstrip('/')}, throws=exceptions.CreateFolderError, ) return DropboxFolderMetadata(data, self.folder)
async def move(src_bundle, dest_bundle, start_time=None, **kwargs): start_time = start_time or time.time() src_path, src_provider = src_bundle.pop('path'), utils.make_provider(**src_bundle.pop('provider')) dest_path, dest_provider = dest_bundle.pop('path'), utils.make_provider(**dest_bundle.pop('provider')) logger.info('Starting moving {!r}, {!r} to {!r}, {!r}'.format(src_path, src_provider, dest_path, dest_provider)) metadata, errors = None, [] try: metadata, created = await src_provider.move(dest_provider, src_path, dest_path, **kwargs) except Exception as e: logger.error('Move failed with error {!r}'.format(e)) errors = [e.__repr__()] raise # Ensure sentry sees this else: logger.info('Move succeeded') dest_path = WaterButlerPath.from_metadata(metadata) finally: source = LogPayload(src_bundle['nid'], src_provider, path=src_path) destination = LogPayload( dest_bundle['nid'], dest_provider, path=dest_path, metadata=metadata ) await utils.log_to_callback( 'move', source=source, destination=destination, start_time=start_time, errors=errors ) return metadata, created
async def copy(src_bundle, dest_bundle, request={}, start_time=None, **kwargs): start_time = start_time or time.time() src_path, src_provider = src_bundle.pop('path'), utils.make_provider(**src_bundle.pop('provider')) dest_path, dest_provider = dest_bundle.pop('path'), utils.make_provider(**dest_bundle.pop('provider')) logger.info('Starting copying {!r}, {!r} to {!r}, {!r}' .format(src_path, src_provider, dest_path, dest_provider)) metadata, errors = None, [] try: metadata, created = await src_provider.copy(dest_provider, src_path, dest_path, **kwargs) except Exception as e: logger.error('Copy failed with error {!r}'.format(e)) errors = [e.__repr__()] raise # Ensure sentry sees this else: logger.info('Copy succeeded') dest_path = WaterButlerPath.from_metadata(metadata) finally: source = LogPayload(src_bundle['nid'], src_provider, path=src_path) destination = LogPayload( dest_bundle['nid'], dest_provider, path=dest_path, metadata=metadata ) await remote_logging.wait_for_log_futures( 'copy', source=source, destination=destination, start_time=start_time, errors=errors, request=request, api_version='celery', ) return metadata, created
async def revalidate_path(self, base: WaterButlerPath, path: str, folder: bool=None) -> WaterButlerPath: # TODO Research the search api endpoint async with self.request( 'GET', self.build_url('folders', base.identifier, 'items', fields='id,name,type', limit=1000), expects=(200,), throws=exceptions.ProviderError ) as resp: data = await resp.json() lower_name = path.lower() try: item = next( x for x in data['entries'] if x['name'].lower() == lower_name and ( folder is None or (x['type'] == 'folder') == folder ) ) name = path # Use path over x['name'] because of casing issues _id = item['id'] folder = item['type'] == 'folder' except StopIteration: _id = None name = path return base.child(name, _id=_id, folder=folder)
def create_folder(self, path, **kwargs): """ :param str path: The path to create a folder at """ WaterButlerPath.validate_folder(path) if (yield from self.exists(path)): raise exceptions.FolderNamingConflict(str(path)) yield from self.make_request( 'PUT', self.bucket.new_key(path.path).generate_url(settings.TEMP_URL_SECS, 'PUT'), expects=(200, 201), throws=exceptions.CreateFolderError ) return S3FolderMetadata({'Prefix': path.path})
def __init__(self, resource, provider, metadata=None, path=None): if path is None and metadata is None: raise Exception("Log payload needs either a path or metadata.") self.resource = resource self.provider = provider self.metadata = metadata self.path = path or WaterButlerPath.from_metadata(metadata)
async def copy(self, dest_provider: 'BaseProvider', src_path: wb_path.WaterButlerPath, dest_path: wb_path.WaterButlerPath, rename: str=None, conflict: str='replace', handle_naming: bool=True) \ -> typing.Tuple[wb_metadata.BaseMetadata, bool]: args = (dest_provider, src_path, dest_path) kwargs = {'rename': rename, 'conflict': conflict, 'handle_naming': handle_naming} self.provider_metrics.add('copy', { 'got_handle_naming': handle_naming, 'conflict': conflict, 'got_rename': rename is not None, }) if handle_naming: dest_path = await dest_provider.handle_naming( src_path, dest_path, rename=rename, conflict=conflict, ) args = (dest_provider, src_path, dest_path) kwargs = {} # files and folders shouldn't overwrite themselves if ( self.shares_storage_root(dest_provider) and src_path.materialized_path == dest_path.materialized_path ): raise exceptions.OverwriteSelfError(src_path) self.provider_metrics.add('copy.can_intra_copy', False) if self.can_intra_copy(dest_provider, src_path): self.provider_metrics.add('copy.can_intra_copy', True) return await self.intra_copy(*args) if src_path.is_dir: return await self._folder_file_op(self.copy, *args, **kwargs) # type: ignore download_stream = await self.download(src_path) if getattr(download_stream, 'name', None): dest_path.rename(download_stream.name) return await dest_provider.upload(download_stream, dest_path)
def test_metadata(self, provider, folder_object_metadata, folder_list_metadata): path = WaterButlerPath('/', _ids=(provider.folder, )) list_url = provider.build_url('folders', provider.folder, 'items', fields='id,name,size,modified_at,etag') aiohttpretty.register_json_uri('GET', list_url, body=folder_list_metadata) result = yield from provider.metadata(path) expected = [] for x in folder_list_metadata['entries']: if x['type'] == 'file': expected.append(BoxFileMetadata(x, path.child(x['name']))) else: expected.append(BoxFolderMetadata(x, path.child(x['name']))) assert result == expected
def test_metadata(self, provider, folder_object_metadata, folder_list_metadata): path = WaterButlerPath("/", _ids=(provider.folder,)) list_url = provider.build_url("folders", provider.folder, "items", fields="id,name,size,modified_at,etag") aiohttpretty.register_json_uri("GET", list_url, body=folder_list_metadata) result = yield from provider.metadata(path) expected = [] for x in folder_list_metadata["entries"]: if x["type"] == "file": expected.append(BoxFileMetadata(x, path.child(x["name"]))) else: expected.append(BoxFolderMetadata(x, path.child(x["name"]))) assert result == expected
async def create_folder(self, path, folder_precheck=True, **kwargs): """ :param str path: The path to create a folder at """ await self._check_region() WaterButlerPath.validate_folder(path) if folder_precheck: if (await self.exists(path)): raise exceptions.FolderNamingConflict(str(path)) async with self.request( 'PUT', functools.partial(self.bucket.new_key(path.path).generate_url, settings.TEMP_URL_SECS, 'PUT'), skip_auto_headers={'CONTENT-TYPE'}, expects=(200, 201), throws=exceptions.CreateFolderError ): return S3FolderMetadata({'Prefix': path.path})
async def validate_path(self, path, revision=None, **kwargs): """Ensure path is in configured dataset :param str path: The path to a file :param list metadata: List of file metadata from _get_data """ if path == '/': wbpath = WaterButlerPath('/') wbpath.revision = revision return wbpath path = path.strip('/') wbpath = None for item in (await self._maybe_fetch_metadata(version=revision)): if path == item.extra['fileId']: wbpath = WaterButlerPath('/' + item.name, _ids=(None, item.extra['fileId'])) wbpath = wbpath or WaterButlerPath('/' + path) wbpath.revision = revision return wbpath
async def test_metadata(self, provider, root_provider_fixtures): path = WaterButlerPath('/', _ids=(provider.folder, )) list_url = provider.build_url('folders', provider.folder, 'items', fields='id,name,size,modified_at,etag,total_count', offset=0, limit=1000) list_metadata = root_provider_fixtures['folder_list_metadata'] aiohttpretty.register_json_uri('GET', list_url, body=list_metadata) result = await provider.metadata(path) expected = [] for x in list_metadata['entries']: if x['type'] == 'file': expected.append(BoxFileMetadata(x, path.child(x['name']))) else: expected.append(BoxFolderMetadata(x, path.child(x['name'], folder=True))) assert result == expected
async def revalidate_path(self, base: wb_path.WaterButlerPath, path: str, folder: bool=False) -> wb_path.WaterButlerPath: """Take a path and a base path and build a WaterButlerPath representing `/base/path`. For id-based providers, this will need to lookup the id of the new child object. :param base: ( :class:`.WaterButlerPath` ) The base folder to look under :param path: ( :class:`str`) the path of a child of `base`, relative to `base` :param folder: ( :class:`bool` )whether the returned WaterButlerPath should represent a folder :rtype: :class:`.WaterButlerPath` """ return base.child(path, folder=folder)
async def _folder_metadata(self, path: WaterButlerPath, raw: bool=False) -> List[Union[BaseGoogleDriveMetadata, dict]]: query = self._build_query(path.identifier) built_url = self.build_url('files', q=query, alt='json', maxResults=1000) full_resp = [] while built_url: async with self.request( 'GET', built_url, expects=(200, ), throws=exceptions.MetadataError, ) as resp: resp_json = await resp.json() full_resp.extend([ self._serialize_item(path.child(item['title']), item, raw=raw) for item in resp_json['items'] ]) built_url = resp_json.get('nextLink', None) return full_resp
async def revalidate_path(self, base: WaterButlerPath, name: str, folder: bool=None) -> WaterButlerPath: # TODO Redo the logic here folders names ending in /s # Will probably break if '/' in name.lstrip('/') and '%' not in name: # DAZ and MnC may pass unquoted names which break # if the name contains a / in it name = parse.quote(name.lstrip('/'), safe='') if not name.endswith('/') and folder: name += '/' parts = await self._resolve_path_to_ids(name, start_at=[{ 'title': base.name, 'mimeType': 'folder', 'id': base.identifier, }]) _id, name, mime = list(map(parts[-1].__getitem__, ('id', 'title', 'mimeType'))) return base.child(name, _id=_id, folder='folder' in mime)
async def test_upload_conflict_keep(self, provider, root_provider_fixtures, file_stream): upload_metadata = root_provider_fixtures['upload_metadata'] item = upload_metadata['entries'][0] path = WaterButlerPath('/newfile', _ids=(provider.folder, item['id'])) upload_url = provider._build_upload_url('files', 'content') aiohttpretty.register_json_uri('POST', upload_url, status=201, body=upload_metadata) metadata_url = provider.build_url('files', path.identifier) aiohttpretty.register_json_uri('GET', metadata_url, body=upload_metadata) list_url = provider.build_url( 'folders', item['path_collection']['entries'][1]['id'], 'items', fields='id,name,type', limit=1000) aiohttpretty.register_json_uri( 'GET', list_url, body=root_provider_fixtures['folder_list_metadata']) metadata, created = await provider.upload(file_stream, path, conflict='keep') expected = BoxFileMetadata(item, path).serialized() # since the metadata for the renamed conflict file isn't actually saved, this one is odd to # test. assert metadata.serialized() == expected assert created is True assert path.identifier_path == metadata.path assert aiohttpretty.has_call(method='POST', uri=upload_url)
async def test_download_revision(self, provider, root_provider_fixtures): revision = '21753842' item = root_provider_fixtures['file_metadata']['entries'][0] path = WaterButlerPath('/triangles.txt', _ids=(provider.folder, item['id'])) metadata_url = provider.build_url('files', item['id']) content_url = provider.build_url('files', item['id'], 'content', version=revision) aiohttpretty.register_json_uri('GET', metadata_url, body=item) aiohttpretty.register_uri('GET', content_url, body=b'better', auto_length=True) result = await provider.download(path, revision) content = await result.read() assert content == b'better'
async def test_upload_update(self, provider, root_provider_fixtures, file_stream): upload_metadata = root_provider_fixtures['upload_metadata'] item_to_overwrite = root_provider_fixtures['folder_list_metadata'][ 'entries'][0] path = WaterButlerPath('/newfile', _ids=(provider.folder, item_to_overwrite['id'])) upload_url = provider._build_upload_url('files', item_to_overwrite['id'], 'content') aiohttpretty.register_json_uri('POST', upload_url, status=201, body=upload_metadata) metadata, created = await provider.upload(file_stream, path) expected = BoxFileMetadata(upload_metadata['entries'][0], path).serialized() assert metadata.serialized() == expected assert created is False assert aiohttpretty.has_call(method='POST', uri=upload_url)
async def test_validate_path_file(self, provider, file_lineage, mock_time): file_id = file_lineage['data'][0]['id'] url, params = build_signed_url_without_auth(provider, 'GET', file_id, 'lineage') aiohttpretty.register_json_uri('GET', url, params=params, status=200, body=file_lineage) with pytest.raises(exceptions.NotFoundError) as exc: await provider.validate_v1_path('/' + file_id + '/') assert exc.value.code == client.NOT_FOUND wb_path_v0 = await provider.validate_path('/' + file_id) wb_path_v1 = await provider.validate_v1_path('/' + file_id) expected = WaterButlerPath('/doc.rst') assert wb_path_v0 == expected assert wb_path_v1 == expected
async def test_single_version_metadata(self, provider, single_version_metadata, mock_time): path = WaterButlerPath('/single-version.file') url = provider.bucket.generate_url(100, 'GET', query_parameters={'versions': ''}) params = build_folder_params(path) aiohttpretty.register_uri('GET', url, params=params, status=200, body=single_version_metadata) data = await provider.revisions(path) assert isinstance(data, list) assert len(data) == 1 for item in data: assert hasattr(item, 'extra') assert hasattr(item, 'version') assert hasattr(item, 'version_identifier') assert aiohttpretty.has_call(method='GET', uri=url, params=params)
async def test_metadata_root_file(self, provider, provider_fixtures): path = WaterButlerPath('/pfile', prepend=provider.folder) url = provider.build_url('files', 'get_metadata') data = {'path': path.full_path} aiohttpretty.register_json_uri('POST', url, data=data, body=provider_fixtures['file_metadata']) result = await provider.metadata(path) assert isinstance(result, core_metadata.BaseMetadata) assert result.kind == 'file' assert result.name == 'Getting_Started.pdf' assert result.path == '/Getting_Started.pdf' assert result.extra == { 'revisionId': '2ba1017a0c1e', 'id': 'id:8y8sAJlrhuAAAAAAAAAAAQ', 'hashes': { 'dropbox': 'meow' }, }
def validate_path(self, path, revision=None, **kwargs): """Ensure path is in configured dataset :param str path: The path to a file :param list metadata: List of file metadata from _get_data """ if path == '/': wbpath = WaterButlerPath('/') wbpath.revision = revision return wbpath path = path.strip('/') wbpath = None for item in (yield from self._maybe_fetch_metadata(version=revision)): if path == item['extra']['fileId']: wbpath = WaterButlerPath('/' + item['name'], _ids=(None, item['extra']['fileId'])) wbpath = wbpath or WaterButlerPath('/' + path) wbpath.revision = revision return wbpath
async def test_complete_session(self, provider, file_stream, provider_fixtures): assert file_stream.size == 38 provider.CHUNK_SIZE = 4 path = WaterButlerPath('/foobah') session_id = provider_fixtures['session_metadata']['session_id'] complete_part_url = provider._build_content_url( 'files', 'upload_session', 'finish') aiohttpretty.register_json_uri('POST', complete_part_url, status=200, body=provider_fixtures.get( 'file_metadata', None)) metadata = await provider._complete_session(file_stream, session_id, path) assert metadata == provider_fixtures['file_metadata'] assert aiohttpretty.has_call(method='POST', uri=complete_part_url) provider.CHUNK_SIZE = CHUNK_SIZE
async def test_create_upload_session_new_file(self, provider, root_provider_fixtures, file_stream): """Check that the chunked upload session creation makes a request to the correct url when creating a new file. """ path = WaterButlerPath('/newfile', _ids=(provider.folder, None)) session_url = provider._build_upload_url('files', 'upload_sessions') create_session_metadata = root_provider_fixtures[ 'create_session_metadata'] aiohttpretty.register_json_uri('POST', session_url, status=201, body=create_session_metadata) session_data = await provider._create_chunked_upload_session( path, file_stream) assert root_provider_fixtures[ 'create_session_metadata'] == session_data assert aiohttpretty.has_call(method='POST', uri=session_url)
def validate_path(self, path, parent=None, **kwargs): split = path.rstrip('/').split('/')[1:] wbpath = parent or WaterButlerPath( '/', _ids=(self.article_id, ), folder=True) if split: name = split.pop(0) try: fid = int(name) except ValueError: fid = name article_json = yield from self._get_article_json() try: wbpath = wbpath.child(**next({ '_id': x['id'], 'name': x['name'], } for x in article_json['files'] if x['id'] == fid)) except StopIteration: wbpath = wbpath.child(name) return wbpath
async def test_upload_checksum_mismatch(self, provider, file_stream, file_header_metadata, mock_time): path = WaterButlerPath('/foobah') url = provider.bucket.new_key(path.path).generate_url(100, 'PUT') metadata_url = provider.bucket.new_key(path.path).generate_url(100, 'HEAD') aiohttpretty.register_uri( 'HEAD', metadata_url, responses=[ {'status': 404}, {'headers': file_header_metadata}, ], ) aiohttpretty.register_uri('PUT', url, status=200, headers={'ETag': '"bad hash"'}) with pytest.raises(exceptions.UploadChecksumMismatchError): await provider.upload(file_stream, path) assert aiohttpretty.has_call(method='PUT', uri=url) assert aiohttpretty.has_call(method='HEAD', uri=metadata_url)
async def test_create_upload_session_existing_file(self, provider, root_provider_fixtures, file_stream): """Check that the chunked upload session creation makes a request to the correct url when updating an existing file. """ path = WaterButlerPath('/newfile', _ids=(provider.folder, '2345')) session_url = 'https://upload.box.com/api/2.0/files/2345/upload_sessions' create_session_metadata = root_provider_fixtures[ 'create_session_metadata'] aiohttpretty.register_json_uri('POST', session_url, status=201, body=create_session_metadata) session_data = await provider._create_chunked_upload_session( path, file_stream) assert root_provider_fixtures[ 'create_session_metadata'] == session_data assert aiohttpretty.has_call(method='POST', uri=session_url)
async def test_download_drive_revision(self, provider): revision = 'oldest' body = b'we love you conrad' item = fixtures.list_file['items'][0] path = WaterButlerPath('/birdie.jpg', _ids=(provider.folder['id'], item['id'])) download_file_url = item['downloadUrl'] metadata_url = provider.build_url('files', path.identifier) revision_url = provider.build_url('files', item['id'], 'revisions', revision) aiohttpretty.register_json_uri('GET', revision_url, body=item) aiohttpretty.register_json_uri('GET', metadata_url, body=item) aiohttpretty.register_uri('GET', download_file_url, body=body, auto_length=True) result = await provider.download(path, revision=revision) content = await result.read() assert content == body
async def test_empty_metadata_folder(self, provider, folder_empty_metadata, mock_time): path = WaterButlerPath('/this-is-not-the-root/') metadata_url = provider.bucket.new_key(path.path).generate_url( 100, 'HEAD') url = provider.bucket.generate_url(100) params = build_folder_params(path) aiohttpretty.register_uri('GET', url, params=params, body=folder_empty_metadata, headers={'Content-Type': 'application/xml'}) aiohttpretty.register_uri('HEAD', metadata_url, header=folder_empty_metadata, headers={'Content-Type': 'application/xml'}) result = await provider.metadata(path) assert isinstance(result, list) assert len(result) == 0
async def test_metadata_404(self, provider, native_dataset_metadata): url = provider.build_url(dvs.JSON_BASE_URL.format( provider._id, 'latest-published'), key=provider.token) aiohttpretty.register_json_uri('GET', url, status=404, body=native_dataset_metadata) url = provider.build_url(dvs.JSON_BASE_URL.format( provider._id, 'latest'), key=provider.token) aiohttpretty.register_json_uri('GET', url, status=200, body=native_dataset_metadata) path = WaterButlerPath('/thefilenotfound.txt', _ids=('?', 'nobody has this fileId')) with pytest.raises(exceptions.MetadataError): await provider.metadata(path, version='latest')
async def test_revisions(self, provider, native_dataset_metadata): url = provider.build_url(dvs.JSON_BASE_URL.format( provider._id, 'latest-published'), key=provider.token) aiohttpretty.register_json_uri('GET', url, status=200, body=native_dataset_metadata) url = provider.build_url(dvs.JSON_BASE_URL.format( provider._id, 'latest'), key=provider.token) aiohttpretty.register_json_uri('GET', url, status=200, body=native_dataset_metadata) path = WaterButlerPath('/thefile.txt', _ids=('?', '19')) result = await provider.revisions(path, version='latest') isinstance(result, DataverseRevision) assert result[0].raw == 'latest-published'
def test_already_exists(self, provider): url = provider.build_url('folders') data_url = provider.build_url('folders', provider.folder) path = WaterButlerPath('/50 shades of nope/', _ids=(provider.folder, None)) aiohttpretty.register_json_uri('POST', url, status=409) aiohttpretty.register_json_uri('GET', data_url, body={ 'id': provider.folder, 'type': 'folder', 'name': 'All Files', 'path_collection': { 'entries': [] } }) with pytest.raises(exceptions.FolderNamingConflict) as e: yield from provider.create_folder(path) assert e.value.code == 409 assert e.value.message == 'Cannot create folder "50 shades of nope" because a file or folder already exists at path "/50 shades of nope/"'
async def test_validate_v1_path_file(self, provider, file_header_metadata, mock_time): file_path = 'foobah' params = {'prefix': '/' + file_path + '/', 'delimiter': '/'} good_metadata_url = provider.bucket.new_key('/' + file_path).generate_url(100, 'HEAD') bad_metadata_url = provider.bucket.generate_url(100) aiohttpretty.register_uri('HEAD', good_metadata_url, headers=file_header_metadata) aiohttpretty.register_uri('GET', bad_metadata_url, params=params, status=404) assert WaterButlerPath('/') == await provider.validate_v1_path('/') try: wb_path_v1 = await provider.validate_v1_path('/' + file_path) except Exception as exc: pytest.fail(str(exc)) with pytest.raises(exceptions.NotFoundError) as exc: await provider.validate_v1_path('/' + file_path + '/') assert exc.value.code == client.NOT_FOUND wb_path_v0 = await provider.validate_path('/' + file_path) assert wb_path_v1 == wb_path_v0
async def test_get_revisions_no_revisions(self, provider): item = fixtures.list_file['items'][0] metadata_url = provider.build_url('files', item['id']) revisions_url = provider.build_url('files', item['id'], 'revisions') path = WaterButlerPath('/birdie.jpg', _ids=('doesntmatter', item['id'])) aiohttpretty.register_json_uri('GET', metadata_url, body=item) aiohttpretty.register_json_uri('GET', revisions_url, body=fixtures.revisions_list_empty) result = await provider.revisions(path) expected = [ GoogleDriveRevision({ 'modifiedDate': item['modifiedDate'], 'id': fixtures.revisions_list_empty['etag'] + ds.DRIVE_IGNORE_VERSION, }) ] assert result == expected
async def _get_folder_meta(self, path: WaterButlerPath, raw: bool=False, folder: bool=False) -> Union[dict, List[BoxFolderMetadata]]: if folder: async with self.request( 'GET', self.build_url('folders', path.identifier), expects=(200, ), throws=exceptions.MetadataError, ) as resp: data = await resp.json() return data if raw else self._serialize_item(data, path) # Box maximum limit is 1000 page_count, page_total, limit = 0, None, 1000 full_resp = {} if raw else [] # type: ignore while page_total is None or page_count < page_total: url = self.build_url('folders', path.identifier, 'items', fields='id,name,size,modified_at,etag,total_count', offset=(page_count * limit), limit=limit) async with self.request('GET', url, expects=(200, ), throws=exceptions.MetadataError) as response: resp_json = await response.json() if raw: full_resp.update(resp_json) # type: ignore else: full_resp.extend([ # type: ignore self._serialize_item( each, path.child(each['name'], folder=(each['type'] == 'folder')) ) for each in resp_json['entries'] ]) page_count += 1 if page_total is None: page_total = ((resp_json['total_count'] - 1) // limit) + 1 # ceiling div self.metrics.add('metadata.folder.pages', page_total) return full_resp
async def test_download_range(self, provider): path = WaterButlerPath('/triangles.txt', prepend=provider.folder) url = provider._build_content_url('files', 'download') aiohttpretty.register_uri('POST', url, body=b'be', auto_length=True, status=206) result = await provider.download(path, range=(0, 1)) assert result.partial content = await result.response.read() assert content == b'be' assert aiohttpretty.has_call(method='POST', uri=url, headers={ 'Authorization': 'Bearer wrote harry potter', 'Range': 'bytes=0-1', 'Dropbox-API-Arg': '{"path": "/Photos/triangles.txt"}', 'Content-Type': '' })
async def test_download_without_auth(monkeypatch, provider_and_mock, osf_response, mock_path, mock_time): provider, inner_provider = provider_and_mock provider.auth = {} # Remove auth for test base_url = provider.build_url(mock_path.identifier, 'download', version=None, mode=None) url, _, params = provider.build_signed_url('GET', base_url, params={}) aiohttpretty.register_json_uri('GET', url, params=params, body=osf_response) await provider.download(mock_path) assert provider.make_provider.called assert inner_provider.download.called assert aiohttpretty.has_call(method='GET', uri=url, params=params) provider.make_provider.assert_called_once_with(osf_response['settings']) inner_provider.download.assert_called_once_with( path=WaterButlerPath('/test/path'), displayName='unrelatedpath')
async def _folder_metadata( self, path: WaterButlerPath, raw: bool = False) -> List[Union[BaseGoogleDriveMetadata, dict]]: query = self._build_query(path.identifier) built_url = self.build_url('files', q=query, alt='json', maxResults=1000) full_resp = [] while built_url: resp = await self.make_request( 'GET', built_url, expects=(200, ), throws=exceptions.MetadataError, ) resp_json = await resp.json() full_resp.extend([ self._serialize_item(path.child(item['title']), item, raw=raw) for item in resp_json['items'] ]) built_url = resp_json.get('nextLink', None) return full_resp
async def test_upload_encrypted(self, provider, file_content, file_stream, file_metadata, mock_time): # Set trigger for encrypt_key=True in s3.provider.upload provider.encrypt_uploads = True path = WaterButlerPath('/foobah') content_md5 = hashlib.md5(file_content).hexdigest() url = provider.bucket.new_key(path.path).generate_url(100, 'PUT', encrypt_key=True) metadata_url = provider.bucket.new_key(path.path).generate_url(100, 'HEAD') aiohttpretty.register_uri( 'HEAD', metadata_url, responses=[ {'status': 404}, {'headers': file_metadata}, ], ) aiohttpretty.register_uri('PUT', url, status=200, headers={'ETag': '"{}"'.format(content_md5)}) metadata, created = await provider.upload(file_stream, path) assert metadata.kind == 'file' assert metadata.extra['encryption'] == 'AES256' assert created assert aiohttpretty.has_call(method='PUT', uri=url) assert aiohttpretty.has_call(method='HEAD', uri=metadata_url)
def test_child(self): path = WaterButlerPath('/this/is/a/long/') assert path.name == 'long' assert path.child('path').name == 'path'
def dest_path(): return WaterButlerPath('/usr/bin/golang')
def src_path(): return WaterButlerPath('/user/bin/python')
async def copy(self, dest_provider: provider.BaseProvider, src_path: WaterButlerPath, dest_path: WaterButlerPath, rename: str=None, conflict: str='replace', handle_naming: bool=True) -> typing.Tuple[BaseMetadata, bool]: """Override parent's copy to support cross-region osfstorage copies. Delegates to :meth:`.BaseProvider.copy` when destination is not osfstorage. If both providers are in the same region (i.e. `.can_intra_copy` is true), call `.intra_copy`. Otherwise, grab a download stream from the source region, send it to the destination region, *then* execute an `.intra_copy` to make new file metadata entries in the OSF. This is needed because a same-region osfstorage copy will duplicate *all* the versions of the file, but `.BaseProvider.copy` will only copy the most recent version. """ # when moving to non-osfstorage, default move is fine if dest_provider.NAME != 'osfstorage': return await super().copy(dest_provider, src_path, dest_path, rename=rename, conflict=conflict, handle_naming=handle_naming) args = (dest_provider, src_path, dest_path) kwargs = {'rename': rename, 'conflict': conflict} self.provider_metrics.add('copy', { 'got_handle_naming': handle_naming, 'conflict': conflict, 'got_rename': rename is not None, }) if handle_naming: dest_path = await dest_provider.handle_naming( src_path, dest_path, rename=rename, conflict=conflict, ) args = (dest_provider, src_path, dest_path) kwargs = {} # files and folders shouldn't overwrite themselves if ( self.shares_storage_root(dest_provider) and src_path.materialized_path == dest_path.materialized_path ): raise exceptions.OverwriteSelfError(src_path) self.provider_metrics.add('copy.can_intra_copy', False) if self.can_intra_copy(dest_provider, src_path): self.provider_metrics.add('copy.can_intra_copy', True) return await self.intra_copy(*args) if src_path.is_dir: meta_data, created = await self._folder_file_op(self.copy, *args, **kwargs) # type: ignore else: download_stream = await self.download(src_path) if getattr(download_stream, 'name', None): dest_path.rename(download_stream.name) await dest_provider._send_to_storage_provider(download_stream, # type: ignore dest_path, **kwargs) meta_data, created = await self.intra_copy(dest_provider, src_path, dest_path) return meta_data, created
async def validate_path(self, path: str, **kwargs) -> WaterButlerPath: return WaterButlerPath(path, prepend=self.folder)
async def validate_path(self, path, **kwargs): return WaterButlerPath(path)
async def move(self, dest_provider: provider.BaseProvider, src_path: WaterButlerPath, dest_path: WaterButlerPath, rename: str=None, conflict: str='replace', handle_naming: bool=True) -> typing.Tuple[BaseMetadata, bool]: """Override parent's move to support cross-region osfstorage moves while preserving guids and versions. Delegates to :meth:`.BaseProvider.move` when destination is not osfstorage. If both providers are in the same region (i.e. `.can_intra_move` is true), then calls that. Otherwise, will grab a download stream from the source region, send it to the destination region, *then* execute an `.intra_move` to update the file metada in-place. """ # when moving to non-osfstorage, default move is fine if dest_provider.NAME != 'osfstorage': return await super().move(dest_provider, src_path, dest_path, rename=rename, conflict=conflict, handle_naming=handle_naming) args = (dest_provider, src_path, dest_path) kwargs = {'rename': rename, 'conflict': conflict} self.provider_metrics.add('move', { 'got_handle_naming': handle_naming, 'conflict': conflict, 'got_rename': rename is not None, }) if handle_naming: dest_path = await dest_provider.handle_naming( src_path, dest_path, rename=rename, conflict=conflict, ) args = (dest_provider, src_path, dest_path) kwargs = {} # files and folders shouldn't overwrite themselves if ( self.shares_storage_root(dest_provider) and src_path.materialized_path == dest_path.materialized_path ): raise exceptions.OverwriteSelfError(src_path) self.provider_metrics.add('move.can_intra_move', False) if self.can_intra_move(dest_provider, src_path): self.provider_metrics.add('move.can_intra_move', True) return await self.intra_move(*args) if src_path.is_dir: meta_data, created = await self._folder_file_op(self.move, *args, **kwargs) # type: ignore await self.delete(src_path) else: download_stream = await self.download(src_path) if getattr(download_stream, 'name', None): dest_path.rename(download_stream.name) await dest_provider._send_to_storage_provider(download_stream, # type: ignore dest_path, **kwargs) meta_data, created = await self.intra_move(dest_provider, src_path, dest_path) return meta_data, created
def folder_wb_path(): return WaterButlerPath('/xml-api/folder-1/')
def path_from_metadata(self, parent_path: wb_path.WaterButlerPath, meta_data: wb_metadata.BaseMetadata) -> wb_path.WaterButlerPath: return parent_path.child(meta_data.name, _id=meta_data.path.strip('/'), folder=meta_data.is_folder)
async def test_metadata_file_does_not_exist(self, connected_provider): path = WaterButlerPath('/does_not.exist') url = connected_provider.build_url(path.path) aiohttpretty.register_uri('HEAD', url, status=404) with pytest.raises(exceptions.MetadataError): await connected_provider.metadata(path)
def test_metadata_missing(self, provider): path = WaterButlerPath('/Something', _ids=(provider.folder, None)) with pytest.raises(exceptions.NotFoundError): yield from provider.metadata(path)