Esempio n. 1
0
    def create_folder(self, path, **kwargs):
        WaterButlerPath.validate_folder(path)

        if path.identifier is not None:
            raise exceptions.FolderNamingConflict(str(path))

        resp = yield from self.make_request(
            'POST',
            self.build_url('folders'),
            data={
                'name': path.name,
                'parent': {
                    'id': path.parent.identifier
                }
            },
            expects=(201, 409),
            throws=exceptions.CreateFolderError,
        )

        # Catch 409s to avoid race conditions
        if resp.status == 409:
            raise exceptions.FolderNamingConflict(str(path))

        return BoxFolderMetadata(
            (yield from resp.json()),
            path
        ).serialized()
Esempio n. 2
0
    def validate_path(self, path, **kwargs):
        split = path.rstrip('/').split('/')[1:]
        wbpath = WaterButlerPath('/', _ids=(self.settings['project_id'], ), folder=True)

        if split:
            name_or_id = split.pop(0)
            try:
                article = yield from self._assert_contains_article(name_or_id)
            except ValueError:
                return wbpath.child(name_or_id, folder=False)
            except exceptions.ProviderError as e:
                if e.code not in (404, 401):
                    raise
                return wbpath.child(name_or_id, folder=False)

            wbpath = wbpath.child(article['title'], article['id'], folder=True)

        if split:
            provider = yield from self._make_article_provider(article['id'], check_parent=False)
            try:
                return (yield from provider.validate_path('/'.join([''] + split), parent=wbpath))
            except exceptions.ProviderError as e:
                if e.code not in (404, 401):
                    raise
                return wbpath.child(split.pop(0), folder=False)

        return wbpath
    async def handle_name_conflict(self,
                                   path: wb_path.WaterButlerPath,
                                   conflict: str='replace',
                                   **kwargs) -> typing.Tuple[wb_path.WaterButlerPath, bool]:
        """Check WaterButlerPath and resolve conflicts

        Given a WaterButlerPath and a conflict resolution pattern determine
        the correct file path to upload to and indicate if that file exists or not

        :param  path: ( :class:`.WaterButlerPath` ) Desired path to check for conflict
        :param conflict: ( :class:`str` ) replace, keep, warn
        :rtype: (:class:`.WaterButlerPath` or False)
        :raises: :class:`.NamingConflict`
        """
        exists = await self.exists(path, **kwargs)
        if (not exists and not exists == []) or conflict == 'replace':
            return path, exists  # type: ignore
        if conflict == 'warn':
            raise exceptions.NamingConflict(path.name)

        while True:
            path.increment_name()
            test_path = await self.revalidate_path(
                path.parent,
                path.name,
                folder=path.is_dir
            )

            exists = await self.exists(test_path, **kwargs)
            if not (exists or exists == []):
                break

        return path, False
Esempio n. 4
0
    def create_folder(self, path, **kwargs):
        WaterButlerPath.validate_folder(path)

        if path.identifier is not None:
            raise exceptions.FolderNamingConflict(str(path))

        resp = yield from self.make_request(
            'POST',
            self.build_url('folders'),
            data={
                'name': path.name,
                'parent': {
                    'id': path.parent.identifier
                }
            },
            expects=(201, 409),
            throws=exceptions.CreateFolderError,
        )

        # Catch 409s to avoid race conditions
        if resp.status == 409:
            raise exceptions.FolderNamingConflict(str(path))

        resp_json = yield from resp.json()
        # save new folder's id into the WaterButlerPath object. logs will need it later.
        path._parts[-1]._id = resp_json['id']
        return BoxFolderMetadata(resp_json, path)
    async def test_intra_move_folder_replace(self, provider, intra_fixtures, root_provider_fixtures):
        item = intra_fixtures['intra_folder_metadata']
        list_metadata = root_provider_fixtures['folder_list_metadata']

        src_path = WaterButlerPath('/name/', _ids=(provider, item['id']))
        dest_path = WaterButlerPath('/charmander/name/', _ids=(provider, item['id'], item['id']))

        file_url = provider.build_url('folders', src_path.identifier)
        delete_url = provider.build_url('folders', dest_path.identifier, recursive=True)
        list_url = provider.build_url('folders', item['id'], 'items',
                                      fields='id,name,size,modified_at,etag,total_count',
                                      offset=0, limit=1000)

        aiohttpretty.register_json_uri('PUT', file_url, body=item)
        aiohttpretty.register_uri('DELETE', delete_url, status=204)
        aiohttpretty.register_json_uri('GET', list_url, body=list_metadata)

        expected_folder = BoxFolderMetadata(item, dest_path)
        expected_folder._children = []
        for child_item in list_metadata['entries']:
            child_path = dest_path.child(child_item['name'], folder=(child_item['type'] == 'folder'))
            serialized_child = provider._serialize_item(child_item, child_path)
            expected_folder._children.append(serialized_child)
        expected = (expected_folder, False)

        result = await provider.intra_move(provider, src_path, dest_path)

        assert result == expected
        assert aiohttpretty.has_call(method='DELETE', uri=delete_url)
    async def test_intra_copy_folder(self, provider, intra_fixtures, root_provider_fixtures):
        item = intra_fixtures['intra_folder_metadata']
        list_metadata = root_provider_fixtures['folder_list_metadata']

        src_path = WaterButlerPath('/name/', _ids=(provider, item['id']))
        dest_path = WaterButlerPath('/charmander/name/', _ids=(provider, item['id']))

        file_url = provider.build_url('folders', src_path.identifier, 'copy')
        list_url = provider.build_url('folders', item['id'], 'items',
                                      fields='id,name,size,modified_at,etag,total_count',
                                      offset=0, limit=1000)

        aiohttpretty.register_json_uri('GET', list_url, body=list_metadata)
        aiohttpretty.register_json_uri('POST', file_url, body=item)

        expected_folder = BoxFolderMetadata(item, dest_path)
        expected_folder._children = []
        for child_item in list_metadata['entries']:
            child_path = dest_path.child(child_item['name'], folder=(child_item['type'] == 'folder'))
            serialized_child = provider._serialize_item(child_item, child_path)
            expected_folder._children.append(serialized_child)
        expected = (expected_folder, True)

        result = await provider.intra_copy(provider, src_path, dest_path)

        assert result == expected
    async def create_folder(self, path: WaterButlerPath, folder_precheck: bool=True,
                            **kwargs) -> BoxFolderMetadata:
        WaterButlerPath.validate_folder(path)

        if folder_precheck:
            if path.identifier is not None:
                raise exceptions.FolderNamingConflict(path.name)

        async with self.request(
            'POST',
            self.build_url('folders'),
            data={
                'name': path.name,
                'parent': {
                    'id': path.parent.identifier
                }
            },
            expects=(201, 409),
            throws=exceptions.CreateFolderError,
        ) as resp:
            # Catch 409s to avoid race conditions
            if resp.status == 409:
                raise exceptions.FolderNamingConflict(path.name)
            resp_json = await resp.json()
        # save new folder's id into the WaterButlerPath object. logs will need it later.
        path._parts[-1]._id = resp_json['id']
        return BoxFolderMetadata(resp_json, path)
Esempio n. 8
0
    async def create_folder(self, path, **kwargs):
        """
        :param str path: The path to create a folder at
        """
        WaterButlerPath.validate_folder(path)

        response = await self.make_request(
            'POST',
            self.build_url('fileops', 'create_folder'),
            params={
                'root': 'auto',
                'path': path.full_path
            },
            expects=(200, 403),
            throws=exceptions.CreateFolderError
        )

        data = await response.json()

        if response.status == 403:
            if 'because a file or folder already exists at path' in data.get('error'):
                raise exceptions.FolderNamingConflict(str(path))
            raise exceptions.CreateFolderError(data, code=403)

        return DropboxFolderMetadata(data, self.folder)
Esempio n. 9
0
    def test_rename(self):
        path = WaterButlerPath('/this/is/a/long/path')

        assert path.name == 'path'

        path.rename('journey')

        assert path.name == 'journey'
Esempio n. 10
0
 async def create_folder(self, path, **kwargs):
     """
     :param str path: The path to create a folder at
     """
     WaterButlerPath.validate_folder(path)
     data = await self.dropbox_request(
         self.build_url('files', 'create_folder'),
         {'path': path.full_path.rstrip('/')},
         throws=exceptions.CreateFolderError,
     )
     return DropboxFolderMetadata(data, self.folder)
Esempio n. 11
0
async def move(src_bundle, dest_bundle, start_time=None, **kwargs):
    start_time = start_time or time.time()

    src_path, src_provider = src_bundle.pop('path'), utils.make_provider(**src_bundle.pop('provider'))
    dest_path, dest_provider = dest_bundle.pop('path'), utils.make_provider(**dest_bundle.pop('provider'))

    logger.info('Starting moving {!r}, {!r} to {!r}, {!r}'.format(src_path, src_provider, dest_path, dest_provider))

    metadata, errors = None, []
    try:
        metadata, created = await src_provider.move(dest_provider, src_path, dest_path, **kwargs)
    except Exception as e:
        logger.error('Move failed with error {!r}'.format(e))
        errors = [e.__repr__()]
        raise  # Ensure sentry sees this
    else:
        logger.info('Move succeeded')
        dest_path = WaterButlerPath.from_metadata(metadata)
    finally:
        source = LogPayload(src_bundle['nid'], src_provider, path=src_path)
        destination = LogPayload(
            dest_bundle['nid'], dest_provider, path=dest_path, metadata=metadata
        )

        await utils.log_to_callback(
            'move',
            source=source,
            destination=destination,
            start_time=start_time,
            errors=errors
        )

    return metadata, created
Esempio n. 12
0
async def copy(src_bundle, dest_bundle, request={}, start_time=None, **kwargs):
    start_time = start_time or time.time()

    src_path, src_provider = src_bundle.pop('path'), utils.make_provider(**src_bundle.pop('provider'))
    dest_path, dest_provider = dest_bundle.pop('path'), utils.make_provider(**dest_bundle.pop('provider'))

    logger.info('Starting copying {!r}, {!r} to {!r}, {!r}'
                .format(src_path, src_provider, dest_path, dest_provider))

    metadata, errors = None, []
    try:
        metadata, created = await src_provider.copy(dest_provider, src_path, dest_path, **kwargs)
    except Exception as e:
        logger.error('Copy failed with error {!r}'.format(e))
        errors = [e.__repr__()]
        raise  # Ensure sentry sees this
    else:
        logger.info('Copy succeeded')
        dest_path = WaterButlerPath.from_metadata(metadata)
    finally:
        source = LogPayload(src_bundle['nid'], src_provider, path=src_path)
        destination = LogPayload(
            dest_bundle['nid'], dest_provider, path=dest_path, metadata=metadata
        )

        await remote_logging.wait_for_log_futures(
            'copy', source=source, destination=destination, start_time=start_time,
            errors=errors, request=request, api_version='celery',
        )

    return metadata, created
Esempio n. 13
0
    async def revalidate_path(self, base: WaterButlerPath, path: str,
                              folder: bool=None) -> WaterButlerPath:
        # TODO Research the search api endpoint
        async with self.request(
            'GET',
            self.build_url('folders', base.identifier, 'items',
                           fields='id,name,type', limit=1000),
            expects=(200,),
            throws=exceptions.ProviderError
        ) as resp:
            data = await resp.json()
        lower_name = path.lower()

        try:
            item = next(
                x for x in data['entries']
                if x['name'].lower() == lower_name and (
                    folder is None or
                    (x['type'] == 'folder') == folder
                )
            )
            name = path  # Use path over x['name'] because of casing issues
            _id = item['id']
            folder = item['type'] == 'folder'
        except StopIteration:
            _id = None
            name = path

        return base.child(name, _id=_id, folder=folder)
Esempio n. 14
0
    def create_folder(self, path, **kwargs):
        """
        :param str path: The path to create a folder at
        """
        WaterButlerPath.validate_folder(path)

        if (yield from self.exists(path)):
            raise exceptions.FolderNamingConflict(str(path))

        yield from self.make_request(
            'PUT',
            self.bucket.new_key(path.path).generate_url(settings.TEMP_URL_SECS, 'PUT'),
            expects=(200, 201),
            throws=exceptions.CreateFolderError
        )

        return S3FolderMetadata({'Prefix': path.path})
Esempio n. 15
0
    def __init__(self, resource, provider, metadata=None, path=None):
        if path is None and metadata is None:
            raise Exception("Log payload needs either a path or metadata.")

        self.resource = resource
        self.provider = provider
        self.metadata = metadata
        self.path = path or WaterButlerPath.from_metadata(metadata)
Esempio n. 16
0
    async def copy(self,
                   dest_provider: 'BaseProvider',
                   src_path: wb_path.WaterButlerPath,
                   dest_path: wb_path.WaterButlerPath,
                   rename: str=None, conflict: str='replace',
                   handle_naming: bool=True) \
            -> typing.Tuple[wb_metadata.BaseMetadata, bool]:
        args = (dest_provider, src_path, dest_path)
        kwargs = {'rename': rename, 'conflict': conflict, 'handle_naming': handle_naming}

        self.provider_metrics.add('copy', {
            'got_handle_naming': handle_naming,
            'conflict': conflict,
            'got_rename': rename is not None,
        })
        if handle_naming:
            dest_path = await dest_provider.handle_naming(
                src_path,
                dest_path,
                rename=rename,
                conflict=conflict,
            )
            args = (dest_provider, src_path, dest_path)
            kwargs = {}

        # files and folders shouldn't overwrite themselves
        if (
                self.shares_storage_root(dest_provider) and
                src_path.materialized_path == dest_path.materialized_path
        ):
            raise exceptions.OverwriteSelfError(src_path)

        self.provider_metrics.add('copy.can_intra_copy', False)
        if self.can_intra_copy(dest_provider, src_path):
            self.provider_metrics.add('copy.can_intra_copy', True)
            return await self.intra_copy(*args)

        if src_path.is_dir:
            return await self._folder_file_op(self.copy, *args, **kwargs)  # type: ignore

        download_stream = await self.download(src_path)

        if getattr(download_stream, 'name', None):
            dest_path.rename(download_stream.name)

        return await dest_provider.upload(download_stream, dest_path)
Esempio n. 17
0
    def test_metadata(self, provider, folder_object_metadata, folder_list_metadata):
        path = WaterButlerPath('/', _ids=(provider.folder, ))

        list_url = provider.build_url('folders', provider.folder, 'items', fields='id,name,size,modified_at,etag')

        aiohttpretty.register_json_uri('GET', list_url, body=folder_list_metadata)

        result = yield from provider.metadata(path)

        expected = []

        for x in folder_list_metadata['entries']:
            if x['type'] == 'file':
                expected.append(BoxFileMetadata(x, path.child(x['name'])))
            else:
                expected.append(BoxFolderMetadata(x, path.child(x['name'])))

        assert result == expected
Esempio n. 18
0
    def test_metadata(self, provider, folder_object_metadata, folder_list_metadata):
        path = WaterButlerPath("/", _ids=(provider.folder,))

        list_url = provider.build_url("folders", provider.folder, "items", fields="id,name,size,modified_at,etag")

        aiohttpretty.register_json_uri("GET", list_url, body=folder_list_metadata)

        result = yield from provider.metadata(path)

        expected = []

        for x in folder_list_metadata["entries"]:
            if x["type"] == "file":
                expected.append(BoxFileMetadata(x, path.child(x["name"])))
            else:
                expected.append(BoxFolderMetadata(x, path.child(x["name"])))

        assert result == expected
Esempio n. 19
0
    async def create_folder(self, path, folder_precheck=True, **kwargs):
        """
        :param str path: The path to create a folder at
        """
        await self._check_region()

        WaterButlerPath.validate_folder(path)

        if folder_precheck:
            if (await self.exists(path)):
                raise exceptions.FolderNamingConflict(str(path))

        async with self.request(
            'PUT',
            functools.partial(self.bucket.new_key(path.path).generate_url, settings.TEMP_URL_SECS, 'PUT'),
            skip_auto_headers={'CONTENT-TYPE'},
            expects=(200, 201),
            throws=exceptions.CreateFolderError
        ):
            return S3FolderMetadata({'Prefix': path.path})
Esempio n. 20
0
    async def validate_path(self, path, revision=None, **kwargs):
        """Ensure path is in configured dataset

        :param str path: The path to a file
        :param list metadata: List of file metadata from _get_data
        """
        if path == '/':
            wbpath = WaterButlerPath('/')
            wbpath.revision = revision
            return wbpath

        path = path.strip('/')

        wbpath = None
        for item in (await self._maybe_fetch_metadata(version=revision)):
            if path == item.extra['fileId']:
                wbpath = WaterButlerPath('/' + item.name, _ids=(None, item.extra['fileId']))
        wbpath = wbpath or WaterButlerPath('/' + path)

        wbpath.revision = revision
        return wbpath
    async def test_metadata(self, provider, root_provider_fixtures):
        path = WaterButlerPath('/', _ids=(provider.folder, ))

        list_url = provider.build_url('folders', provider.folder, 'items',
                                      fields='id,name,size,modified_at,etag,total_count',
                                      offset=0, limit=1000)

        list_metadata = root_provider_fixtures['folder_list_metadata']
        aiohttpretty.register_json_uri('GET', list_url, body=list_metadata)

        result = await provider.metadata(path)

        expected = []

        for x in list_metadata['entries']:
            if x['type'] == 'file':
                expected.append(BoxFileMetadata(x, path.child(x['name'])))
            else:
                expected.append(BoxFolderMetadata(x, path.child(x['name'], folder=True)))

        assert result == expected
Esempio n. 22
0
    async def revalidate_path(self,
                              base: wb_path.WaterButlerPath,
                              path: str,
                              folder: bool=False) -> wb_path.WaterButlerPath:
        """Take a path and a base path and build a WaterButlerPath representing `/base/path`.  For
        id-based providers, this will need to lookup the id of the new child object.

        :param  base: ( :class:`.WaterButlerPath` ) The base folder to look under
        :param path: ( :class:`str`) the path of a child of `base`, relative to `base`
        :param folder: ( :class:`bool` )whether the returned WaterButlerPath should represent a folder
        :rtype: :class:`.WaterButlerPath`
        """
        return base.child(path, folder=folder)
Esempio n. 23
0
 async def _folder_metadata(self,
                            path: WaterButlerPath,
                            raw: bool=False) -> List[Union[BaseGoogleDriveMetadata, dict]]:
     query = self._build_query(path.identifier)
     built_url = self.build_url('files', q=query, alt='json', maxResults=1000)
     full_resp = []
     while built_url:
         async with self.request(
             'GET',
             built_url,
             expects=(200, ),
             throws=exceptions.MetadataError,
         ) as resp:
             resp_json = await resp.json()
             full_resp.extend([
                 self._serialize_item(path.child(item['title']), item, raw=raw)
                 for item in resp_json['items']
             ])
             built_url = resp_json.get('nextLink', None)
     return full_resp
Esempio n. 24
0
    async def revalidate_path(self,
                              base: WaterButlerPath,
                              name: str,
                              folder: bool=None) -> WaterButlerPath:
        # TODO Redo the logic here folders names ending in /s
        # Will probably break
        if '/' in name.lstrip('/') and '%' not in name:
            # DAZ and MnC may pass unquoted names which break
            # if the name contains a / in it
            name = parse.quote(name.lstrip('/'), safe='')

        if not name.endswith('/') and folder:
            name += '/'

        parts = await self._resolve_path_to_ids(name, start_at=[{
            'title': base.name,
            'mimeType': 'folder',
            'id': base.identifier,
        }])
        _id, name, mime = list(map(parts[-1].__getitem__, ('id', 'title', 'mimeType')))
        return base.child(name, _id=_id, folder='folder' in mime)
Esempio n. 25
0
    async def test_upload_conflict_keep(self, provider, root_provider_fixtures,
                                        file_stream):
        upload_metadata = root_provider_fixtures['upload_metadata']
        item = upload_metadata['entries'][0]
        path = WaterButlerPath('/newfile', _ids=(provider.folder, item['id']))

        upload_url = provider._build_upload_url('files', 'content')
        aiohttpretty.register_json_uri('POST',
                                       upload_url,
                                       status=201,
                                       body=upload_metadata)

        metadata_url = provider.build_url('files', path.identifier)
        aiohttpretty.register_json_uri('GET',
                                       metadata_url,
                                       body=upload_metadata)

        list_url = provider.build_url(
            'folders',
            item['path_collection']['entries'][1]['id'],
            'items',
            fields='id,name,type',
            limit=1000)
        aiohttpretty.register_json_uri(
            'GET',
            list_url,
            body=root_provider_fixtures['folder_list_metadata'])

        metadata, created = await provider.upload(file_stream,
                                                  path,
                                                  conflict='keep')
        expected = BoxFileMetadata(item, path).serialized()

        # since the metadata for the renamed conflict file isn't actually saved, this one is odd to
        # test.
        assert metadata.serialized() == expected
        assert created is True
        assert path.identifier_path == metadata.path
        assert aiohttpretty.has_call(method='POST', uri=upload_url)
Esempio n. 26
0
    async def test_download_revision(self, provider, root_provider_fixtures):
        revision = '21753842'
        item = root_provider_fixtures['file_metadata']['entries'][0]
        path = WaterButlerPath('/triangles.txt',
                               _ids=(provider.folder, item['id']))

        metadata_url = provider.build_url('files', item['id'])
        content_url = provider.build_url('files',
                                         item['id'],
                                         'content',
                                         version=revision)

        aiohttpretty.register_json_uri('GET', metadata_url, body=item)
        aiohttpretty.register_uri('GET',
                                  content_url,
                                  body=b'better',
                                  auto_length=True)

        result = await provider.download(path, revision)
        content = await result.read()

        assert content == b'better'
Esempio n. 27
0
    async def test_upload_update(self, provider, root_provider_fixtures,
                                 file_stream):
        upload_metadata = root_provider_fixtures['upload_metadata']
        item_to_overwrite = root_provider_fixtures['folder_list_metadata'][
            'entries'][0]
        path = WaterButlerPath('/newfile',
                               _ids=(provider.folder, item_to_overwrite['id']))
        upload_url = provider._build_upload_url('files',
                                                item_to_overwrite['id'],
                                                'content')
        aiohttpretty.register_json_uri('POST',
                                       upload_url,
                                       status=201,
                                       body=upload_metadata)

        metadata, created = await provider.upload(file_stream, path)
        expected = BoxFileMetadata(upload_metadata['entries'][0],
                                   path).serialized()

        assert metadata.serialized() == expected
        assert created is False
        assert aiohttpretty.has_call(method='POST', uri=upload_url)
Esempio n. 28
0
    async def test_validate_path_file(self, provider, file_lineage, mock_time):
        file_id = file_lineage['data'][0]['id']

        url, params = build_signed_url_without_auth(provider, 'GET', file_id,
                                                    'lineage')
        aiohttpretty.register_json_uri('GET',
                                       url,
                                       params=params,
                                       status=200,
                                       body=file_lineage)

        with pytest.raises(exceptions.NotFoundError) as exc:
            await provider.validate_v1_path('/' + file_id + '/')

        assert exc.value.code == client.NOT_FOUND

        wb_path_v0 = await provider.validate_path('/' + file_id)
        wb_path_v1 = await provider.validate_v1_path('/' + file_id)

        expected = WaterButlerPath('/doc.rst')
        assert wb_path_v0 == expected
        assert wb_path_v1 == expected
Esempio n. 29
0
    async def test_single_version_metadata(self, provider, single_version_metadata, mock_time):
        path = WaterButlerPath('/single-version.file')
        url = provider.bucket.generate_url(100, 'GET', query_parameters={'versions': ''})
        params = build_folder_params(path)

        aiohttpretty.register_uri('GET',
                                  url,
                                  params=params,
                                  status=200,
                                  body=single_version_metadata)

        data = await provider.revisions(path)

        assert isinstance(data, list)
        assert len(data) == 1

        for item in data:
            assert hasattr(item, 'extra')
            assert hasattr(item, 'version')
            assert hasattr(item, 'version_identifier')

        assert aiohttpretty.has_call(method='GET', uri=url, params=params)
Esempio n. 30
0
    async def test_metadata_root_file(self, provider, provider_fixtures):
        path = WaterButlerPath('/pfile', prepend=provider.folder)
        url = provider.build_url('files', 'get_metadata')
        data = {'path': path.full_path}
        aiohttpretty.register_json_uri('POST',
                                       url,
                                       data=data,
                                       body=provider_fixtures['file_metadata'])

        result = await provider.metadata(path)

        assert isinstance(result, core_metadata.BaseMetadata)
        assert result.kind == 'file'
        assert result.name == 'Getting_Started.pdf'
        assert result.path == '/Getting_Started.pdf'
        assert result.extra == {
            'revisionId': '2ba1017a0c1e',
            'id': 'id:8y8sAJlrhuAAAAAAAAAAAQ',
            'hashes': {
                'dropbox': 'meow'
            },
        }
Esempio n. 31
0
    def validate_path(self, path, revision=None, **kwargs):
        """Ensure path is in configured dataset

        :param str path: The path to a file
        :param list metadata: List of file metadata from _get_data
        """
        if path == '/':
            wbpath = WaterButlerPath('/')
            wbpath.revision = revision
            return wbpath

        path = path.strip('/')

        wbpath = None
        for item in (yield from self._maybe_fetch_metadata(version=revision)):
            if path == item['extra']['fileId']:
                wbpath = WaterButlerPath('/' + item['name'], _ids=(None, item['extra']['fileId']))
        wbpath = wbpath or WaterButlerPath('/' + path)

        wbpath.revision = revision
        return wbpath
Esempio n. 32
0
    async def test_complete_session(self, provider, file_stream,
                                    provider_fixtures):

        assert file_stream.size == 38
        provider.CHUNK_SIZE = 4

        path = WaterButlerPath('/foobah')
        session_id = provider_fixtures['session_metadata']['session_id']

        complete_part_url = provider._build_content_url(
            'files', 'upload_session', 'finish')
        aiohttpretty.register_json_uri('POST',
                                       complete_part_url,
                                       status=200,
                                       body=provider_fixtures.get(
                                           'file_metadata', None))
        metadata = await provider._complete_session(file_stream, session_id,
                                                    path)

        assert metadata == provider_fixtures['file_metadata']
        assert aiohttpretty.has_call(method='POST', uri=complete_part_url)

        provider.CHUNK_SIZE = CHUNK_SIZE
Esempio n. 33
0
    async def test_create_upload_session_new_file(self, provider,
                                                  root_provider_fixtures,
                                                  file_stream):
        """Check that the chunked upload session creation makes a request to the correct url when
        creating a new file.
        """

        path = WaterButlerPath('/newfile', _ids=(provider.folder, None))
        session_url = provider._build_upload_url('files', 'upload_sessions')

        create_session_metadata = root_provider_fixtures[
            'create_session_metadata']
        aiohttpretty.register_json_uri('POST',
                                       session_url,
                                       status=201,
                                       body=create_session_metadata)

        session_data = await provider._create_chunked_upload_session(
            path, file_stream)

        assert root_provider_fixtures[
            'create_session_metadata'] == session_data
        assert aiohttpretty.has_call(method='POST', uri=session_url)
Esempio n. 34
0
    def validate_path(self, path, parent=None, **kwargs):
        split = path.rstrip('/').split('/')[1:]
        wbpath = parent or WaterButlerPath(
            '/', _ids=(self.article_id, ), folder=True)

        if split:
            name = split.pop(0)

            try:
                fid = int(name)
            except ValueError:
                fid = name

            article_json = yield from self._get_article_json()
            try:
                wbpath = wbpath.child(**next({
                    '_id': x['id'],
                    'name': x['name'],
                } for x in article_json['files'] if x['id'] == fid))
            except StopIteration:
                wbpath = wbpath.child(name)

        return wbpath
Esempio n. 35
0
    async def test_upload_checksum_mismatch(self,
                                            provider,
                                            file_stream,
                                            file_header_metadata,
                                            mock_time):
        path = WaterButlerPath('/foobah')
        url = provider.bucket.new_key(path.path).generate_url(100, 'PUT')
        metadata_url = provider.bucket.new_key(path.path).generate_url(100, 'HEAD')
        aiohttpretty.register_uri(
            'HEAD',
            metadata_url,
            responses=[
                {'status': 404},
                {'headers': file_header_metadata},
            ],
        )
        aiohttpretty.register_uri('PUT', url, status=200, headers={'ETag': '"bad hash"'})

        with pytest.raises(exceptions.UploadChecksumMismatchError):
            await provider.upload(file_stream, path)

        assert aiohttpretty.has_call(method='PUT', uri=url)
        assert aiohttpretty.has_call(method='HEAD', uri=metadata_url)
Esempio n. 36
0
    async def test_create_upload_session_existing_file(self, provider,
                                                       root_provider_fixtures,
                                                       file_stream):
        """Check that the chunked upload session creation makes a request to the correct url when
        updating an existing file.
        """

        path = WaterButlerPath('/newfile', _ids=(provider.folder, '2345'))

        session_url = 'https://upload.box.com/api/2.0/files/2345/upload_sessions'
        create_session_metadata = root_provider_fixtures[
            'create_session_metadata']
        aiohttpretty.register_json_uri('POST',
                                       session_url,
                                       status=201,
                                       body=create_session_metadata)

        session_data = await provider._create_chunked_upload_session(
            path, file_stream)

        assert root_provider_fixtures[
            'create_session_metadata'] == session_data
        assert aiohttpretty.has_call(method='POST', uri=session_url)
Esempio n. 37
0
    async def test_download_drive_revision(self, provider):
        revision = 'oldest'
        body = b'we love you conrad'
        item = fixtures.list_file['items'][0]
        path = WaterButlerPath('/birdie.jpg',
                               _ids=(provider.folder['id'], item['id']))

        download_file_url = item['downloadUrl']
        metadata_url = provider.build_url('files', path.identifier)
        revision_url = provider.build_url('files', item['id'], 'revisions',
                                          revision)

        aiohttpretty.register_json_uri('GET', revision_url, body=item)
        aiohttpretty.register_json_uri('GET', metadata_url, body=item)
        aiohttpretty.register_uri('GET',
                                  download_file_url,
                                  body=body,
                                  auto_length=True)

        result = await provider.download(path, revision=revision)
        content = await result.read()

        assert content == body
Esempio n. 38
0
    async def test_empty_metadata_folder(self, provider, folder_empty_metadata,
                                         mock_time):
        path = WaterButlerPath('/this-is-not-the-root/')
        metadata_url = provider.bucket.new_key(path.path).generate_url(
            100, 'HEAD')

        url = provider.bucket.generate_url(100)
        params = build_folder_params(path)
        aiohttpretty.register_uri('GET',
                                  url,
                                  params=params,
                                  body=folder_empty_metadata,
                                  headers={'Content-Type': 'application/xml'})

        aiohttpretty.register_uri('HEAD',
                                  metadata_url,
                                  header=folder_empty_metadata,
                                  headers={'Content-Type': 'application/xml'})

        result = await provider.metadata(path)

        assert isinstance(result, list)
        assert len(result) == 0
    async def test_metadata_404(self, provider, native_dataset_metadata):

        url = provider.build_url(dvs.JSON_BASE_URL.format(
            provider._id, 'latest-published'),
                                 key=provider.token)
        aiohttpretty.register_json_uri('GET',
                                       url,
                                       status=404,
                                       body=native_dataset_metadata)

        url = provider.build_url(dvs.JSON_BASE_URL.format(
            provider._id, 'latest'),
                                 key=provider.token)
        aiohttpretty.register_json_uri('GET',
                                       url,
                                       status=200,
                                       body=native_dataset_metadata)

        path = WaterButlerPath('/thefilenotfound.txt',
                               _ids=('?', 'nobody has this fileId'))

        with pytest.raises(exceptions.MetadataError):
            await provider.metadata(path, version='latest')
    async def test_revisions(self, provider, native_dataset_metadata):

        url = provider.build_url(dvs.JSON_BASE_URL.format(
            provider._id, 'latest-published'),
                                 key=provider.token)
        aiohttpretty.register_json_uri('GET',
                                       url,
                                       status=200,
                                       body=native_dataset_metadata)

        url = provider.build_url(dvs.JSON_BASE_URL.format(
            provider._id, 'latest'),
                                 key=provider.token)
        aiohttpretty.register_json_uri('GET',
                                       url,
                                       status=200,
                                       body=native_dataset_metadata)

        path = WaterButlerPath('/thefile.txt', _ids=('?', '19'))
        result = await provider.revisions(path, version='latest')

        isinstance(result, DataverseRevision)
        assert result[0].raw == 'latest-published'
Esempio n. 41
0
    def test_already_exists(self, provider):
        url = provider.build_url('folders')
        data_url = provider.build_url('folders', provider.folder)
        path = WaterButlerPath('/50 shades of nope/',
                               _ids=(provider.folder, None))

        aiohttpretty.register_json_uri('POST', url, status=409)
        aiohttpretty.register_json_uri('GET',
                                       data_url,
                                       body={
                                           'id': provider.folder,
                                           'type': 'folder',
                                           'name': 'All Files',
                                           'path_collection': {
                                               'entries': []
                                           }
                                       })

        with pytest.raises(exceptions.FolderNamingConflict) as e:
            yield from provider.create_folder(path)

        assert e.value.code == 409
        assert e.value.message == 'Cannot create folder "50 shades of nope" because a file or folder already exists at path "/50 shades of nope/"'
Esempio n. 42
0
    async def test_validate_v1_path_file(self, provider, file_header_metadata, mock_time):
        file_path = 'foobah'

        params = {'prefix': '/' + file_path + '/', 'delimiter': '/'}
        good_metadata_url = provider.bucket.new_key('/' + file_path).generate_url(100, 'HEAD')
        bad_metadata_url = provider.bucket.generate_url(100)
        aiohttpretty.register_uri('HEAD', good_metadata_url, headers=file_header_metadata)
        aiohttpretty.register_uri('GET', bad_metadata_url, params=params, status=404)

        assert WaterButlerPath('/') == await provider.validate_v1_path('/')

        try:
            wb_path_v1 = await provider.validate_v1_path('/' + file_path)
        except Exception as exc:
            pytest.fail(str(exc))

        with pytest.raises(exceptions.NotFoundError) as exc:
            await provider.validate_v1_path('/' + file_path + '/')

        assert exc.value.code == client.NOT_FOUND

        wb_path_v0 = await provider.validate_path('/' + file_path)

        assert wb_path_v1 == wb_path_v0
Esempio n. 43
0
    async def test_get_revisions_no_revisions(self, provider):
        item = fixtures.list_file['items'][0]
        metadata_url = provider.build_url('files', item['id'])
        revisions_url = provider.build_url('files', item['id'], 'revisions')
        path = WaterButlerPath('/birdie.jpg',
                               _ids=('doesntmatter', item['id']))

        aiohttpretty.register_json_uri('GET', metadata_url, body=item)
        aiohttpretty.register_json_uri('GET',
                                       revisions_url,
                                       body=fixtures.revisions_list_empty)

        result = await provider.revisions(path)

        expected = [
            GoogleDriveRevision({
                'modifiedDate':
                item['modifiedDate'],
                'id':
                fixtures.revisions_list_empty['etag'] +
                ds.DRIVE_IGNORE_VERSION,
            })
        ]
        assert result == expected
Esempio n. 44
0
    async def _get_folder_meta(self, path: WaterButlerPath, raw: bool=False,
                               folder: bool=False) -> Union[dict, List[BoxFolderMetadata]]:
        if folder:
            async with self.request(
                'GET', self.build_url('folders', path.identifier),
                expects=(200, ), throws=exceptions.MetadataError,
            ) as resp:
                data = await resp.json()
                return data if raw else self._serialize_item(data, path)

        # Box maximum limit is 1000
        page_count, page_total, limit = 0, None, 1000
        full_resp = {} if raw else []  # type: ignore
        while page_total is None or page_count < page_total:
            url = self.build_url('folders', path.identifier, 'items',
                                 fields='id,name,size,modified_at,etag,total_count',
                                 offset=(page_count * limit),
                                 limit=limit)
            async with self.request('GET', url, expects=(200, ),
                                    throws=exceptions.MetadataError) as response:
                resp_json = await response.json()
                if raw:
                    full_resp.update(resp_json)  # type: ignore
                else:
                    full_resp.extend([  # type: ignore
                        self._serialize_item(
                            each, path.child(each['name'], folder=(each['type'] == 'folder'))
                        )
                        for each in resp_json['entries']
                    ])

                page_count += 1
                if page_total is None:
                    page_total = ((resp_json['total_count'] - 1) // limit) + 1  # ceiling div
        self.metrics.add('metadata.folder.pages', page_total)
        return full_resp
Esempio n. 45
0
    async def test_download_range(self, provider):
        path = WaterButlerPath('/triangles.txt', prepend=provider.folder)
        url = provider._build_content_url('files', 'download')
        aiohttpretty.register_uri('POST',
                                  url,
                                  body=b'be',
                                  auto_length=True,
                                  status=206)

        result = await provider.download(path, range=(0, 1))
        assert result.partial
        content = await result.response.read()

        assert content == b'be'
        assert aiohttpretty.has_call(method='POST',
                                     uri=url,
                                     headers={
                                         'Authorization':
                                         'Bearer wrote harry potter',
                                         'Range': 'bytes=0-1',
                                         'Dropbox-API-Arg':
                                         '{"path": "/Photos/triangles.txt"}',
                                         'Content-Type': ''
                                     })
Esempio n. 46
0
async def test_download_without_auth(monkeypatch, provider_and_mock,
                                     osf_response, mock_path, mock_time):
    provider, inner_provider = provider_and_mock
    provider.auth = {}  # Remove auth for test
    base_url = provider.build_url(mock_path.identifier,
                                  'download',
                                  version=None,
                                  mode=None)
    url, _, params = provider.build_signed_url('GET', base_url, params={})

    aiohttpretty.register_json_uri('GET',
                                   url,
                                   params=params,
                                   body=osf_response)

    await provider.download(mock_path)

    assert provider.make_provider.called
    assert inner_provider.download.called

    assert aiohttpretty.has_call(method='GET', uri=url, params=params)
    provider.make_provider.assert_called_once_with(osf_response['settings'])
    inner_provider.download.assert_called_once_with(
        path=WaterButlerPath('/test/path'), displayName='unrelatedpath')
Esempio n. 47
0
 async def _folder_metadata(
         self,
         path: WaterButlerPath,
         raw: bool = False) -> List[Union[BaseGoogleDriveMetadata, dict]]:
     query = self._build_query(path.identifier)
     built_url = self.build_url('files',
                                q=query,
                                alt='json',
                                maxResults=1000)
     full_resp = []
     while built_url:
         resp = await self.make_request(
             'GET',
             built_url,
             expects=(200, ),
             throws=exceptions.MetadataError,
         )
         resp_json = await resp.json()
         full_resp.extend([
             self._serialize_item(path.child(item['title']), item, raw=raw)
             for item in resp_json['items']
         ])
         built_url = resp_json.get('nextLink', None)
     return full_resp
Esempio n. 48
0
    async def test_upload_encrypted(self, provider, file_content, file_stream, file_metadata, mock_time):
        # Set trigger for encrypt_key=True in s3.provider.upload
        provider.encrypt_uploads = True
        path = WaterButlerPath('/foobah')
        content_md5 = hashlib.md5(file_content).hexdigest()
        url = provider.bucket.new_key(path.path).generate_url(100, 'PUT', encrypt_key=True)
        metadata_url = provider.bucket.new_key(path.path).generate_url(100, 'HEAD')
        aiohttpretty.register_uri(
            'HEAD',
            metadata_url,
            responses=[
                {'status': 404},
                {'headers': file_metadata},
            ],
        )
        aiohttpretty.register_uri('PUT', url, status=200, headers={'ETag': '"{}"'.format(content_md5)})

        metadata, created = await provider.upload(file_stream, path)

        assert metadata.kind == 'file'
        assert metadata.extra['encryption'] == 'AES256'
        assert created
        assert aiohttpretty.has_call(method='PUT', uri=url)
        assert aiohttpretty.has_call(method='HEAD', uri=metadata_url)
Esempio n. 49
0
    def test_child(self):
        path = WaterButlerPath('/this/is/a/long/')

        assert path.name == 'long'
        assert path.child('path').name == 'path'
Esempio n. 50
0
def dest_path():
    return WaterButlerPath('/usr/bin/golang')
Esempio n. 51
0
def src_path():
    return WaterButlerPath('/user/bin/python')
Esempio n. 52
0
    async def copy(self,
                   dest_provider: provider.BaseProvider,
                   src_path: WaterButlerPath,
                   dest_path: WaterButlerPath,
                   rename: str=None,
                   conflict: str='replace',
                   handle_naming: bool=True) -> typing.Tuple[BaseMetadata, bool]:
        """Override parent's copy to support cross-region osfstorage copies. Delegates to
        :meth:`.BaseProvider.copy` when destination is not osfstorage. If both providers are in the
        same region (i.e. `.can_intra_copy` is true), call `.intra_copy`. Otherwise, grab a
        download stream from the source region, send it to the destination region, *then* execute
        an `.intra_copy` to make new file metadata entries in the OSF.

        This is needed because a same-region osfstorage copy will duplicate *all* the versions of
        the file, but `.BaseProvider.copy` will only copy the most recent version.
        """

        # when moving to non-osfstorage, default move is fine
        if dest_provider.NAME != 'osfstorage':
            return await super().copy(dest_provider, src_path, dest_path, rename=rename,
                                      conflict=conflict, handle_naming=handle_naming)

        args = (dest_provider, src_path, dest_path)
        kwargs = {'rename': rename, 'conflict': conflict}

        self.provider_metrics.add('copy', {
            'got_handle_naming': handle_naming,
            'conflict': conflict,
            'got_rename': rename is not None,
        })

        if handle_naming:
            dest_path = await dest_provider.handle_naming(
                src_path,
                dest_path,
                rename=rename,
                conflict=conflict,
            )
            args = (dest_provider, src_path, dest_path)
            kwargs = {}

        # files and folders shouldn't overwrite themselves
        if (
            self.shares_storage_root(dest_provider) and
            src_path.materialized_path == dest_path.materialized_path
        ):
            raise exceptions.OverwriteSelfError(src_path)

        self.provider_metrics.add('copy.can_intra_copy', False)
        if self.can_intra_copy(dest_provider, src_path):
            self.provider_metrics.add('copy.can_intra_copy', True)
            return await self.intra_copy(*args)

        if src_path.is_dir:
            meta_data, created = await self._folder_file_op(self.copy, *args, **kwargs)  # type: ignore
        else:
            download_stream = await self.download(src_path)
            if getattr(download_stream, 'name', None):
                dest_path.rename(download_stream.name)

            await dest_provider._send_to_storage_provider(download_stream,  # type: ignore
                                                          dest_path, **kwargs)
            meta_data, created = await self.intra_copy(dest_provider, src_path, dest_path)

        return meta_data, created
Esempio n. 53
0
 async def validate_path(self, path: str, **kwargs) -> WaterButlerPath:
     return WaterButlerPath(path, prepend=self.folder)
Esempio n. 54
0
 async def validate_path(self, path, **kwargs):
     return WaterButlerPath(path)
Esempio n. 55
0
    async def move(self,
                   dest_provider: provider.BaseProvider,
                   src_path: WaterButlerPath,
                   dest_path: WaterButlerPath,
                   rename: str=None,
                   conflict: str='replace',
                   handle_naming: bool=True) -> typing.Tuple[BaseMetadata, bool]:
        """Override parent's move to support cross-region osfstorage moves while preserving guids
        and versions. Delegates to :meth:`.BaseProvider.move` when destination is not osfstorage.
        If both providers are in the same region (i.e. `.can_intra_move` is true), then calls that.
        Otherwise, will grab a download stream from the source region, send it to the destination
        region, *then* execute an `.intra_move` to update the file metada in-place.
        """

        # when moving to non-osfstorage, default move is fine
        if dest_provider.NAME != 'osfstorage':
            return await super().move(dest_provider, src_path, dest_path, rename=rename,
                                      conflict=conflict, handle_naming=handle_naming)

        args = (dest_provider, src_path, dest_path)
        kwargs = {'rename': rename, 'conflict': conflict}

        self.provider_metrics.add('move', {
            'got_handle_naming': handle_naming,
            'conflict': conflict,
            'got_rename': rename is not None,
        })

        if handle_naming:
            dest_path = await dest_provider.handle_naming(
                src_path,
                dest_path,
                rename=rename,
                conflict=conflict,
            )
            args = (dest_provider, src_path, dest_path)
            kwargs = {}

        # files and folders shouldn't overwrite themselves
        if (
            self.shares_storage_root(dest_provider) and
            src_path.materialized_path == dest_path.materialized_path
        ):
            raise exceptions.OverwriteSelfError(src_path)

        self.provider_metrics.add('move.can_intra_move', False)
        if self.can_intra_move(dest_provider, src_path):
            self.provider_metrics.add('move.can_intra_move', True)
            return await self.intra_move(*args)

        if src_path.is_dir:
            meta_data, created = await self._folder_file_op(self.move, *args, **kwargs)  # type: ignore
            await self.delete(src_path)
        else:
            download_stream = await self.download(src_path)
            if getattr(download_stream, 'name', None):
                dest_path.rename(download_stream.name)

            await dest_provider._send_to_storage_provider(download_stream,  # type: ignore
                                                          dest_path, **kwargs)
            meta_data, created = await self.intra_move(dest_provider, src_path, dest_path)

        return meta_data, created
Esempio n. 56
0
def folder_wb_path():
    return WaterButlerPath('/xml-api/folder-1/')
Esempio n. 57
0
 def path_from_metadata(self,
                        parent_path: wb_path.WaterButlerPath,
                        meta_data: wb_metadata.BaseMetadata) -> wb_path.WaterButlerPath:
     return parent_path.child(meta_data.name, _id=meta_data.path.strip('/'),
                              folder=meta_data.is_folder)
Esempio n. 58
0
 async def test_metadata_file_does_not_exist(self, connected_provider):
     path = WaterButlerPath('/does_not.exist')
     url = connected_provider.build_url(path.path)
     aiohttpretty.register_uri('HEAD', url, status=404)
     with pytest.raises(exceptions.MetadataError):
         await connected_provider.metadata(path)
Esempio n. 59
0
    def test_metadata_missing(self, provider):
        path = WaterButlerPath('/Something', _ids=(provider.folder, None))

        with pytest.raises(exceptions.NotFoundError):
            yield from provider.metadata(path)