Beispiel #1
0
    async def intra_move(self,  # type: ignore
                         dest_provider: provider.BaseProvider,
                         src_path: wb_path.WaterButlerPath,
                         dest_path: wb_path.WaterButlerPath) \
                         -> typing.Tuple[BaseGoogleDriveMetadata, bool]:
        self.metrics.add('intra_move.destination_exists', dest_path.identifier is not None)
        if dest_path.identifier:
            await dest_provider.delete(dest_path)

        async with self.request(
            'PATCH',
            self.build_url('files', src_path.identifier),
            headers={
                'Content-Type': 'application/json'
            },
            data=json.dumps({
                'parents': [{
                    'id': dest_path.parent.identifier
                }],
                'title': dest_path.name
            }),
            expects=(200, ),
            throws=exceptions.IntraMoveError,
        ) as resp:
            data = await resp.json()

        created = dest_path.identifier is None
        dest_path.parts[-1]._id = data['id']

        if dest_path.is_dir:
            metadata = GoogleDriveFolderMetadata(data, dest_path)
            metadata._children = await self._folder_metadata(dest_path)
            return metadata, created
        else:
            return GoogleDriveFileMetadata(data, dest_path), created  # type: ignore
    def test_upload_doesnt_unquote(self, provider, file_stream):
        upload_id = '7'
        item = fixtures.list_file['items'][0]
        path = GoogleDrivePath('/birdie%2F %20".jpg',
                               _ids=(provider.folder['id'], None))

        start_upload_url = provider._build_upload_url('files',
                                                      uploadType='resumable')
        finish_upload_url = provider._build_upload_url('files',
                                                       uploadType='resumable',
                                                       upload_id=upload_id)

        aiohttpretty.register_json_uri('PUT', finish_upload_url, body=item)
        aiohttpretty.register_uri(
            'POST',
            start_upload_url,
            headers={
                'LOCATION':
                'http://waterbutler.io?upload_id={}'.format(upload_id)
            })

        result, created = yield from provider.upload(file_stream, path)

        expected = GoogleDriveFileMetadata(item, path).serialized()

        assert created is True
        assert result == expected
        assert aiohttpretty.has_call(method='POST', uri=start_upload_url)
        assert aiohttpretty.has_call(method='PUT', uri=finish_upload_url)
    async def test_upload_update(self, provider, file_stream):
        upload_id = '7'
        item = fixtures.list_file['items'][0]
        path = WaterButlerPath('/birdie.jpg',
                               _ids=(provider.folder['id'], item['id']))

        start_upload_url = provider._build_upload_url('files',
                                                      path.identifier,
                                                      uploadType='resumable')
        finish_upload_url = provider._build_upload_url('files',
                                                       path.identifier,
                                                       uploadType='resumable',
                                                       upload_id=upload_id)

        aiohttpretty.register_json_uri('PUT', finish_upload_url, body=item)
        aiohttpretty.register_uri(
            'PUT',
            start_upload_url,
            headers={
                'LOCATION':
                'http://waterbutler.io?upload_id={}'.format(upload_id)
            })
        result, created = await provider.upload(file_stream, path)

        assert aiohttpretty.has_call(method='PUT', uri=start_upload_url)
        assert aiohttpretty.has_call(method='PUT', uri=finish_upload_url)
        assert created is False
        expected = GoogleDriveFileMetadata(item, path)
        assert result == expected
    async def test_upload_create_nested(self, provider, file_stream):
        upload_id = '7'
        item = fixtures.list_file['items'][0]
        path = WaterButlerPath('/ed/sullivan/show.mp3',
                               _ids=[str(x) for x in range(3)])

        start_upload_url = provider._build_upload_url('files',
                                                      uploadType='resumable')
        finish_upload_url = provider._build_upload_url('files',
                                                       uploadType='resumable',
                                                       upload_id=upload_id)
        aiohttpretty.register_uri(
            'POST',
            start_upload_url,
            headers={
                'LOCATION':
                'http://waterbutler.io?upload_id={}'.format(upload_id)
            })
        aiohttpretty.register_json_uri('PUT', finish_upload_url, body=item)
        result, created = await provider.upload(file_stream, path)

        assert aiohttpretty.has_call(method='POST', uri=start_upload_url)
        assert aiohttpretty.has_call(method='PUT', uri=finish_upload_url)
        assert created is True
        expected = GoogleDriveFileMetadata(item, path)
        assert result == expected
Beispiel #5
0
    def test_file_metadata_drive_slashes(self, basepath,
                                         root_provider_fixtures):
        item = root_provider_fixtures['file_forward_slash']
        path = basepath.child(item['title'])
        parsed = GoogleDriveFileMetadata(item, path)

        assert parsed.provider == 'googledrive'
        assert parsed.id == item['id']
        assert parsed.name == item['title']
        assert parsed.name == path.name
        assert parsed.size == item['fileSize']
        assert parsed.size_as_int == 918668
        assert type(parsed.size_as_int) == int
        assert parsed.modified == item['modifiedDate']
        assert parsed.content_type == item['mimeType']
        assert parsed.extra == {
            'revisionId': item['version'],
            'webView': item['alternateLink'],
            'hashes': {
                'md5': item['md5Checksum']
            },
        }
        assert parsed.path == '/' + os.path.join(*[x.raw for x in path.parts])
        assert parsed.materialized_path == str(path)
        assert parsed.is_google_doc is False
        assert parsed.export_name == item['title']
Beispiel #6
0
    async def intra_copy(self,
                         dest_provider: provider.BaseProvider,
                         src_path: wb_path.WaterButlerPath,
                         dest_path: wb_path.WaterButlerPath) \
                         -> typing.Tuple[GoogleDriveFileMetadata, bool]:
        self.metrics.add('intra_copy.destination_exists', dest_path.identifier is not None)
        if dest_path.identifier:
            await dest_provider.delete(dest_path)

        async with self.request(
            'POST',
            self.build_url('files', src_path.identifier, 'copy'),
            headers={'Content-Type': 'application/json'},
            data=json.dumps({
                'parents': [{
                    'id': dest_path.parent.identifier
                }],
                'title': dest_path.name
            }),
            expects=(200, ),
            throws=exceptions.IntraMoveError,
        ) as resp:
            data = await resp.json()

        # GoogleDrive doesn't support intra-copy for folders, so dest_path will always
        # be a file.  See can_intra_copy() for type check.
        return GoogleDriveFileMetadata(data, dest_path), dest_path.identifier is None
Beispiel #7
0
 def _serialize_item(self,
                     path: wb_path.WaterButlerPath,
                     item: dict,
                     raw: bool=False) -> typing.Union[BaseGoogleDriveMetadata, dict]:
     if raw:
         return item
     if item['mimeType'] == self.FOLDER_MIME_TYPE:
         return GoogleDriveFolderMetadata(item, path)
     return GoogleDriveFileMetadata(item, path)
    def test_metadata_file_root(self, provider):
        path = WaterButlerPath('/birdie.jpg', _ids=(provider.folder['id'], fixtures.list_file['items'][0]['id']))

        list_file_url = provider.build_url('files', path.identifier)
        aiohttpretty.register_json_uri('GET', list_file_url, body=fixtures.list_file['items'][0])

        result = yield from provider.metadata(path)

        expected = GoogleDriveFileMetadata(fixtures.list_file['items'][0], path)
        assert result == expected
Beispiel #9
0
def test_file_metadata_docs(basepath):
    item = fixtures.docs_file_metadata
    path = basepath.child(item['title'])
    parsed = GoogleDriveFileMetadata(item, path)

    assert parsed.name == item['title'] + '.gdoc'
    assert parsed.extra == {
        'revisionId': item['version'],
        'downloadExt': '.docx'
    }
Beispiel #10
0
def test_file_metadata_docs(basepath):
    item = fixtures.docs_file_metadata
    path = basepath.child(item['title'])
    parsed = GoogleDriveFileMetadata(item, path)

    assert parsed.name == item['title'] + '.gdoc'
    assert parsed.extra == {
        'revisionId': item['version'],
        'downloadExt': '.docx',
        'webView': item['alternateLink']
    }
    assert parsed.is_google_doc == True
    assert parsed.export_name == item['title'] + '.docx'
    def test_file_metadata_docs(self, basepath, root_provider_fixtures):
        item = root_provider_fixtures['docs_file_metadata']
        path = basepath.child(item['title'])
        parsed = GoogleDriveFileMetadata(item, path)

        assert parsed.name == item['title'] + '.gdoc'
        assert parsed.extra == {
            'revisionId': item['version'],
            'downloadExt': '.docx',
            'webView': item['alternateLink'],
        }
        assert parsed.is_google_doc is True
        assert parsed.export_name == item['title'] + '.docx'
    def test_metadata_root_folder(self, provider):
        path = yield from provider.validate_path('/')
        query = provider._build_query(provider.folder['id'])
        list_file_url = provider.build_url('files', q=query, alt='json')
        aiohttpretty.register_json_uri('GET', list_file_url, body=fixtures.list_file)

        result = yield from provider.metadata(path)

        expected = GoogleDriveFileMetadata(
            fixtures.list_file['items'][0],
            path.child(fixtures.list_file['items'][0]['title'])
        )
        assert result == [expected]
Beispiel #13
0
    async def upload(self, stream, path, **kwargs):
        assert path.is_file

        if path.identifier:
            segments = (path.identifier, )
        else:
            segments = ()

        upload_metadata = self._build_upload_metadata(path.parent.identifier, path.name)
        upload_id = await self._start_resumable_upload(not path.identifier, segments, stream.size, upload_metadata)
        data = await self._finish_resumable_upload(segments, stream, upload_id)

        return GoogleDriveFileMetadata(data, path), path.identifier is None
    async def test_metadata_file_nested(self, provider):
        path = GoogleDrivePath('/hugo/kim/pins',
                               _ids=[str(x) for x in range(4)])

        item = fixtures.generate_list(3)['items'][0]
        url = provider.build_url('files', path.identifier)

        aiohttpretty.register_json_uri('GET', url, body=item)

        result = await provider.metadata(path)

        expected = GoogleDriveFileMetadata(item, path)
        assert result == expected
        assert aiohttpretty.has_call(method='GET', uri=url)
Beispiel #15
0
def test_file_metadata_drive_slashes(basepath):
    item = fixtures.file_forward_slash
    path = basepath.child(item['title'])
    parsed = GoogleDriveFileMetadata(item, path)

    assert parsed.provider == 'googledrive'
    assert parsed.id == item['id']
    assert parsed.name == item['title']
    assert parsed.name == path.name
    assert parsed.size == item['fileSize']
    assert parsed.modified == item['modifiedDate']
    assert parsed.content_type == item['mimeType']
    assert parsed.extra == {'revisionId': item['version']}
    assert parsed.path == '/' + os.path.join(*[x.raw for x in path.parts])
    assert parsed.materialized_path == str(path)
    async def test_metadata_folder_nested(self, provider):
        path = GoogleDrivePath('/hugo/kim/pins/',
                               _ids=[str(x) for x in range(4)])

        body = fixtures.generate_list(3)
        item = body['items'][0]

        query = provider._build_query(path.identifier)
        url = provider.build_url('files', q=query, alt='json', maxResults=1000)

        aiohttpretty.register_json_uri('GET', url, body=body)

        result = await provider.metadata(path)

        expected = GoogleDriveFileMetadata(item, path.child(item['title']))

        assert result == [expected]
        assert aiohttpretty.has_call(method='GET', uri=url)
Beispiel #17
0
    async def intra_copy(self, dest_provider, src_path, dest_path):
        if dest_path.identifier:
            await dest_provider.delete(dest_path)

        async with self.request(
            'POST',
            self.build_url('files', src_path.identifier, 'copy'),
            headers={'Content-Type': 'application/json'},
            data=json.dumps({
                'parents': [{
                    'id': dest_path.parent.identifier
                }],
                'title': dest_path.name
            }),
            expects=(200, ),
            throws=exceptions.IntraMoveError,
        ) as resp:
            data = await resp.json()
        return GoogleDriveFileMetadata(data, dest_path), dest_path.identifier is None
Beispiel #18
0
def test_file_metadata_drive(basepath):
    item = fixtures.list_file['items'][0]
    path = basepath.child(item['title'])
    parsed = GoogleDriveFileMetadata(item, path)

    assert parsed.provider == 'googledrive'
    assert parsed.id == item['id']
    assert path.name == item['title']
    assert parsed.name == item['title']
    assert parsed.size == item['fileSize']
    assert parsed.modified == item['modifiedDate']
    assert parsed.content_type == item['mimeType']
    assert parsed.extra == {
        'revisionId': item['version'],
        'webView': item['alternateLink']
    }
    assert parsed.path == '/' + os.path.join(*[x.raw for x in path.parts])
    assert parsed.materialized_path == str(path)
    assert parsed.is_google_doc == False
    assert parsed.export_name == item['title']
Beispiel #19
0
    async def upload(self, stream, path: wb_path.WaterButlerPath, *args, **kwargs) \
            -> typing.Tuple[GoogleDriveFileMetadata, bool]:
        assert path.is_file

        if path.identifier:
            segments = [path.identifier]
        else:
            segments = []

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))

        upload_metadata = self._build_upload_metadata(path.parent.identifier, path.name)
        upload_id = await self._start_resumable_upload(not path.identifier, segments, stream.size,
                                                       upload_metadata)
        data = await self._finish_resumable_upload(segments, stream, upload_id)

        if data['md5Checksum'] != stream.writers['md5'].hexdigest:
            raise exceptions.UploadChecksumMismatchError()

        return GoogleDriveFileMetadata(data, path), path.identifier is None
Beispiel #20
0
    def intra_move(self, dest_provider, src_path, dest_path):
        if dest_path.identifier:
            yield from dest_provider.delete(dest_path)

        resp = yield from self.make_request(
            'PATCH',
            self.build_url('files', src_path.identifier),
            headers={'Content-Type': 'application/json'},
            data=json.dumps({
                'parents': [{
                    'id': dest_path.parent.identifier
                }],
                'title': dest_path.name
            }),
            expects=(200, ),
            throws=exceptions.IntraMoveError,
        )

        data = yield from resp.json()
        return GoogleDriveFileMetadata(data,
                                       dest_path), dest_path.identifier is None
Beispiel #21
0
 def _serialize_item(self, path, item, raw=False):
     if raw:
         return item
     if item['mimeType'] == self.FOLDER_MIME_TYPE:
         return GoogleDriveFolderMetadata(item, path)
     return GoogleDriveFileMetadata(item, path)
Beispiel #22
0
 def _serialize_item(self, path, item, raw=False):
     if raw:
         return item
     if item['mimeType'] == 'application/vnd.google-apps.folder':
         return GoogleDriveFolderMetadata(item, path)
     return GoogleDriveFileMetadata(item, path)
Beispiel #23
0
    async def _file_metadata(self,
                             path: GoogleDrivePath,
                             revision: str=None,
                             raw: bool=False):
        """ Returns metadata for the file identified by `path`.  If the `revision` arg is set,
        will attempt to return metadata for the given revision of the file.  If the revision does
        not exist, ``_file_metadata`` will throw a 404.

        This method used to error with a 500 when metadata was requested for a file that the
        authorizing user only had view or commenting permissions for.  The GDrive revisions
        endpoint returns a 403, which was not being handled.  WB postpends a sentinel value to the
        revisions for these files.  If a revision ending with this sentinel value is detected, this
        method will return metadata for the latest revision of the file.  If a revision NOT ending
        in the sentinel value is requested for a read-only file, this method will return a 404 Not
        Found instead.

        Metrics:

        ``_file_metadata.got_revision``: did this request include a revision parameter?

        ``_file_metadata.revision_is_valid``: if a revision was given, was it valid? A revision is
        "valid" if it doesn't end with our sentinal string (`settings.DRIVE_IGNORE_VERSION`).

        ``_file_metadata.user_role``: What role did the user possess? Helps identify other roles
        for which revision information isn't available.

        :param GoogleDrivePath path: the path of the file whose metadata is being requested
        :param str revision: a string representing the ID of the revision (default: `None`)
        :param bool raw: should we return the raw response object from the GDrive API?
        :rtype: GoogleDriveFileMetadata
        :rtype: dict
        :return: a metadata for the googledoc or the raw response object from the GDrive API
        """

        self.metrics.add('_file_metadata.got_revision', revision is not None)

        valid_revision = revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION)
        if revision:
            self.metrics.add('_file_metadata.revision_is_valid', valid_revision)

        if revision and valid_revision:
            url = self.build_url('files', path.identifier, 'revisions', revision)
        else:
            url = self.build_url('files', path.identifier)

        async with self.request(
            'GET', url,
            expects=(200, 403, 404, ),
            throws=exceptions.MetadataError,
        ) as resp:
            try:
                data = await resp.json()
            except:  # some 404s return a string instead of json
                data = await resp.read()

        if resp.status != 200:
            raise exceptions.NotFoundError(path)

        if revision and valid_revision:
            return GoogleDriveFileRevisionMetadata(data, path)

        user_role = data['userPermission']['role']
        self.metrics.add('_file_metadata.user_role', user_role)
        can_access_revisions = user_role in self.ROLES_ALLOWING_REVISIONS
        if drive_utils.is_docs_file(data):
            if can_access_revisions:
                return await self._handle_docs_versioning(path, data, raw=raw)
            else:
                # Revisions are not available for some sharing configurations. If revisions list is
                # empty, use the etag of the file plus a sentinel string as a dummy revision ID.
                data['version'] = data['etag'] + settings.DRIVE_IGNORE_VERSION

        return data if raw else GoogleDriveFileMetadata(data, path)