async def upload(self, stream, path, conflict='replace', **kwargs): """Upload a file to provider root or to an article whose defined_type is configured to represent a folder. :param asyncio.StreamReader stream: stream to upload :param FigsharePath path: FigsharePath to upload the file to. :param dict \*\*kwargs: Will be passed to returned metadata object """ path, exists = await self.handle_name_conflict(path, conflict=conflict) if not path.parent.is_root: parent_resp = await self.make_request( 'GET', self.build_url(False, *self.root_path_parts, 'articles', path.parent.identifier), expects=(200, ), ) parent_json = await parent_resp.json() if not parent_json['defined_type'] in settings.FOLDER_TYPES: del path._parts[1] stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5)) file_id = await self._upload_file(self.container_id, path.name, stream) # Build new file path and return metadata path = FigsharePath('/' + file_id, _ids=('', file_id), folder=False, is_public=False) metadata = await self.metadata(path, **kwargs) if stream.writers['md5'].hexdigest != metadata.extra['hashes']['md5']: raise exceptions.UploadChecksumMismatchError() return metadata, True
async def _create_blob(self, stream): blob_stream = streams.JSONStream({ 'encoding': 'base64', 'content': streams.Base64EncodeStream(stream), }) sha1_calculator = streams.HashStreamWriter(hashlib.sha1) stream.add_writer('sha1', sha1_calculator) git_blob_header = 'blob {}\0'.format(str(stream.size)) sha1_calculator.write(git_blob_header.encode('utf-8')) resp = await self.make_request( 'POST', self.build_repo_url('git', 'blobs'), data=blob_stream, headers={ 'Content-Type': 'application/json', 'Content-Length': str(blob_stream.size), }, expects=(201, ), throws=exceptions.UploadError, ) blob_metadata = await resp.json() if stream.writers['sha1'].hexdigest != blob_metadata['sha']: raise exceptions.UploadChecksumMismatchError() return blob_metadata
async def _contiguous_upload(self, stream, path): """Uploads the given stream in one request. """ stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5)) headers = {'Content-Length': str(stream.size)} # this is usually set in boto.s3.key.generate_url, but do it here # do be explicit about our header payloads for signing purposes if self.encrypt_uploads: headers['x-amz-server-side-encryption'] = 'AES256' upload_url = functools.partial( self.bucket.new_key(path.path).generate_url, settings.TEMP_URL_SECS, 'PUT', headers=headers, ) resp = await self.make_request( 'PUT', upload_url, data=stream, skip_auto_headers={'CONTENT-TYPE'}, headers=headers, expects=(200, 201, ), throws=exceptions.UploadError, ) await resp.release() # md5 is returned as ETag header as long as server side encryption is not used. if stream.writers['md5'].hexdigest != resp.headers['ETag'].replace('"', ''): raise exceptions.UploadChecksumMismatchError()
async def upload(self, stream, path, **kwargs): """Zips the given stream then uploads to Dataverse. This will delete existing draft files with the same name. :param waterbutler.core.streams.RequestWrapper stream: The stream to put to Dataverse :param str path: The filename prepended with '/' :rtype: dict, bool """ stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5)) zip_stream = streams.ZipStreamReader( AsyncIterator([(path.name, stream)])) # Write stream to disk (Necessary to find zip file size) f = tempfile.TemporaryFile() chunk = await zip_stream.read() while chunk: f.write(chunk) chunk = await zip_stream.read() file_stream = streams.FileStreamReader(f) dv_headers = { "Content-Disposition": "filename=temp.zip", "Content-Type": "application/zip", "Packaging": "http://purl.org/net/sword/package/SimpleZip", "Content-Length": str(file_stream.size), } # Delete old file if it exists if path.identifier: await self.delete(path) resp = await self.make_request('POST', self.build_url( settings.EDIT_MEDIA_BASE_URL, 'study', self.doi), headers=dv_headers, auth=(self.token, ), data=file_stream, expects=(201, ), throws=exceptions.UploadError) await resp.release() # Find appropriate version of file metadata = await self._get_data('latest') files = metadata if isinstance(metadata, list) else [] file_metadata = next(file for file in files if file.name == path.name) if stream.writers['md5'].hexdigest != file_metadata.extra['hashes'][ 'md5']: raise exceptions.UploadChecksumMismatchError() return file_metadata, path.identifier is None
async def upload(self, stream, path, conflict='replace', **kwargs): """Upload a file to provider root or to an article whose defined_type is configured to represent a folder. :param asyncio.StreamReader stream: stream to upload :param FigsharePath path: FigsharePath to upload the file to. :param dict \*\*kwargs: Will be passed to returned metadata object """ if path.identifier and conflict == 'replace': raise exceptions.UnsupportedOperationError( 'Files in Figshare cannot be updated') path, exists = await self.handle_name_conflict(path, conflict=conflict) if not path.parent.is_root: parent_resp = await self.make_request( 'GET', self.build_url(False, *self.root_path_parts, 'articles', path.parent.identifier), expects=(200, ), ) parent_json = await parent_resp.json() if not parent_json['defined_type'] in settings.FOLDER_TYPES: del path._parts[1] # Create article or retrieve article_id from existing article if not path.parent.is_root: article_id = path.parent.identifier else: article_name = json.dumps({'title': path.name}) if self.container_type == 'project': article_id = await self._create_article(article_name) elif self.container_type == 'collection': # TODO don't think this is correct. Probably should POST to /accounts/articles article_id = await self._create_article(article_name) article_list = json.dumps({'articles': [article_id]}) await self.make_request( 'POST', self.build_url(False, *self.root_path_parts, 'articles'), data=article_list, expects=(201, ), ) stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5)) file_id = await self._upload_file(article_id, path.name, stream) # Build new file path and return metadata path = FigsharePath('/' + article_id + '/' + file_id, _ids=(self.container_id, article_id, file_id), folder=False, is_public=False) metadata = await self.metadata(path, **kwargs) if stream.writers['md5'].hexdigest != metadata.extra['hashes']['md5']: raise exceptions.UploadChecksumMismatchError() return metadata, True
async def upload(self, stream, path, conflict='replace', **kwargs): """Uploads the given stream to S3 :param waterbutler.core.streams.RequestWrapper stream: The stream to put to S3 :param str path: The full path of the key to upload to/into :rtype: dict, bool """ await self._check_region() path, exists = await self.handle_name_conflict(path, conflict=conflict) stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5)) headers = {'Content-Length': str(stream.size)} # this is usually set in boto.s3.key.generate_url, but do it here # do be explicit about our header payloads for signing purposes if self.encrypt_uploads: headers['x-amz-server-side-encryption'] = 'AES256' upload_url = functools.partial( self.bucket.new_key(path.path).generate_url, settings.TEMP_URL_SECS, 'PUT', headers=headers, ) resp = await self.make_request( 'PUT', upload_url, data=stream, skip_auto_headers={'CONTENT-TYPE'}, headers=headers, expects=( 200, 201, ), throws=exceptions.UploadError, ) # md5 is returned as ETag header as long as server side encryption is not used. if stream.writers['md5'].hexdigest != resp.headers['ETag'].replace( '"', ''): raise exceptions.UploadChecksumMismatchError() await resp.release() return (await self.metadata(path, **kwargs)), not exists
async def upload( self, # type: ignore stream: streams.BaseStream, path: WaterButlerPath, conflict: str = 'replace', **kwargs) -> Tuple[BoxFileMetadata, bool]: if path.identifier and conflict == 'keep': path, _ = await self.handle_name_conflict(path, conflict=conflict, kind='folder') path._parts[-1]._id = None stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1)) data_stream = streams.FormDataStream( attributes=json.dumps({ 'name': path.name, 'parent': { 'id': path.parent.identifier } })) data_stream.add_file('file', stream, path.name, disposition='form-data') async with self.request( 'POST', self._build_upload_url( *filter(lambda x: x is not None, ('files', path.identifier, 'content'))), data=data_stream, headers=data_stream.headers, expects=(201, ), throws=exceptions.UploadError, ) as resp: data = await resp.json() entry = data['entries'][0] if stream.writers['sha1'].hexdigest != entry['sha1']: raise exceptions.UploadChecksumMismatchError() created = path.identifier is None path._parts[-1]._id = entry['id'] return BoxFileMetadata(entry, path), created
async def _contiguous_upload(self, path: WaterButlerPath, stream: streams.BaseStream) -> dict: """Upload a file to Box using a single request. This will only be called if the file is smaller than the ``NONCHUNKED_UPLOAD_LIMIT``. API Docs: https://developer.box.com/reference#upload-a-file """ assert stream.size <= self.NONCHUNKED_UPLOAD_LIMIT stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1)) data_stream = streams.FormDataStream( attributes=json.dumps({ 'name': path.name, 'parent': { 'id': path.parent.identifier } })) data_stream.add_file('file', stream, path.name, disposition='form-data') if path.identifier is not None: segments = ['files', path.identifier, 'content'] else: segments = ['files', 'content'] response = await self.make_request( 'POST', self._build_upload_url(*segments), data=data_stream, headers=data_stream.headers, expects=(201, ), throws=exceptions.UploadError, ) data = await response.json() entry = data['entries'][0] if stream.writers['sha1'].hexdigest != entry['sha1']: raise exceptions.UploadChecksumMismatchError() return entry
async def upload(self, stream, path: wb_path.WaterButlerPath, *args, **kwargs) \ -> typing.Tuple[GoogleDriveFileMetadata, bool]: assert path.is_file if path.identifier: segments = [path.identifier] else: segments = [] stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5)) upload_metadata = self._build_upload_metadata(path.parent.identifier, path.name) upload_id = await self._start_resumable_upload(not path.identifier, segments, stream.size, upload_metadata) data = await self._finish_resumable_upload(segments, stream, upload_id) if data['md5Checksum'] != stream.writers['md5'].hexdigest: raise exceptions.UploadChecksumMismatchError() return GoogleDriveFileMetadata(data, path), path.identifier is None
async def upload(self, stream, path, check_created=True, fetch_metadata=True, **kwargs): """Uploads the given stream to CloudFiles :param ResponseStreamReader stream: The stream to put to CloudFiles :param str path: The full path of the object to upload to/into :rtype ResponseStreamReader: """ if check_created: created = not (await self.exists(path)) else: created = None self.metrics.add('upload.check_created', check_created) stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5)) resp = await self.make_request( 'PUT', functools.partial(self.sign_url, path, 'PUT'), data=stream, headers={'Content-Length': str(stream.size)}, expects=(200, 201), throws=exceptions.UploadError, ) await resp.release() # md5 is returned as ETag header as long as server side encryption is not used. if stream.writers['md5'].hexdigest != resp.headers['ETag'].replace( '"', ''): raise exceptions.UploadChecksumMismatchError() if fetch_metadata: metadata = await self.metadata(path) else: metadata = None self.metrics.add('upload.fetch_metadata', fetch_metadata) return metadata, created