def test_get_target_url_works(self): url = 'http://localhost:9000' eq_(get_target_url(url, 'bucket-name'), 'http://localhost:9000/bucket-name/') eq_(get_target_url(url, 'bucket-name', 'objectName'), 'http://localhost:9000/bucket-name/objectName') eq_(get_target_url(url, 'bucket-name', 'objectName', None), 'http://localhost:9000/bucket-name/objectName') eq_( get_target_url(url, 'bucket-name', 'objectName', 'us-east-1', {'foo': 'bar'}), 'http://localhost:9000/bucket-name/objectName?foo=bar') eq_( get_target_url(url, 'bucket-name', 'objectName', 'us-east-1', { 'foo': 'bar', 'b': 'c', 'a': 'b' }), 'http://localhost:9000/bucket-name/objectName?a=b&b=c&foo=bar') # S3 urls. s3_url = 'https://s3.amazonaws.com' eq_(get_target_url(s3_url), 'https://s3.amazonaws.com/') eq_(get_target_url(s3_url, 'my.bucket.name'), 'https://s3.amazonaws.com/my.bucket.name/') eq_( get_target_url(s3_url, 'bucket-name', 'objectName', 'us-west-2', None), 'https://bucket-name.s3-us-west-2.amazonaws.com/objectName') eq_( get_target_url('http://localhost:9000', 'bucket-name', 'objectName', 'us-east-1', {'versionId': 'uuid'}), 'http://localhost:9000/bucket-name/objectName?versionId=uuid')
def test_get_target_url_works(self): url = 'http://localhost:9000' eq_(get_target_url(url, 'bucket-name'), 'http://localhost:9000/bucket-name/') eq_(get_target_url(url, 'bucket-name', 'objectName'), 'http://localhost:9000/bucket-name/objectName') eq_(get_target_url(url, 'bucket-name', 'objectName', None), 'http://localhost:9000/bucket-name/objectName') eq_(get_target_url(url, 'bucket-name', 'objectName', 'us-east-1', {'foo': 'bar'}), 'http://localhost:9000/bucket-name/objectName?foo=bar') eq_(get_target_url(url, 'bucket-name', 'objectName', 'us-east-1', {'foo': 'bar', 'b': 'c', 'a': 'b'}), 'http://localhost:9000/bucket-name/objectName?a=b&b=c&foo=bar') # S3 urls. s3_url = 'https://s3.amazonaws.com' eq_(get_target_url(s3_url), 'https://s3.amazonaws.com/') eq_(get_target_url(s3_url, 'my.bucket.name'), 'https://s3.amazonaws.com/my.bucket.name/') eq_(get_target_url(s3_url, 'bucket-name', 'objectName', 'us-west-2', None), 'https://bucket-name.s3-us-west-2.amazonaws.com/objectName')
def url(self, name: str, *args, max_age: T.Optional[datetime.timedelta] = None) -> str: if self.presign_urls: url = self._presigned_url(name, max_age=max_age) else: if self.base_url is not None: def strip_beg(path): while path.startswith("/"): path = path[1:] return path def strip_end(path): while path.endswith("/"): path = path[:-1] return path url = "{}/{}".format(strip_end(self.base_url), urllib.parse.quote(strip_beg(name))) else: url = get_target_url( self.client._endpoint_url, bucket_name=self.bucket_name, object_name=name, # bucket_region=region, ) return url
async def _get_object(self, bucket: str, object_name: str, destination: BinaryIO) -> None: """ Fetch a remote object into a binary file/stream. :param bucket: the bucket where the object is stored :param object_name: the path to the object """ url = get_target_url(self._config.url, bucket_name=bucket, object_name=object_name) headers = self._ensure_auth_headers("GET", url) try: async with self._session.get(url, headers=headers) as response: if response.status != 200: raise StorageError( f"Could not fetch `{bucket}/{object_name}` from minio") while True: chunk = await response.content.read(128 * 1024) if not chunk: break destination.write(chunk) except AioHTTPClientError as ce: raise StorageInaccessibleError() from ce
def test_signv4(self): # Construct target url. credentials = Credentials( provider=Static( access_key='minio', secret_key='minio123' ) ) url = get_target_url('http://localhost:9000', bucket_name='testbucket', object_name='~testobject', bucket_region='us-east-1', query={'partID': '1', 'uploadID': '~abcd'}) hdrs = sign_v4('PUT', url, 'us-east-1', credentials=credentials, request_datetime=dt) eq_(hdrs['Authorization'], 'AWS4-HMAC-SHA256 Credential=minio/20150620/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=a2f4546f647981732bd90dfa5a7599c44dca92f44bea48ecc7565df06032c25b')
async def is_accessible(self) -> bool: """ Implementation of :py:meth:`shepherd.storage.Storage.is_accessible`. """ url = get_target_url(self._config.url, "does-not-matter") headers = self._ensure_auth_headers('HEAD', url) try: await self._session.head(url, headers=headers) return True except AioHTTPClientError: return False
def url(self, name: str, *args, max_age: T.Optional[datetime.timedelta] = None) -> str: kwargs = {} if max_age is not None: kwargs["expires"] = max_age # NOTE: Here be dragons, when a external base_url is used the code # below is both using "internal" minio clint APIs and somewhat # subverting how minio/S3 expects urls to be generated in the first # place. if self.presign_urls: url = self.client.presigned_get_object(self.bucket_name, name, **kwargs) if self.base_url is not None: parsed_url = urlparse(url) path = parsed_url.path.split(self.bucket_name, 1)[1] url = "{}{}?{}{}{}".format( self.base_url, path, parsed_url.params, parsed_url.query, parsed_url.fragment, ) else: if self.base_url is not None: def strip_beg(path): while path.startswith("/"): path = path[1:] return path def strip_end(path): while path.endswith("/"): path = path[:-1] return path url = "{}/{}".format(strip_end(self.base_url), urllib.parse.quote(strip_beg(name))) else: url = get_target_url( self.client._endpoint_url, bucket_name=self.bucket_name, object_name=name, # bucket_region=region, ) return url
async def job_dir_exists(self, job_id: str) -> bool: """ Implementation of :py:meth:`shepherd.storage.Storage.job_data_exists`. """ url = get_target_url(self._config.url, job_id) headers = self._ensure_auth_headers('HEAD', url) try: response = await self._session.head(url, headers=headers) except AioHTTPClientError as error: raise StorageInaccessibleError( f"Failed to check minio bucket `{job_id}`") from error return response.status == 200
async def init_job(self, job_id: str): """ Implementation of :py:meth:`shepherd.storage.Storage.init_job`. """ url = get_target_url(self._config.url, bucket_name=job_id) headers = self._ensure_auth_headers("PUT", url) try: response = await self._session.put(url, headers=headers) except AioHTTPClientError as he: raise StorageInaccessibleError() from he if response.status == 409: raise NameConflictError("A job with this ID was already submitted") if response.status != 200: raise StorageError(f"Failed to create minio bucket `{job_id}`")
async def _put_object(self, bucket: str, object_name: str, content: BinaryIO, length: int) -> None: """ Store data from a file/stream object as a remote object. :param bucket: the bucket where the object should be stored :param object_name: the name of the new object :param content: a stream containing the object data :param length: the length of the data """ url = get_target_url(self._config.url, bucket_name=bucket, object_name=object_name) headers = self._ensure_user_agent_header({ "Content-Length": str(length), "Content-Type": "application/octet-stream" }) data = content.read() content_sha256 = get_sha256_hexdigest(data) if self._config.secure: headers["Content-Md5"] = get_md5_base64digest(data) content_sha256 = "UNSIGNED-PAYLOAD" headers = self._ensure_auth_headers("PUT", url, headers, content_sha256=content_sha256) try: response = await self._session.put(url, data=data, headers=headers) except AioHTTPClientError as ce: raise StorageInaccessibleError() from ce if response.status != 200: raise StorageError( f"Failed to upload object `{bucket}/{object_name}`")
async def get_file(self, job_id: str, file_path: str) -> Optional[StreamReader]: """ Implementation of :py:meth:`shepherd.storage.Storage.get_file`. """ url = get_target_url(self._config.url, bucket_name=job_id, object_name=file_path) headers = self._ensure_auth_headers("GET", url) try: if not await self._object_exists(job_id, file_path): return None response = await self._session.get(url, headers=headers) return response.content except AioHTTPClientError as he: raise StorageInaccessibleError() from he
async def _object_exists(self, bucket: str, object_name: str) -> bool: """ Check if an object exists in the remote storage. :param bucket: the bucket to search for the object :param object_name: name of the object :return: True if the object exists, False otherwise """ url = get_target_url(self._config.url, bucket_name=bucket, object_name=object_name) headers = self._ensure_auth_headers("HEAD", url) try: response = await self._session.head(url, headers=headers) return response.status == 200 except AioHTTPClientError as ce: raise StorageInaccessibleError() from ce
def url(self, name): # type: (str) -> str # NOTE: Here be dragons, when a external base_url is used the code # below is both using "internal" minio clint APIs and somewhat # subverting how minio/S3 expects urls to be generated in the first # place. if self.presign_urls: url = self.client.presigned_get_object(self.bucket_name, name) if self.base_url is not None: parsed_url = urlparse(url) path = parsed_url.path.split(self.bucket_name, 1)[1] url = '{0}{1}?{2}{3}{4}'.format(self.base_url, path, parsed_url.params, parsed_url.query, parsed_url.fragment) else: if self.base_url is not None: def strip_beg(path): while path.startswith('/'): path = path[1:] return path def strip_end(path): while path.endswith('/'): path = path[:-1] return path url = "{}/{}".format(strip_end(self.base_url), strip_beg(name)) else: url = get_target_url( self.client._endpoint_url, bucket_name=self.bucket_name, object_name=name, # bucket_region=region, ) return url
def test_get_target_url_works(self): url = 'http://localhost:9000' eq_(get_target_url(url, 'bucket'), 'http://localhost:9000/bucket') eq_(get_target_url(url, 'bucket', 'key'), 'http://localhost:9000/bucket/key') eq_(get_target_url(url, 'bucket', 'key', None), 'http://localhost:9000/bucket/key') eq_(get_target_url(url, 'bucket', 'key', {'foo': 'bar'}), 'http://localhost:9000/bucket/key?foo=bar') eq_(get_target_url(url, 'bucket', 'key', {'foo': 'bar', 'b': 'c', 'a': 'b'}), 'http://localhost:9000/bucket/key?a=b&b=c&foo=bar') s3_url = 'https://s3.amazonaws.com' eq_(get_target_url(s3_url), 'https://s3.amazonaws.com/')
async def _list_bucket(self, bucket: str) -> AsyncIterable[str]: """ List the names of all files in a bucket. :param bucket: the bucket to list :return: a generator of file names """ continuation_token = None truncated = True while truncated: query = {} if continuation_token is not None: query["continuation-token"] = continuation_token url = get_target_url(self._config.url, bucket_name=bucket, query=query) headers = self._ensure_auth_headers("GET", url) try: response = await self._session.get(url, headers=headers) except AioHTTPClientError as ce: raise StorageInaccessibleError() from ce if response.status != 200: raise StorageError(f"Listing minio bucket `{bucket}` failed") tree = ElementTree.fromstring(await response.text()) for key in map(lambda el: el.text, tree.findall(".//s3:Key", self._NS)): yield key truncated = tree.find("s3:IsTruncated", self._NS).text != "false" continuation_token = tree.find("s3:NextContinuationToken", self._NS)
def url(self, name): # type: (str) -> str # NOTE: Here be dragons, when a external base_url is used the code # below is both using "internal" minio clint APIs and somewhat # subverting how minio/S3 expects urls to be generated in the first # place. if self.presign_urls: url = self.client.presigned_get_object(self.bucket_name, name) if self.base_url is not None: parsed_url = urlparse(url) path = parsed_url.path.split(self.bucket_name, 1)[1] url = '{0}{1}?{2}{3}{4}'.format( self.base_url, path, parsed_url.params, parsed_url.query, parsed_url.fragment) else: if self.base_url is not None: def strip_beg(path): while path.startswith('/'): path = path[1:] return path def strip_end(path): while path.endswith('/'): path = path[:-1] return path url = "{}/{}".format(strip_end(self.base_url), strip_beg(name)) else: url = get_target_url(self.client._endpoint_url, bucket_name=self.bucket_name, object_name=name, # bucket_region=region, ) return url