Ejemplo n.º 1
0
 def test_get_target_url_works(self):
     url = 'http://localhost:9000'
     eq_(get_target_url(url, 'bucket-name'),
         'http://localhost:9000/bucket-name/')
     eq_(get_target_url(url, 'bucket-name', 'objectName'),
         'http://localhost:9000/bucket-name/objectName')
     eq_(get_target_url(url, 'bucket-name', 'objectName', None),
         'http://localhost:9000/bucket-name/objectName')
     eq_(
         get_target_url(url, 'bucket-name', 'objectName', 'us-east-1',
                        {'foo': 'bar'}),
         'http://localhost:9000/bucket-name/objectName?foo=bar')
     eq_(
         get_target_url(url, 'bucket-name', 'objectName', 'us-east-1', {
             'foo': 'bar',
             'b': 'c',
             'a': 'b'
         }), 'http://localhost:9000/bucket-name/objectName?a=b&b=c&foo=bar')
     # S3 urls.
     s3_url = 'https://s3.amazonaws.com'
     eq_(get_target_url(s3_url), 'https://s3.amazonaws.com/')
     eq_(get_target_url(s3_url, 'my.bucket.name'),
         'https://s3.amazonaws.com/my.bucket.name/')
     eq_(
         get_target_url(s3_url, 'bucket-name', 'objectName', 'us-west-2',
                        None),
         'https://bucket-name.s3-us-west-2.amazonaws.com/objectName')
     eq_(
         get_target_url('http://localhost:9000', 'bucket-name',
                        'objectName', 'us-east-1', {'versionId': 'uuid'}),
         'http://localhost:9000/bucket-name/objectName?versionId=uuid')
Ejemplo n.º 2
0
 def test_get_target_url_works(self):
     url = 'http://localhost:9000'
     eq_(get_target_url(url, 'bucket-name'),
         'http://localhost:9000/bucket-name/')
     eq_(get_target_url(url, 'bucket-name', 'objectName'),
         'http://localhost:9000/bucket-name/objectName')
     eq_(get_target_url(url, 'bucket-name', 'objectName', None),
         'http://localhost:9000/bucket-name/objectName')
     eq_(get_target_url(url, 'bucket-name', 'objectName', 'us-east-1',
                        {'foo': 'bar'}),
         'http://localhost:9000/bucket-name/objectName?foo=bar')
     eq_(get_target_url(url, 'bucket-name', 'objectName', 'us-east-1',
                        {'foo': 'bar',
                         'b': 'c',
                         'a': 'b'}),
         'http://localhost:9000/bucket-name/objectName?a=b&b=c&foo=bar')
     # S3 urls.
     s3_url = 'https://s3.amazonaws.com'
     eq_(get_target_url(s3_url), 'https://s3.amazonaws.com/')
     eq_(get_target_url(s3_url, 'my.bucket.name'),
         'https://s3.amazonaws.com/my.bucket.name/')
     eq_(get_target_url(s3_url,
                        'bucket-name',
                        'objectName',
                        'us-west-2', None),
         'https://bucket-name.s3-us-west-2.amazonaws.com/objectName')
Ejemplo n.º 3
0
    def url(self,
            name: str,
            *args,
            max_age: T.Optional[datetime.timedelta] = None) -> str:
        if self.presign_urls:
            url = self._presigned_url(name, max_age=max_age)
        else:
            if self.base_url is not None:

                def strip_beg(path):
                    while path.startswith("/"):
                        path = path[1:]
                    return path

                def strip_end(path):
                    while path.endswith("/"):
                        path = path[:-1]
                    return path

                url = "{}/{}".format(strip_end(self.base_url),
                                     urllib.parse.quote(strip_beg(name)))
            else:
                url = get_target_url(
                    self.client._endpoint_url,
                    bucket_name=self.bucket_name,
                    object_name=name,
                    # bucket_region=region,
                )
        return url
Ejemplo n.º 4
0
    async def _get_object(self, bucket: str, object_name: str,
                          destination: BinaryIO) -> None:
        """
        Fetch a remote object into a binary file/stream.

        :param bucket: the bucket where the object is stored
        :param object_name: the path to the object
        """

        url = get_target_url(self._config.url,
                             bucket_name=bucket,
                             object_name=object_name)
        headers = self._ensure_auth_headers("GET", url)

        try:
            async with self._session.get(url, headers=headers) as response:
                if response.status != 200:
                    raise StorageError(
                        f"Could not fetch `{bucket}/{object_name}` from minio")

                while True:
                    chunk = await response.content.read(128 * 1024)

                    if not chunk:
                        break

                    destination.write(chunk)
        except AioHTTPClientError as ce:
            raise StorageInaccessibleError() from ce
Ejemplo n.º 5
0
 def test_signv4(self):
     # Construct target url.
     credentials = Credentials(
         provider=Static(
             access_key='minio',
             secret_key='minio123'
         )
     )
     url = get_target_url('http://localhost:9000', bucket_name='testbucket',
          object_name='~testobject', bucket_region='us-east-1', query={'partID': '1', 'uploadID': '~abcd'})
     hdrs = sign_v4('PUT', url, 'us-east-1', credentials=credentials, request_datetime=dt)
     eq_(hdrs['Authorization'], 'AWS4-HMAC-SHA256 Credential=minio/20150620/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=a2f4546f647981732bd90dfa5a7599c44dca92f44bea48ecc7565df06032c25b')
Ejemplo n.º 6
0
    async def is_accessible(self) -> bool:
        """
        Implementation of :py:meth:`shepherd.storage.Storage.is_accessible`.
        """

        url = get_target_url(self._config.url, "does-not-matter")
        headers = self._ensure_auth_headers('HEAD', url)

        try:
            await self._session.head(url, headers=headers)
            return True
        except AioHTTPClientError:
            return False
Ejemplo n.º 7
0
    def url(self,
            name: str,
            *args,
            max_age: T.Optional[datetime.timedelta] = None) -> str:
        kwargs = {}
        if max_age is not None:
            kwargs["expires"] = max_age

        # NOTE: Here be dragons, when a external base_url is used the code
        # below is both using "internal" minio clint APIs and somewhat
        # subverting how minio/S3 expects urls to be generated in the first
        # place.

        if self.presign_urls:
            url = self.client.presigned_get_object(self.bucket_name, name,
                                                   **kwargs)
            if self.base_url is not None:
                parsed_url = urlparse(url)
                path = parsed_url.path.split(self.bucket_name, 1)[1]
                url = "{}{}?{}{}{}".format(
                    self.base_url,
                    path,
                    parsed_url.params,
                    parsed_url.query,
                    parsed_url.fragment,
                )

        else:
            if self.base_url is not None:

                def strip_beg(path):
                    while path.startswith("/"):
                        path = path[1:]
                    return path

                def strip_end(path):
                    while path.endswith("/"):
                        path = path[:-1]
                    return path

                url = "{}/{}".format(strip_end(self.base_url),
                                     urllib.parse.quote(strip_beg(name)))
            else:
                url = get_target_url(
                    self.client._endpoint_url,
                    bucket_name=self.bucket_name,
                    object_name=name,
                    # bucket_region=region,
                )
        return url
Ejemplo n.º 8
0
    async def job_dir_exists(self, job_id: str) -> bool:
        """
        Implementation of :py:meth:`shepherd.storage.Storage.job_data_exists`.
        """

        url = get_target_url(self._config.url, job_id)
        headers = self._ensure_auth_headers('HEAD', url)

        try:
            response = await self._session.head(url, headers=headers)
        except AioHTTPClientError as error:
            raise StorageInaccessibleError(
                f"Failed to check minio bucket `{job_id}`") from error

        return response.status == 200
Ejemplo n.º 9
0
    async def init_job(self, job_id: str):
        """
        Implementation of :py:meth:`shepherd.storage.Storage.init_job`.
        """

        url = get_target_url(self._config.url, bucket_name=job_id)
        headers = self._ensure_auth_headers("PUT", url)

        try:
            response = await self._session.put(url, headers=headers)
        except AioHTTPClientError as he:
            raise StorageInaccessibleError() from he

        if response.status == 409:
            raise NameConflictError("A job with this ID was already submitted")

        if response.status != 200:
            raise StorageError(f"Failed to create minio bucket `{job_id}`")
Ejemplo n.º 10
0
    async def _put_object(self, bucket: str, object_name: str,
                          content: BinaryIO, length: int) -> None:
        """
        Store data from a file/stream object as a remote object.

        :param bucket: the bucket where the object should be stored
        :param object_name: the name of the new object
        :param content: a stream containing the object data
        :param length: the length of the data
        """

        url = get_target_url(self._config.url,
                             bucket_name=bucket,
                             object_name=object_name)

        headers = self._ensure_user_agent_header({
            "Content-Length":
            str(length),
            "Content-Type":
            "application/octet-stream"
        })

        data = content.read()
        content_sha256 = get_sha256_hexdigest(data)

        if self._config.secure:
            headers["Content-Md5"] = get_md5_base64digest(data)
            content_sha256 = "UNSIGNED-PAYLOAD"

        headers = self._ensure_auth_headers("PUT",
                                            url,
                                            headers,
                                            content_sha256=content_sha256)

        try:
            response = await self._session.put(url, data=data, headers=headers)
        except AioHTTPClientError as ce:
            raise StorageInaccessibleError() from ce

        if response.status != 200:
            raise StorageError(
                f"Failed to upload object `{bucket}/{object_name}`")
Ejemplo n.º 11
0
    async def get_file(self, job_id: str,
                       file_path: str) -> Optional[StreamReader]:
        """
        Implementation of :py:meth:`shepherd.storage.Storage.get_file`.
        """

        url = get_target_url(self._config.url,
                             bucket_name=job_id,
                             object_name=file_path)
        headers = self._ensure_auth_headers("GET", url)

        try:
            if not await self._object_exists(job_id, file_path):
                return None

            response = await self._session.get(url, headers=headers)

            return response.content
        except AioHTTPClientError as he:
            raise StorageInaccessibleError() from he
Ejemplo n.º 12
0
    async def _object_exists(self, bucket: str, object_name: str) -> bool:
        """
        Check if an object exists in the remote storage.

        :param bucket: the bucket to search for the object
        :param object_name: name of the object
        :return: True if the object exists, False otherwise
        """

        url = get_target_url(self._config.url,
                             bucket_name=bucket,
                             object_name=object_name)
        headers = self._ensure_auth_headers("HEAD", url)

        try:
            response = await self._session.head(url, headers=headers)

            return response.status == 200
        except AioHTTPClientError as ce:
            raise StorageInaccessibleError() from ce
Ejemplo n.º 13
0
    def url(self, name):
        # type: (str) -> str

        # NOTE: Here be dragons, when a external base_url is used the code
        # below is both using "internal" minio clint APIs and somewhat
        # subverting how minio/S3 expects urls to be generated in the first
        # place.
        if self.presign_urls:
            url = self.client.presigned_get_object(self.bucket_name, name)
            if self.base_url is not None:
                parsed_url = urlparse(url)
                path = parsed_url.path.split(self.bucket_name, 1)[1]
                url = '{0}{1}?{2}{3}{4}'.format(self.base_url, path,
                                                parsed_url.params,
                                                parsed_url.query,
                                                parsed_url.fragment)

        else:
            if self.base_url is not None:

                def strip_beg(path):
                    while path.startswith('/'):
                        path = path[1:]
                    return path

                def strip_end(path):
                    while path.endswith('/'):
                        path = path[:-1]
                    return path

                url = "{}/{}".format(strip_end(self.base_url), strip_beg(name))
            else:
                url = get_target_url(
                    self.client._endpoint_url,
                    bucket_name=self.bucket_name,
                    object_name=name,
                    # bucket_region=region,
                )
        return url
Ejemplo n.º 14
0
 def test_get_target_url_works(self):
     url = 'http://localhost:9000'
     eq_(get_target_url(url, 'bucket'),
         'http://localhost:9000/bucket')
     eq_(get_target_url(url, 'bucket', 'key'),
         'http://localhost:9000/bucket/key')
     eq_(get_target_url(url, 'bucket', 'key', None),
         'http://localhost:9000/bucket/key')
     eq_(get_target_url(url, 'bucket', 'key', {'foo': 'bar'}),
         'http://localhost:9000/bucket/key?foo=bar')
     eq_(get_target_url(url, 'bucket', 'key',
                        {'foo': 'bar',
                         'b': 'c',
                         'a': 'b'}),
         'http://localhost:9000/bucket/key?a=b&b=c&foo=bar')
     s3_url = 'https://s3.amazonaws.com'
     eq_(get_target_url(s3_url), 'https://s3.amazonaws.com/')
Ejemplo n.º 15
0
    async def _list_bucket(self, bucket: str) -> AsyncIterable[str]:
        """
        List the names of all files in a bucket.

        :param bucket: the bucket to list
        :return: a generator of file names
        """

        continuation_token = None
        truncated = True

        while truncated:
            query = {}
            if continuation_token is not None:
                query["continuation-token"] = continuation_token

            url = get_target_url(self._config.url,
                                 bucket_name=bucket,
                                 query=query)
            headers = self._ensure_auth_headers("GET", url)

            try:
                response = await self._session.get(url, headers=headers)
            except AioHTTPClientError as ce:
                raise StorageInaccessibleError() from ce

            if response.status != 200:
                raise StorageError(f"Listing minio bucket `{bucket}` failed")

            tree = ElementTree.fromstring(await response.text())

            for key in map(lambda el: el.text,
                           tree.findall(".//s3:Key", self._NS)):
                yield key

            truncated = tree.find("s3:IsTruncated", self._NS).text != "false"
            continuation_token = tree.find("s3:NextContinuationToken",
                                           self._NS)
Ejemplo n.º 16
0
    def url(self, name):
        # type: (str) -> str

        # NOTE: Here be dragons, when a external base_url is used the code
        # below is both using "internal" minio clint APIs and somewhat
        # subverting how minio/S3 expects urls to be generated in the first
        # place.
        if self.presign_urls:
            url = self.client.presigned_get_object(self.bucket_name, name)
            if self.base_url is not None:
                parsed_url = urlparse(url)
                path = parsed_url.path.split(self.bucket_name, 1)[1]
                url = '{0}{1}?{2}{3}{4}'.format(
                    self.base_url, path, parsed_url.params,
                    parsed_url.query, parsed_url.fragment)

        else:
            if self.base_url is not None:
                def strip_beg(path):
                    while path.startswith('/'):
                        path = path[1:]
                    return path

                def strip_end(path):
                    while path.endswith('/'):
                        path = path[:-1]
                    return path
                url = "{}/{}".format(strip_end(self.base_url),
                                     strip_beg(name))
            else:
                url = get_target_url(self.client._endpoint_url,
                                     bucket_name=self.bucket_name,
                                     object_name=name,
                                     # bucket_region=region,
                                     )
        return url