コード例 #1
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, bucket_name, prefix=None, azure_cloud=None):
        prefix = "{}".format(prefix.lstrip("/") if prefix else "")
        super().__init__(prefix=prefix)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = bucket_name
        try:
            endpoint_suffix = ENDPOINT_SUFFIXES[azure_cloud]
        except KeyError:
            raise InvalidConfigurationError("Unknown azure cloud {!r}".format(azure_cloud))

        self.conn = BlockBlobService(
            account_name=self.account_name, account_key=self.account_key, endpoint_suffix=endpoint_suffix
        )
        self.conn.socket_timeout = 120  # Default Azure socket timeout 20s is a bit short
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized, %r", self.container_name)

    def copy_file(self, *, source_key, destination_key, metadata=None, **kwargs):
        timeout = kwargs.get("timeout") or 15
        source_path = self.format_key_for_backend(source_key, remove_slash_prefix=True, trailing_slash=False)
        destination_path = self.format_key_for_backend(destination_key, remove_slash_prefix=True, trailing_slash=False)
        source_url = self.conn.make_blob_url(self.container_name, source_path)
        start = time.monotonic()
        self.conn.copy_blob(self.container_name, destination_path, source_url, metadata=metadata, timeout=timeout)
        while True:
            blob_properties = self.conn.get_blob_properties(self.container_name, destination_path, timeout=timeout)
            copy_props = blob_properties.properties.copy
            if copy_props.status == "success":
                return
            elif copy_props.status == "pending":
                if time.monotonic() - start < timeout:
                    time.sleep(0.1)
                else:
                    self.conn.abort_copy_blob(self.container_name, destination_key, copy_props.id, timeout=timeout)
                    raise StorageError(
                        "Copying {!r} to {!r} did not complete in {} seconds".format(source_key, destination_key, timeout)
                    )
            elif copy_props.status == "failed":
                raise StorageError(
                    "Copying {!r} to {!r} failed: {!r}".format(source_key, destination_key, copy_props.status_description)
                )
            else:
                raise StorageError(
                    "Copying {!r} to {!r} failed, unexpected status: {!r}".format(
                        source_key, destination_key, copy_props.status
                    )
                )

    def get_metadata_for_key(self, key):
        path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=False)
        items = list(self._iter_key(path=path, with_metadata=True, deep=False))
        if not items:
            raise FileNotFoundFromStorageError(key)
        item, = items
        if item.type != KEY_TYPE_OBJECT:
            raise FileNotFoundFromStorageError(key)  # it's a prefix
        return item.value["metadata"]

    def _metadata_for_key(self, path):
        return list(self._iter_key(path=path, with_metadata=True, deep=False))[0].value["metadata"]

    def iter_key(self, key, *, with_metadata=True, deep=False, include_key=False):
        path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=not include_key)
        self.log.debug("Listing path %r", path)
        yield from self._iter_key(path=path, with_metadata=with_metadata, deep=deep)

    def _iter_key(self, *, path, with_metadata, deep):
        include = "metadata" if with_metadata else None
        kwargs = {}
        if path:
            # If you give Azure an empty path, it gives you an authentication error
            kwargs["prefix"] = path
        if not deep:
            kwargs["delimiter"] = "/"
        items = self.conn.list_blobs(self.container_name, include=include, **kwargs)
        for item in items:
            if isinstance(item, BlobPrefix):
                yield IterKeyItem(type=KEY_TYPE_PREFIX, value=self.format_key_from_backend(item.name).rstrip("/"))
            else:
                if with_metadata:
                    # Azure Storage cannot handle '-' so we turn them into underscores and back again
                    metadata = {k.replace("_", "-"): v for k, v in item.metadata.items()}
                else:
                    metadata = None
                yield IterKeyItem(
                    type=KEY_TYPE_OBJECT,
                    value={
                        "last_modified": item.properties.last_modified,
                        "metadata": metadata,
                        "name": self.format_key_from_backend(item.name),
                        "size": item.properties.content_length,
                    },
                )

    def delete_key(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.log.debug("Deleting key: %r", key)
        try:
            return self.conn.delete_blob(self.container_name, key)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def get_contents_to_file(self, key, filepath_to_store_to, *, progress_callback=None):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)

        self.log.debug("Starting to fetch the contents of: %r to: %r", key, filepath_to_store_to)
        try:
            self.conn.get_blob_to_path(self.container_name, key, filepath_to_store_to)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

        if progress_callback:
            progress_callback(1, 1)
        return self._metadata_for_key(key)

    @classmethod
    def _parse_length_from_content_range(cls, content_range):
        """Parses the blob length from the content range header: bytes 1-3/65537"""
        if not content_range:
            raise ValueError("File size unavailable")

        return int(content_range.split(" ", 1)[1].split("/", 1)[1])

    def _stream_blob(self, key, fileobj, progress_callback):
        """Streams contents of given key to given fileobj. Data is read sequentially in chunks
        without any seeks. This requires duplicating some functionality of the Azure SDK, which only
        allows reading entire blob into memory at once or returning data from random offsets"""
        file_size = None
        start_range = 0
        chunk_size = self.conn.MAX_CHUNK_GET_SIZE
        end_range = chunk_size - 1
        while True:
            try:
                # pylint: disable=protected-access
                blob = self.conn._get_blob(self.container_name, key, start_range=start_range, end_range=end_range)
                if file_size is None:
                    file_size = self._parse_length_from_content_range(blob.properties.content_range)
                fileobj.write(blob.content)
                start_range += blob.properties.content_length
                if start_range == file_size:
                    break
                if blob.properties.content_length == 0:
                    raise StorageError("Empty response received for {}, range {}-{}".format(key, start_range, end_range))
                end_range += blob.properties.content_length
                if end_range >= file_size:
                    end_range = file_size - 1
                if progress_callback:
                    progress_callback(start_range, file_size)
            except azure.common.AzureHttpError as ex:  # pylint: disable=no-member
                if ex.status_code == 416:  # Empty file
                    return
                raise

    def get_contents_to_fileobj(self, key, fileobj_to_store_to, *, progress_callback=None):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)

        self.log.debug("Starting to fetch the contents of: %r", key)
        try:
            self._stream_blob(key, fileobj_to_store_to, progress_callback)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

        if progress_callback:
            progress_callback(1, 1)

        return self._metadata_for_key(key)

    def get_contents_to_string(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.log.debug("Starting to fetch the contents of: %r", key)
        try:
            blob = self.conn.get_blob_to_bytes(self.container_name, key)
            return blob.content, self._metadata_for_key(key)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def get_file_size(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        try:
            blob = self.conn.get_blob_properties(self.container_name, key)
            return blob.properties.content_length
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def store_file_from_memory(self, key, memstring, metadata=None, cache_control=None, mimetype=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)
        self.conn.create_blob_from_bytes(
            self.container_name,
            key,
            bytes(memstring),  # azure would work with memoryview, but validates it's bytes
            content_settings=content_settings,
            metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_")
        )

    def store_file_from_disk(self, key, filepath, metadata=None, multipart=None, cache_control=None, mimetype=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)
        self.conn.create_blob_from_path(
            self.container_name,
            key,
            filepath,
            content_settings=content_settings,
            metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_")
        )

    def store_file_object(self, key, fd, *, cache_control=None, metadata=None, mimetype=None, upload_progress_fn=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)

        def progress_callback(bytes_sent, _):
            if upload_progress_fn:
                upload_progress_fn(bytes_sent)

        # Azure _BlobChunkUploader calls `tell()` on the stream even though it doesn't use the result.
        # We expect the input stream not to support `tell()` so use dummy implementation for it
        original_tell = getattr(fd, "tell", None)
        fd.tell = lambda: None
        try:
            self.conn.create_blob_from_stream(
                self.container_name,
                key,
                fd,
                content_settings=content_settings,
                metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_"),
                progress_callback=progress_callback
            )
        finally:
            if original_tell:
                fd.tell = original_tell
            else:
                delattr(fd, "tell")

    def get_or_create_container(self, container_name):
        start_time = time.monotonic()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.monotonic() - start_time)
        return container_name
コード例 #2
0
ファイル: AzureStorage.py プロジェクト: beebeeep/cacus
class AzureStorage(plugins.IStoragePlugin):

    def configure(self, config):
        self.storage = BlockBlobService(account_name=config['account_name'], account_key=config['account_key'])
        self.container = config['container']
        try:
            container = self.storage.get_container_properties(self.container)
            log.info("Configuring Azure blob storage %s/%s", self.storage.account_name, self.container)
        except AzureMissingResourceHttpError as e:
            log.warning("Container '%s' is missing in account '%s', trying to create new", self.container, self.storage.account_name)
            try:
                self.storage.create_container(self.container)
                self.storage.set_container_acl(self.container, public_access=PublicAccess.Container)
            except Exception as e:
                log.critical("Cannot create new container: %s", e)
                raise plugins.PluginInitException("Cannot create new container")
        except AzureHttpError as e:
            log.critical("Cannot access container '%s' in account '%s': %s", self.container, self.storage.account_name, e)
            raise plugins.PluginInitException("Cannot access container")
        except Exception as e:
            log.critical("Cannot access container '%s' in account '%s': %s", self.container, self.storage.account_name, e)
            raise plugins.PluginInitException("Cannot access container")

    def delete(self, key):
        log.info("Deleting file '%s' from %s/%s", key, self.storage.account_name, self.container)
        try:
            self.storage.delete_blob(self.container, key)
        except AzureMissingResourceHttpError:
            log.error("File '%s' was not found in %s/%s", key, self.storage.account_name, self.container)
            raise common.NotFound('File not found')
        except Exception as e:
            log.error("Cannot delete '%s' from %s/%s: %s", key, self.storage.account_name, self.container, e)
            raise common.FatalError(e)

    def put(self, key, filename=None, file=None):
        storage_key = key
        try:
            if filename:
                log.debug("Uploading %s to %s", filename, self.storage.make_blob_url(self.container, storage_key))
                self.storage.create_blob_from_path(self.container, storage_key, filename, content_settings=ContentSettings(content_type='application/octet-stream'))
            elif file:
                old_pos = file.tell()
                file.seek(0)
                log.debug("Uploading from stream to %s", self.storage.make_blob_url(self.container, storage_key))
                self.storage.create_blob_from_stream(self.container, storage_key, file, content_settings=ContentSettings(content_type='application/octet-stream'))
                file.seek(old_pos)
        except Exception as e:
            # TODO: more detailed error inspection
            log.critical("Error uploading to %s/%s: %s", self.storage.account_name, self.container, e)
            raise common.FatalError(e)
        return storage_key


    def get(self, key, stream):
        # current azure python sdk barely can work with non-seekable streams,
        # so we have to implement chunking by our own
        # TODO: proper ranging? RFC says server SHOULD return 406 once range is unsatisfiable, 
        # but Azure is OK with end pos > blob length unless blob is not empty
        chunk_size = 4*1024*1024
        chunk_start = 0
        chunk_end = chunk_size - 1
        while True:
            try:
                chunk = self.storage._get_blob(self.container, key, start_range=chunk_start, end_range=chunk_end)
                log.debug("Writing %s bytes from %s", len(chunk.content), chunk_start)
                stream.write(chunk.content)
            except IOError:
                # remote side closed connection
                return
            except AzureMissingResourceHttpError as e:
                raise common.NotFound(e)
            except (AzureHttpError, AzureException) as e:
                raise common.TemporaryError('Error while downloading {}: {}'.format(key, e))

            blob_length = int(chunk.properties.content_range.split('/')[1])
            chunk_start, chunk_end, blob_size = map(int, re.match(r'^bytes\s+(\d+)-(\d+)/(\d+)$', chunk.properties.content_range).groups())
            if chunk_end == blob_size - 1:
                # no more data to stream
                break
            else:
                chunk_start = chunk_end + 1
                chunk_end += chunk_size
        return 0
コード例 #3
0
class AzureTransfer(BaseTransfer):
    def __init__(self,
                 account_name,
                 account_key,
                 bucket_name,
                 prefix=None,
                 azure_cloud=None):
        prefix = "{}".format(prefix.lstrip("/") if prefix else "")
        super().__init__(prefix=prefix)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = bucket_name
        try:
            endpoint_suffix = ENDPOINT_SUFFIXES[azure_cloud]
        except KeyError:
            raise InvalidConfigurationError(
                "Unknown azure cloud {!r}".format(azure_cloud))

        self.conn = BlockBlobService(account_name=self.account_name,
                                     account_key=self.account_key,
                                     endpoint_suffix=endpoint_suffix)
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized, %r", self.container_name)

    def get_metadata_for_key(self, key):
        path = self.format_key_for_backend(key,
                                           remove_slash_prefix=True,
                                           trailing_slash=False)
        results = list(self._list_iter(path))
        if not results:
            raise FileNotFoundFromStorageError(key)
        return results[0]["metadata"]

    def _metadata_for_key(self, key):
        return list(self._list_iter(key))[0]["metadata"]

    def list_path(self, key, *, with_metadata=True):
        # Trailing slash needed when listing directories, without when listing individual files
        path = self.format_key_for_backend(key,
                                           remove_slash_prefix=True,
                                           trailing_slash=True)
        return list(self._list_iter(path, with_metadata=with_metadata))

    def list_iter(self, key, *, with_metadata=True):
        # Trailing slash needed when listing directories, without when listing individual files
        path = self.format_key_for_backend(key,
                                           remove_slash_prefix=True,
                                           trailing_slash=True)
        yield from self._list_iter(path, with_metadata=with_metadata)

    def _list_iter(self, path, *, with_metadata=True):
        self.log.debug("Listing path %r", path)
        include = "metadata" if with_metadata else None
        if path:
            items = self.conn.list_blobs(self.container_name,
                                         prefix=path,
                                         delimiter="/",
                                         include=include)
        else:  # If you give Azure an empty path, it gives you an authentication error
            items = self.conn.list_blobs(self.container_name,
                                         delimiter="/",
                                         include=include)
        for item in items:
            if not isinstance(item, BlobPrefix):
                if with_metadata:
                    # Azure Storage cannot handle '-' so we turn them into underscores and back again
                    metadata = dict((k.replace("_", "-"), v)
                                    for k, v in item.metadata.items())
                else:
                    metadata = None
                yield {
                    "last_modified": item.properties.last_modified,
                    "metadata": metadata,
                    "name": self.format_key_from_backend(item.name),
                    "size": item.properties.content_length,
                }

    def delete_key(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.log.debug("Deleting key: %r", key)
        try:
            return self.conn.delete_blob(self.container_name, key)
        except azure.common.AzureMissingResourceHttpError as ex:
            raise FileNotFoundFromStorageError(key) from ex

    def get_contents_to_file(self,
                             key,
                             filepath_to_store_to,
                             *,
                             progress_callback=None):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)

        self.log.debug("Starting to fetch the contents of: %r to: %r", key,
                       filepath_to_store_to)
        try:
            self.conn.get_blob_to_path(self.container_name, key,
                                       filepath_to_store_to)
        except azure.common.AzureMissingResourceHttpError as ex:
            raise FileNotFoundFromStorageError(key) from ex

        if progress_callback:
            progress_callback(1, 1)
        return self._metadata_for_key(key)

    @classmethod
    def _parse_length_from_content_range(cls, content_range):
        """Parses the blob length from the content range header: bytes 1-3/65537"""
        if not content_range:
            raise ValueError("File size unavailable")

        return int(content_range.split(" ", 1)[1].split("/", 1)[1])

    def _stream_blob(self, key, fileobj, progress_callback):
        """Streams contents of given key to given fileobj. Data is read sequentially in chunks
        without any seeks. This requires duplicating some functionality of the Azure SDK, which only
        allows reading entire blob into memory at once or returning data from random offsets"""
        file_size = None
        start_range = 0
        chunk_size = self.conn.MAX_CHUNK_GET_SIZE
        end_range = chunk_size - 1
        while True:
            try:
                # pylint: disable=protected-access
                blob = self.conn._get_blob(self.container_name,
                                           key,
                                           start_range=start_range,
                                           end_range=end_range)
                if file_size is None:
                    file_size = self._parse_length_from_content_range(
                        blob.properties.content_range)
                fileobj.write(blob.content)
                start_range += blob.properties.content_length
                if start_range == file_size:
                    break
                if blob.properties.content_length == 0:
                    raise StorageError(
                        "Empty response received for {}, range {}-{}".format(
                            key, start_range, end_range))
                end_range += blob.properties.content_length
                if end_range >= file_size:
                    end_range = file_size - 1
                if progress_callback:
                    progress_callback(start_range, file_size)
            except azure.common.AzureHttpError as ex:
                if ex.status_code == 416:  # Empty file
                    return
                raise

    def get_contents_to_fileobj(self,
                                key,
                                fileobj_to_store_to,
                                *,
                                progress_callback=None):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)

        self.log.debug("Starting to fetch the contents of: %r", key)
        try:
            self._stream_blob(key, fileobj_to_store_to, progress_callback)
        except azure.common.AzureMissingResourceHttpError as ex:
            raise FileNotFoundFromStorageError(key) from ex

        if progress_callback:
            progress_callback(1, 1)

        return self._metadata_for_key(key)

    def get_contents_to_string(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.log.debug("Starting to fetch the contents of: %r", key)
        try:
            blob = self.conn.get_blob_to_bytes(self.container_name, key)
            return blob.content, self._metadata_for_key(key)
        except azure.common.AzureMissingResourceHttpError as ex:
            raise FileNotFoundFromStorageError(key) from ex

    def get_file_size(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        try:
            blob = self.conn.get_blob_properties(self.container_name, key)
            return blob.properties.content_length
        except azure.common.AzureMissingResourceHttpError as ex:
            raise FileNotFoundFromStorageError(key) from ex

    def store_file_from_memory(self,
                               key,
                               memstring,
                               metadata=None,
                               cache_control=None):
        if cache_control is not None:
            raise NotImplementedError(
                "AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.conn.create_blob_from_bytes(
            self.container_name,
            key,
            memstring,
            metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_"))

    def store_file_from_disk(self,
                             key,
                             filepath,
                             metadata=None,
                             multipart=None,
                             cache_control=None):
        if cache_control is not None:
            raise NotImplementedError(
                "AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.conn.create_blob_from_path(self.container_name,
                                        key,
                                        filepath,
                                        metadata=self.sanitize_metadata(
                                            metadata, replace_hyphen_with="_"))

    def get_or_create_container(self, container_name):
        start_time = time.monotonic()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs",
                       container_name,
                       time.monotonic() - start_time)
        return container_name