コード例 #1
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, bucket_name, prefix=None, azure_cloud=None):
        prefix = "{}".format(prefix.lstrip("/") if prefix else "")
        super().__init__(prefix=prefix)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = bucket_name
        try:
            endpoint_suffix = ENDPOINT_SUFFIXES[azure_cloud]
        except KeyError:
            raise InvalidConfigurationError("Unknown azure cloud {!r}".format(azure_cloud))

        self.conn = BlockBlobService(
            account_name=self.account_name, account_key=self.account_key, endpoint_suffix=endpoint_suffix
        )
        self.conn.socket_timeout = 120  # Default Azure socket timeout 20s is a bit short
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized, %r", self.container_name)

    def copy_file(self, *, source_key, destination_key, metadata=None, **kwargs):
        timeout = kwargs.get("timeout") or 15
        source_path = self.format_key_for_backend(source_key, remove_slash_prefix=True, trailing_slash=False)
        destination_path = self.format_key_for_backend(destination_key, remove_slash_prefix=True, trailing_slash=False)
        source_url = self.conn.make_blob_url(self.container_name, source_path)
        start = time.monotonic()
        self.conn.copy_blob(self.container_name, destination_path, source_url, metadata=metadata, timeout=timeout)
        while True:
            blob_properties = self.conn.get_blob_properties(self.container_name, destination_path, timeout=timeout)
            copy_props = blob_properties.properties.copy
            if copy_props.status == "success":
                return
            elif copy_props.status == "pending":
                if time.monotonic() - start < timeout:
                    time.sleep(0.1)
                else:
                    self.conn.abort_copy_blob(self.container_name, destination_key, copy_props.id, timeout=timeout)
                    raise StorageError(
                        "Copying {!r} to {!r} did not complete in {} seconds".format(source_key, destination_key, timeout)
                    )
            elif copy_props.status == "failed":
                raise StorageError(
                    "Copying {!r} to {!r} failed: {!r}".format(source_key, destination_key, copy_props.status_description)
                )
            else:
                raise StorageError(
                    "Copying {!r} to {!r} failed, unexpected status: {!r}".format(
                        source_key, destination_key, copy_props.status
                    )
                )

    def get_metadata_for_key(self, key):
        path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=False)
        items = list(self._iter_key(path=path, with_metadata=True, deep=False))
        if not items:
            raise FileNotFoundFromStorageError(key)
        item, = items
        if item.type != KEY_TYPE_OBJECT:
            raise FileNotFoundFromStorageError(key)  # it's a prefix
        return item.value["metadata"]

    def _metadata_for_key(self, path):
        return list(self._iter_key(path=path, with_metadata=True, deep=False))[0].value["metadata"]

    def iter_key(self, key, *, with_metadata=True, deep=False, include_key=False):
        path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=not include_key)
        self.log.debug("Listing path %r", path)
        yield from self._iter_key(path=path, with_metadata=with_metadata, deep=deep)

    def _iter_key(self, *, path, with_metadata, deep):
        include = "metadata" if with_metadata else None
        kwargs = {}
        if path:
            # If you give Azure an empty path, it gives you an authentication error
            kwargs["prefix"] = path
        if not deep:
            kwargs["delimiter"] = "/"
        items = self.conn.list_blobs(self.container_name, include=include, **kwargs)
        for item in items:
            if isinstance(item, BlobPrefix):
                yield IterKeyItem(type=KEY_TYPE_PREFIX, value=self.format_key_from_backend(item.name).rstrip("/"))
            else:
                if with_metadata:
                    # Azure Storage cannot handle '-' so we turn them into underscores and back again
                    metadata = {k.replace("_", "-"): v for k, v in item.metadata.items()}
                else:
                    metadata = None
                yield IterKeyItem(
                    type=KEY_TYPE_OBJECT,
                    value={
                        "last_modified": item.properties.last_modified,
                        "metadata": metadata,
                        "name": self.format_key_from_backend(item.name),
                        "size": item.properties.content_length,
                    },
                )

    def delete_key(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.log.debug("Deleting key: %r", key)
        try:
            return self.conn.delete_blob(self.container_name, key)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def get_contents_to_file(self, key, filepath_to_store_to, *, progress_callback=None):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)

        self.log.debug("Starting to fetch the contents of: %r to: %r", key, filepath_to_store_to)
        try:
            self.conn.get_blob_to_path(self.container_name, key, filepath_to_store_to)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

        if progress_callback:
            progress_callback(1, 1)
        return self._metadata_for_key(key)

    @classmethod
    def _parse_length_from_content_range(cls, content_range):
        """Parses the blob length from the content range header: bytes 1-3/65537"""
        if not content_range:
            raise ValueError("File size unavailable")

        return int(content_range.split(" ", 1)[1].split("/", 1)[1])

    def _stream_blob(self, key, fileobj, progress_callback):
        """Streams contents of given key to given fileobj. Data is read sequentially in chunks
        without any seeks. This requires duplicating some functionality of the Azure SDK, which only
        allows reading entire blob into memory at once or returning data from random offsets"""
        file_size = None
        start_range = 0
        chunk_size = self.conn.MAX_CHUNK_GET_SIZE
        end_range = chunk_size - 1
        while True:
            try:
                # pylint: disable=protected-access
                blob = self.conn._get_blob(self.container_name, key, start_range=start_range, end_range=end_range)
                if file_size is None:
                    file_size = self._parse_length_from_content_range(blob.properties.content_range)
                fileobj.write(blob.content)
                start_range += blob.properties.content_length
                if start_range == file_size:
                    break
                if blob.properties.content_length == 0:
                    raise StorageError("Empty response received for {}, range {}-{}".format(key, start_range, end_range))
                end_range += blob.properties.content_length
                if end_range >= file_size:
                    end_range = file_size - 1
                if progress_callback:
                    progress_callback(start_range, file_size)
            except azure.common.AzureHttpError as ex:  # pylint: disable=no-member
                if ex.status_code == 416:  # Empty file
                    return
                raise

    def get_contents_to_fileobj(self, key, fileobj_to_store_to, *, progress_callback=None):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)

        self.log.debug("Starting to fetch the contents of: %r", key)
        try:
            self._stream_blob(key, fileobj_to_store_to, progress_callback)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

        if progress_callback:
            progress_callback(1, 1)

        return self._metadata_for_key(key)

    def get_contents_to_string(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.log.debug("Starting to fetch the contents of: %r", key)
        try:
            blob = self.conn.get_blob_to_bytes(self.container_name, key)
            return blob.content, self._metadata_for_key(key)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def get_file_size(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        try:
            blob = self.conn.get_blob_properties(self.container_name, key)
            return blob.properties.content_length
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def store_file_from_memory(self, key, memstring, metadata=None, cache_control=None, mimetype=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)
        self.conn.create_blob_from_bytes(
            self.container_name,
            key,
            bytes(memstring),  # azure would work with memoryview, but validates it's bytes
            content_settings=content_settings,
            metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_")
        )

    def store_file_from_disk(self, key, filepath, metadata=None, multipart=None, cache_control=None, mimetype=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)
        self.conn.create_blob_from_path(
            self.container_name,
            key,
            filepath,
            content_settings=content_settings,
            metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_")
        )

    def store_file_object(self, key, fd, *, cache_control=None, metadata=None, mimetype=None, upload_progress_fn=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)

        def progress_callback(bytes_sent, _):
            if upload_progress_fn:
                upload_progress_fn(bytes_sent)

        # Azure _BlobChunkUploader calls `tell()` on the stream even though it doesn't use the result.
        # We expect the input stream not to support `tell()` so use dummy implementation for it
        original_tell = getattr(fd, "tell", None)
        fd.tell = lambda: None
        try:
            self.conn.create_blob_from_stream(
                self.container_name,
                key,
                fd,
                content_settings=content_settings,
                metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_"),
                progress_callback=progress_callback
            )
        finally:
            if original_tell:
                fd.tell = original_tell
            else:
                delattr(fd, "tell")

    def get_or_create_container(self, container_name):
        start_time = time.monotonic()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.monotonic() - start_time)
        return container_name
コード例 #2
0
class WABS(Storage):
    """
    A class for managing objects on Windows Azure Blob Storage. It implements
    the interface of Storage base class
    """
    def __init__(self, account_name, container_name, sas_token):
        """Setup a Windows azure blob storage client object

        :param str account_name: Azure blob storage account name for connection
        :param str container_name: Name of container to be accessed in the account
        :param str sas_token: Shared access signature token for access

        """
        self.sas_token = sas_token
        self.container_name = container_name

        # The socket_timeout is passed on to the requests session
        # which executes the HTTP call. Both read / connect timeouts
        # are set to 60s
        self.client = BlockBlobService(account_name=account_name,
                                       sas_token=self.sas_token)
        logger.debug("Created wabs client object: {0}".format(self.client))

    @classmethod
    def get_retriable_exceptions(cls, method_name=None):
        """Return exceptions that should be retried for specified method of class

        :param str method_name: A method of class for which retriable exceptions should be searched
        :returns: A tuple of exception class to be retried
        :rtype: tuple

        """
        if method_name == 'delete_key':
            return ()
        return (AzureException, )

    def get_url_prefix(self):
        """Returns a connection string for the client object

        :returns: Connection string for the client object
        :rtype: str

        """
        return '{}://{}/{}/'.format(self.client.protocol,
                                    self.client.primary_endpoint,
                                    self.container_name)

    def list_object_keys(self, prefix='', metadata=False, pagesize=1000):
        """List object keys matching a prefix for the WABS client

        :param str prefix: A prefix string to list objects
        :param bool metadata: If set to True, object metadata will be fetched with object. Default is False
        :param int pagesize: Maximum objects to be fetched in a single WABS api call. This is limited to upto 5000 objects in WABS
        :returns: A generator of object dictionary with key, size and last_modified keys. Metadata will be returned if set to True
        :rtype: Iterator[dict]

        """

        logger.debug("Listing files for prefix: {0}".format(prefix))
        include = Include(metadata=metadata)
        marker = None
        while True:
            if marker:
                logger.debug("Paging objects "
                             "from marker '{0}'".format(marker))
            objects = self.client.list_blobs(self.container_name,
                                             prefix=prefix,
                                             num_results=pagesize,
                                             include=include,
                                             marker=marker)
            for obj in objects:
                yield {
                    'key': obj.name,
                    'last_modified': obj.properties.last_modified,
                    'size': obj.properties.content_length,
                    'metadata': obj.metadata
                }

            if objects.next_marker:
                marker = objects.next_marker
            else:
                break

    def download_file(self, source_key, destination_file):
        """Download a object from WABS container to local filesystem

        :param str source_key: Key for object to be downloaded
        :param str destination_file: Path on local filesystem to download file
        :returns: Nothing
        :rtype: None

        """
        self.client.get_blob_to_path(self.container_name, source_key,
                                     destination_file)

    def upload_file(self, destination_key, source_file, metadata=None):
        """Upload a file from local filesystem to WABS

        :param str destination_key: Key where to store object
        :param str source_file: Path on local file system for file to be uploaded
        :param dict metadata: Metadata to be stored along with object
        :returns: Nothing
        :rtype: None

        """
        metadata = metadata or {}
        logger.debug("Uploading file {0} to prefix {1}".format(
            source_file, destination_key))
        self.client.create_blob_from_path(self.container_name,
                                          destination_key,
                                          source_file,
                                          metadata=metadata)

    def upload_file_obj(self, destination_key, source_fd, metadata=None):
        """Upload a file from file object to WABS

        :param str destination_key: Key where to store object
        :param file source_fd: A file object to be uploaded
        :param dict metadata: Metadata to be stored along with object
        :returns: Nothing
        :rtype: None

        """
        metadata = metadata or {}
        self.client.create_blob_from_stream(self.container_name,
                                            destination_key,
                                            source_fd,
                                            metadata=metadata)

    # FIXME: Need to fix this function to abort, if another copy is already
    # happening it should abort, or it should follow the ec2 behaviour
    def copy_from_key(self, source_key, destination_key, metadata=None):
        """Copy a WABS object from one key to another key on server side

        :param str source_key: Source key for the object to be copied
        :param str destination_key: Destination key to store object
        :param dict metadata: Metadata to be stored along with object
        :returns: Nothing
        :rtype: None

        """
        metadata = metadata or {}
        logger.debug("Copying key {0} -> {1}".format(source_key,
                                                     destination_key))

        # If a previous copy was pending cancel it before
        # starting another copy
        for blob in self.client.list_blobs(self.container_name,
                                           prefix=destination_key):
            # There should only be one blob with the given key,
            # However list_blobs is the only exposed API to check
            # existance of blob without failures
            # AzureBlobStorage doesn't allow more than one pending
            # copies to the destination key
            try:
                self.client.abort_copy_blob(self.container_name,
                                            destination_key,
                                            blob.properties.copy.id)
            except AzureConflictHttpError:
                logger.info(('No copy in progress,' +
                             ' Ignoring AzureConflictHttpError'))
        source_uri = self.client.make_blob_url(self.container_name,
                                               source_key,
                                               sas_token=self.sas_token)
        copy_properties = self.client.copy_blob(self.container_name,
                                                destination_key,
                                                source_uri,
                                                metadata=metadata)
        # Wait for the copy to be a success
        while copy_properties.status == 'pending':
            # Wait a second before retrying
            time.sleep(1)
            properties = self.client.get_blob_properties(
                self.container_name, destination_key)
            copy_properties = properties.properties.copy
            # TODO(vin): Raise Error if copy_properties errors out

    def delete_key(self, destination_key):
        """Delete an object from WABS

        :param str destination_key: Destination key for the object to be deleted
        :returns: Nothing
        :rtype: None

        """
        logger.debug("Deleting key {0}".format(destination_key))
        return self.client.delete_blob(self.container_name, destination_key)