def get_blobs(self, container: Container) -> Iterable[Blob]:
        azure_container = self._get_azure_container(container.name)

        azure_blobs = self.service.list_blobs(azure_container.name,
                                              include=Include(metadata=True))
        for azure_blob in azure_blobs:
            yield self._convert_azure_blob(container, azure_blob)
Esempio n. 2
0
    def list_object_keys(self, prefix='', metadata=False, pagesize=1000):
        """List object keys matching a prefix for the WABS client

        :param str prefix: A prefix string to list objects
        :param bool metadata: If set to True, object metadata will be fetched with object. Default is False
        :param int pagesize: Maximum objects to be fetched in a single WABS api call. This is limited to upto 5000 objects in WABS
        :returns: A generator of object dictionary with key, size and last_modified keys. Metadata will be returned if set to True
        :rtype: Iterator[dict]

        """

        logger.debug("Listing files for prefix: {0}".format(prefix))
        include = Include(metadata=metadata)
        marker = None
        while True:
            if marker:
                logger.debug("Paging objects "
                             "from marker '{0}'".format(marker))
            objects = self.client.list_blobs(self.container_name,
                                             prefix=prefix,
                                             num_results=pagesize,
                                             include=include,
                                             marker=marker)
            for obj in objects:
                yield {
                    'key': obj.name,
                    'last_modified': obj.properties.last_modified,
                    'size': obj.properties.content_length,
                    'metadata': obj.metadata
                }

            if objects.next_marker:
                marker = objects.next_marker
            else:
                break
Esempio n. 3
0
    def get_blobs(self,
                  container: Container,
                  prefix: str = '',
                  delimiter: str = '') -> Iterable[Blob]:
        azure_container = self._get_azure_container(container.name)

        azure_blobs = self.service.list_blobs(azure_container.name,
                                              prefix=prefix,
                                              delimiter=delimiter,
                                              include=Include(metadata=True))
        for azure_blob in azure_blobs:
            yield self._convert_azure_blob(container, azure_blob)
Esempio n. 4
0
 def list_from_group(self, file_group, remote_path=None, num_results=None):
     """List the files in the file group.
     :param str file_group: The file group from which to list the files.
     :param str remote_path: The remote file prefix by which to filter results.
     :param int num_results: The max number of files to return.
     :returns: A generator of files. Each file is represented as a dictionary with
      the following keys:
      'name': The full remote file path.
      'last_modified': The time stamp for when the file was last modified locally.
      'size': The content length of the file.
      'uploaded': The time stamp for whe the file was last modified remotely.
     """
     storage_client = self.get_storage_client()
     container = file_utils.get_container_name(file_group)
     properties = Include(metadata=True)
     return ({
         'name': b.name,
         'last_modified': b.metadata.get('lastmodified'),
         'size': b.properties.content_length,
         'uploaded': b.properties.last_modified
     } for b in storage_client.list_blobs(container,
                                          prefix=remote_path,
                                          num_results=num_results,
                                          include=properties))
Esempio n. 5
0
def storage_blob_copy_batch(client,
                            source_account,
                            source_container,
                            destination_container,
                            source_sas=None,
                            prefix=None,
                            recursive=False,
                            snapshots=False,
                            exclude_old=False,
                            exclude_new=False):
    """
    Copy blobs between containers and storage accounts. This is a server-side copy operation
    therefore the command is asynchronous.

    :param str source_account:
        The account name of the source storage account

    :param str source_container:
        The source blob container

    :param str source_sas:
        The shared access signature used to access the source container. It is not required if
        either a connection string is given or the source container doesn't require a sas.

    :param str destination_container:
        The destination blob container

    :param str prefix:
        If option --recursive is specified, then this command interprets the specified the pattern
        given as a blob prefix. If option --recursive is not specified, then the given pattern
        matches the against exact blob names.

    :param bool recursive:
        Copy all the files to the given container and maintain the folder structure.

    :param bool snapshots:
        Copy both the blobs and their snapshots.

    :param bool exclude_old:
        Excludes an older source resource. The resource will not be copied if the last modified time
        of the source is the same or older than destination.

    :param bool exclude_new:
        Excludes a newer source resource. The resource will not be copied if the last modified time
        of the source is the same or newer than destination.

    :return: A BlobCopyTicket instance summarize the operations
    """

    # TODO:
    # 1. Page the result list (using num_result and marker)
    # 2. Support connection string for source
    # 3. stop using 'baseblobservice.exists' function. it doesn't provide performance gain
    #    since it invoked the get_blob_properties any way.

    # Question:
    # 1. Performance of creating a source blob service
    src_client = BlockBlobService(account_name=source_account,
                                  sas_token=source_sas)

    def _get_blob_name(source_blob):
        name = source_blob.name
        if source_blob.snapshot is not None:
            # the snapshot time string has seven digital in the microseconds, which make strptime
            # nearly unusable. therefore characters after dot are thrown away
            time_string = source_blob.snapshot[:source_blob.snapshot.rfind('.'
                                                                           )]
            snapshot_time = datetime.strptime(time_string, '%Y-%m-%dT%H:%M:%S')

            # insert the date time string before the file extension
            dot = name.rfind('.')
            dot = len(name) if dot == -1 else dot
            name = '{0}({1}){2}'.format(
                name[0:dot], snapshot_time.strftime('%Y-%m-%d %H%M%S'),
                name[dot:])
        return name

    def _get_blob_url(source_blob):
        # to be removed once this issue is fixed:
        # https://github.com/Azure/azure-storage-python/issues/233
        src_url = src_client.make_blob_url(source_container,
                                           source_blob.name,
                                           sas_token=source_sas)

        if source_blob.snapshot is not None:
            # this is a blob snapshot
            if '?' in src_url:
                src_url += '&snapshot=' + str(source_blob.snapshot)
            else:
                src_url += '?snapshot=' + str(source_blob.snapshot)

        return src_url

    def _copy_single_blob(source_blob):
        kwargs = {
            "container_name": destination_container,
            "blob_name": _get_blob_name(source_blob),
            "copy_source": _get_blob_url(source_blob)
        }

        if (exclude_new or exclude_old) and client.exists(
                destination_container, source_blob.name):
            if exclude_old:
                destination_blob = client.get_blob_properties(
                    destination_container, source_blob.name)
                kwargs[
                    "source_if_modified_since"] = destination_blob.properties.last_modified
            if exclude_new:
                kwargs[
                    "destination_if_modified_since"] = source_blob.properties.last_modified

        return client.copy_blob(**kwargs)

    if recursive:
        source_blobs = src_client.list_blobs(
            source_container,
            prefix=prefix,
            include=Include(snapshots=True) if snapshots else None)
    elif src_client.exists(source_container, prefix):
        source_blobs = [
            src_client.get_blob_properties(source_container, prefix)
        ]
    else:
        source_blobs = []

    return [
        BlobCopyResult(b.name,
                       _copy_single_blob(b).id) for b in source_blobs
    ]