Esempio n. 1
0
from azure.storage.blob import BlobServiceClient

# Setup pandas display properties
pandas.set_option('display.max_rows', None)
pandas.set_option('display.max_columns', None)
pandas.set_option('display.width', None)
pandas.set_option('display.max_colwidth', None)

# Constants
LATEST_COVERAGE_URL = "https://azuresdkartifacts.blob.core.windows.net/azure-sdk-for-java/test-coverage/jacoco.csv"
BLOB_CONTAINER_NAME = os.getenv('BLOB_CONTAINER_NAME')
STORAGE_CONNECTION_STRING = os.getenv('STORAGE_CONNECTION_STRING')
AGGREGATE_REPORT_NAME = 'aggregate/jacoco_aggregate.csv'

# Instantiate a new BlobServiceClient using a connection string
blob_service_client = BlobServiceClient.from_connection_string(
    STORAGE_CONNECTION_STRING)

# Instantiate a new ContainerClient
container_client = blob_service_client.get_container_client(
    BLOB_CONTAINER_NAME)


def download_latest_coverage():
    jacoco_artifact_csv = requests.get(LATEST_COVERAGE_URL)
    print("Latest Jacoco report was generated on " +
          jacoco_artifact_csv.headers['Last-Modified'])
    last_modified_time = datetime.datetime.strptime(
        jacoco_artifact_csv.headers['Last-Modified'],
        "%a, %d %b %Y %H:%M:%S %Z")

    with open('jacoco.csv', 'wb') as coverage_report:
Esempio n. 2
0
def _refresh_containers_cache_file(connection_string,
                                   container,
                                   cache_file,
                                   multiple_env=False,
                                   environment="base"):
    """
    .. versionadded:: 3001

    Downloads the entire contents of an Azure storage container to the local filesystem.

    :param connection_string: The connection string to use to access the specified Azure Blob Container.

    :param container: The name of the target Azure Blob Container.

    :param cache_file: The path of where the file will be cached.

    :param multiple_env: Specifies whether the pillar should interpret top level folders as pillar environments.

    :param environment: Specifies which environment the container represents when in single environment mode. This is
        ignored if multiple_env is set as True.

    """
    try:
        # Create the BlobServiceClient object which will be used to create a container client
        blob_service_client = BlobServiceClient.from_connection_string(
            connection_string)

        # Create the ContainerClient object
        container_client = blob_service_client.get_container_client(container)
    except Exception as exc:  # pylint: disable=broad-except
        log.error("Exception: %s", exc)
        return False

    metadata = {}

    def _walk_blobs(saltenv="base", prefix=None):
        # Walk the blobs in the container with a generator
        blob_list = container_client.walk_blobs(name_starts_with=prefix)

        # Iterate over the generator
        while True:
            try:
                blob = next(blob_list)
            except StopIteration:
                break

            log.debug("Raw blob attributes: %s", blob)

            # Directories end with "/".
            if blob.name.endswith("/"):
                # Recurse into the directory
                _walk_blobs(prefix=blob.name)
                continue

            if multiple_env:
                saltenv = "base" if (not prefix
                                     or prefix == ".") else prefix[:-1]

            if saltenv not in metadata:
                metadata[saltenv] = {}

            if container not in metadata[saltenv]:
                metadata[saltenv][container] = []

            metadata[saltenv][container].append(blob)

    _walk_blobs(saltenv=environment)

    # write the metadata to disk
    if os.path.isfile(cache_file):
        os.remove(cache_file)

    log.debug("Writing Azure blobs pillar cache file")

    with salt.utils.files.fopen(cache_file, "wb") as fp_:
        pickle.dump(metadata, fp_)

    return metadata
AUTHORITY_HOST_URI = 'https://login.microsoftonline.com'
AUTHORITY_URI = AUTHORITY_HOST_URI + '/' + TENANT_ID
RESOURCE_URI = 'https://management.core.windows.net/'
# Create token to authenticate to storage account
context = adal.AuthenticationContext(AUTHORITY_URI, api_version=None)
mgmt_token = context.acquire_token_with_client_credentials(
    RESOURCE_URI, CLIENT_ID, CLIENT_SECRET)
credentials = AADTokenCredentials(mgmt_token, CLIENT_ID)
token_credential = ClientSecretCredential(TENANT_ID, CLIENT_ID, CLIENT_SECRET)
# Create global handlers and variables
adls_service_client = DataLakeServiceClient(
    account_url="{}://{}.dfs.core.windows.net".format("https",
                                                      STORAGE_ACCOUNT_NAME),
    credential=token_credential)
blob_service_client = BlobServiceClient(
    account_url="{}://{}.blob.core.windows.net".format("https",
                                                       STORAGE_ACCOUNT_NAME),
    credential=token_credential)
currentTime = datetime.now()
container_client = blob_service_client.get_container_client(CONTAINER_NAME)


def list_snapshots_blob(name=""):

    blob_list = container_client.list_blobs(name_starts_with=name,
                                            include=['snapshots', 'metadata'])
    for snapshot in blob_list:
        #print(str(snapshot))
        print(
            str(snapshot.name + ', ' + str(snapshot.snapshot) + ', ' +
                str(snapshot.size)))
Esempio n. 4
0
def init_azure_storage() -> BlobServiceClient:
    connect_str = os.getenv('AZURE_BLOB_CONNECT_STR')
    return BlobServiceClient.from_connection_string(connect_str)
Esempio n. 5
0
def speech_recognize_once_from_file():
    """performs one-shot speech recognition with input from an audio file"""
    # <SpeechRecognitionWithFile>
    speech_config = speechsdk.SpeechConfig(subscription=speech_key,
                                           region=service_region)
    audio_config = speechsdk.audio.AudioConfig(filename=inputfilename)
    # Creates a speech recognizer using a file as audio input, also specify the speech language
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   language="en-US",
                                                   audio_config=audio_config)

    # Starts speech recognition, and returns after a single utterance is recognized. The end of a
    # single utterance is determined by listening for silence at the end or until a maximum of 15
    # seconds of audio is processed. It returns the recognition text as result.
    # Note: Since recognize_once() returns only a single utterance, it is suitable only for single
    # shot recognition like command or query.
    # For long-running multi-utterance recognition, use start_continuous_recognition() instead.
    result = speech_recognizer.recognize_once()

    # Check the result
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        print("Recognized: {}".format(result.text))
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print("No speech could be recognized: {}".format(
            result.no_match_details))
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech Recognition canceled: {}".format(
            cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print("Error details: {}".format(
                cancellation_details.error_details))

    #creating dataframe and save the output into CSV format
    df = pd.DataFrame()  #pd.DataFrame(columns=['comment'])
    df['comment'] = [str(result.text)]
    #
    df.to_csv('speech2text.csv')

    # </SpeechRecognitionWithFile>

    try:
        CONNECTION_STRING = AZURE_STORAGE_CONNECTION_STRING

    except KeyError:
        print("AZURE_STORAGE_CONNECTION_STRING must be set.")
        sys.exit(1)
    blob_service_client = BlobServiceClient.from_connection_string(
        CONNECTION_STRING)

    # Instantiate a new ContainerClient
    container_client = blob_service_client.get_container_client(container_name)

    try:
        # Create new Container in the service
        container_client.create_container()

        # Instantiate a new BlobClient
        blob_client = container_client.get_blob_client(outputfilename)
        print("")
        print("")
        print("Output file is uploaded to Blob storage")

        # [START upload_a_blob]
        # Upload content to block blob
        with open(outputfilename, "rb") as data:
            blob_client.upload_blob(data)  #, blob_type="BlockBlob"
        # [END upload_a_blob]

        # [START delete_blob]
        #blob_client.delete_blob()
        # [END delete_blob]

    finally:
        # Delete the container
        #container_client.delete_container()
        pass
Esempio n. 6
0
class AzureStorage(BaseStorage):
    def __init__(
        self,
        context,
        azure_container,
        storage_path,
        azure_account_name,
        azure_account_key=None,
        sas_token=None,
        connection_string=None,
    ):
        super(AzureStorage, self).__init__()
        self._context = context
        self._storage_path = storage_path.lstrip("/")

        self._azure_account_name = azure_account_name
        self._azure_account_key = azure_account_key
        self._azure_sas_token = sas_token
        self._azure_container = azure_container
        self._azure_connection_string = connection_string

        self._blob_service_client = BlobServiceClient(
            AZURE_STORAGE_URL_STRING.format(self._azure_account_name),
            credential=self._azure_account_key,
        )

        # https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs
        api_version = self._blob_service_client.api_version
        api_version_dt = datetime.strptime(api_version, "%Y-%m-%d")
        if api_version_dt < _API_VERSION_LIMITS["2016-05-31"][0]:
            self._max_block_size = _API_VERSION_LIMITS["2016-05-31"][1]
        elif api_version_dt <= _API_VERSION_LIMITS["2019-07-07"][0]:
            self._max_block_size = _API_VERSION_LIMITS["2019-07-07"][1]
        elif api_version_dt >= _API_VERSION_LIMITS["2019-12-12"][0]:
            self._max_block_size = _API_VERSION_LIMITS["2019-12-12"][1]
        else:
            raise Exception("Unknown Azure api version %s" % api_version)

    def _blob_name_from_path(self, object_path):
        if ".." in object_path:
            raise Exception("Relative paths are not allowed; found %s" %
                            object_path)

        return os.path.join(self._storage_path, object_path).rstrip("/")

    def _upload_blob_path_from_uuid(self, uuid):
        return self._blob_name_from_path(
            self._upload_blob_name_from_uuid(uuid))

    def _upload_blob_name_from_uuid(self, uuid):
        return "uploads/{0}".format(uuid)

    def _blob(self, blob_name):
        return self._blob_service_client.get_blob_client(
            self._azure_container, blob_name)

    @property
    def _container(self):
        return self._blob_service_client.get_container_client(
            self._azure_container)

    def get_direct_download_url(self,
                                object_path,
                                request_ip=None,
                                expires_in=60,
                                requires_cors=False,
                                head=False):
        blob_name = self._blob_name_from_path(object_path)

        try:
            sas_token = generate_blob_sas(
                self._azure_account_name,
                self._azure_container,
                blob_name,
                account_key=self._azure_account_key,
                permission=ContainerSasPermissions.from_string("r"),
                expiry=datetime.utcnow() + timedelta(seconds=expires_in),
            )

            blob_url = "{}?{}".format(self._blob(blob_name).url, sas_token)

        except AzureError:
            logger.exception(
                "Exception when trying to get direct download for path %s",
                object_path)
            raise IOError("Exception when trying to get direct download")

        return blob_url

    def validate(self, client):
        super(AzureStorage, self).validate(client)

    def get_content(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            blob_stream = self._blob(blob_name).download_blob()
        except AzureError:
            logger.exception("Exception when trying to get path %s", path)
            raise IOError("Exception when trying to get path")

        return blob_stream.content_as_bytes()

    def put_content(self, path, content):
        blob_name = self._blob_name_from_path(path)
        try:
            self._blob(blob_name).upload_blob(content,
                                              blob_type=BlobType.BlockBlob,
                                              overwrite=True)
        except AzureError:
            logger.exception("Exception when trying to put path %s", path)
            raise IOError("Exception when trying to put path")

    def stream_read(self, path):
        with self.stream_read_file(path) as f:
            while True:
                buf = f.read(self.buffer_size)
                if not buf:
                    break
                yield buf

    def stream_read_file(self, path):
        blob_name = self._blob_name_from_path(path)

        try:
            output_stream = io.BytesIO()
            self._blob(blob_name).download_blob().download_to_stream(
                output_stream)
            output_stream.seek(0)
        except AzureError:
            logger.exception(
                "Exception when trying to stream_file_read path %s", path)
            raise IOError("Exception when trying to stream_file_read path")

        return output_stream

    def stream_write(self, path, fp, content_type=None, content_encoding=None):
        blob_name = self._blob_name_from_path(path)
        content_settings = ContentSettings(
            content_type=content_type,
            content_encoding=content_encoding,
        )

        try:
            self._blob(blob_name).upload_blob(
                fp, content_settings=content_settings, overwrite=True)
        except AzureError as ae:
            logger.exception("Exception when trying to stream_write path %s",
                             path)
            raise IOError("Exception when trying to stream_write path", ae)

    def exists(self, path):
        blob_name = self._blob_name_from_path(path)

        try:
            self._blob(blob_name).get_blob_properties()
        except ResourceNotFoundError:
            return False
        except AzureError:
            logger.exception("Exception when trying to check exists path %s",
                             path)
            raise IOError("Exception when trying to check exists path")

        return True

    def remove(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            self._blob(blob_name).delete_blob()
        except AzureError:
            logger.exception("Exception when trying to remove path %s", path)
            raise IOError("Exception when trying to remove path")

    def get_checksum(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            blob_properties = self._blob(blob_name).get_blob_properties()
        except AzureError:
            logger.exception(
                "Exception when trying to get_checksum for path %s", path)
            raise IOError("Exception when trying to get_checksum path")
        return blob_properties.etag

    def initiate_chunked_upload(self):
        random_uuid = str(uuid.uuid4())
        metadata = {
            _BLOCKS_KEY: [],
            _CONTENT_TYPE_KEY: None,
        }
        return random_uuid, metadata

    def stream_upload_chunk(self,
                            uuid,
                            offset,
                            length,
                            in_fp,
                            storage_metadata,
                            content_type=None):
        if length == 0:
            return 0, storage_metadata, None

        upload_blob_path = self._upload_blob_path_from_uuid(uuid)
        new_metadata = copy.deepcopy(storage_metadata)

        total_bytes_written = 0

        while True:
            current_length = length - total_bytes_written
            max_length = (min(current_length, self._max_block_size) if
                          length != READ_UNTIL_END else self._max_block_size)
            if max_length <= 0:
                break

            limited = LimitingStream(in_fp, max_length, seekable=False)

            # Note: Azure fails if a zero-length block is uploaded, so we read all the data here,
            # and, if there is none, terminate early.
            block_data = b""
            for chunk in iter(lambda: limited.read(31457280), b""):
                block_data += chunk

            if len(block_data) == 0:
                break

            block_index = len(new_metadata[_BLOCKS_KEY])
            block_id = format(block_index, "05")
            new_metadata[_BLOCKS_KEY].append(block_id)

            try:
                self._blob(upload_blob_path).stage_block(block_id,
                                                         block_data,
                                                         validate_content=True)
            except AzureError as ae:
                logger.exception(
                    "Exception when trying to stream_upload_chunk block %s for %s",
                    block_id, uuid)
                return total_bytes_written, new_metadata, ae

            bytes_written = len(block_data)
            total_bytes_written += bytes_written
            if bytes_written == 0 or bytes_written < max_length:
                break

        if content_type is not None:
            new_metadata[_CONTENT_TYPE_KEY] = content_type

        return total_bytes_written, new_metadata, None

    def complete_chunked_upload(self, uuid, final_path, storage_metadata):
        """
        Complete the chunked upload and store the final results in the path indicated.

        Returns nothing.
        """
        # Commit the blob's blocks.
        upload_blob_name = self._upload_blob_name_from_uuid(
            uuid)  # upload/<uuid>
        upload_blob_path = self._upload_blob_path_from_uuid(
            uuid)  # storage/path/upload/<uuid>
        block_list = [
            BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY]
        ]

        try:
            if storage_metadata[_CONTENT_TYPE_KEY] is not None:
                content_settings = ContentSettings(
                    content_type=storage_metadata[_CONTENT_TYPE_KEY])
                self._blob(upload_blob_path).commit_block_list(
                    block_list, content_settings=content_settings)
            else:
                self._blob(upload_blob_path).commit_block_list(block_list)
        except AzureError:
            logger.exception(
                "Exception when trying to put block list for path %s from upload %s",
                final_path,
                uuid,
            )
            raise IOError("Exception when trying to put block list")

        # Copy the blob to its final location.
        upload_blob_name = self._upload_blob_name_from_uuid(uuid)
        copy_source_url = self.get_direct_download_url(upload_blob_name,
                                                       expires_in=300)

        try:
            final_blob_name = self._blob_name_from_path(final_path)
            cp = self._blob(final_blob_name).start_copy_from_url(
                copy_source_url)
        except AzureError:
            logger.exception(
                "Exception when trying to set copy uploaded blob %s to path %s",
                uuid, final_path)
            raise IOError("Exception when trying to copy uploaded blob")

        self._await_copy(final_blob_name)

        # Delete the original blob.
        logger.debug("Deleting chunked upload %s at path %s", uuid,
                     upload_blob_path)
        try:
            self._blob(upload_blob_path).delete_blob()
        except AzureError:
            logger.exception(
                "Exception when trying to set delete uploaded blob %s", uuid)
            raise IOError("Exception when trying to delete uploaded blob")

    def cancel_chunked_upload(self, uuid, storage_metadata):
        """
        Cancel the chunked upload and clean up any outstanding partially uploaded data.

        Returns nothing.
        """
        upload_blob_path = self._upload_blob_path_from_uuid(uuid)
        logger.debug("Canceling chunked upload %s at path %s", uuid,
                     upload_blob_path)
        try:
            self._blob(upload_blob_path).delete_blob()
        except ResourceNotFoundError:
            pass

    def _await_copy(self, blob_name):
        # Poll for copy completion.
        blob = self._blob(blob_name)
        copy_prop = blob.get_blob_properties().copy

        count = 0
        while copy_prop.status == "pending":
            props = blob.get_blob_properties()
            copy_prop = props.copy

            if copy_prop.status == "success":
                return

            if copy_prop.status == "failed" or copy_prop.status == "aborted":
                raise IOError("Copy of blob %s failed with status %s" %
                              (blob_name, copy_prop.status))

            count = count + 1
            if count > _MAX_COPY_POLL_COUNT:
                raise IOError("Timed out waiting for copy to complete")

            time.sleep(_COPY_POLL_SLEEP)

    def copy_to(self, destination, path):
        if self.__class__ == destination.__class__:
            logger.debug(
                "Starting copying file from Azure %s to Azure %s via an Azure copy",
                self._azure_container,
                destination._azure_container,
            )
            copy_source_url = self.get_direct_download_url(path)
            blob_name = destination._blob_name_from_path(path)
            dest_blob = destination._blob(blob_name)

            destination._blob(blob_name).start_copy_from_url(copy_source_url)
            destination._await_copy(blob_name)
            logger.debug(
                "Finished copying file from Azure %s to Azure %s via an Azure copy",
                self._azure_container,
                destination._azure_container,
            )
            return

        # Fallback to a slower, default copy.
        logger.debug(
            "Copying file from Azure container %s to %s via a streamed copy",
            self._azure_container,
            destination,
        )
        with self.stream_read_file(path) as fp:
            destination.stream_write(path, fp)

    def setup(self):
        # From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services
        cors = [
            CorsRule(
                allowed_origins="*",
                allowed_methods=["GET", "PUT"],
                max_age_in_seconds=3000,
                exposed_headers=["x-ms-meta-*"],
                allowed_headers=[
                    "x-ms-meta-data*",
                    "x-ms-meta-target*",
                    "x-ms-meta-abc",
                    "Content-Type",
                ],
            )
        ]

        self._blob_service_client.set_service_properties(cors=cors)
Esempio n. 7
0
class AzureBlobFileSystem(AbstractFileSystem):
    """
    Access Azure Datalake Gen2 and Azure Storage if it were a file system using Multiprotocol Access

    Parameters
    ----------
    account_name: str
        The storage account name. This is used to authenticate requests
        signed with an account key and to construct the storage endpoint. It
        is required unless a connection string is given, or if a custom
        domain is used with anonymous authentication.
    account_key: str
        The storage account key. This is used for shared key authentication.
        If any of account key, sas token or client_id is specified, anonymous access
        will be used.
    sas_token: str
        A shared access signature token to use to authenticate requests
        instead of the account key. If account key and sas token are both
        specified, account key will be used to sign. If any of account key, sas token
        or client_id are specified, anonymous access will be used.
    request_session: Session
        The session object to use for http requests.
    connection_string: str
        If specified, this will override all other parameters besides
        request session. See
        http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/
        for the connection string format.
    socket_timeout: int
        If specified, this will override the default socket timeout. The timeout specified is in
        seconds.
        See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value.
    credential: TokenCredential or SAS token
        The credentials with which to authenticate.  Optional if the account URL already has a SAS token.
        Can include an instance of TokenCredential class from azure.identity
    blocksize: int
        The block size to use for download/upload operations. Defaults to the value of
        ``BlockBlobService.MAX_BLOCK_SIZE``
    client_id: str
        Client ID to use when authenticating using an AD Service Principal client/secret.
    client_secret: str
        Client secret to use when authenticating using an AD Service Principal client/secret.
    tenant_id: str
        Tenant ID to use when authenticating using an AD Service Principal client/secret.

    Examples
    --------
    Authentication with an account_key
    >>> abfs = AzureBlobFileSystem(account_name="XXXX", account_key="XXXX", container_name="XXXX")
    >>> abfs.ls('')

    **  Sharded Parquet & csv files can be read as: **
        ------------------------------------------
        ddf = dd.read_csv('abfs://container_name/folder/*.csv', storage_options={
        ...    'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY})

        ddf = dd.read_parquet('abfs://container_name/folder.parquet', storage_options={
        ...    'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY,})

    Authentication with an Azure ServicePrincipal
    >>> abfs = AzureBlobFileSystem(account_name="XXXX", tenant_id=TENANT_ID,
        ...    client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
    >>> abfs.ls('')

    **  Read files as: **
        -------------
        ddf = dd.read_csv('abfs://container_name/folder/*.csv', storage_options={
            'account_name': ACCOUNT_NAME, 'tenant_id': TENANT_ID, 'client_id': CLIENT_ID,
            'client_secret': CLIENT_SECRET})
        })
    """

    protocol = "abfs"

    def __init__(
        self,
        account_name: str,
        account_key: str = None,
        connection_string: str = None,
        credential: str = None,
        sas_token: str = None,
        request_session=None,
        socket_timeout: int = None,
        blocksize: int = create_configuration(
            storage_sdk="blob").max_block_size,
        client_id: str = None,
        client_secret: str = None,
        tenant_id: str = None,
    ):
        AbstractFileSystem.__init__(self)
        self.account_name = account_name
        self.account_key = account_key
        self.connection_string = connection_string
        self.credential = credential
        self.sas_token = sas_token
        self.request_session = request_session
        self.socket_timeout = socket_timeout
        self.blocksize = blocksize
        self.client_id = client_id
        self.client_secret = client_secret
        self.tenant_id = tenant_id

        if (self.credential is None and self.account_key is None
                and self.sas_token is None and self.client_id is not None):
            self.credential = self._get_credential_from_service_principal()
        self.do_connect()

    @classmethod
    def _strip_protocol(cls, path: str):
        """
        Remove the protocol from the input path

        Parameters
        ----------
        path: str
            Path to remove the protocol from

        Returns
        -------
        str
            Returns a path without the protocol
        """
        logging.debug(f"_strip_protocol for {path}")
        ops = infer_storage_options(path)

        # we need to make sure that the path retains
        # the format {host}/{path}
        # here host is the container_name
        if ops.get("host", None):
            ops["path"] = ops["host"] + ops["path"]
        ops["path"] = ops["path"].lstrip("/")

        logging.debug(f"_strip_protocol({path}) = {ops}")
        return ops["path"]

    def _get_credential_from_service_principal(self):
        """
        Create a Credential for authentication.  This can include a TokenCredential
        client_id, client_secret and tenant_id

        Returns
        -------
        Credential
        """
        from azure.identity import ClientSecretCredential

        sp_token = ClientSecretCredential(
            tenant_id=self.tenant_id,
            client_id=self.client_id,
            client_secret=self.client_secret,
        )
        return sp_token

    def do_connect(self):
        """Connect to the BlobServiceClient, using user-specified connection details.
        Tries credentials first, then connection string and finally account key

        Raises
        ------
        ValueError if none of the connection details are available
        """
        self.account_url: str = f"https://{self.account_name}.blob.core.windows.net"
        if self.credential is not None:
            self.service_client = BlobServiceClient(
                account_url=self.account_url, credential=self.credential)
        elif self.connection_string is not None:
            self.service_client = BlobServiceClient.from_connection_string(
                conn_str=self.connection_string)
        elif self.account_key is not None:
            self.service_client = BlobServiceClient(
                account_url=self.account_url, credential=self.account_key)
        else:
            raise ValueError("unable to connect with provided params!!")

    def split_path(self,
                   path,
                   delimiter="/",
                   return_container: bool = False,
                   **kwargs):
        """
        Normalize ABFS path string into bucket and key.

        Parameters
        ----------
        path : string
            Input path, like `abfs://my_container/path/to/file`

        delimiter: string
            Delimiter used to split the path

        return_container: bool

        Examples
        --------
        >>> split_path("abfs://my_container/path/to/file")
        ['my_container', 'path/to/file']
        """

        if path in ["", delimiter]:
            return "", ""

        path = self._strip_protocol(path)
        path = path.lstrip(delimiter)
        if "/" not in path:
            # this means path is the container_name
            return path, ""
        else:
            return path.split(delimiter, 1)

    # def _generate_blobs(self, *args, **kwargs):
    #     """Follow next_marker to get ALL results."""
    #     logging.debug("running _generate_blobs...")
    #     blobs = self.blob_fs.list_blobs(*args, **kwargs)
    #     yield from blobs
    #     while blobs.next_marker:
    #         logging.debug(f"following next_marker {blobs.next_marker}")
    #         kwargs["marker"] = blobs.next_marker
    #         blobs = self.blob_fs.list_blobs(*args, **kwargs)
    #         yield from blobs

    # def _matches(
    #     self, container_name, path, as_directory=False, delimiter="/", **kwargs
    # ):
    #     """check if the path returns an exact match"""

    #     path = path.rstrip(delimiter)
    #     gen = self.blob_fs.list_blob_names(
    #         container_name=container_name,
    #         prefix=path,
    #         delimiter=delimiter,
    #         num_results=None,
    #     )

    #     contents = list(gen)
    #     if not contents:
    #         return False

    #     if as_directory:
    #         return contents[0] == path + delimiter
    #     else:
    #         return contents[0] == path

    def ls(
        self,
        path: str,
        detail: bool = False,
        invalidate_cache: bool = True,
        delimiter: str = "/",
        return_glob: bool = False,
        **kwargs,
    ):
        """
        Create a list of blob names from a blob container

        Parameters
        ----------
        path: str
            Path to an Azure Blob with its container name

        detail: bool
            If False, return a list of blob names, else a list of dictionaries with blob details

        invalidate_cache:  bool
            If True, do not use the cache

        delimiter: str
            Delimiter used to split paths

        return_glob: bool

        """

        logging.debug(f"abfs.ls() is searching for {path}")

        container, path = self.split_path(path)
        if (container in ["", delimiter]) and (path in ["", delimiter]):
            # This is the case where only the containers are being returned
            logging.info(
                "Returning a list of containers in the azure blob storage account"
            )
            if detail:
                contents = self.service_client.list_containers(
                    include_metadata=True)
                return self._details(contents)
            else:
                contents = self.service_client.list_containers()
                return [f"{c.name}{delimiter}" for c in contents]

        else:
            if container not in ["", delimiter]:
                # This is the case where the container name is passed
                container_client = self.service_client.get_container_client(
                    container=container)
                blobs = container_client.walk_blobs(name_starts_with=path)
                try:
                    blobs = [blob for blob in blobs]
                except Exception:
                    raise FileNotFoundError
                if len(blobs) > 1:
                    if return_glob:
                        return self._details(blobs, return_glob=True)
                    if detail:
                        return self._details(blobs)
                    else:
                        return [
                            f"{blob.container}{delimiter}{blob.name}"
                            for blob in blobs
                        ]
                elif len(blobs) == 1:
                    if (blobs[0].name.rstrip(delimiter)
                            == path) and not blobs[0].has_key(  # NOQA
                                "blob_type"):

                        path = blobs[0].name
                        blobs = container_client.walk_blobs(
                            name_starts_with=path)
                        if return_glob:
                            return self._details(blobs, return_glob=True)
                        if detail:
                            return self._details(blobs)
                        else:
                            return [
                                f"{blob.container}{delimiter}{blob.name}"
                                for blob in blobs
                            ]
                    elif isinstance(blobs[0], BlobPrefix):
                        if detail:
                            for blob_page in blobs:
                                return self._details(blob_page)
                        else:
                            outblobs = []
                            for blob_page in blobs:
                                for blob in blob_page:
                                    outblobs.append(
                                        f"{blob.container}{delimiter}{blob.name}"
                                    )
                            return outblobs
                    elif blobs[0]["blob_type"] == "BlockBlob":
                        if detail:
                            return self._details(blobs)
                        else:
                            return [
                                f"{blob.container}{delimiter}{blob.name}"
                                for blob in blobs
                            ]
                    elif isinstance(blobs[0], ItemPaged):
                        outblobs = []
                        for page in blobs:
                            for b in page:
                                outblobs.append(b)
                    else:
                        raise FileNotFoundError(
                            f"Unable to identify blobs in {path} for {blobs[0].name}"
                        )
                elif len(blobs) == 0:
                    if return_glob or (path in ["", delimiter]):
                        return []
                    else:
                        raise FileNotFoundError
                else:
                    raise FileNotFoundError

    def _details(self,
                 contents,
                 delimiter="/",
                 return_glob: bool = False,
                 **kwargs):
        """
        Return a list of dictionaries of specifying details about the contents

        Parameters
        ----------
        contents

        delimiter: str
            Delimiter used to separate containers and files

        return_glob: bool


        Returns
        -------
        List of dicts
            Returns details about the contents, such as name, size and type
        """
        pathlist = []
        for c in contents:
            data = {}
            if c.has_key("container"):  # NOQA
                data["name"] = f"{c.container}{delimiter}{c.name}"
                if c.has_key("size"):  # NOQA
                    data["size"] = c.size
                else:
                    data["size"] = 0
                if data["size"] == 0:
                    data["type"] = "directory"
                else:
                    data["type"] = "file"
            else:
                data["name"] = f"{c.name}{delimiter}"
                data["size"] = 0
                data["type"] = "directory"
            if return_glob:
                data["name"] = data["name"].rstrip("/")

            pathlist.append(data)
        return pathlist

    def walk(self, path: str, maxdepth=None, **kwargs):
        """ Return all files belows path

        List all files, recursing into subdirectories; output is iterator-style,
        like ``os.walk()``. For a simple list of files, ``find()`` is available.

        Note that the "files" outputted will include anything that is not
        a directory, such as links.

        Parameters
        ----------
        path: str
            Root to recurse into

        maxdepth: int
            Maximum recursion depth. None means limitless, but not recommended
            on link-based file-systems.

        **kwargs are passed to ``ls``
        """
        path = self._strip_protocol(path)
        full_dirs = {}
        dirs = {}
        files = {}

        detail = kwargs.pop("detail", False)
        try:
            listing = self.ls(path, detail=True, return_glob=True, **kwargs)
        except (FileNotFoundError, IOError):
            return [], [], []

        for info in listing:
            # each info name must be at least [path]/part , but here
            # we check also for names like [path]/part/
            pathname = info["name"].rstrip("/")
            name = pathname.rsplit("/", 1)[-1]
            if info["type"] == "directory" and pathname != path:
                # do not include "self" path
                full_dirs[pathname] = info
                dirs[name] = info
            elif pathname == path:
                # file-like with same name as give path
                files[""] = info
            else:
                files[name] = info

        if detail:
            yield path, dirs, files
        else:
            yield path, list(dirs), list(files)

        if maxdepth is not None:
            maxdepth -= 1
            if maxdepth < 1:
                return

        for d in full_dirs:
            yield from self.walk(d, maxdepth=maxdepth, detail=detail, **kwargs)

    def mkdir(self, path, delimiter="/", exists_ok=False, **kwargs):
        """
        Create directory entry at path

        Parameters
        ----------
        path: str
            The path to create

        delimiter: str
            Delimiter to use when splitting the path

        exists_ok: bool
            If True, raise an exception if the directory already exists. Defaults to False
        """
        container_name, path = self.split_path(path, delimiter=delimiter)
        if not exists_ok:
            if (container_name not in self.ls("")) and (not path):
                # create new container
                self.service_client.create_container(name=container_name)
            elif (container_name in [
                    container_path.split("/")[0]
                    for container_path in self.ls("")
            ]) and path:
                ## attempt to create prefix
                container_client = self.service_client.get_container_client(
                    container=container_name)
                container_client.upload_blob(name=path, data="")
            else:
                ## everything else
                raise RuntimeError(
                    f"Cannot create {container_name}{delimiter}{path}.")
        else:
            if container_name in self.ls("") and path:
                container_client = self.service_client.get_container_client(
                    container=container_name)
                container_client.upload_blob(name=path, data="")

    def rmdir(self, path: str, delimiter="/", **kwargs):
        """
        Remove a directory, if empty

        Parameters
        ----------
        path: str
            Path of directory to remove

        delimiter: str
            Delimiter to use when splitting the path

        """

        container_name, path = self.split_path(path, delimiter=delimiter)
        if (container_name + delimiter in self.ls("")) and (not path):
            # delete container
            self.service_client.delete_container(container_name)

    def _rm(self, path, delimiter="/", **kwargs):
        """
        Delete a given file

        Parameters
        ----------
        path: str
            Path to file to delete

        delimiter: str
            Delimiter to use when splitting the path
        """
        if self.isfile(path):
            container_name, path = self.split_path(path, delimiter=delimiter)
            container_client = self.service_client.get_container_client(
                container=container_name)
            logging.debug(f"Delete blob {path} in {container_name}")
            container_client.delete_blob(path)
        elif self.isdir(path):
            container_name, path = self.split_path(path, delimiter=delimiter)
            container_client = self.service_client.get_container_client(
                container=container_name)
            if (container_name + delimiter in self.ls("")) and (not path):
                logging.debug(f"Delete container {container_name}")
                container_client.delete_container(container_name)
        else:
            raise RuntimeError(f"cannot delete {path}")

    def _open(
        self,
        path: str,
        mode: str = "rb",
        block_size: int = None,
        autocommit: bool = True,
        cache_options=None,
        **kwargs,
    ):
        """Open a file on the datalake, or a block blob

        Parameters
        ----------
        path: str
            Path to file to open

        mode: str
            What mode to open the file in - defaults to "rb"

        block_size: int
            Size per block for multi-part downloads.

        autocommit: bool
            Whether or not to write to the destination directly

        cache_type: str
            One of "readahead", "none", "mmap", "bytes", defaults to "readahead"
            Caching policy in read mode.
            See the definitions here:
            https://filesystem-spec.readthedocs.io/en/latest/api.html#readbuffering
        """
        logging.debug(f"_open:  {path}")
        return AzureBlobFile(
            fs=self,
            path=path,
            mode=mode,
            block_size=block_size or self.blocksize,
            autocommit=autocommit,
            cache_options=cache_options,
            **kwargs,
        )
Esempio n. 8
0
def enumerate_prefix(prefix, sas_url, output_folder):

    account_name = sas_blob_utils.get_account_from_uri(sas_url)
    container_name = sas_blob_utils.get_container_from_uri(sas_url)
    ro_sas_token = sas_blob_utils.get_sas_token_from_uri(sas_url)
    assert not ro_sas_token.startswith('?')
    ro_sas_token = '?' + ro_sas_token

    storage_account_url_blob = 'https://' + account_name + '.blob.core.windows.net'

    # prefix = prefixes[0]; print(prefix)

    print('Starting enumeration for prefix {}'.format(prefix))

    # Open the output file
    fn = path_utils.clean_filename(prefix)
    output_file = os.path.join(output_folder, fn)

    # Create the container
    blob_service_client = BlobServiceClient(
        account_url=storage_account_url_blob, credential=ro_sas_token)

    container_client = blob_service_client.get_container_client(container_name)

    # Enumerate
    with open(output_file, 'w') as output_f:

        continuation_token = ''
        hit_debug_limit = False
        i_blob = 0

        while (continuation_token is not None) and (not hit_debug_limit):

            blobs_iter = container_client.list_blobs(
                name_starts_with=prefix,
                results_per_page=n_blobs_per_page).by_page(
                    continuation_token=continuation_token)
            blobs = next(blobs_iter)

            n_blobs_this_page = 0

            for blob in blobs:
                i_blob += 1
                n_blobs_this_page += 1
                if (debug_max_files > 0) and (i_blob > debug_max_files):
                    print('Hit debug path limit for prefix {}'.format(prefix))
                    i_blob -= 1
                    hit_debug_limit = True
                    break
                else:
                    output_f.write(blob.name + '\n')

            # print('Enumerated {} blobs'.format(n_blobs_this_page))
            cnt.increment(n=n_blobs_this_page)

            continuation_token = blobs_iter.continuation_token

            if sleep_time_per_page > 0:
                time.sleep(sleep_time_per_page)

        # ...while we're enumerating

    # ...with open(output_file)

    print('Finished enumerating {} blobs for prefix {}'.format(i_blob, prefix))
Esempio n. 9
0
def connect_blob(connect_str):
    # Create the BlobServiceClient object which will be used to create a container client
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    return blob_service_client
Esempio n. 10
0
class Connector:
    def __init__(self, path=None, storage_account=None, container=None):

        logging.basicConfig(level=logging.INFO)

        self.storage_account = storage_account
        self.container = container

        if path:
            parsed_path = self.parse_azure_path(path)
            self.storage_account = parsed_path["storage_account"]
            self.container = parsed_path["container"]

        # Gets credential from azure cli
        self.credential = DefaultAzureCredential()

        # Create class wide storage account and container clients if names are passed
        if self.storage_account:
            blob_storage_url = self.get_blob_storage_url(
                storage_account=self.storage_account
            )
            self.blob_service_client = BlobServiceClient(
                credential=self.credential, account_url=blob_storage_url
            )
            if self.container:
                container_names = [
                    container.name
                    for container in self.blob_service_client.list_containers()
                ]
                if self.container in container_names:
                    self.container_client = (
                        self.blob_service_client.get_container_client(
                            container=self.container
                        )
                    )
                else:
                    raise ValueError(
                        f"The container: {self.container} is not in the storage account: {self.storage_account}"
                    )

    @arguments_decorator()
    def get_blob_storage_url(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ) -> str:
        """
        Returns the storage account url for the path or storage_account name passed

        :param path: str: optional An azure path. Defaults to None.
        :param storage_account: str: optional Storage account name. Defaults to None.
        :param container: str: optional Ignored. Defaults to None.
        :param file_path: str: optional Ignored. Defaults to None.

        :return str: The storage account url in the form: https://{storage_account}.blob.core.windows.net/
        """
        return f"https://{storage_account}.blob.core.windows.net/"

    def parse_azure_path(self, path: str) -> dict:
        """
        Parse an azure url into : storage_account, container and filepath.
        If passing a url of the for azure://container/filepath the storage account is
        taken from the class instance. If there is no storage account passed for the class
        the storage account will be None.

        :param path: str: The azure blob path
        :return: dict: A dictionary containing the container name and filepath
        """
        storage_account = self.storage_account
        container = self.container

        if path.startswith("https://"):
            storage_account = re.findall(
                r"https://(.*)\.blob\.core\.windows\.net", path
            )[0]
            path = path.replace(f"https://{storage_account}.blob.core.windows.net/", "")
            split_path = path.split("/")
            container = split_path.pop(0)
            filepath = "/".join(split_path)

        elif path.startswith("azure://"):
            path = path.replace("azure://", "")
            split_path = path.split("/")
            container = split_path.pop(0)
            filepath = "/".join(split_path)

        else:
            filepath = path
        return {
            "storage_account": storage_account,
            "container": container,
            "file_path": filepath,
        }

    def is_azure_path(self, path: str) -> bool:
        """
        Returns true if the path is of a recognised azure path format

        :param path: str: The path to test

        :return bool: True if path is of an accepted azure path format
        """
        patterns = [r"https://.*\.blob.core.windows.net", r"azure://"]
        return any([bool(re.match(p, path)) for p in patterns])

    @arguments_decorator()
    def get_blob_service_client(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ) -> BlobServiceClient:
        """
        Returns a blob service client for the specified storage account. If no parameters are passed the class values are used

        :param path: str: optional An azure path, the storage account will be used to create a client. Defaults to None.
        :param storage_account: str: optional The name of the storage account to create a client for. Defaults to None.
        :param container: str: optional Ignored. Defaults to None.
        :param file_path: str: optional Ignored. Defaults to None.

        :return BlobServiceClient: An azure blobserviceclient for the specified storage account
        """
        if storage_account == self.storage_account:
            return self.blob_service_client
        else:
            blob_storage_url = self.get_blob_storage_url(
                storage_account=storage_account
            )
            return BlobServiceClient(
                credential=self.credential, account_url=blob_storage_url
            )

    @arguments_decorator()
    def get_container_client(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ) -> ContainerClient:
        """
        Returns a container client when a container name in the storage account is passed. If no params are passed the class values will be used

        :param path: str: optional An Azure path, the container in the path will be used. Defaults to None.
        :param storage_account: str: optional A storage account name containing the container. Defaults to None.
        :param container: str: optional The name of the container to create a client for. Defaults to None.
        :param file_path: str: optional The file path will ultimately be ignored. Defaults to None.

        :exception ValueError: Raised if the container does not exist in the storage account

        :return ContainerClient: An Azure client for the container
        """
        if storage_account == self.storage_account and container == self.container:
            return self.container_client
        else:
            client = self.get_blob_service_client(storage_account=storage_account)
            container_names = [container.name for container in client.list_containers()]
            if container in container_names:
                return client.get_container_client(container=container)
            else:
                raise ValueError(
                    f"The container: {container} is not in the storage account: {storage_account}"
                )

    @arguments_decorator()
    def list_blobs(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ) -> list:
        """
        Returns a list of blobs, with paths that match the path passed

        :param path: str: optional An azure path to search for blobs. Defaults to None.
        :param storage_account: str: optional storage account name. Defaults to None.
        :param container: str: optional container name. Defaults to None.
        :param file_path: str: optional the prefix file path. Defaults to None.

        :return list: Blobs in the path passed
        """
        container_client = self.get_container_client(
            storage_account=storage_account, container=container
        )
        if file_path:
            blob_iter = container_client.list_blobs(name_starts_with=file_path)
            return [blob.name.replace(file_path, "") for blob in blob_iter]
        else:
            blob_iter = container_client.list_blobs()
            return [blob.name for blob in blob_iter]

    @multi_arguments_decorator(local_support=True)
    def download_folder(
        self,
        source_path: str = None,
        source_storage_account: str = None,
        source_container: str = None,
        source_file_path: str = None,
        dest_path: str = None,
        dest_storage_account: str = None,
        dest_container: str = None,
        dest_file_path: str = None,
    ):
        """
        Copy a folder from azure to a local path

        :param source_path: str: optional An Azure path to the folder to download. Defaults to None.
        :param source_storage_account: str: optional The storage account name. Defaults to None.
        :param source_container: str: optional The container name. Defaults to None.
        :param source_file_path: str: optional The path to the folder to download. Defaults to None.
        :param dest_path: str: optional The local path to download the folder to. Defaults to None.
        :param dest_storage_account: str: optional Ignored. Defaults to None.
        :param dest_container: str: optional Ignored. Defaults to None.
        :param dest_file_path: str: optional Ignored. Defaults to None.

        :exception ValueError: Raised when destination path is an azure path
        """
        container_client = self.get_container_client(
            storage_account=source_storage_account, container=source_container
        )

        if self.is_azure_path(dest_path):
            raise ValueError(
                f"Expected destination to be local path got azure path: {dest_path}"
            )
        os.makedirs(dest_path, exist_ok=True)

        for blob in container_client.list_blobs(source_file_path):
            file_name = os.path.basename(blob.name)
            local_path = os.path.join(dest_path, file_name)
            with open(local_path, "wb") as f:
                logging.info(f"Downloading {blob.name} to {local_path}")
                blob_data = container_client.download_blob(blob.name)
                blob_data.readinto(f)
            logging.info("Completed Download")

    @arguments_decorator()
    def blob_exists(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ):
        """
        Checks if a file exists in azure, return bool

        :param path: str: optional Azure path to file to check. Defaults to None.
        :param storage_account: str: optional Storage account. Defaults to None.
        :param container: str: optional Container. Defaults to None.
        :param file_path: str: optional path to file. Defaults to None.

        :return [bool]: True if file exists
        """

        client = self.get_blob_service_client(storage_account=storage_account)
        blob_client = client.get_blob_client(container, file_path)
        return blob_client.exists()

    @multi_arguments_decorator(local_support=True)
    def upload_folder(
        self,
        source_path: str = None,
        source_storage_account: str = None,
        source_container: str = None,
        source_file_path: str = None,
        dest_path: str = None,
        dest_storage_account: str = None,
        dest_container: str = None,
        dest_file_path: str = None,
    ):
        """
        Upload a directory to an azure location. Subdirectories are not currently supported

        :param source_path: str: optional Local path to folder to upload. Defaults to None.
        :param source_storage_account: str: optional Ignored. Defaults to None.
        :param source_container: str: optional Ignored. Defaults to None.
        :param source_file_path: str: optional Ignored. Defaults to None.
        :param dest_path: str: optional Azure path to upload to. Defaults to None.
        :param dest_storage_account: str: optional Storage account. Defaults to None.
        :param dest_container: str: optional Container name. Defaults to None.
        :param dest_file_path: str: optional Path to folder. Defaults to None.

        :exception ValueError: Raised if source is an Azure path
        """
        if self.is_azure_path(source_path):
            raise ValueError(
                f"Expected destination to be local path got azure path: {source_path}"
            )

        container_client = self.get_container_client(
            storage_account=dest_storage_account, container=dest_container
        )

        for root, dirs, files in os.walk(source_path):
            logging.warning(
                "upload folder does not support sub-directories only files will be uploaded"
            )
            for file in files:
                file_path = os.path.join(root, file)
                blob_path = dest_file_path + file

                logging.info(f"Uploading {file_path} to {blob_path}")
                with open(file_path, "rb") as data:
                    container_client.upload_blob(name=blob_path, data=data)

    @arguments_decorator(local_support=True)
    def open(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
        mode="r",
        *args,
        **kwargs,
    ):
        """
        wrapper around smart_open so we dont have to pass a blob client everywhere.

        :param path: str: optional Local or azure path. Defaults to None.
        :param storage_account: str: optional name of storage account. Defaults to None.
        :param container: str: optional container name. Defaults to None.
        :param file_path: str: optional path to file. Defaults to None.
        :param mode: str: optional open mode. Defaults to "r".

        :return [smart_open.open]: Opens both local and azure files
        """
        if path and not self.is_azure_path(path) and "w" in mode:
            # if it is local write mode, check the path and create folder if needed
            subdir = os.path.dirname(path)
            if subdir:
                os.makedirs(subdir, exist_ok=True)
        if storage_account:
            transport_params = {
                "client": self.get_blob_service_client(storage_account=storage_account)
            }
        else:
            transport_params = {"client": None}
        if "transport_params" not in kwargs:
            kwargs["transport_params"] = transport_params
        path = path if path else f"azure://{container}/{file_path}"
        return smart_open.open(path, mode, *args, **kwargs)
Esempio n. 11
0
# Set TimeZone (Change from pacific if you live in another timezone)
TIME_ZONE = timezone("US/Pacific")

# This is a boolean marking whether to use cloud storage or not.
# Change to true and follow tutorial instructions if you would like to use it.
CLOUD_STORAGE = False
print("CLOUD STORAGE STATUS:", CLOUD_STORAGE)

CONTAINER_NAME = "storagetest"
ts = datetime.now(TIME_ZONE)
timestring = ts.strftime("%Y-%m-%d %H:%M:%S")
current_date = timestring.split()[0]
DAILY_STRING = "timestamp,location," + labels_string + "\n"
if CLOUD_STORAGE:
    block_blob_service = BlobServiceClient.from_connection_string(
        BLOB_STORAGE_CONNECTION_STRING)
    ts = datetime.now(TIME_ZONE)
    DAILY_CSV_NAME = "objectcount" + current_date + ".csv"
    try:
        container_client = block_blob_service.create_container(CONTAINER_NAME)
    except ResourceExistsError:
        pass
    blob_client = block_blob_service.get_blob_client(container=CONTAINER_NAME,
                                                     blob=DAILY_CSV_NAME)
    blob_client.upload_blob(DAILY_STRING, overwrite=True)


class HubManager(object):
    def __init__(self):

        self.client = IoTHubDeviceClient.create_from_connection_string(
class StorageBlockBlobTest(StorageTestCase):

    def _setup(self, storage_account, key):
        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        self.bsc = BlobServiceClient(
            self.account_url(storage_account, "blob"),
            credential=key,
            connection_data_block_size=4 * 1024,
            max_single_put_size=32 * 1024,
            max_block_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        if self.is_live:
            self.bsc.create_container(self.container_name)

    def _teardown(self, FILE_PATH):
        if os.path.isfile(FILE_PATH):
            try:
                os.remove(FILE_PATH)
            except:
                pass

    #--Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    def _create_blob(self):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(b'')
        return blob

    def assertBlobEqual(self, container_name, blob_name, expected_data):
        blob = self.bsc.get_blob_client(container_name, blob_name)
        actual_data = blob.download_blob()
        self.assertEqual(actual_data.readall(), expected_data)

    class NonSeekableFile(object):
        def __init__(self, wrapped_file):
            self.wrapped_file = wrapped_file

        def write(self, data):
            self.wrapped_file.write(data)

        def read(self, count):
            return self.wrapped_file.read(count)

    #--Test cases for block blobs --------------------------------------------

    @GlobalStorageAccountPreparer()
    def test_put_block(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob = self._create_blob()

        # Act
        for i in range(5):
            headers = blob.stage_block(i, 'block {0}'.format(i).encode('utf-8'))
            self.assertIn('content_crc64', headers)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_with_response(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        def return_response(resp, _, headers):
            return (resp, headers)

        # Act
        resp, headers = blob.stage_block(0, 'block 0', cls=return_response)

        # Assert
        self.assertEqual(201, resp.status_code)
        self.assertIn('x-ms-content-crc64', headers)

    @GlobalStorageAccountPreparer()
    def test_put_block_unicode(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob = self._create_blob()

        # Act
        headers = blob.stage_block('1', u'啊齄丂狛狜')
        self.assertIn('content_crc64', headers)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_with_md5(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob = self._create_blob()

        # Act
        blob.stage_block(1, b'block', validate_content=True)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_list(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3')]
        put_block_list_resp = blob.commit_block_list(block_list)

        # Assert
        content = blob.download_blob()
        self.assertEqual(content.readall(), b'AAABBBCCC')
        self.assertEqual(content.properties.etag, put_block_list_resp.get('etag'))
        self.assertEqual(content.properties.last_modified, put_block_list_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_put_block_list_invalid_block_id(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        try:
            block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='4')]
            blob.commit_block_list(block_list)
            self.fail()
        except HttpResponseError as e:
            self.assertGreaterEqual(str(e).find('specified block list is invalid'), 0)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_list_with_md5(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3')]
        blob.commit_block_list(block_list, validate_content=True)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_list_with_blob_tier_specified(self, resource_group, location, storage_account, storage_account_key):

        # Arrange
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob_client = self.bsc.get_blob_client(self.container_name, blob_name)
        blob_client.stage_block('1', b'AAA')
        blob_client.stage_block('2', b'BBB')
        blob_client.stage_block('3', b'CCC')
        blob_tier = StandardBlobTier.Cool

        # Act
        block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3')]
        blob_client.commit_block_list(block_list,
                                      standard_blob_tier=blob_tier)

        # Assert
        blob_properties = blob_client.get_blob_properties()
        self.assertEqual(blob_properties.blob_tier, blob_tier)

    @GlobalStorageAccountPreparer()
    def test_get_block_list_no_blocks(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob = self._create_blob()

        # Act
        block_list = blob.get_block_list('all')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 0)

    @GlobalStorageAccountPreparer()
    def test_get_block_list_uncommitted_blocks(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = blob.get_block_list('uncommitted')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 3)
        self.assertEqual(len(block_list[0]), 0)
        self.assertEqual(block_list[1][0].id, '1')
        self.assertEqual(block_list[1][0].size, 3)
        self.assertEqual(block_list[1][1].id, '2')
        self.assertEqual(block_list[1][1].size, 3)
        self.assertEqual(block_list[1][2].id, '3')
        self.assertEqual(block_list[1][2].size, 3)

    @GlobalStorageAccountPreparer()
    def test_get_block_list_committed_blocks(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3')]
        blob.commit_block_list(block_list)

        # Act
        block_list = blob.get_block_list('committed')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 3)
        self.assertEqual(block_list[0][0].id, '1')
        self.assertEqual(block_list[0][0].size, 3)
        self.assertEqual(block_list[0][1].id, '2')
        self.assertEqual(block_list[0][1].size, 3)
        self.assertEqual(block_list[0][2].id, '3')
        self.assertEqual(block_list[0][2].size, 3)

    @GlobalStorageAccountPreparer()
    def test_create_small_block_blob_with_no_overwrite(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = b'hello world'
        data2 = b'hello second world'

        # Act
        create_resp = blob.upload_blob(data1, overwrite=True)

        with self.assertRaises(ResourceExistsError):
            blob.upload_blob(data2, overwrite=False)

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data1)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)

    @GlobalStorageAccountPreparer()
    def test_create_small_block_blob_with_overwrite(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = b'hello world'
        data2 = b'hello second world'

        # Act
        create_resp = blob.upload_blob(data1, overwrite=True)
        update_resp = blob.upload_blob(data2, overwrite=True)

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)

    @GlobalStorageAccountPreparer()
    def test_create_large_block_blob_with_no_overwrite(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        create_resp = blob.upload_blob(data1, overwrite=True, metadata={'blobdata': 'data1'})

        with self.assertRaises(ResourceExistsError):
            blob.upload_blob(data2, overwrite=False, metadata={'blobdata': 'data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data1)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)
        self.assertEqual(props.metadata, {'blobdata': 'data1'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE)

    @GlobalStorageAccountPreparer()
    def test_create_large_block_blob_with_overwrite(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(data1, overwrite=True, metadata={'blobdata': 'data1'})
        update_resp = blob.upload_blob(data2, overwrite=True, metadata={'blobdata': 'data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)
        self.assertEqual(props.metadata, {'blobdata': 'data2'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE + 512)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_single_put(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b'hello world'

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_0_bytes(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b''

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_create_from_bytes_blob_unicode(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = u'hello world'

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_create_from_bytes_blob_unicode(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        data = u'hello world'
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data.encode('utf-8'))
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_from_bytes_blob_with_lease_id(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        lease = blob.acquire_lease()

        # Act
        create_resp = blob.upload_blob(data, lease=lease)

        # Assert
        output = blob.download_blob(lease=lease)
        self.assertEqual(output.readall(), data)
        self.assertEqual(output.properties.etag, create_resp.get('etag'))
        self.assertEqual(output.properties.last_modified, create_resp.get('last_modified'))

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_metadata(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        metadata = {'hello': 'world', 'number': '42'}

        # Act
        blob.upload_blob(data, metadata=metadata)

        # Assert
        md = blob.get_blob_properties().metadata
        self.assertDictEqual(md, metadata)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_properties(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        content_settings=ContentSettings(
                content_type='image/png',
                content_language='spanish')
        blob.upload_blob(data, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type, content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language, content_settings.content_language)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_progress(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        progress = []
        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        create_resp = blob.upload_blob(data, raw_response_hook=callback)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size, progress)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_index(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data[3:])

        # Assert
        self.assertEqual(data[3:], blob.download_blob().readall())

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_index_and_count(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data[3:], length=5)

        # Assert
        self.assertEqual(data[3:8], blob.download_blob().readall())

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_index_and_count_and_properties(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        content_settings=ContentSettings(
                content_type='image/png',
                content_language='spanish')
        blob.upload_blob(data[3:], length=5, content_settings=content_settings)

        # Assert
        self.assertEqual(data[3:8], blob.download_blob().readall())
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type, content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language, content_settings.content_language)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_non_parallel(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data, length=LARGE_BLOB_SIZE, max_concurrency=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_blob_tier_specified(self, resource_group, location, storage_account, storage_account_key):
        # Arrange
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob_client = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b'hello world'
        blob_tier = StandardBlobTier.Cool

        # Act
        blob_client.upload_blob(data, standard_blob_tier=blob_tier)
        blob_properties = blob_client.get_blob_properties()

        # Assert
        self.assertEqual(blob_properties.blob_tier, blob_tier)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_path(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'create_blob_from_input.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self._teardown(FILE_PATH)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_path_non_parallel(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(100)
        FILE_PATH = 'create_blob_from_path_non_par.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream, length=100, max_concurrency=1)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self._teardown(FILE_PATH)

    @GlobalStorageAccountPreparer()
    def test_upload_blob_from_path_non_parallel_with_standard_blob_tier(self, resource_group, location, storage_account, storage_account_key):
        # Arrange
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(100)
        FILE_PATH = '_path_non_parallel_with_standard_blob.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)
        blob_tier = StandardBlobTier.Cool
        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, length=100, max_concurrency=1, standard_blob_tier=blob_tier)
        props = blob.get_blob_properties()

        # Assert
        self.assertEqual(props.blob_tier, blob_tier)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_path_with_progress(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'create_blob_from_path_with_progr.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []
        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size, progress)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_path_with_properties(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'blob_from_path_with_properties.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings=ContentSettings(
            content_type='image/png',
            content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type, content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language, content_settings.content_language)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_chunked_upload(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'blob_from_stream_chunked_up.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_frm_stream_nonseek_chunk_upld_knwn_size(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        blob_size = len(data) - 66
        FILE_PATH = 'stream_nonseek_chunk_upld_knwn_size.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file, length=blob_size, max_concurrency=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_from_stream_nonseek_chunk_upld_unkwn_size(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'stream_nonseek_chunk_upld.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file, max_concurrency=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_with_progress_chunked_upload(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'stream_with_progress_chunked.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []
        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size, progress)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_chunked_upload_with_count(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'chunked_upload_with_count.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            resp = blob.upload_blob(stream, length=blob_size)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_from_stream_chunk_upload_with_cntandrops(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'from_stream_chunk_upload_with_cntandrops.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings=ContentSettings(
            content_type='image/png',
            content_language='spanish')
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, length=blob_size, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type, content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language, content_settings.content_language)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_chnked_upload_with_properties(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'chnked_upload_with_properti.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings=ContentSettings(
            content_type='image/png',
            content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type, content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language, content_settings.content_language)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_chunked_upload_with_properties(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        # Arrange
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'blob_from_stream_chunked_upload.temp.{}.dat'.format(str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)
        blob_tier = StandardBlobTier.Cool

        # Act
        content_settings = ContentSettings(
            content_type='image/png',
            content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, content_settings=content_settings, max_concurrency=2, standard_blob_tier=blob_tier)

        properties = blob.get_blob_properties()

        # Assert
        self.assertEqual(properties.blob_tier, blob_tier)
        self._teardown(FILE_PATH)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_text(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-8')

        # Act
        create_resp = blob.upload_blob(text)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_text_with_encoding(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        blob.upload_blob(text, encoding='utf-16')

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_text_with_encoding_and_progress(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        progress = []
        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        blob.upload_blob(text, encoding='utf-16', raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size, progress)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_text_chunked_upload(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_text_data(LARGE_BLOB_SIZE)
        encoded_data = data.encode('utf-8')

        # Act
        blob.upload_blob(data)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, encoded_data)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, encoded_data)

    @GlobalStorageAccountPreparer()
    def test_create_blob_with_md5(self, resource_group, location, storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b'hello world'

        # Act
        blob.upload_blob(data, validate_content=True)

        # Assert

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_with_md5_chunked(self, resource_group, location, storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data, validate_content=True)
                                             in_asset_name, in_asset)

# An AMS asset is a container with a specific id that has "asset-" prepended to the GUID.
# So, you need to create the asset id to identify it as the container
# where Storage is to upload the video (as a block blob)
in_container = 'asset-' + input_asset.asset_id

# create an output Asset
print(f"Creating output asset {out_asset_name}")
output_asset = client.assets.create_or_update(resource_group, account_name,
                                              out_asset_name, out_asset)

### Use the Storage SDK to upload the video ###
print(f"Uploading the file {source_file}")

blob_service_client = BlobServiceClient.from_connection_string(
    os.getenv('STORAGEACCOUNTCONNECTION'))
blob_client = blob_service_client.get_blob_client(in_container, source_file)
working_dir = os.getcwd()
print(f"Current working directory: {working_dir}")
upload_file_path = os.path.join(working_dir, source_file)

# WARNING: Depending on where you are launching the sample from, the path here could be off, and not include the BasicEncoding folder.
# Adjust the path as needed depending on how you are launching this python sample file.

# Upload the video to storage as a block blob
with open(upload_file_path, "rb") as data:
    blob_client.upload_blob(data)

audio_transform_name = 'AudioAnalyzerTransform'
video_transform_name = 'VideoAnalyzerTransform'
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.blob_service_client = BlobServiceClient.from_connection_string(self.connection_string)
Esempio n. 15
0
 def __init__(self, container='ackbarstorage'):
     self.container = container
     self.conn_str = os.environ['AZURE_STORAGE_CONNECTION_STRING']
     self.blob_service_client = BlobServiceClient.from_connection_string(
         self.conn_str)
Esempio n. 16
0
class AzureBlobStorage(IStorage):
    """ Storage backend that uses Azure Blob Storage """

    test = False

    def __init__(
        self,
        request,
        expire_after=None,
        path_prefix=None,
        redirect_urls=None,
        storage_account_name=None,
        storage_account_key=None,
        storage_container_name=None,
    ):
        super(AzureBlobStorage, self).__init__(request)

        self.expire_after = expire_after
        self.path_prefix = path_prefix
        self.redirect_urls = redirect_urls
        self.storage_account_name = storage_account_name
        self.storage_account_key = storage_account_key
        self.storage_container_name = storage_container_name
        self.azure_storage_account_url = "https://{}.blob.core.windows.net".format(
            storage_account_name)
        self.blob_service_client = BlobServiceClient(
            account_url=self.azure_storage_account_url,
            credential=self.storage_account_key,
        )
        self.container_client = self.blob_service_client.get_container_client(
            self.storage_container_name)

    @classmethod
    def configure(cls, settings):
        kwargs = super(AzureBlobStorage, cls).configure(settings)
        kwargs["expire_after"] = int(
            settings.get("storage.expire_after", 60 * 60 * 24))
        kwargs["path_prefix"] = settings.get("storage.prefix", "")
        kwargs["redirect_urls"] = asbool(
            settings.get("storage.redirect_urls", True))
        kwargs["storage_account_name"] = settings.get(
            "storage.storage_account_name")
        if kwargs["storage_account_name"] is None:
            raise ValueError(
                "You must specify the 'storage.storage_account_name'")

        kwargs["storage_account_key"] = settings.get(
            "storage.storage_account_key")
        if kwargs["storage_account_key"] is None:
            raise ValueError(
                "You must specify the 'storage.storage_account_key'")

        kwargs["storage_container_name"] = settings.get(
            "storage.storage_container_name")
        if kwargs["storage_container_name"] is None:
            raise ValueError(
                "You must specify the 'storage.storage_container_name'")

        return kwargs

    def _generate_url(self, package: Package) -> str:
        path = self.get_path(package)

        url_params = generate_blob_sas(
            account_name=self.storage_account_name,
            container_name=self.storage_container_name,
            blob_name=path,
            account_key=self.storage_account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.now() + timedelta(seconds=self.expire_after),
            protocol="https",
        )

        url = "{}/{}/{}?{}".format(
            self.azure_storage_account_url,
            self.storage_container_name,
            path,
            url_params,
        )
        return url

    def download_response(self, package):
        return HTTPFound(location=self._generate_url(package))

    def list(self, factory=Package):
        # List does not return metadata :(
        for blob_properties in self.container_client.list_blobs(
                name_starts_with=self.path_prefix):
            blob_client = self.container_client.get_blob_client(
                blob=blob_properties.name)
            metadata = blob_client.get_blob_properties()

            yield factory(metadata.metadata["name"],
                          metadata.metadata["version"],
                          posixpath.basename(blob_properties.name),
                          blob_properties.last_modified,
                          path=blob_properties.name,
                          **Package.read_metadata(metadata.metadata))

    def get_path(self, package):
        """ Get the fully-qualified bucket path for a package """
        if "path" not in package.data:
            package.data["path"] = (self.path_prefix + package.name + "/" +
                                    package.filename)
        return package.data["path"]

    def upload(self, package, datastream):
        path = self.get_path(package)

        metadata = package.get_metadata()
        metadata["name"] = package.name
        metadata["version"] = package.version
        normalize_metadata(metadata)

        blob_client = self.container_client.get_blob_client(blob=path)
        blob_client.upload_blob(data=datastream, metadata=metadata)

    def delete(self, package):
        path = self.get_path(package)
        blob_client = self.container_client.get_blob_client(blob=path)
        blob_client.delete_blob()

    def check_health(self):
        try:
            self.container_client.get_blob_client(
                blob="__notexist").get_blob_properties()
        except ResourceNotFoundError:
            pass
        except Exception as e:
            return False, str(e)
        return True, ""

    @contextmanager
    def open(self, package):
        url = self._generate_url(package)
        handle = urlopen(url)
        try:
            yield BytesIO(handle.read())
        finally:
            handle.close()
class AzureBlobArtifactRepository(ArtifactRepository):
    """
    Stores artifacts on Azure Blob Storage.

    This repository is used with URIs of the form
    ``wasbs://<container-name>@<ystorage-account-name>.blob.core.windows.net/<path>``,
    following the same URI scheme as Hadoop on Azure blob storage. It requires that your Azure
    storage access key be available in the environment variable ``AZURE_STORAGE_ACCESS_KEY``.
    """
    def __init__(self, artifact_uri, client=None):
        super(AzureBlobArtifactRepository, self).__init__(artifact_uri)

        # Allow override for testing
        if client:
            self.client = client
            return

        from azure.storage.blob import BlobServiceClient
        (_, account,
         _) = AzureBlobArtifactRepository.parse_wasbs_uri(artifact_uri)
        if "AZURE_STORAGE_CONNECTION_STRING" in os.environ:
            self.client = BlobServiceClient.from_connection_string(
                conn_str=os.environ.get("AZURE_STORAGE_CONNECTION_STRING"))
        elif "AZURE_STORAGE_ACCESS_KEY" in os.environ:
            account_url = "https://{account}.blob.core.windows.net".format(
                account=account)
            self.client = BlobServiceClient(
                account_url=account_url,
                credential=os.environ.get("AZURE_STORAGE_ACCESS_KEY"))
        else:
            raise Exception(
                "You need to set one of AZURE_STORAGE_CONNECTION_STRING or "
                "AZURE_STORAGE_ACCESS_KEY to access Azure storage.")

    @staticmethod
    def parse_wasbs_uri(uri):
        """Parse a wasbs:// URI, returning (container, storage_account, path)."""
        parsed = urllib.parse.urlparse(uri)
        if parsed.scheme != "wasbs":
            raise Exception("Not a WASBS URI: %s" % uri)
        match = re.match("([^@]+)@([^.]+)\\.blob\\.core\\.windows\\.net",
                         parsed.netloc)
        if match is None:
            raise Exception("WASBS URI must be of the form "
                            "<container>@<account>.blob.core.windows.net")
        container = match.group(1)
        storage_account = match.group(2)
        path = parsed.path
        if path.startswith('/'):
            path = path[1:]
        return container, storage_account, path

    def log_artifact(self, local_file, artifact_path=None):
        (container, _, dest_path) = self.parse_wasbs_uri(self.artifact_uri)
        container_client = self.client.get_container_client(container)
        if artifact_path:
            dest_path = posixpath.join(dest_path, artifact_path)
        dest_path = posixpath.join(dest_path, os.path.basename(local_file))
        with open(local_file, "rb") as file:
            container_client.upload_blob(dest_path, file)

    def log_artifacts(self, local_dir, artifact_path=None):
        (container, _, dest_path) = self.parse_wasbs_uri(self.artifact_uri)
        container_client = self.client.get_container_client(container)
        if artifact_path:
            dest_path = posixpath.join(dest_path, artifact_path)
        local_dir = os.path.abspath(local_dir)
        for (root, _, filenames) in os.walk(local_dir):
            upload_path = dest_path
            if root != local_dir:
                rel_path = os.path.relpath(root, local_dir)
                upload_path = posixpath.join(dest_path, rel_path)
            for f in filenames:
                remote_file_path = posixpath.join(upload_path, f)
                local_file_path = os.path.join(root, f)
                with open(local_file_path, "rb") as file:
                    container_client.upload_blob(remote_file_path, file)

    def list_artifacts(self, path=None):
        from azure.storage.blob._models import BlobPrefix
        (container, _, artifact_path) = self.parse_wasbs_uri(self.artifact_uri)
        container_client = self.client.get_container_client(container)
        dest_path = artifact_path
        if path:
            dest_path = posixpath.join(dest_path, path)
        infos = []
        prefix = dest_path + "/"
        results = container_client.walk_blobs(name_starts_with=prefix)
        for r in results:
            if not r.name.startswith(artifact_path):
                raise MlflowException(
                    "The name of the listed Azure blob does not begin with the specified"
                    " artifact path. Artifact path: {artifact_path}. Blob name:"
                    " {blob_name}".format(artifact_path=artifact_path,
                                          blob_name=r.name))
            if isinstance(r, BlobPrefix
                          ):  # This is a prefix for items in a subdirectory
                subdir = posixpath.relpath(path=r.name, start=artifact_path)
                if subdir.endswith("/"):
                    subdir = subdir[:-1]
                infos.append(FileInfo(subdir, True, None))
            else:  # Just a plain old blob
                file_name = posixpath.relpath(path=r.name, start=artifact_path)
                infos.append(FileInfo(file_name, False, r.size))
        return sorted(infos, key=lambda f: f.path)

    def _download_file(self, remote_file_path, local_path):
        (container, _,
         remote_root_path) = self.parse_wasbs_uri(self.artifact_uri)
        container_client = self.client.get_container_client(container)
        remote_full_path = posixpath.join(remote_root_path, remote_file_path)
        with open(local_path, "wb") as file:
            container_client.download_blob(remote_full_path).readinto(file)

    def delete_artifacts(self, artifact_path=None):
        raise MlflowException('Not implemented yet')
Esempio n. 18
0
def connect_service(url: str, creds: str) -> BlobServiceClient:
    '''Connect to the main service, maybe write new ways to connect later'''
    return BlobServiceClient(account_url=url, credential=creds)
Esempio n. 19
0
class Connector():
    """
    Azure blobs access
    """

    def __init__(self, **kwargs):
        self.account_url = kwargs["account_url"]
        self.account_key = kwargs["account_key"]
        self.client = BlobServiceClient(
            account_url=self.account_url,
            credential=self.account_key
            )


    def _get_container_client(self, folder):
        return self.client.get_container_client(container=folder)


    def _get_blob_client(self, folder, filename):
        return self.client.get_blob_client(container=folder, blob=filename)


    def list_files(self, folder, resource_id=None, prefix='', sufix='', full_path=False):
        container_client = self._get_container_client(folder)
        file_list = []
        for blob in container_client.list_blobs(name_starts_with=resource_id if resource_id is not None else ''):
            if full_path:
                blob_name = blob['name']
            else:
                blob_name = blob['name'].split('/').pop()

            if blob_name.startswith(prefix) and blob_name.endswith(sufix):
                file_list.append(blob_name)

        return file_list


    def list_files_info(self, folder, resource_id=None, prefix='', sufix='', full_path=False):
        """
        Returns a list with info of all files from folder
        [{"filename": string,
          "creation_date": timestamp,
          "modified_date": timestamp,
          "file_size": int}
        ]
        """
        container_client = self._get_container_client(folder)
        file_list = []
        for blob in container_client.list_blobs(name_starts_with=resource_id + '/' if resource_id is not None else ''):
            if full_path:
                blob_name = blob['name']
            elif resource_id is not None:
                blob_name = blob['name'].replace(resource_id + '/', '', 1)
            else:
                blob_name = blob['name']

            if blob_name.startswith(prefix) and blob_name.endswith(sufix):
                file_info = {"filename": blob_name,
                             "creation_date": blob["creation_time"],
                             "modified_date": blob["last_modified"],
                             "file_size": blob["size"]
                            }

                file_list.append(file_info)

        return file_list


    def is_file(self, folder, filename, resource_id=None):
        """
        Returns true if file exists
        """
        if resource_id is not None:
            blob_name = f"{resource_id}/{filename}"
        else:
            blob_name = filename

        try:
            blob_client = self._get_blob_client(folder, blob_name)
            blob_client.get_blob_properties()
            return True
        except ResourceNotFoundError:
            return False


    def get_file_info(self, folder, filename, resource_id=None):
        """
        Returns file info in a dict
        {"filename": string,
         "creation_date": timestamp,
         "modified_date": timestamp,
         "file_size": int
        }
        """
        if resource_id is not None:
            blob_name = f"{resource_id}/{filename}"
        else:
            blob_name = filename

        blob_client = self._get_blob_client(folder, blob_name)
        blob = blob_client.get_blob_properties()
        blob_name = blob['name'].split('/').pop()

        blob_info = {"filename": blob_name,
                     "creation_date": blob["creation_time"],
                     "modified_date": blob["last_modified"],
                     "file_size": blob["size"]
                    }

        return blob_info


    def download_file(self, folder, filename, resource_id=None):
        if resource_id is not None:
            blob_name = f"{resource_id}/{filename}"
        else:
            blob_name = filename

        blob_client = self._get_blob_client(folder, blob_name)
        try:
            download_stream = blob_client.download_blob()
        except Exception as err:
            raise Exception(f'Error downloading {filename}: {err}')
        else:
            data = download_stream.readall()
            return data


    def upload_file(self, folder, filename, data, resource_id=None):
        if not isinstance(data, bytes):
            raise Exception(f'Data should be byte type, {type(data)} detected.')

        if resource_id is not None:
            blob_name = f"{resource_id}/{filename}"
        else:
            blob_name = filename

        blob_client = self._get_blob_client(folder, blob_name)
        try:
            blob_client.upload_blob(data=data)
            blob_client.get_blob_properties()
        except Exception as err:
            raise Exception(f'Error uploading {filename}: {err}')
        else:
            # print(f'Successfully uploaded {filename}')
            pass


    def delete_file(self, folder, filename, resource_id=None):
        if resource_id is not None:
            blob_name = f"{resource_id}/{filename}"
        else:
            blob_name = filename

        blob_client = self._get_blob_client(folder, blob_name)
        try:
            blob_client.delete_blob()
        except Exception as err:
            raise Exception(f'Error deleting {filename}: {err}')
        else:
            # print(f'Successfully deleted {blob_name}')
            pass
class StorageBlobEncryptionTest(StorageTestCase):
    # --Helpers-----------------------------------------------------------------
    def _setup(self, name, key):
        self.bsc = BlobServiceClient(self.account_url(name, "blob"),
                                     credential=key,
                                     max_single_put_size=32 * 1024,
                                     max_block_size=4 * 1024,
                                     max_page_size=4 * 1024,
                                     max_single_get_size=1024,
                                     max_chunk_get_size=1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')
        self.blob_types = (BlobType.BlockBlob, BlobType.PageBlob,
                           BlobType.AppendBlob)
        self.bytes = b'Foo'

        if self.is_live:
            container = self.bsc.get_container_client(self.container_name)
            container.create_container()

    def _teardown(self, file_name):
        if path.isfile(file_name):
            try:
                remove(file_name)
            except:
                pass

    def _get_container_reference(self):
        return self.get_resource_name(TEST_CONTAINER_PREFIX)

    def _get_blob_reference(self, blob_type):
        return self.get_resource_name(TEST_BLOB_PREFIXES[blob_type.value])

    def _create_small_blob(self, blob_type):
        blob_name = self._get_blob_reference(blob_type)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(self.bytes, blob_type=blob_type)
        return blob

    # --Test cases for blob encryption ----------------------------------------

    @GlobalStorageAccountPreparer()
    def test_missing_attribute_kek_wrap(self, resource_group, location,
                                        storage_account, storage_account_key):
        # In the shared method _generate_blob_encryption_key
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        valid_key = KeyWrapper('key1')

        # Act
        invalid_key_1 = lambda: None  # functions are objects, so this effectively creates an empty object
        invalid_key_1.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm
        invalid_key_1.get_kid = valid_key.get_kid
        # No attribute wrap_key
        self.bsc.key_encryption_key = invalid_key_1
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

        invalid_key_2 = lambda: None  # functions are objects, so this effectively creates an empty object
        invalid_key_2.wrap_key = valid_key.wrap_key
        invalid_key_2.get_kid = valid_key.get_kid
        # No attribute get_key_wrap_algorithm
        self.bsc.key_encryption_key = invalid_key_2
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

        invalid_key_3 = lambda: None  # functions are objects, so this effectively creates an empty object
        invalid_key_3.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm
        invalid_key_3.wrap_key = valid_key.wrap_key
        # No attribute get_kid
        self.bsc.key_encryption_key = invalid_key_2
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

    @GlobalStorageAccountPreparer()
    def test_invalid_value_kek_wrap(self, resource_group, location,
                                    storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')

        self.bsc.key_encryption_key.get_key_wrap_algorithm = None
        try:
            self._create_small_blob(BlobType.BlockBlob)
            self.fail()
        except AttributeError as e:
            self.assertEqual(
                str(e),
                _ERROR_OBJECT_INVALID.format('key encryption key',
                                             'get_key_wrap_algorithm'))

        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.key_encryption_key.get_kid = None
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.key_encryption_key.wrap_key = None
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

    @GlobalStorageAccountPreparer()
    def test_missing_attribute_kek_unwrap(self, resource_group, location,
                                          storage_account,
                                          storage_account_key):
        # Shared between all services in decrypt_blob
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        valid_key = KeyWrapper('key1')
        self.bsc.key_encryption_key = valid_key
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        # Note that KeyWrapper has a default value for key_id, so these Exceptions
        # are not due to non_matching kids.
        invalid_key_1 = lambda: None  # functions are objects, so this effectively creates an empty object
        invalid_key_1.get_kid = valid_key.get_kid
        # No attribute unwrap_key
        blob.key_encryption_key = invalid_key_1
        with self.assertRaises(HttpResponseError):
            blob.download_blob().content_as_bytes()

        invalid_key_2 = lambda: None  # functions are objects, so this effectively creates an empty object
        invalid_key_2.unwrap_key = valid_key.unwrap_key
        blob.key_encryption_key = invalid_key_2
        # No attribute get_kid
        with self.assertRaises(HttpResponseError):
            blob.download_blob().content_as_bytes()

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_invalid_value_kek_unwrap(self, resource_group, location,
                                      storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        blob.key_encryption_key = KeyWrapper('key1')
        blob.key_encryption_key.unwrap_key = None

        with self.assertRaises(HttpResponseError) as e:
            blob.download_blob().content_as_bytes()
        self.assertEqual(str(e.exception), 'Decryption failed.')

    @GlobalStorageAccountPreparer()
    def test_get_blob_kek(self, resource_group, location, storage_account,
                          storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        content = blob.download_blob()

        # Assert
        self.assertEqual(b"".join(list(content.chunks())), self.bytes)

    @GlobalStorageAccountPreparer()
    def test_get_blob_resolver(self, resource_group, location, storage_account,
                               storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        key_resolver = KeyResolver()
        key_resolver.put_key(self.bsc.key_encryption_key)
        self.bsc.key_resolver_function = key_resolver.resolve_key
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        self.bsc.key_encryption_key = None
        content = blob.download_blob().content_as_bytes()

        # Assert
        self.assertEqual(content, self.bytes)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_get_blob_kek_RSA(self, resource_group, location, storage_account,
                              storage_account_key):
        # We can only generate random RSA keys, so this must be run live or
        # the playback test will fail due to a change in kek values.
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = RSAKeyWrapper('key2')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        content = blob.download_blob()

        # Assert
        self.assertEqual(b"".join(list(content.chunks())), self.bytes)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_get_blob_nonmatching_kid(self, resource_group, location,
                                      storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        self.bsc.key_encryption_key.kid = 'Invalid'

        # Assert
        with self.assertRaises(HttpResponseError) as e:
            blob.download_blob().content_as_bytes()
        self.assertEqual(str(e.exception), 'Decryption failed.')

    @GlobalStorageAccountPreparer()
    def test_put_blob_invalid_stream_type(self, resource_group, location,
                                          storage_account,
                                          storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        small_stream = StringIO(u'small')
        large_stream = StringIO(u'large' * self.config.max_single_put_size)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Assert
        # Block blob specific single shot
        with self.assertRaises(TypeError) as e:
            blob.upload_blob(small_stream, length=5)
        self.assertTrue(
            'Blob data should be of type bytes.' in str(e.exception))

        # Generic blob chunked
        with self.assertRaises(TypeError) as e:
            blob.upload_blob(large_stream)
        self.assertTrue(
            'Blob data should be of type bytes.' in str(e.exception))

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_blob_chunking_required_mult_of_block_size(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(self.config.max_single_put_size +
                                        self.config.max_block_size)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=3)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=3)

        # Assert
        self.assertEqual(content, blob_content)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_blob_chunking_required_non_mult_of_block_size(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = urandom(self.config.max_single_put_size + 1)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=3)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=3)

        # Assert
        self.assertEqual(content, blob_content)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_blob_chunking_required_range_specified(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(self.config.max_single_put_size * 2)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content,
                         length=self.config.max_single_put_size + 53,
                         max_concurrency=3)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=3)

        # Assert
        self.assertEqual(content[:self.config.max_single_put_size + 53],
                         blob_content)

    @GlobalStorageAccountPreparer()
    def test_put_block_blob_single_shot(self, resource_group, location,
                                        storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = b'small'
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob().content_as_bytes()

        # Assert
        self.assertEqual(content, blob_content)

    @GlobalStorageAccountPreparer()
    def test_put_blob_range(self, resource_group, location, storage_account,
                            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        content = b'Random repeats' * self.config.max_single_put_size * 5

        # All page blob uploads call _upload_chunks, so this will test the ability
        # of that function to handle ranges even though it's a small blob
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content[2:],
                         length=self.config.max_single_put_size + 5,
                         max_concurrency=1)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=1)

        # Assert
        self.assertEqual(content[2:2 + self.config.max_single_put_size + 5],
                         blob_content)

    @GlobalStorageAccountPreparer()
    def test_put_blob_empty(self, resource_group, location, storage_account,
                            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = b''
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=2)

        # Assert
        self.assertEqual(content, blob_content)

    @GlobalStorageAccountPreparer()
    def test_put_blob_serial_upload_chunking(self, resource_group, location,
                                             storage_account,
                                             storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(self.config.max_single_put_size + 1)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=1)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=1)

        # Assert
        self.assertEqual(content, blob_content)

    @GlobalStorageAccountPreparer()
    def test_get_blob_range_beginning_to_middle(self, resource_group, location,
                                                storage_account,
                                                storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=1)
        blob_content = blob.download_blob(
            offset=0, length=50).content_as_bytes(max_concurrency=1)

        # Assert
        self.assertEqual(content[:50], blob_content)

    @GlobalStorageAccountPreparer()
    def test_get_blob_range_middle_to_end(self, resource_group, location,
                                          storage_account,
                                          storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=1)
        blob_content = blob.download_blob(offset=100,
                                          length=28).content_as_bytes()
        blob_content2 = blob.download_blob(offset=100).content_as_bytes()

        # Assert
        self.assertEqual(content[100:], blob_content)
        self.assertEqual(content[100:], blob_content2)

    @GlobalStorageAccountPreparer()
    def test_get_blob_range_middle_to_middle(self, resource_group, location,
                                             storage_account,
                                             storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob(offset=5,
                                          length=93).content_as_bytes()

        # Assert
        self.assertEqual(content[5:98], blob_content)

    @GlobalStorageAccountPreparer()
    def test_get_blob_range_aligns_on_16_byte_block(self, resource_group,
                                                    location, storage_account,
                                                    storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob(offset=48,
                                          length=16).content_as_bytes()

        # Assert
        self.assertEqual(content[48:64], blob_content)

    @GlobalStorageAccountPreparer()
    def test_get_blob_range_expanded_to_beginning_block_align(
            self, resource_group, location, storage_account,
            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob(offset=5,
                                          length=50).content_as_bytes()

        # Assert
        self.assertEqual(content[5:55], blob_content)

    @GlobalStorageAccountPreparer()
    def test_get_blob_range_expanded_to_beginning_iv(self, resource_group,
                                                     location, storage_account,
                                                     storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob(offset=22,
                                          length=20).content_as_bytes()

        # Assert
        self.assertEqual(content[22:42], blob_content)

    @GlobalStorageAccountPreparer()
    def test_put_blob_strict_mode(self, resource_group, location,
                                  storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        content = urandom(512)

        # Assert
        for service in self.blob_types:
            blob_name = self._get_blob_reference(service)
            blob = self.bsc.get_blob_client(self.container_name, blob_name)

            with self.assertRaises(ValueError):
                blob.upload_blob(content, blob_type=service)

            stream = BytesIO(content)
            with self.assertRaises(ValueError):
                blob.upload_blob(stream, length=512, blob_type=service)

            file_name = 'blob_strict_mode.temp.dat'
            with open(file_name, 'wb') as stream:
                stream.write(content)
            with open(file_name, 'rb') as stream:
                with self.assertRaises(ValueError):
                    blob.upload_blob(stream, blob_type=service)

            with self.assertRaises(ValueError):
                blob.upload_blob('To encrypt', blob_type=service)
        self._teardown(file_name)

    @GlobalStorageAccountPreparer()
    def test_get_blob_strict_mode_no_policy(self, resource_group, location,
                                            storage_account,
                                            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        blob.key_encryption_key = None

        # Assert
        with self.assertRaises(ValueError):
            blob.download_blob().content_as_bytes()

    @GlobalStorageAccountPreparer()
    def test_get_blob_strict_mode_unencrypted_blob(self, resource_group,
                                                   location, storage_account,
                                                   storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        blob.require_encryption = True
        blob.key_encryption_key = KeyWrapper('key1')

        # Assert
        with self.assertRaises(HttpResponseError):
            blob.download_blob().content_as_bytes()

    @GlobalStorageAccountPreparer()
    def test_invalid_methods_fail_block(self, resource_group, location,
                                        storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Assert
        with self.assertRaises(ValueError) as e:
            blob.stage_block('block1', urandom(32))
        self.assertEqual(str(e.exception),
                         _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

        with self.assertRaises(ValueError) as e:
            blob.commit_block_list(['block1'])
        self.assertEqual(str(e.exception),
                         _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

    @GlobalStorageAccountPreparer()
    def test_invalid_methods_fail_append(self, resource_group, location,
                                         storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob_name = self._get_blob_reference(BlobType.AppendBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Assert
        with self.assertRaises(ValueError) as e:
            blob.append_block(urandom(32))
        self.assertEqual(str(e.exception),
                         _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

        with self.assertRaises(ValueError) as e:
            blob.create_append_blob()
        self.assertEqual(str(e.exception),
                         _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

        # All append_from operations funnel into append_from_stream, so testing one is sufficient
        with self.assertRaises(ValueError) as e:
            blob.upload_blob(b'To encrypt', blob_type=BlobType.AppendBlob)
        self.assertEqual(str(e.exception),
                         _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

    @GlobalStorageAccountPreparer()
    def test_invalid_methods_fail_page(self, resource_group, location,
                                       storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob_name = self._get_blob_reference(BlobType.PageBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Assert
        with self.assertRaises(ValueError) as e:
            blob.upload_page(urandom(512), offset=0, length=512)
        self.assertEqual(str(e.exception),
                         _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

        with self.assertRaises(ValueError) as e:
            blob.create_page_blob(512)
        self.assertEqual(str(e.exception),
                         _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

    @GlobalStorageAccountPreparer()
    def test_validate_encryption(self, resource_group, location,
                                 storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        kek = KeyWrapper('key1')
        self.bsc.key_encryption_key = kek
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        blob.require_encryption = False
        blob.key_encryption_key = None
        content = blob.download_blob()
        data = content.content_as_bytes()

        encryption_data = _dict_to_encryption_data(
            loads(content.properties.metadata['encryptiondata']))
        iv = encryption_data.content_encryption_IV
        content_encryption_key = _validate_and_unwrap_cek(
            encryption_data, kek, None)
        cipher = _generate_AES_CBC_cipher(content_encryption_key, iv)
        decryptor = cipher.decryptor()
        unpadder = PKCS7(128).unpadder()

        content = decryptor.update(data) + decryptor.finalize()
        content = unpadder.update(content) + unpadder.finalize()

        self.assertEqual(self.bytes, content)

    @GlobalStorageAccountPreparer()
    def test_create_block_blob_from_star(self, resource_group, location,
                                         storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self._create_blob_from_star(BlobType.BlockBlob, self.bytes, self.bytes)

        stream = BytesIO(self.bytes)
        self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream)

        file_name = 'block_blob_from_star.temp.dat'
        with open(file_name, 'wb') as stream:
            stream.write(self.bytes)
        with open(file_name, 'rb') as stream:
            self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream)

        self._create_blob_from_star(BlobType.BlockBlob, b'To encrypt',
                                    'To encrypt')
        self._teardown(file_name)

    @GlobalStorageAccountPreparer()
    def test_create_page_blob_from_star(self, resource_group, location,
                                        storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        content = self.get_random_bytes(512)
        self._create_blob_from_star(BlobType.PageBlob, content, content)

        stream = BytesIO(content)
        self._create_blob_from_star(BlobType.PageBlob,
                                    content,
                                    stream,
                                    length=512)

        file_name = 'page_blob_from_star.temp.dat'
        with open(file_name, 'wb') as stream:
            stream.write(content)

        with open(file_name, 'rb') as stream:
            self._create_blob_from_star(BlobType.PageBlob, content, stream)
        self._teardown(file_name)

    def _create_blob_from_star(self, blob_type, content, data, **kwargs):
        blob_name = self._get_blob_reference(blob_type)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.key_encryption_key = KeyWrapper('key1')
        blob.require_encryption = True
        blob.upload_blob(data, blob_type=blob_type, **kwargs)

        blob_content = blob.download_blob().content_as_bytes()
        self.assertEqual(content, blob_content)
        blob.delete_blob()

    @GlobalStorageAccountPreparer()
    def test_get_blob_to_star(self, resource_group, location, storage_account,
                              storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        iter_blob = b"".join(list(blob.download_blob().chunks()))
        bytes_blob = blob.download_blob().content_as_bytes()
        stream_blob = BytesIO()
        blob.download_blob().download_to_stream(stream_blob)
        stream_blob.seek(0)
        text_blob = blob.download_blob(encoding='UTF-8').readall()

        # Assert
        self.assertEqual(self.bytes, iter_blob)
        self.assertEqual(self.bytes, bytes_blob)
        self.assertEqual(self.bytes, stream_blob.read())
        self.assertEqual(self.bytes.decode(), text_blob)
Esempio n. 21
0
import datetime
import json
import logging
import os

import fsspec
from azure.storage.blob import BlobServiceClient

from newstory_scraper.config import config
from newstory_scraper.pub_sub import utils
from newstory_scraper.scraper import get_profile

logger = logging.getLogger(__name__)

try:
    blob_service_client = BlobServiceClient.from_connection_string(
        os.environ["AZURE_STORAGE_CONNECTION_STRING"])
except KeyError:
    UserWarning("Unable to set blob_service_client")


def echo(event, registry):
    print(event.get("message"))


def scrape_profile(event, registry):
    user = event["user"]
    prefix = event.get("prefix", ".scraper")
    maximum = int(event.get("maximum", "0"))
    path = "/".join([
        prefix,
        datetime.datetime.utcnow().strftime("%Y-%m-%d"),
Esempio n. 22
0
from FlaskExercise import app, db
from flask import flash
from werkzeug.utils import secure_filename
from azure.storage.blob import BlobServiceClient
import uuid

blob_container = app.config['BLOB_CONTAINER']
storage_url = "https://{}.blob.core.windows.net/".format(
    app.config['BLOB_ACCOUNT'])
blob_service = BlobServiceClient(account_url=storage_url,
                                 credential=app.config['BLOB_STORAGE_KEY'])


class Animal(db.Model):
    __tablename__ = 'animals'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(75))
    scientific_name = db.Column(db.String(75))
    description = db.Column(db.String(800))
    image_path = db.Column(db.String(100))

    def __repr__(self):
        return '<Animal {}>'.format(self.body)

    def save_changes(self, file):
        if file:
            filename = secure_filename(file.filename)
            fileExtension = filename.rsplit('.', 1)[1]
            randomFilename = str(uuid.uuid1())
            filename = randomFilename + '.' + fileExtension
            try:
from django.shortcuts import render
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from msrest.authentication import ApiKeyCredentials
import cv2
from PIL import Image, ImageDraw, ImageFont
import urllib
from azure.storage.blob import BlobServiceClient,BlobClient

connection_string = "<Blob Storage conection string>"
service = BlobServiceClient.from_connection_string(conn_str=connection_string)
def home(request):
    return render(request,"home.html")

def resultado(request):
    font                   = cv2.FONT_HERSHEY_SIMPLEX
    fontScale              = 0.7
    fontColor              = (0,0,255)
    lineType               = 2
    name=request.GET["namefile"]
    credentials = ApiKeyCredentials(in_headers={"Prediction-key": "<Prediction Key Aquí>"})
    predictor = CustomVisionPredictionClient("<Zona regional aqui>", credentials)
    blob = BlobClient.from_connection_string(conn_str=connection_string, container_name="images", blob_name=f"{name} training.png")
    url = request.GET["link"] 
    urllib.request.urlretrieve(url, "python.png")
    imagen=cv2.imread("python.png")
    height, width, channels = imagen.shape
    Resultado = predictor.detect_image_url("<Prediction Key>", "<Iteration>", url) 
    for prediction in Resultado.predictions:
        if prediction.probability > 0.4:
            bbox = prediction.bounding_box
            tag = prediction.tag_name
Esempio n. 24
0
def upload_command(coreclr_args):
    """ Upload the JIT

    Args:
        coreclr_args (CoreclrArguments): parsed args
    """

    print("JIT upload")

    def upload_blob(file, blob_name):
        blob_client = blob_service_client.get_blob_client(
            container=az_container_name, blob=blob_name)

        # Check if the blob already exists, and delete it if it does, before uploading / replacing it.
        try:
            blob_client.get_blob_properties()
            # If no exception, then the blob already exists. Delete it!
            print("Warning: replacing existing blob!")
            blob_client.delete_blob()
        except Exception:
            # Blob doesn't exist already; that's good
            pass

        with open(file, "rb") as data:
            blob_client.upload_blob(data)

    # 1. Find all the JIT builds in the product directory
    # 2. Upload them
    #
    # We could also upload debug info, but it's not clear it's needed for most purposes, and it is very big:
    # it increases the upload size from about 190MB to over 900MB for each roll.
    #
    # For reference, the JIT debug info is found:
    #    a. For Windows, in the PDB subdirectory, e.g. PDB\clrjit.pdb
    #    b. For Linux .dbg files, and Mac .dwarf files, in the same directory as the jit, e.g., libcoreclr.so.dbg

    # Target directory: <root>/git-hash/OS/architecture/build-flavor/
    # Note that build-flavor will probably always be Checked.

    files = []

    # First, find the primary JIT that we expect to find.
    jit_name = determine_jit_name(coreclr_args)
    jit_path = os.path.join(coreclr_args.product_location, jit_name)
    if not os.path.isfile(jit_path):
        print("Error: Couldn't find JIT at {}".format(jit_path))
        raise RuntimeError("Missing JIT")

    files.append(jit_path)

    # Next, look for any and all cross-compilation JITs. These are named, e.g.:
    #   clrjit_unix_x64_x64.dll
    #   clrjit_universal_arm_x64.dll
    #   clrjit_universal_arm64_x64.dll
    # and so on, and live in the same product directory as the primary JIT.
    #
    # Note that the expression below explicitly filters out the primary JIT since we added that above.
    # We handle the primary JIT specially so we can error if it is missing. For the cross-compilation
    # JITs, we don't bother trying to ensure that all the ones we might expect are actually there.
    #
    # We don't do a recursive walk because the JIT is also copied to the "sharedFramework" subdirectory,
    # so we don't want to pick that up.

    if coreclr_args.host_os == "OSX":
        allowed_extensions = [".dylib"]
        # Add .dwarf for debug info
    elif coreclr_args.host_os == "Linux":
        allowed_extensions = [".so"]
        # Add .dbg for debug info
    elif coreclr_args.host_os == "windows":
        allowed_extensions = [".dll"]
    else:
        raise RuntimeError("Unknown OS.")

    cross_jit_paths = [
        os.path.join(coreclr_args.product_location, item)
        for item in os.listdir(coreclr_args.product_location)
        if re.match(r'.*clrjit.*', item) and item != jit_name and any(
            item.endswith(extension) for extension in allowed_extensions)
    ]
    files += cross_jit_paths

    # On Windows, grab the PDB files from a sub-directory.
    # if coreclr_args.host_os == "windows":
    #    pdb_dir = os.path.join(coreclr_args.product_location, "PDB")
    #    if os.path.isdir(pdb_dir):
    #        pdb_paths = [os.path.join(pdb_dir, item) for item in os.listdir(pdb_dir) if re.match(r'.*clrjit.*', item)]
    #        files += pdb_paths

    print("Uploading:")
    for item in files:
        print("  {}".format(item))

    try:
        from azure.storage.blob import BlobServiceClient

    except:
        print("Please install:")
        print("  pip install azure-storage-blob")
        print(
            "See also https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python"
        )
        raise RuntimeError("Missing azure storage package.")

    blob_service_client = BlobServiceClient(
        account_url=az_blob_storage_account_uri,
        credential=coreclr_args.az_storage_key)
    blob_folder_name = "{}/{}/{}/{}/{}".format(az_builds_root_folder,
                                               coreclr_args.git_hash,
                                               coreclr_args.host_os,
                                               coreclr_args.arch,
                                               coreclr_args.build_type)

    total_bytes_uploaded = 0

    # Should we compress the JIT on upload? It would save space, but it makes it slightly more complicated to use
    # because you can't just "wget" or otherwise download the file and use it immediately -- you need to unzip first.
    # So for now, don't compress it.
    compress_jit = False

    with TempDir() as temp_location:
        for file in files:
            if compress_jit:
                # Zip compress the file we will upload
                zip_name = os.path.basename(file) + ".zip"
                zip_path = os.path.join(temp_location, zip_name)
                print("Compress {} -> {}".format(file, zip_path))
                with zipfile.ZipFile(zip_path, 'w',
                                     zipfile.ZIP_DEFLATED) as zip_file:
                    zip_file.write(file, os.path.basename(file))

                file_stat_result = os.stat(file)
                zip_stat_result = os.stat(zip_path)
                print("Compressed {:n} to {:n} bytes".format(
                    file_stat_result.st_size, zip_stat_result.st_size))
                total_bytes_uploaded += zip_stat_result.st_size

                blob_name = "{}/{}".format(blob_folder_name, zip_name)
                print("Uploading: {} ({}) -> {}".format(
                    file, zip_path,
                    az_blob_storage_container_uri + "/" + blob_name))
                upload_blob(zip_path, blob_name)
            else:
                file_stat_result = os.stat(file)
                total_bytes_uploaded += file_stat_result.st_size
                file_name = os.path.basename(file)
                blob_name = "{}/{}".format(blob_folder_name, file_name)
                print("Uploading: {} -> {}".format(
                    file, az_blob_storage_container_uri + "/" + blob_name))
                upload_blob(file, blob_name)

    print("Uploaded {:n} bytes".format(total_bytes_uploaded))
    print("Finished JIT upload")
Esempio n. 25
0
class AzureClient(CloudClient):
    """
    Implementation of a Azure Client using the Azure API

    """
    def __init__(self,
                 account_name=None,
                 credential=None,
                 auth_dict=None,
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        if auth_dict:
            account_name = auth_dict.get("STORAGE_ACCOUNT_NAME")
            credential = auth_dict.get("STORAGE_ACCOUNT_KEY")
        if account_name and credential:
            self.account_name = account_name
            self.credential = credential
            self.secret = self.create_azure_secret()

            account_url = constants.AZURE_BLOB_ENDPOINT_TEMPLATE.format(
                account_name)
            self.blob_service_client = BlobServiceClient(
                account_url=account_url, credential=credential)

    def internal_create_uls(self, name, region):
        """
        Creates the Underlying Storage using the Azure API

        Args:
           name (str): The Underlying Storage name to be created

        """
        self.blob_service_client.get_container_client(name).create_container()

    def internal_delete_uls(self, name):
        """
        Deletes the Underlying Storage using the Azure API

        Args:
           name (str): The Underlying Storage name to be deleted

        """
        self.blob_service_client.get_container_client(name).delete_container()

    def get_all_uls_names(self):
        """
        Returns a set containing all the container names that the client has access to

        """
        return {
            container["name"]
            for container in self.blob_service_client.list_containers()
        }

    def verify_uls_exists(self, uls_name):
        """
        Verifies whether a Underlying Storage with the given uls_name exists

        Args:
           uls_name (str): The Underlying Storage name to be verified

        Returns:
             bool: True if Underlying Storage exists, False otherwise

        """
        try:
            self.blob_service_client.get_container_client(
                uls_name).get_container_properties()
            return True
        except ResourceNotFoundError:
            return False

    def create_azure_secret(self):
        """
        Create a Kubernetes secret to allow NooBaa to create Azure-based backingstores

        """
        bs_secret_data = templating.load_yaml(
            constants.MCG_BACKINGSTORE_SECRET_YAML)
        bs_secret_data["metadata"]["name"] = create_unique_resource_name(
            "cldmgr-azure", "secret")
        bs_secret_data["metadata"]["namespace"] = config.ENV_DATA[
            "cluster_namespace"]
        bs_secret_data["data"]["AccountKey"] = base64.urlsafe_b64encode(
            self.credential.encode("UTF-8")).decode("ascii")
        bs_secret_data["data"]["AccountName"] = base64.urlsafe_b64encode(
            self.account_name.encode("UTF-8")).decode("ascii")

        return create_resource(**bs_secret_data)
Esempio n. 26
0
class StorageBlockBlobTest(StorageTestCase):
    def setUp(self):
        super(StorageBlockBlobTest, self).setUp()

        url = self._get_account_url()

        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        self.bsc = BlobServiceClient(
            url,
            credential=self.settings.STORAGE_ACCOUNT_KEY,
            connection_data_block_size=4 * 1024,
            max_single_put_size=32 * 1024,
            max_block_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        if not self.is_playback():
            self.bsc.create_container(self.container_name)

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bsc.delete_container(self.container_name)
            except:
                pass

        if os.path.isfile(FILE_PATH):
            try:
                os.remove(FILE_PATH)
            except:
                pass

        return super(StorageBlockBlobTest, self).tearDown()

    #--Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    def _create_blob(self):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(b'')
        return blob

    def assertBlobEqual(self, container_name, blob_name, expected_data):
        blob = self.bsc.get_blob_client(container_name, blob_name)
        actual_data = blob.download_blob()
        self.assertEqual(b"".join(list(actual_data)), expected_data)

    class NonSeekableFile(object):
        def __init__(self, wrapped_file):
            self.wrapped_file = wrapped_file

        def write(self, data):
            self.wrapped_file.write(data)

        def read(self, count):
            return self.wrapped_file.read(count)

    #--Test cases for block blobs --------------------------------------------

    @record
    def test_put_block(self):
        # Arrange
        blob = self._create_blob()

        # Act
        for i in range(5):
            resp = blob.stage_block(i, 'block {0}'.format(i).encode('utf-8'))
            self.assertIsNone(resp)

        # Assert

    @record
    def test_put_block_unicode(self):
        # Arrange
        blob = self._create_blob()

        # Act
        resp = blob.stage_block('1', u'啊齄丂狛狜')
        self.assertIsNone(resp)

        # Assert

    @record
    def test_put_block_with_md5(self):
        # Arrange
        blob = self._create_blob()

        # Act
        blob.stage_block(1, b'block', validate_content=True)

        # Assert

    @record
    def test_put_block_list(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = [
            BlobBlock(block_id='1'),
            BlobBlock(block_id='2'),
            BlobBlock(block_id='3')
        ]
        put_block_list_resp = blob.commit_block_list(block_list)

        # Assert
        content = blob.download_blob()
        self.assertEqual(b"".join(list(content)), b'AAABBBCCC')
        self.assertEqual(content.properties.etag,
                         put_block_list_resp.get('etag'))
        self.assertEqual(content.properties.last_modified,
                         put_block_list_resp.get('last_modified'))

    @record
    def test_put_block_list_invalid_block_id(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        try:
            block_list = [
                BlobBlock(block_id='1'),
                BlobBlock(block_id='2'),
                BlobBlock(block_id='4')
            ]
            blob.commit_block_list(block_list)
            self.fail()
        except HttpResponseError as e:
            self.assertGreaterEqual(
                str(e).find('specified block list is invalid'), 0)

        # Assert

    @record
    def test_put_block_list_with_md5(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = [
            BlobBlock(block_id='1'),
            BlobBlock(block_id='2'),
            BlobBlock(block_id='3')
        ]
        blob.commit_block_list(block_list, validate_content=True)

        # Assert

    @record
    def test_get_block_list_no_blocks(self):
        # Arrange
        blob = self._create_blob()

        # Act
        block_list = blob.get_block_list('all')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 0)

    @record
    def test_get_block_list_uncommitted_blocks(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = blob.get_block_list('uncommitted')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 3)
        self.assertEqual(len(block_list[0]), 0)
        self.assertEqual(block_list[1][0].id, '1')
        self.assertEqual(block_list[1][0].size, 3)
        self.assertEqual(block_list[1][1].id, '2')
        self.assertEqual(block_list[1][1].size, 3)
        self.assertEqual(block_list[1][2].id, '3')
        self.assertEqual(block_list[1][2].size, 3)

    @record
    def test_get_block_list_committed_blocks(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        block_list = [
            BlobBlock(block_id='1'),
            BlobBlock(block_id='2'),
            BlobBlock(block_id='3')
        ]
        blob.commit_block_list(block_list)

        # Act
        block_list = blob.get_block_list('committed')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 3)
        self.assertEqual(block_list[0][0].id, '1')
        self.assertEqual(block_list[0][0].size, 3)
        self.assertEqual(block_list[0][1].id, '2')
        self.assertEqual(block_list[0][1].size, 3)
        self.assertEqual(block_list[0][2].id, '3')
        self.assertEqual(block_list[0][2].size, 3)

    @record
    def test_create_small_block_blob_with_no_overwrite(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = b'hello world'
        data2 = b'hello second world'

        # Act
        create_resp = blob.upload_blob(data1, overwrite=True)

        with self.assertRaises(ResourceExistsError):
            blob.upload_blob(data2, overwrite=False)

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data1)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)

    @record
    def test_create_small_block_blob_with_overwrite(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = b'hello world'
        data2 = b'hello second world'

        # Act
        create_resp = blob.upload_blob(data1, overwrite=True)
        update_resp = blob.upload_blob(data2, overwrite=True)

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)

    @record
    def test_create_large_block_blob_with_no_overwrite(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        create_resp = blob.upload_blob(data1,
                                       overwrite=True,
                                       metadata={'BlobData': 'Data1'})

        with self.assertRaises(ResourceExistsError):
            blob.upload_blob(data2,
                             overwrite=False,
                             metadata={'BlobData': 'Data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data1)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)
        self.assertEqual(props.metadata, {'BlobData': 'Data1'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE)

    @record
    def test_create_large_block_blob_with_overwrite(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(data1,
                                       overwrite=True,
                                       metadata={'BlobData': 'Data1'})
        update_resp = blob.upload_blob(data2,
                                       overwrite=True,
                                       metadata={'BlobData': 'Data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)
        self.assertEqual(props.metadata, {'BlobData': 'Data2'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE + 512)

    @record
    def test_create_blob_from_bytes_single_put(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b'hello world'

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @record
    def test_create_blob_from_0_bytes(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b''

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @record
    def test_create_from_bytes_blob_unicode(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = u'hello world'

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @record
    def test_create_from_bytes_blob_unicode(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        data = u'hello world'
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name,
                             data.encode('utf-8'))
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_from_bytes_blob_with_lease_id(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        lease = blob.acquire_lease()

        # Act
        create_resp = blob.upload_blob(data, lease=lease)

        # Assert
        output = blob.download_blob(lease=lease)
        self.assertEqual(b"".join(list(output)), data)
        self.assertEqual(output.properties.etag, create_resp.get('etag'))
        self.assertEqual(output.properties.last_modified,
                         create_resp.get('last_modified'))

    def test_create_blob_from_bytes_with_metadata(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        metadata = {'hello': 'world', 'number': '42'}

        # Act
        blob.upload_blob(data, metadata=metadata)

        # Assert
        md = blob.get_blob_properties().metadata
        self.assertDictEqual(md, metadata)

    def test_create_blob_from_bytes_with_properties(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        blob.upload_blob(data, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    def test_create_blob_from_bytes_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        create_resp = blob.upload_blob(data, raw_response_hook=callback)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_bytes_with_index(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data[3:])

        # Assert
        self.assertEqual(data[3:], b"".join(list(blob.download_blob())))

    @record
    def test_create_blob_from_bytes_with_index_and_count(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data[3:], length=5)

        # Assert
        self.assertEqual(data[3:8], b"".join(list(blob.download_blob())))

    @record
    def test_create_blob_from_bytes_with_index_and_count_and_properties(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        blob.upload_blob(data[3:], length=5, content_settings=content_settings)

        # Assert
        self.assertEqual(data[3:8], b"".join(list(blob.download_blob())))
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @record
    def test_create_blob_from_bytes_non_parallel(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data, length=LARGE_BLOB_SIZE, max_connections=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)

    def test_create_blob_from_path(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @record
    def test_create_blob_from_path_non_parallel(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(100)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream,
                                           length=100,
                                           max_connections=1)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_path_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    def test_create_blob_from_path_with_properties(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    def test_create_blob_from_stream_chunked_upload(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_stream_non_seekable_chunked_upload_known_size(
            self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        blob_size = len(data) - 66
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file,
                             length=blob_size,
                             max_connections=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])

    def test_create_blob_from_stream_non_seekable_chunked_upload_unknown_size(
            self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file, max_connections=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)

    def test_create_blob_from_stream_with_progress_chunked_upload(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    def test_create_blob_from_stream_chunked_upload_with_count(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            resp = blob.upload_blob(stream, length=blob_size)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])

    def test_create_blob_from_stream_chunked_upload_with_count_and_properties(
            self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             length=blob_size,
                             content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    def test_create_blob_from_stream_chunked_upload_with_properties(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @record
    def test_create_blob_from_text(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-8')

        # Act
        create_resp = blob.upload_blob(text)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @record
    def test_create_blob_from_text_with_encoding(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        blob.upload_blob(text, encoding='utf-16')

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)

    @record
    def test_create_blob_from_text_with_encoding_and_progress(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        blob.upload_blob(text, encoding='utf-16', raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    def test_create_blob_from_text_chunked_upload(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_text_data(LARGE_BLOB_SIZE)
        encoded_data = data.encode('utf-8')

        # Act
        blob.upload_blob(data)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, encoded_data)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, encoded_data)

    @record
    def test_create_blob_with_md5(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b'hello world'

        # Act
        blob.upload_blob(data, validate_content=True)

        # Assert

    def test_create_blob_with_md5_chunked(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data, validate_content=True)
Esempio n. 27
0
 def list_blobs(self):
     with BlobServiceClient.from_connection_string(self._connection_string) as client:
         container: ContainerClient = client.get_container_client(self._container_name)
         for blob in container.list_blobs(name_starts_with=self.path):
             yield blob
Esempio n. 28
0
 def __init__(self, client=None):
   connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
   if client is None:
     self.client = BlobServiceClient.from_connection_string(connect_str)
   else:
     self.client = client
Esempio n. 29
0
from __future__ import print_function
import os
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

try:

    # get the connection string
    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')

    # Create the BlobServiceClient object which will be used to create a container client
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)

    # main menu loop
    while True:
        request = input(
            "Do you wish to [(s)earch] or [(d)ownload] or [exit]? > ")
        request = request.strip().lower()
        print("")

        # exit the loop
        if request == "exit":
            break

        #search for records
        elif request == "search" or request == "s":
            choice = input(
                "SEARCH> All containers [all], a container [con], or an object [obj]? > "
            )
            choice = choice.strip().lower()
            print("")
            # list all containers and blobs
 def __init__(self, azure_storage_conn_string):
     self._logger = Logger().logger
     self.blob_service_client = BlobServiceClient.from_connection_string(azure_storage_conn_string)