Ejemplo n.º 1
0
def generate_token():
    blob_service_client = BlobServiceClient(account_url=config.URL,
                                            credential=config.SHARED_KEY)

    try:
        for i in blob_service_client.list_containers():
            continue
    except:
        return 'cannot generate the sas token'
    container_client = blob_service_client.get_container_client("mycontainer")

    # container_token = generate_container_sas(
    #             container_client.account_name,
    #             container_client.container_name,
    #             account_key=container_client.credential.account_key,
    #             policy_id='my-access-policy-id'
    #         )

    sas_token = generate_account_sas(
        blob_service_client.account_name,
        account_key=blob_service_client.credential.account_key,
        resource_types=ResourceTypes(object=True),
        permission=AccountSasPermissions(read=True,
                                         write=True,
                                         add=True,
                                         create=True),
        expiry=datetime.utcnow() + timedelta(hours=1))
    return sas_token
Ejemplo n.º 2
0
def add_sanitizers(test_proxy):
    add_remove_header_sanitizer(headers="Ocp-Apim-Subscription-Key")
    add_remove_header_sanitizer(headers="Retry-After")
    add_general_regex_sanitizer(
        value="fakeendpoint",
        regex="(?<=\\/\\/)[a-z-]+(?=\\.cognitiveservices\\.azure\\.com)"
    )
    add_general_regex_sanitizer(
        regex="(?<=\\/\\/)[a-z]+(?=(?:|-secondary)\\.(?:table|blob|queue)\\.core\\.windows\\.net)",
        value="fakeendpoint",
    )
    add_oauth_response_sanitizer()

    # run tests
    yield

    # Dogfood env uses a static storage account so we clean up the blob resources
    # This is unnecessary for AzureCloud where each storage account is deleted at the end of testing
    if is_live() and os.getenv("TRANSLATION_ENVIRONMENT") == "Dogfood":
        client = BlobServiceClient(
            "https://" + os.getenv("TRANSLATION_DOCUMENT_STORAGE_NAME") + ".blob.core.windows.net/",
            os.getenv("TRANSLATION_DOCUMENT_STORAGE_KEY")
        )
        for container in client.list_containers():
            client.delete_container(container)
Ejemplo n.º 3
0
def azblob_file(azblob_credentials,
                cloud_bucket_name,
                download_gcs_public_data,
                public=False):
    acc_url = f"https://{azblob_credentials['storage_account']}.blob.core.windows.net"
    azblob_client = BlobServiceClient(
        account_url=acc_url, credential=azblob_credentials["shared_key"])
    container_name = cloud_bucket_name + random_char(3).lower()
    if public:
        container_name += "public"
    print(f"\nUpload dataset to private azure blob container {container_name}")
    if container_name not in [
            cntr["name"] for cntr in azblob_client.list_containers()
    ]:
        if public:
            azblob_client.create_container(name=container_name,
                                           metadata=None,
                                           public_access="container")
        else:
            azblob_client.create_container(name=container_name,
                                           metadata=None,
                                           public_access=None)
    blob_client = azblob_client.get_blob_client(container_name, "myfile.csv")
    with open(download_gcs_public_data, "r") as f:
        blob_client.upload_blob(f.read(),
                                blob_type="BlockBlob",
                                overwrite=True)

    yield f"{container_name}/myfile.csv"

    azblob_client.delete_container(container_name)
    print(
        f"\nAzure Blob Container {container_name} is now marked for deletion")
def list_containers(account_name,sas_token,required_string=None):
    
    # Break URL into URL and token
    if not sas_token.startswith('?'):
        sas_token = '?' + sas_token

    storage_account_url_blob = 'https://' + account_name + '.blob.core.windows.net'
    
    blob_service_client = BlobServiceClient(account_url=storage_account_url_blob, 
                                            credential=sas_token)

    container_iter = blob_service_client.list_containers(include_metadata=False)
    containers = []
    
    for container in container_iter:    
        name = container['name']
        if required_string is None or required_string in name:
            containers.append(name)
        elif required_string is not None:
            print('Skipping container {}'.format(name))
    
    print('Enumerated {} containers:'.format(len(containers)))
    
    print(containers)

    return containers
Ejemplo n.º 5
0
    def upload_third_party(self):
        logger.info("uploading third-party tools from %s", self.third_party)
        account_name = self.results["deploy"]["fuzz-name"]["value"]
        key = self.results["deploy"]["fuzz-key"]["value"]
        account_url = "https://%s.blob.core.windows.net" % account_name

        client = BlobServiceClient(account_url, credential=key)
        containers = [x["name"] for x in client.list_containers()]

        for name in os.listdir(self.third_party):
            path = os.path.join(self.third_party, name)
            if not os.path.isdir(path):
                continue
            if name not in containers:
                client.create_container(name)

            expiry = datetime.utcnow() + timedelta(minutes=30)
            sas = generate_container_sas(
                account_name,
                name,
                account_key=key,
                permission=ContainerSasPermissions(
                    read=True, write=True, delete=True, list=True
                ),
                expiry=expiry,
            )
            url = "%s/%s?%s" % (account_url, name, sas)

            subprocess.check_output(
                [self.azcopy, "sync", path, url, "--delete-destination", "true"]
            )
Ejemplo n.º 6
0
    def upload_instance_setup(self):
        logger.info("uploading instance-specific-setup from %s", self.instance_specific)
        account_name = self.results["deploy"]["func-name"]["value"]
        key = self.results["deploy"]["func-key"]["value"]
        account_url = "https://%s.blob.core.windows.net" % account_name
        client = BlobServiceClient(account_url, credential=key)
        if "instance-specific-setup" not in [
            x["name"] for x in client.list_containers()
        ]:
            client.create_container("instance-specific-setup")

        expiry = datetime.utcnow() + timedelta(minutes=30)

        sas = generate_container_sas(
            account_name,
            "instance-specific-setup",
            account_key=key,
            permission=ContainerSasPermissions(
                read=True, write=True, delete=True, list=True
            ),
            expiry=expiry,
        )
        url = "%s/%s?%s" % (account_url, "instance-specific-setup", sas)

        subprocess.check_output(
            [
                self.azcopy,
                "sync",
                self.instance_specific,
                url,
                "--delete-destination",
                "true",
            ]
        )
Ejemplo n.º 7
0
    def add_log_export(self) -> None:
        if not self.export_appinsights:
            logger.info("not exporting appinsights")
            return

        container_name = "app-insights"

        logger.info("adding appinsight log export")
        account_name = self.results["deploy"]["func_name"]["value"]
        key = self.results["deploy"]["func_key"]["value"]
        account_url = "https://%s.blob.core.windows.net" % account_name
        client = BlobServiceClient(account_url, credential=key)
        if container_name not in [x["name"] for x in client.list_containers()]:
            client.create_container(container_name)

        expiry = datetime.utcnow() + timedelta(days=2 * 365)

        # NOTE: as this is a long-lived SAS url, it should not be logged and only
        # used in the the later-on export_configurations.create() call
        sas = generate_container_sas(
            account_name,
            container_name,
            account_key=key,
            permission=ContainerSasPermissions(write=True),
            expiry=expiry,
        )
        url = "%s/%s?%s" % (account_url, container_name, sas)

        record_types = (
            "Requests, Event, Exceptions, Metrics, PageViews, "
            "PageViewPerformance, Rdd, PerformanceCounters, Availability")

        req = ApplicationInsightsComponentExportRequest(
            record_types=record_types,
            destination_type="Blob",
            is_enabled="true",
            destination_address=url,
        )

        credential = AzureCliCredential()
        app_insight_client = ApplicationInsightsManagementClient(
            credential,
            subscription_id=self.get_subscription_id(),
        )

        to_delete = []
        for entry in app_insight_client.export_configurations.list(
                self.resource_group, self.application_name):
            if (entry.storage_name == account_name
                    and entry.container_name == container_name):
                to_delete.append(entry.export_id)

        for export_id in to_delete:
            logger.info("replacing existing export: %s", export_id)
            app_insight_client.export_configurations.delete(
                self.resource_group, self.application_name, export_id)

        app_insight_client.export_configurations.create(
            self.resource_group, self.application_name, req)
Ejemplo n.º 8
0
    def get_blob_containers_by_storage_account(self, storage_account_name, account_key):

        blob_service = BlobServiceClient(
            account_url=f'{storage_account_name}.blob.core.windows.net',
            credential=account_key)
        containers = blob_service.list_containers()

        return containers
Ejemplo n.º 9
0
def check_storage_account(account_name, key):
    blob_service_client = BlobServiceClient(ENDPOINT_URL.format(account_name),
                                            credential=key)
    containers = blob_service_client.list_containers(timeout=15)

    public_containers = list()
    for cont in containers:
        if cont.public_access is not None:
            public_containers.append(cont)

    return public_containers
Ejemplo n.º 10
0
    def check_if_container_exists(self, BlobServiceClient, containername):
        """See if the provided container exists or not and returns boolean flag"""

        flag = False
        #get the list of containers using service client
        containers = BlobServiceClient.list_containers()
        for container in containers:
            if container.name == containername:
                flag = True
                break

        return flag
def get_container(blob_service_client: BlobServiceClient,
                  container_name: str) -> bool:
    """ Get a container client
    """
    logging.info('blob_service_client.list_containers()')
    logging.info(list(blob_service_client.list_containers()))

    try:
        _ = blob_service_client.get_container_client(container_name)
        container_client = blob_service_client.get_container_client(
            container_name)
    except:
        container_client = blob_service_client.create_container(container_name)
    return container_client
Ejemplo n.º 12
0
def list_blobs():
    # 接続先BlobアカウントのURL作る
    blob_url = "https://{}.blob.core.windows.net".format(
        os.getenv("AZURE_STORAGE_ACCOUNT_NAME"))

    # DefaultAzureCredentialを使い、Blobに接続するためのCredentialを自動で取得する。
    # DefaultAzureCredentialを使うと、次の順番でCredentialの取得を試みる。
    # なので、Azure上ではManaged IDの資格情報、ローカル開発環境上ではVSCodeの資格情報が使われるといったことが自動的に行われる。
    #  1. EnvironmentCredential
    #     環境変数に設定されてるCredentialを使う
    #     https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity.environmentcredential?view=azure-python
    #  2. ManagedIdentityCredential
    #     AzureのManaged Identityを使う
    #     https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity.managedidentitycredential?view=azure-python
    #  3. SharedTokenCacheCredential
    #     WindowsのVisual Studio等でログインした際のCredentialを使う
    #     https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity.sharedtokencachecredential?view=azure-python
    #  4. VisualStudioCodeCredential
    #     Visual Studio CodeのAzure Account拡張機能でログインした際のCredentialを使う。
    #     Windows、macOS、Linux対応。
    #     https://marketplace.visualstudio.com/items?itemName=ms-vscode.azure-account
    #     https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity.visualstudiocodecredential?view=azure-python
    #  5. AzureCliCredential
    #     AzureのCLIでログインした際のCredentialを使う。
    #     https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity.azureclicredential?view=azure-python
    cred = DefaultAzureCredential()

    # Blobに接続する際、パラメータを明示したExponentialRetryを使う。
    # デフォルトだとExponentialRetryが使われるがその際のデフォルトパラメータは
    #   initial_backoff=15, increment_base=3, retry_total=3, random_jitter_range=3
    #
    # なので、リトライ分含め合計4回接続を試み、リトライの間隔は
    #   (15+3^1) = 18±3秒、(15+3^2) = 24±3秒、(15+3^3) = 42±3秒
    # になるので、Flaskに接続してくるclientのHTTP Connectionを長時間保持したままになってしまう。
    #
    # それがイヤだったら明示的にパラメータを設定して早めにBlobに対してリトライをかける。
    # このコードの例だと、
    #   (0.5+1.2^1) = 1.7±0.2秒、(0.5+1.2^2) = 1.94±0.2秒、(0.5+1.2^3) = 2.228±0.2秒
    # の間隔でのリトライになる。
    retry = ExponentialRetry(initial_backoff=0.5,
                             increment_base=1.2,
                             random_jitter_range=0.2)
    client = BlobServiceClient(blob_url, cred, retry_policy=retry)
    containers = client.list_containers()
    container_names = [
        container.get("name", "unknown") for container in containers
    ]

    return ", ".join(container_names)
Ejemplo n.º 13
0
def check_storage_account(account_name, key):
    blob_service_client = BlobServiceClient(ENDPOINT_URL.format(account_name),
                                            credential=key)
    containers = blob_service_client.list_containers(timeout=15)

    public_containers = list()
    try:
        for cont in containers:
            if cont.public_access is not None:
                public_containers.append(cont)
    except azure.core.exceptions.HttpResponseError:
        print(
            "\t\t[-] Could not scan account {}, skipping".format(account_name),
            flush=True)

    return public_containers
Ejemplo n.º 14
0
    def test_create_container_with_default_cpk_n_deny_override(
            self, resource_group, location, storage_account,
            storage_account_key):
        # Arrange
        bsc = BlobServiceClient(self.account_url(storage_account, "blob"),
                                credential=storage_account_key,
                                connection_data_block_size=1024,
                                max_single_put_size=1024,
                                min_large_block_upload_threshold=1024,
                                max_block_size=1024,
                                max_page_size=1024)
        container_client = bsc.create_container(
            'denyoverridecpkcontainer',
            container_encryption_scope=
            TEST_CONTAINER_ENCRYPTION_KEY_SCOPE_DENY_OVERRIDE)
        container_props = container_client.get_container_properties()
        self.assertEqual(
            container_props.encryption_scope.default_encryption_scope,
            TEST_CONTAINER_ENCRYPTION_KEY_SCOPE.default_encryption_scope)
        self.assertEqual(
            container_props.encryption_scope.prevent_encryption_scope_override,
            True)
        for container in bsc.list_containers(
                name_starts_with='denyoverridecpkcontainer'):
            self.assertEqual(
                container_props.encryption_scope.default_encryption_scope,
                TEST_CONTAINER_ENCRYPTION_KEY_SCOPE.default_encryption_scope)
            self.assertEqual(
                container_props.encryption_scope.
                prevent_encryption_scope_override, True)

        blob_client = container_client.get_blob_client("appendblob")

        # It's not allowed to set encryption scope on the blob when the container denies encryption scope override.
        with self.assertRaises(HttpResponseError):
            blob_client.upload_blob(b'aaaa',
                                    BlobType.AppendBlob,
                                    encryption_scope=TEST_ENCRYPTION_KEY_SCOPE)

        resp = blob_client.upload_blob(b'aaaa', BlobType.AppendBlob)

        self.assertEqual(
            resp['encryption_scope'],
            TEST_CONTAINER_ENCRYPTION_KEY_SCOPE.default_encryption_scope)

        container_client.delete_container()
Ejemplo n.º 15
0
def check_storage_account(account_name, key):
    blob_service_client = BlobServiceClient(ENDPOINT_URL.format(account_name), credential=key)
    containers = blob_service_client.list_containers(timeout=15)
    public_containers = list() 

    for cont,e in iterator_wrapper(containers):
        if cont == STOP_SCAN_FLAG:
            break
        if e :
            if type(e) is not StopIteration:   
                print("\t\t[-] Could not scan the container of the account{} due to the error{}. skipping".format(account_name,e), flush=True) 
                continue
            else:
                break
        if cont.public_access is not None:
            public_containers.append(cont)

    return public_containers
Ejemplo n.º 16
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    #demo starts here

    new_module.new_func()  #shows custom module import

    action = req.params["action"]
    if action == "SQL":
        server = "<SECRET>"
        database = "<SECRET>"
        username = "******"
        password = "******"
        cnxn = pyodbc.connect(
            'DRIVER={ODBC Driver 17 for SQL Server};SERVER=' + server +
            ';DATABASE=' + database + ';UID=' + username + ';PWD=' + password)
        cursor = cnxn.cursor()
        #logging.info(cursor)
        cursor.execute("select * from testTable")
        row = cursor.fetchone()
        json_array = []
        for i in row:
            json_array.append(i)
        json_result = json.dumps({"data": json_array})
        return func.HttpResponse(json_result)
    elif action == "blob":
        #blob test
        logging.info("blob action")
        credential = "<SECRET>"
        service = BlobServiceClient(account_url="<SECRET>",
                                    credential=credential)
        container_names = next(service.list_containers())
        #logging.info(container_names)
        container = service.get_container_client("<SECRET>")
        blob_names = next(container.list_blobs())
        #logging.info(blob_names)
        blob = container.get_blob_client("<SECRET>")
        dl_stream = blob.download_blob()
        #logging.info(dl_stream.content_as_text())
        return func.HttpResponse(dl_stream.content_as_text())
    else:
        return func.HttpResponse("### WRONG ACTION, CHECK API. ###")
Ejemplo n.º 17
0
    def test_create_container_with_default_cpk_n(self, resource_group,
                                                 location, storage_account,
                                                 storage_account_key):
        # Arrange
        bsc = BlobServiceClient(self.account_url(storage_account, "blob"),
                                credential=storage_account_key,
                                connection_data_block_size=1024,
                                max_single_put_size=1024,
                                min_large_block_upload_threshold=1024,
                                max_block_size=1024,
                                max_page_size=1024)
        container_client = bsc.create_container(
            'cpkcontainer',
            container_encryption_scope=TEST_CONTAINER_ENCRYPTION_KEY_SCOPE)
        container_props = container_client.get_container_properties()
        self.assertEqual(
            container_props.encryption_scope.default_encryption_scope,
            TEST_CONTAINER_ENCRYPTION_KEY_SCOPE.default_encryption_scope)
        self.assertEqual(
            container_props.encryption_scope.prevent_encryption_scope_override,
            False)
        for container in bsc.list_containers(name_starts_with='cpkcontainer'):
            self.assertEqual(
                container_props.encryption_scope.default_encryption_scope,
                TEST_CONTAINER_ENCRYPTION_KEY_SCOPE.default_encryption_scope)
            self.assertEqual(
                container_props.encryption_scope.
                prevent_encryption_scope_override, False)

        blob_client = container_client.get_blob_client("appendblob")

        # providing encryption scope when upload the blob
        resp = blob_client.upload_blob(
            b'aaaa',
            BlobType.AppendBlob,
            encryption_scope=TEST_ENCRYPTION_KEY_SCOPE)
        # Use the provided encryption scope on the blob
        self.assertEqual(resp['encryption_scope'], TEST_ENCRYPTION_KEY_SCOPE)

        container_client.delete_container()
Ejemplo n.º 18
0
    def add_instance_id(self) -> None:
        logger.info("setting instance_id log export")

        container_name = "base-config"
        blob_name = "instance_id"
        account_name = self.results["deploy"]["func-name"]["value"]
        key = self.results["deploy"]["func-key"]["value"]
        account_url = "https://%s.blob.core.windows.net" % account_name
        client = BlobServiceClient(account_url, credential=key)
        if container_name not in [x["name"] for x in client.list_containers()]:
            client.create_container(container_name)

        blob_client = client.get_blob_client(container_name, blob_name)
        if blob_client.exists():
            logger.debug("instance_id already exists")
            instance_id = uuid.UUID(blob_client.download_blob().readall().decode())
        else:
            logger.debug("creating new instance_id")
            instance_id = uuid.uuid4()
            blob_client.upload_blob(str(instance_id))

        logger.info("instance_id: %s", instance_id)
Ejemplo n.º 19
0
def connect_container(service: BlobServiceClient, container: str, create=True) -> ContainerClient:
    '''
    Parse options for container name to upload,
    compare to list to see if container needs to be created
    '''
    container_list = [x for x in service.list_containers()]
    container_names = [x['name'] for x in container_list]

    container_client = service.get_container_client(container)

    if container not in container_names and create: # Meaning no container setup yet.
        operation = container_client.create_container()
        if operation['error_code'] is not None:
            raise Exception(operation['error_code'])
        else:
            log.info(f"Created container {container}, request_id: {operation['request_id']}.")
    elif container not in container_names and not create:
        log.error(f'Container {container} not found.')
        exit(1)


    return container_client
Ejemplo n.º 20
0
    def upload_tools(self) -> None:
        logger.info("uploading tools from %s", self.tools)
        account_name = self.results["deploy"]["func-name"]["value"]
        key = self.results["deploy"]["func-key"]["value"]
        account_url = "https://%s.blob.core.windows.net" % account_name
        client = BlobServiceClient(account_url, credential=key)
        if "tools" not in [x["name"] for x in client.list_containers()]:
            client.create_container("tools")

        expiry = datetime.utcnow() + timedelta(minutes=30)

        sas = generate_container_sas(
            account_name,
            "tools",
            account_key=key,
            permission=ContainerSasPermissions(read=True,
                                               write=True,
                                               delete=True,
                                               list=True),
            expiry=expiry,
        )
        url = "%s/%s?%s" % (account_url, "tools", sas)

        subprocess.check_output([
            self.azcopy,
            "copy",
            os.path.join(self.tools, "*"),
            url,
            "--overwrite=true",
            "--recursive=true",
        ])

        subprocess.check_output([
            self.azcopy, "sync", self.tools, url, "--delete-destination",
            "true"
        ])
Ejemplo n.º 21
0
class DataLakeServiceClient(StorageAccountHostsMixin):
    """A client to interact with the DataLake Service at the account level.

    This client provides operations to retrieve and configure the account properties
    as well as list, create and delete file systems within the account.
    For operations relating to a specific file system, directory or file, clients for those entities
    can also be retrieved using the `get_client` functions.

    :ivar str url:
        The full endpoint URL to the datalake service endpoint.
    :ivar str primary_endpoint:
        The full primary endpoint URL.
    :ivar str primary_hostname:
        The hostname of the primary endpoint.
    :param str account_url:
        The URL to the DataLake storage account. Any other entities included
        in the URL path (e.g. file system or file) will be discarded. This URL can be optionally
        authenticated with a SAS token.
    :param credential:
        The credentials with which to authenticate. This is optional if the
        account URL already has a SAS token. The value can be a SAS token string,
        an instance of a AzureSasCredential from azure.core.credentials, an account
        shared access key, or an instance of a TokenCredentials class from azure.identity.
        If the resource URI already contains a SAS token, this will be ignored in favor of an explicit credential
        - except in the case of AzureSasCredential, where the conflicting SAS tokens will raise a ValueError.

    .. admonition:: Example:

        .. literalinclude:: ../samples/datalake_samples_service.py
            :start-after: [START create_datalake_service_client]
            :end-before: [END create_datalake_service_client]
            :language: python
            :dedent: 8
            :caption: Creating the DataLakeServiceClient from connection string.

        .. literalinclude:: ../samples/datalake_samples_service.py
            :start-after: [START create_datalake_service_client_oauth]
            :end-before: [END create_datalake_service_client_oauth]
            :language: python
            :dedent: 8
            :caption: Creating the DataLakeServiceClient with Azure Identity credentials.
    """
    def __init__(
            self,
            account_url,  # type: str
            credential=None,  # type: Optional[Any]
            **kwargs  # type: Any
    ):
        # type: (...) -> None
        try:
            if not account_url.lower().startswith('http'):
                account_url = "https://" + account_url
        except AttributeError:
            raise ValueError("Account URL must be a string.")
        parsed_url = urlparse(account_url.rstrip('/'))
        if not parsed_url.netloc:
            raise ValueError("Invalid URL: {}".format(account_url))

        blob_account_url = convert_dfs_url_to_blob_url(account_url)
        self._blob_account_url = blob_account_url
        self._blob_service_client = BlobServiceClient(blob_account_url,
                                                      credential, **kwargs)
        self._blob_service_client._hosts[LocationMode.SECONDARY] = ""  #pylint: disable=protected-access

        _, sas_token = parse_query(parsed_url.query)
        self._query_str, self._raw_credential = self._format_query_string(
            sas_token, credential)

        super(DataLakeServiceClient,
              self).__init__(parsed_url,
                             service='dfs',
                             credential=self._raw_credential,
                             **kwargs)
        # ADLS doesn't support secondary endpoint, make sure it's empty
        self._hosts[LocationMode.SECONDARY] = ""

    def __enter__(self):
        self._blob_service_client.__enter__()
        return self

    def __exit__(self, *args):
        self._blob_service_client.close()

    def close(self):
        # type: () -> None
        """ This method is to close the sockets opened by the client.
        It need not be used when using with a context manager.
        """
        self._blob_service_client.close()

    def _format_url(self, hostname):
        """Format the endpoint URL according to hostname
        """
        formated_url = "{}://{}/{}".format(self.scheme, hostname,
                                           self._query_str)
        return formated_url

    @classmethod
    def from_connection_string(
        cls,
        conn_str,  # type: str
        credential=None,  # type: Optional[Any]
        **kwargs  # type: Any
    ):  # type: (...) -> DataLakeServiceClient
        """
        Create DataLakeServiceClient from a Connection String.

        :param str conn_str:
            A connection string to an Azure Storage account.
        :param credential:
            The credentials with which to authenticate. This is optional if the
            account URL already has a SAS token, or the connection string already has shared
            access key values. The value can be a SAS token string,
            an instance of a AzureSasCredential from azure.core.credentials, an account shared access
            key, or an instance of a TokenCredentials class from azure.identity.
            Credentials provided here will take precedence over those in the connection string.
        :return a DataLakeServiceClient
        :rtype ~azure.storage.filedatalake.DataLakeServiceClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_file_system.py
                :start-after: [START create_data_lake_service_client_from_conn_str]
                :end-before: [END create_data_lake_service_client_from_conn_str]
                :language: python
                :dedent: 8
                :caption: Creating the DataLakeServiceClient from a connection string.
        """
        account_url, _, credential = parse_connection_str(
            conn_str, credential, 'dfs')
        return cls(account_url, credential=credential, **kwargs)

    def get_user_delegation_key(
            self,
            key_start_time,  # type: datetime
            key_expiry_time,  # type: datetime
            **kwargs  # type: Any
    ):
        # type: (...) -> UserDelegationKey
        """
        Obtain a user delegation key for the purpose of signing SAS tokens.
        A token credential must be present on the service object for this request to succeed.

        :param ~datetime.datetime key_start_time:
            A DateTime value. Indicates when the key becomes valid.
        :param ~datetime.datetime key_expiry_time:
            A DateTime value. Indicates when the key stops being valid.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :return: The user delegation key.
        :rtype: ~azure.storage.filedatalake.UserDelegationKey

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START get_user_delegation_key]
                :end-before: [END get_user_delegation_key]
                :language: python
                :dedent: 8
                :caption: Get user delegation key from datalake service client.
        """
        delegation_key = self._blob_service_client.get_user_delegation_key(
            key_start_time=key_start_time,
            key_expiry_time=key_expiry_time,
            **kwargs)  # pylint: disable=protected-access
        return UserDelegationKey._from_generated(delegation_key)  # pylint: disable=protected-access

    def list_file_systems(
            self,
            name_starts_with=None,  # type: Optional[str]
            include_metadata=None,  # type: Optional[bool]
            **kwargs):
        # type: (...) -> ItemPaged[FileSystemProperties]
        """Returns a generator to list the file systems under the specified account.

        The generator will lazily follow the continuation tokens returned by
        the service and stop when all file systems have been returned.

        :param str name_starts_with:
            Filters the results to return only file systems whose names
            begin with the specified prefix.
        :param bool include_metadata:
            Specifies that file system metadata be returned in the response.
            The default value is `False`.
        :keyword int results_per_page:
            The maximum number of file system names to retrieve per API
            call. If the request does not specify the server will return up to 5,000 items per page.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :keyword bool include_deleted:
            Specifies that deleted file systems to be returned in the response. This is for file system restore enabled
            account. The default value is `False`.
            .. versionadded:: 12.3.0
        :returns: An iterable (auto-paging) of FileSystemProperties.
        :rtype: ~azure.core.paging.ItemPaged[~azure.storage.filedatalake.FileSystemProperties]

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START list_file_systems]
                :end-before: [END list_file_systems]
                :language: python
                :dedent: 8
                :caption: Listing the file systems in the datalake service.
        """
        item_paged = self._blob_service_client.list_containers(
            name_starts_with=name_starts_with,
            include_metadata=include_metadata,
            **kwargs)  # pylint: disable=protected-access
        item_paged._page_iterator_class = FileSystemPropertiesPaged  # pylint: disable=protected-access
        return item_paged

    def create_file_system(
            self,
            file_system,  # type: Union[FileSystemProperties, str]
            metadata=None,  # type: Optional[Dict[str, str]]
            public_access=None,  # type: Optional[PublicAccess]
            **kwargs):
        # type: (...) -> FileSystemClient
        """Creates a new file system under the specified account.

        If the file system with the same name already exists, a ResourceExistsError will
        be raised. This method returns a client with which to interact with the newly
        created file system.

        :param str file_system:
            The name of the file system to create.
        :param metadata:
            A dict with name-value pairs to associate with the
            file system as metadata. Example: `{'Category':'test'}`
        :type metadata: dict(str, str)
        :param public_access:
            Possible values include: file system, file.
        :type public_access: ~azure.storage.filedatalake.PublicAccess
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: ~azure.storage.filedatalake.FileSystemClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START create_file_system_from_service_client]
                :end-before: [END create_file_system_from_service_client]
                :language: python
                :dedent: 8
                :caption: Creating a file system in the datalake service.
        """
        file_system_client = self.get_file_system_client(file_system)
        file_system_client.create_file_system(metadata=metadata,
                                              public_access=public_access,
                                              **kwargs)
        return file_system_client

    def _rename_file_system(self, name, new_name, **kwargs):
        # type: (str, str, **Any) -> FileSystemClient
        """Renames a filesystem.

        Operation is successful only if the source filesystem exists.

        :param str name:
            The name of the filesystem to rename.
        :param str new_name:
            The new filesystem name the user wants to rename to.
        :keyword lease:
            Specify this to perform only if the lease ID given
            matches the active lease ID of the source filesystem.
        :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: ~azure.storage.filedatalake.FileSystemClient
        """
        self._blob_service_client._rename_container(name, new_name, **kwargs)  # pylint: disable=protected-access
        renamed_file_system = self.get_file_system_client(new_name)
        return renamed_file_system

    def undelete_file_system(self, name, deleted_version, **kwargs):
        # type: (str, str, **Any) -> FileSystemClient
        """Restores soft-deleted filesystem.

        Operation will only be successful if used within the specified number of days
        set in the delete retention policy.

        .. versionadded:: 12.3.0
            This operation was introduced in API version '2019-12-12'.

        :param str name:
            Specifies the name of the deleted filesystem to restore.
        :param str deleted_version:
            Specifies the version of the deleted filesystem to restore.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: ~azure.storage.filedatalake.FileSystemClient
        """
        new_name = kwargs.pop('new_name', None)
        file_system = self.get_file_system_client(new_name or name)
        self._blob_service_client.undelete_container(name,
                                                     deleted_version,
                                                     new_name=new_name,
                                                     **kwargs)  # pylint: disable=protected-access
        return file_system

    def delete_file_system(
            self,
            file_system,  # type: Union[FileSystemProperties, str]
            **kwargs):
        # type: (...) -> FileSystemClient
        """Marks the specified file system for deletion.

        The file system and any files contained within it are later deleted during garbage collection.
        If the file system is not found, a ResourceNotFoundError will be raised.

        :param file_system:
            The file system to delete. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :keyword lease:
            If specified, delete_file_system only succeeds if the
            file system's lease is active and matches this ID.
            Required if the file system has an active lease.
        :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str
        :keyword ~datetime.datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :keyword ~datetime.datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :keyword str etag:
            An ETag value, or the wildcard character (*). Used to check if the resource has changed,
            and act according to the condition specified by the `match_condition` parameter.
        :keyword ~azure.core.MatchConditions match_condition:
            The match condition to use upon the etag.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: None

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START delete_file_system_from_service_client]
                :end-before: [END delete_file_system_from_service_client]
                :language: python
                :dedent: 8
                :caption: Deleting a file system in the datalake service.
        """
        file_system_client = self.get_file_system_client(file_system)
        file_system_client.delete_file_system(**kwargs)
        return file_system_client

    def get_file_system_client(
            self,
            file_system  # type: Union[FileSystemProperties, str]
    ):
        # type: (...) -> FileSystemClient
        """Get a client to interact with the specified file system.

        The file system need not already exist.

        :param file_system:
            The file system. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :returns: A FileSystemClient.
        :rtype: ~azure.storage.filedatalake.FileSystemClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_file_system.py
                :start-after: [START create_file_system_client_from_service]
                :end-before: [END create_file_system_client_from_service]
                :language: python
                :dedent: 8
                :caption: Getting the file system client to interact with a specific file system.
        """
        try:
            file_system_name = file_system.name
        except AttributeError:
            file_system_name = file_system

        _pipeline = Pipeline(
            transport=TransportWrapper(self._pipeline._transport
                                       ),  # pylint: disable = protected-access
            policies=self._pipeline.
            _impl_policies  # pylint: disable = protected-access
        )
        return FileSystemClient(
            self.url,
            file_system_name,
            credential=self._raw_credential,
            _configuration=self._config,
            _pipeline=_pipeline,
            _hosts=self._hosts,
            require_encryption=self.require_encryption,
            key_encryption_key=self.key_encryption_key,
            key_resolver_function=self.key_resolver_function)

    def get_directory_client(
        self,
        file_system,  # type: Union[FileSystemProperties, str]
        directory  # type: Union[DirectoryProperties, str]
    ):
        # type: (...) -> DataLakeDirectoryClient
        """Get a client to interact with the specified directory.

        The directory need not already exist.

        :param file_system:
            The file system that the directory is in. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :param directory:
            The directory with which to interact. This can either be the name of the directory,
            or an instance of DirectoryProperties.
        :type directory: str or ~azure.storage.filedatalake.DirectoryProperties
        :returns: A DataLakeDirectoryClient.
        :rtype: ~azure.storage.filedatalake.DataLakeDirectoryClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START get_directory_client_from_service_client]
                :end-before: [END get_directory_client_from_service_client]
                :language: python
                :dedent: 8
                :caption: Getting the directory client to interact with a specific directory.
        """
        try:
            file_system_name = file_system.name
        except AttributeError:
            file_system_name = file_system
        try:
            directory_name = directory.name
        except AttributeError:
            directory_name = directory

        _pipeline = Pipeline(
            transport=TransportWrapper(self._pipeline._transport
                                       ),  # pylint: disable = protected-access
            policies=self._pipeline.
            _impl_policies  # pylint: disable = protected-access
        )
        return DataLakeDirectoryClient(
            self.url,
            file_system_name,
            directory_name=directory_name,
            credential=self._raw_credential,
            _configuration=self._config,
            _pipeline=_pipeline,
            _hosts=self._hosts,
            require_encryption=self.require_encryption,
            key_encryption_key=self.key_encryption_key,
            key_resolver_function=self.key_resolver_function)

    def get_file_client(
        self,
        file_system,  # type: Union[FileSystemProperties, str]
        file_path  # type: Union[FileProperties, str]
    ):
        # type: (...) -> DataLakeFileClient
        """Get a client to interact with the specified file.

        The file need not already exist.

        :param file_system:
            The file system that the file is in. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :param file_path:
            The file with which to interact. This can either be the full path of the file(from the root directory),
            or an instance of FileProperties. eg. directory/subdirectory/file
        :type file_path: str or ~azure.storage.filedatalake.FileProperties
        :returns: A DataLakeFileClient.
        :rtype: ~azure.storage.filedatalake.DataLakeFileClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START get_file_client_from_service_client]
                :end-before: [END get_file_client_from_service_client]
                :language: python
                :dedent: 8
                :caption: Getting the file client to interact with a specific file.
        """
        try:
            file_system_name = file_system.name
        except AttributeError:
            file_system_name = file_system
        try:
            file_path = file_path.name
        except AttributeError:
            pass

        _pipeline = Pipeline(
            transport=TransportWrapper(self._pipeline._transport
                                       ),  # pylint: disable = protected-access
            policies=self._pipeline.
            _impl_policies  # pylint: disable = protected-access
        )
        return DataLakeFileClient(
            self.url,
            file_system_name,
            file_path=file_path,
            credential=self._raw_credential,
            _hosts=self._hosts,
            _configuration=self._config,
            _pipeline=_pipeline,
            require_encryption=self.require_encryption,
            key_encryption_key=self.key_encryption_key,
            key_resolver_function=self.key_resolver_function)

    def set_service_properties(self, **kwargs):
        # type: (**Any) -> None
        """Sets the properties of a storage account's Datalake service, including
        Azure Storage Analytics.

        .. versionadded:: 12.4.0
            This operation was introduced in API version '2020-06-12'.

        If an element (e.g. analytics_logging) is left as None, the
        existing settings on the service for that functionality are preserved.

        :keyword analytics_logging:
            Groups the Azure Analytics Logging settings.
        :type analytics_logging: ~azure.storage.filedatalake.AnalyticsLogging
        :keyword hour_metrics:
            The hour metrics settings provide a summary of request
            statistics grouped by API in hourly aggregates.
        :type hour_metrics: ~azure.storage.filedatalake.Metrics
        :keyword minute_metrics:
            The minute metrics settings provide request statistics
            for each minute.
        :type minute_metrics: ~azure.storage.filedatalake.Metrics
        :keyword cors:
            You can include up to five CorsRule elements in the
            list. If an empty list is specified, all CORS rules will be deleted,
            and CORS will be disabled for the service.
        :type cors: list[~azure.storage.filedatalake.CorsRule]
        :keyword str target_version:
            Indicates the default version to use for requests if an incoming
            request's version is not specified.
        :keyword delete_retention_policy:
            The delete retention policy specifies whether to retain deleted files/directories.
            It also specifies the number of days and versions of file/directory to keep.
        :type delete_retention_policy: ~azure.storage.filedatalake.RetentionPolicy
        :keyword static_website:
            Specifies whether the static website feature is enabled,
            and if yes, indicates the index document and 404 error document to use.
        :type static_website: ~azure.storage.filedatalake.StaticWebsite
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: None
        """
        return self._blob_service_client.set_service_properties(**kwargs)  # pylint: disable=protected-access

    def get_service_properties(self, **kwargs):
        # type: (**Any) -> Dict[str, Any]
        """Gets the properties of a storage account's datalake service, including
        Azure Storage Analytics.

        .. versionadded:: 12.4.0
            This operation was introduced in API version '2020-06-12'.

        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :returns: An object containing datalake service properties such as
            analytics logging, hour/minute metrics, cors rules, etc.
        :rtype: Dict[str, Any]
        """
        props = self._blob_service_client.get_service_properties(**kwargs)  # pylint: disable=protected-access
        return get_datalake_service_properties(props)
Ejemplo n.º 22
0
class AzureBlobFileSystem(AbstractFileSystem):
    """
    Access Azure Datalake Gen2 and Azure Storage if it were a file system using Multiprotocol Access

    Parameters
    ----------
    account_name: str
        The storage account name. This is used to authenticate requests
        signed with an account key and to construct the storage endpoint. It
        is required unless a connection string is given, or if a custom
        domain is used with anonymous authentication.
    account_key: str
        The storage account key. This is used for shared key authentication.
        If any of account key, sas token or client_id is specified, anonymous access
        will be used.
    sas_token: str
        A shared access signature token to use to authenticate requests
        instead of the account key. If account key and sas token are both
        specified, account key will be used to sign. If any of account key, sas token
        or client_id are specified, anonymous access will be used.
    request_session: Session
        The session object to use for http requests.
    connection_string: str
        If specified, this will override all other parameters besides
        request session. See
        http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/
        for the connection string format.
    socket_timeout: int
        If specified, this will override the default socket timeout. The timeout specified is in
        seconds.
        See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value.
    token_credential: TokenCredential
        A token credential used to authenticate HTTPS requests. The token value
        should be updated before its expiration.
    blocksize: int
        The block size to use for download/upload operations. Defaults to the value of
        ``BlockBlobService.MAX_BLOCK_SIZE``
    client_id: str
        Client ID to use when authenticating using an AD Service Principal client/secret.
    client_secret: str
        Client secret to use when authenticating using an AD Service Principal client/secret.
    tenant_id: str
        Tenant ID to use when authenticating using an AD Service Principal client/secret.

    Examples
    --------
    >>> abfs = AzureBlobFileSystem(account_name="XXXX", account_key="XXXX", container_name="XXXX")
    >>> abfs.ls('')

    **  Sharded Parquet & csv files can be read as: **
        ------------------------------------------
        ddf = dd.read_csv('abfs://container_name/folder/*.csv', storage_options={
        ...    'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY})

        ddf = dd.read_parquet('abfs://container_name/folder.parquet', storage_options={
        ...    'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY,})
    """

    protocol = "abfs"

    def __init__(
        self,
        account_name: str,
        account_key: str = None,
        connection_string: str = None,
        credential: str = None,
        sas_token: str = None,
        request_session=None,
        socket_timeout: int = None,
        token_credential=None,
        blocksize: int = create_configuration(storage_sdk="blob").max_block_size,
        client_id: str = None,
        client_secret: str = None,
        tenant_id: str = None,
    ):
        AbstractFileSystem.__init__(self)
        self.account_name = account_name
        self.account_key = account_key
        self.connection_string = connection_string
        self.credential = credential
        self.sas_token = sas_token
        self.request_session = request_session
        self.socket_timeout = socket_timeout
        self.token_credential = token_credential
        self.blocksize = blocksize
        self.client_id = client_id
        self.client_secret = client_secret
        self.tenant_id = tenant_id

        if (
            self.token_credential is None
            and self.account_key is None
            and self.sas_token is None
            and self.client_id is not None
        ):
            self.token_credential = self._get_token_from_service_principal()
        self.do_connect()

    @classmethod
    def _strip_protocol(cls, path: str):
        """
        Remove the protocol from the input path

        Parameters
        ----------
        path: str
            Path to remove the protocol from

        Returns
        -------
        str
            Returns a path without the protocol
        """
        logging.debug(f"_strip_protocol for {path}")
        ops = infer_storage_options(path)

        # we need to make sure that the path retains
        # the format {host}/{path}
        # here host is the container_name
        if ops.get("host", None):
            ops["path"] = ops["host"] + ops["path"]
        ops["path"] = ops["path"].lstrip("/")

        logging.debug(f"_strip_protocol({path}) = {ops}")
        return ops["path"]

    def _get_token_from_service_principal(self):
        """
        Create a TokenCredential given a client_id, client_secret and tenant_id

        Returns
        -------
        TokenCredential
        """
        from azure.common.credentials import ServicePrincipalCredentials
        from azure.storage.common import TokenCredential

        sp_cred = ServicePrincipalCredentials(
            client_id=self.client_id,
            secret=self.client_secret,
            tenant=self.tenant_id,
            resource="https://storage.azure.com/",
        )

        token_cred = TokenCredential(sp_cred.token["access_token"])
        return token_cred

    def do_connect(self):
        """Connect to the BlobServiceClient, using user-specified connection details.
        Tries credentials first, then connection string and finally account key

        Raises
        ------
        ValueError if none of the connection details are available
        """
        self.account_url: str = f"https://{self.account_name}.blob.core.windows.net"
        if self.credential is not None:
            self.service_client = BlobServiceClient(
                account_url=self.account_url, credential=self.credential
            )
        elif self.connection_string is not None:
            self.service_client = BlobServiceClient.from_connection_string(
                conn_str=self.connection_string
            )
        elif self.account_key is not None:
            self.service_client = BlobServiceClient(
                account_url=self.account_url, credential=self.account_key
            )
        else:
            raise ValueError("unable to connect with provided params!!")

    def split_path(self, path, delimiter="/", return_container: bool = False, **kwargs):
        """
        Normalize ABFS path string into bucket and key.

        Parameters
        ----------
        path : string
            Input path, like `abfs://my_container/path/to/file`

        delimiter: string
            Delimiter used to split the path

        return_container: bool

        Examples
        --------
        >>> split_path("abfs://my_container/path/to/file")
        ['my_container', 'path/to/file']
        """

        if path in ["", delimiter]:
            return "", ""

        path = self._strip_protocol(path)
        path = path.lstrip(delimiter)
        if "/" not in path:
            # this means path is the container_name
            return path, ""
        else:
            return path.split(delimiter, 1)

    # def _generate_blobs(self, *args, **kwargs):
    #     """Follow next_marker to get ALL results."""
    #     logging.debug("running _generate_blobs...")
    #     blobs = self.blob_fs.list_blobs(*args, **kwargs)
    #     yield from blobs
    #     while blobs.next_marker:
    #         logging.debug(f"following next_marker {blobs.next_marker}")
    #         kwargs["marker"] = blobs.next_marker
    #         blobs = self.blob_fs.list_blobs(*args, **kwargs)
    #         yield from blobs

    # def _matches(
    #     self, container_name, path, as_directory=False, delimiter="/", **kwargs
    # ):
    #     """check if the path returns an exact match"""

    #     path = path.rstrip(delimiter)
    #     gen = self.blob_fs.list_blob_names(
    #         container_name=container_name,
    #         prefix=path,
    #         delimiter=delimiter,
    #         num_results=None,
    #     )

    #     contents = list(gen)
    #     if not contents:
    #         return False

    #     if as_directory:
    #         return contents[0] == path + delimiter
    #     else:
    #         return contents[0] == path

    def ls(
        self,
        path: str,
        detail: bool = False,
        invalidate_cache: bool = True,
        delimiter: str = "/",
        return_glob: bool = False,
        **kwargs,
    ):
        """
        Create a list of blob names from a blob container

        Parameters
        ----------
        path: str
            Path to an Azure Blob with its container name

        detail: bool
            If False, return a list of blob names, else a list of dictionaries with blob details

        invalidate_cache:  bool
            If True, do not use the cache

        delimiter: str
            Delimiter used to split paths

        return_glob: bool

        """

        logging.debug(f"abfs.ls() is searching for {path}")

        container, path = self.split_path(path)
        if (container in ["", delimiter]) and (path in ["", delimiter]):
            # This is the case where only the containers are being returned
            logging.info(
                "Returning a list of containers in the azure blob storage account"
            )
            if detail:
                contents = self.service_client.list_containers(include_metadata=True)
                return self._details(contents)
            else:
                contents = self.service_client.list_containers()
                return [f"{c.name}{delimiter}" for c in contents]

        else:
            if container not in ["", delimiter]:
                # This is the case where the container name is passed
                container_client = self.service_client.get_container_client(
                    container=container
                )
                blobs = container_client.walk_blobs(name_starts_with=path)
                try:
                    blobs = [blob for blob in blobs]
                except Exception:
                    raise FileNotFoundError
                if len(blobs) > 1:
                    if return_glob:
                        return self._details(blobs, return_glob=True)
                    if detail:
                        return self._details(blobs)
                    else:
                        return [
                            f"{blob.container}{delimiter}{blob.name}" for blob in blobs
                        ]
                elif len(blobs) == 1:
                    if (blobs[0].name.rstrip(delimiter) == path) and not blobs[
                        0
                    ].has_key(  # NOQA
                        "blob_type"
                    ):

                        path = blobs[0].name
                        blobs = container_client.walk_blobs(name_starts_with=path)
                        if return_glob:
                            return self._details(blobs, return_glob=True)
                        if detail:
                            return self._details(blobs)
                        else:
                            return [
                                f"{blob.container}{delimiter}{blob.name}"
                                for blob in blobs
                            ]
                    elif isinstance(blobs[0], BlobPrefix):
                        if detail:
                            for blob_page in blobs:
                                return self._details(blob_page)
                        else:
                            outblobs = []
                            for blob_page in blobs:
                                for blob in blob_page:
                                    outblobs.append(
                                        f"{blob.container}{delimiter}{blob.name}"
                                    )
                            return outblobs
                    elif blobs[0]["blob_type"] == "BlockBlob":
                        if detail:
                            return self._details(blobs)
                        else:
                            return [
                                f"{blob.container}{delimiter}{blob.name}"
                                for blob in blobs
                            ]
                    elif isinstance(blobs[0], ItemPaged):
                        outblobs = []
                        for page in blobs:
                            for b in page:
                                outblobs.append(b)
                    else:
                        raise FileNotFoundError(
                            f"Unable to identify blobs in {path} for {blobs[0].name}"
                        )
                elif len(blobs) == 0:
                    if return_glob or (path in ["", delimiter]):
                        return []
                    else:
                        raise FileNotFoundError
                else:
                    raise FileNotFoundError

    def _details(self, contents, delimiter="/", return_glob: bool = False, **kwargs):
        """
        Return a list of dictionaries of specifying details about the contents

        Parameters
        ----------
        contents

        delimiter: str
            Delimiter used to separate containers and files

        return_glob: bool


        Returns
        -------
        List of dicts
            Returns details about the contents, such as name, size and type
        """
        pathlist = []
        for c in contents:
            data = {}
            if c.has_key("container"):  # NOQA
                data["name"] = f"{c.container}{delimiter}{c.name}"
                if c.has_key("size"):  # NOQA
                    data["size"] = c.size
                else:
                    data["size"] = 0
                if data["size"] == 0:
                    data["type"] = "directory"
                else:
                    data["type"] = "file"
            else:
                data["name"] = f"{c.name}{delimiter}"
                data["size"] = 0
                data["type"] = "directory"
            if return_glob:
                data["name"] = data["name"].rstrip("/")

            pathlist.append(data)
        return pathlist

    def walk(self, path: str, maxdepth=None, **kwargs):
        """ Return all files belows path

        List all files, recursing into subdirectories; output is iterator-style,
        like ``os.walk()``. For a simple list of files, ``find()`` is available.

        Note that the "files" outputted will include anything that is not
        a directory, such as links.

        Parameters
        ----------
        path: str
            Root to recurse into

        maxdepth: int
            Maximum recursion depth. None means limitless, but not recommended
            on link-based file-systems.

        **kwargs are passed to ``ls``
        """
        path = self._strip_protocol(path)
        full_dirs = {}
        dirs = {}
        files = {}

        detail = kwargs.pop("detail", False)
        try:
            listing = self.ls(path, detail=True, return_glob=True, **kwargs)
        except (FileNotFoundError, IOError):
            return [], [], []

        for info in listing:
            # each info name must be at least [path]/part , but here
            # we check also for names like [path]/part/
            pathname = info["name"].rstrip("/")
            name = pathname.rsplit("/", 1)[-1]
            if info["type"] == "directory" and pathname != path:
                # do not include "self" path
                full_dirs[pathname] = info
                dirs[name] = info
            elif pathname == path:
                # file-like with same name as give path
                files[""] = info
            else:
                files[name] = info

        if detail:
            yield path, dirs, files
        else:
            yield path, list(dirs), list(files)

        if maxdepth is not None:
            maxdepth -= 1
            if maxdepth < 1:
                return

        for d in full_dirs:
            yield from self.walk(d, maxdepth=maxdepth, detail=detail, **kwargs)

    def mkdir(self, path, delimiter="/", exists_ok=False, **kwargs):
        """
        Create directory entry at path

        Parameters
        ----------
        path: str
            The path to create

        delimiter: str
            Delimiter to use when splitting the path

        exists_ok: bool
            If True, raise an exception if the directory already exists. Defaults to False
        """
        container_name, path = self.split_path(path, delimiter=delimiter)
        if not exists_ok:
            if (container_name not in self.ls("")) and (not path):
                # create new container
                self.service_client.create_container(name=container_name)
            elif (
                container_name
                in [container_path.split("/")[0] for container_path in self.ls("")]
            ) and path:
                ## attempt to create prefix
                container_client = self.service_client.get_container_client(
                    container=container_name
                )
                container_client.upload_blob(name=path, data="")
            else:
                ## everything else
                raise RuntimeError(f"Cannot create {container_name}{delimiter}{path}.")
        else:
            if container_name in self.ls("") and path:
                container_client = self.service_client.get_container_client(
                    container=container_name
                )
                container_client.upload_blob(name=path, data="")

    def rmdir(self, path: str, delimiter="/", **kwargs):
        """
        Remove a directory, if empty

        Parameters
        ----------
        path: str
            Path of directory to remove

        delimiter: str
            Delimiter to use when splitting the path

        """

        container_name, path = self.split_path(path, delimiter=delimiter)
        if (container_name + delimiter in self.ls("")) and (not path):
            # delete container
            self.service_client.delete_container(container_name)

    def _rm(self, path, delimiter="/", **kwargs):
        """
        Delete a given file

        Parameters
        ----------
        path: str
            Path to file to delete

        delimiter: str
            Delimiter to use when splitting the path
        """
        if self.isfile(path):
            container_name, path = self.split_path(path, delimiter=delimiter)
            container_client = self.service_client.get_container_client(
                container=container_name
            )
            logging.debug(f"Delete blob {path} in {container_name}")
            container_client.delete_blob(path)
        elif self.isdir(path):
            container_name, path = self.split_path(path, delimiter=delimiter)
            container_client = self.service_client.get_container_client(
                container=container_name
            )
            if (container_name + delimiter in self.ls("")) and (not path):
                logging.debug(f"Delete container {container_name}")
                container_client.delete_container(container_name)
        else:
            raise RuntimeError(f"cannot delete {path}")

    def _open(
        self,
        path: str,
        mode: str = "rb",
        block_size: int = None,
        autocommit: bool = True,
        cache_options=None,
        **kwargs,
    ):
        """Open a file on the datalake, or a block blob

        Parameters
        ----------
        path: str
            Path to file to open

        mode: str
            What mode to open the file in - defaults to "rb"

        block_size: int
            Size per block for multi-part downloads.

        autocommit: bool
            Whether or not to write to the destination directly

        cache_type: str
            One of "readahead", "none", "mmap", "bytes", defaults to "readahead"
            Caching policy in read mode.
            See the definitions here:
            https://filesystem-spec.readthedocs.io/en/latest/api.html#readbuffering
        """
        logging.debug(f"_open:  {path}")
        return AzureBlobFile(
            fs=self,
            path=path,
            mode=mode,
            block_size=block_size or self.blocksize,
            autocommit=autocommit,
            cache_options=cache_options,
            **kwargs,
        )
Ejemplo n.º 23
0
account_name = 'lilablobssc'
storage_account_url_blob = 'https://' + account_name + '.blob.core.windows.net'

# Read-only
storage_account_sas_token = ''
storage_account_key = ''

output_file = r'd:\temp\lila_sas_urls.txt'

#%% Enumerate containers

blob_service_client = BlobServiceClient(account_url=storage_account_url_blob,
                                        credential=storage_account_sas_token)

container_iter = blob_service_client.list_containers(include_metadata=False)
containers = []

for container in container_iter:
    containers.append(container)
containers = [c['name'] for c in containers]

#%% Generate SAS tokens

permissions = ContainerSasPermissions(read=True,
                                      write=False,
                                      delete=False,
                                      list=True)
expiry_time = datetime(year=2034, month=1, day=1)
start_time = datetime(year=2020, month=1, day=1)
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    # DefaultAzureCredential supports managed identity or environment configuration (see docs)
    credential = DefaultAzureCredential()

    # parse parameters
    storage_account_source = os.environ["par_storage_account_name_source"]
    storage_account_source_url = "https://" + storage_account_source + ".blob.core.windows.net"
    storage_account_backup = os.environ["par_storage_account_name_backup"]
    storage_account_backup_url = "https://" + storage_account_backup + ".blob.core.windows.net"

    # create blob client for backup and source
    credential = DefaultAzureCredential()
    client_source = BlobServiceClient(account_url=storage_account_source_url,
                                      credential=credential)
    client_backup = BlobServiceClient(account_url=storage_account_backup_url,
                                      credential=credential)

    # Create queue clients
    queue_service = QueueService(
        account_name=os.environ['par_storage_account_name_queue'],
        account_key=os.environ['par_storage_account_key_queue'])
    queue_service.encode_function = QueueMessageFormat.text_base64encode

    # Get all blobs in sourcecontainer
    container_source_list = client_source.list_containers()
    for container in container_source_list:
        # Log container name
        logging.info(container.name)
        container_source = client_source.get_container_client(container.name)

        # Get all blobs in container
        prev_blob_name = ""
        prev_blob_etag = ""
        blob_source_list = container_source.list_blobs(include=['snapshots'])
        for blob in blob_source_list:

            if blob.snapshot == None:
                # Blob that is not snapshot.
                # 1. Check if snapshot needs to be created
                if prev_blob_name != blob.name:
                    # New blob without snapshot, create snapshot/backup
                    logging.info("new blob" + blob.name +
                                 ", create snapshot/backup")
                    create_snapshot(client_source, queue_service,
                                    container.name, blob.name, blob.etag)
                elif prev_blob_etag != blob.etag:
                    # Existing blob that has changed, create snapshot/backup
                    logging.info(blob.name +
                                 "has changed, create snapshot/backup")
                    create_snapshot(client_source, queue_service,
                                    container.name, blob.name, blob.etag)

                # 2. Check if incremental backup needs to be created
                # get blob backup and source properties
                blob_source = client_source.get_blob_client(
                    container=container.name, blob=blob.name)
                source_last_modified = blob_source.get_blob_properties(
                )['last_modified']
                source_etag = str(
                    blob_source.get_blob_properties()['etag']).replace(
                        "\"", "")
                blob_name_backup = append_timestamp_etag(
                    blob.name, source_last_modified, source_etag)
                blob_backup = client_backup.get_blob_client(
                    container=container.name + "bak", blob=blob_name_backup)
                blob_exists = check_blob_exists(blob_backup)
                # Check if blob exists
                if blob_exists == False:
                    # Latest blob does not yet exist in backup, create message on queue to update
                    queue_json = "{" + "\"container\":\"{}\", \"blob_name\":\"{}\", \"etag\":\"{}\"".format(
                        container.name, blob.name, source_etag) + "}"
                    logging.info("backup needed for: " + queue_json)
                    queue_service.put_message(os.environ['par_queue_name'],
                                              queue_json)
                    #asyncio.run(copy_adf_blob_source_backup(blob_source, blob_backup))

            prev_blob_name = blob.name
            prev_blob_etag = blob.etag

    result = {"status": "ok"}
    return func.HttpResponse(str(result))
Ejemplo n.º 25
0
class AzureStorageHelper(object):
    def __init__(self, *args, **kwargs):
        if "stay_on_remote" in kwargs:
            del kwargs["stay_on_remote"]

        # if not handed down explicitely, try to read credentials from
        # environment variables.
        for (csavar, envvar) in [
            ("account_url", "AZ_BLOB_ACCOUNT_URL"),
            ("credential", "AZ_BLOB_CREDENTIAL"),
        ]:
            if csavar not in kwargs and envvar in os.environ:
                kwargs[csavar] = os.environ.get(envvar)
        assert (
            "account_url" in kwargs
        ), "Missing AZ_BLOB_ACCOUNT_URL env var (and possibly AZ_BLOB_CREDENTIAL)"
        # remove leading '?' from SAS if needed
        # if kwargs.get("sas_token", "").startswith("?"):
        #    kwargs["sas_token"] = kwargs["sas_token"][1:]

        # by right only account_key or sas_token should be set, but we let
        # BlobServiceClient deal with the ambiguity
        self.blob_service_client = BlobServiceClient(**kwargs)

    def container_exists(self, container_name):
        return any(
            True for _ in self.blob_service_client.list_containers(container_name)
        )

    def upload_to_azure_storage(
        self,
        container_name,
        file_path,
        blob_name=None,
        use_relative_path_for_blob_name=True,
        relative_start_dir=None,
        extra_args=None,
    ):
        """ Upload a file to Azure Storage
            This function uploads a file to an Azure Storage Container as a blob.
            Args:
                container_name: the name of the Azure container to use
                file_path: The path to the file to upload.
                blob_name: The name to set for the blob on Azure. If not specified, this will default to the
                    name of the file.
            Returns: The blob_name of the file on Azure if written, None otherwise
        """
        file_path = os.path.realpath(os.path.expanduser(file_path))

        assert container_name, "container_name must be specified"
        assert os.path.exists(file_path), (
            "The file path specified does not exist: %s" % file_path
        )
        assert os.path.isfile(file_path), (
            "The file path specified does not appear to be a file: %s" % file_path
        )

        container_client = self.blob_service_client.get_container_client(container_name)
        try:
            container_client.create_container()
        except azure.core.exceptions.ResourceExistsError:
            pass

        if not blob_name:
            if use_relative_path_for_blob_name:
                if relative_start_dir:
                    path_blob_name = os.path.relpath(file_path, relative_start_dir)
                else:
                    path_blob_name = os.path.relpath(file_path)
            else:
                path_blob_name = os.path.basename(file_path)
            blob_name = path_blob_name
        blob_client = container_client.get_blob_client(blob_name)

        # upload_blob fails, if blob exists
        if self.exists_in_container(container_name, blob_name):
            blob_client.delete_blob()
        try:
            with open(file_path, "rb") as data:
                blob_client.upload_blob(data, blob_type="BlockBlob")
            return blob_client.get_blob_properties().name
        except Exception as e:
            raise WorkflowError("Error in creating blob. %s" % str(e))
            # return None

    def download_from_azure_storage(
        self,
        container_name,
        blob_name,
        destination_path=None,
        expandBlobNameIntoDirs=True,
        make_dest_dirs=True,
        create_stub_only=False,
    ):
        """ Download a file from Azure Storage
            This function downloads an object from a specified Azure Storage container.
            Args:
                container_name: the name of the Azure Storage container to use (container name only)
                destination_path: If specified, the file will be saved to this path, otherwise cwd.
                expandBlobNameIntoDirs: Since Azure blob names can include slashes, if this is True (defult)
                    then Azure blob names with slashes are expanded into directories on the receiving end.
                    If it is False, the blob name is passed to os.path.basename() to get the substring
                    following the last slash.
                make_dest_dirs: If this is True (default) and the destination path includes directories
                    that do not exist, they will be created.
            Returns:
                The destination path of the downloaded file on the receiving end, or None if the destination_path
                could not be downloaded
        """
        assert container_name, "container_name must be specified"
        assert blob_name, "blob_name must be specified"
        if destination_path:
            destination_path = os.path.realpath(os.path.expanduser(destination_path))
        else:
            if expandBlobNameIntoDirs:
                destination_path = os.path.join(os.getcwd(), blob_name)
            else:
                destination_path = os.path.join(
                    os.getcwd(), os.path.basename(blob_name)
                )
        # if the destination path does not exist
        if make_dest_dirs:
            os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        b = self.blob_service_client.get_blob_client(container_name, blob_name)
        if not create_stub_only:
            with open(destination_path, "wb") as my_blob:
                blob_data = b.download_blob()
                blob_data.readinto(my_blob)
        else:
            # just create an empty file with the right timestamps
            ts = b.get_blob_properties().last_modified.timestamp()
            with open(destination_path, "wb") as fp:
                os.utime(fp.name, (ts, ts))
        return destination_path

    def delete_from_container(self, container_name, blob_name):
        """ Delete a file from Azure Storage container

            This function deletes an object from a specified Azure Storage container.

            Args:
                container_name: the name of the Azure Storage container to use (container name only, not endpoint)
                blob_name: the name of the blob to delete from the container

            Returns:
                nothing
        """
        assert container_name, "container_name must be specified"
        assert blob_name, "blob_name must be specified"
        b = self.blob_service_client.get_blob_client(container_name, blob_name)
        b.delete_blob()

    def exists_in_container(self, container_name, blob_name):
        """ Returns whether the blob exists in the container

            Args:
                container_name: the name of the Azure Storage container (container name only, not endpoint)
                blob_name: the blob_name of the object to delete from the container

            Returns:
                True | False
        """

        assert (
            container_name
        ), 'container_name must be specified (did you try to write to "root" or forgot to set --default-remote-prefix?)'
        assert blob_name, "blob_name must be specified"
        cc = self.blob_service_client.get_container_client(container_name)
        return any(True for _ in cc.list_blobs(name_starts_with=blob_name))

    def blob_size(self, container_name, blob_name):
        """ Returns the size of a blob

            Args:
                container_name: the name of the Azure Storage container (container name only, not endpoint)
                blob_name: the blob_name of the object to delete from the container

            Returns:
                Size in kb
        """
        assert container_name, "container_name must be specified"
        assert blob_name, "blob_name must be specified"

        b = self.blob_service_client.get_blob_client(container_name, blob_name)
        return b.get_blob_properties().size // 1024

    def blob_last_modified(self, container_name, blob_name):
        """ Returns a timestamp of a blob

            Args:
                container_name: the name of the Azure Storage container (container name only, not endpoint)
                blob_name: the blob_name of the object to delete from the container

            Returns:
                timestamp
        """
        assert container_name, "container_name must be specified"
        assert blob_name, "blob_name must be specified"
        b = self.blob_service_client.get_blob_client(container_name, blob_name)
        return b.get_blob_properties().last_modified.timestamp()

    def list_blobs(self, container_name):
        """ Returns a list of blobs from the container

            Args:
                container_name: the name of the Azure Storage container (container name only, not endpoint)

            Returns:
                list of blobs
        """
        assert container_name, "container_name must be specified"
        c = self.blob_service_client.get_container_client(container_name)
        return [b.name for b in c.list_blobs()]
Ejemplo n.º 26
0
    answer = ""
    while answer not in ["y", "n"]:
        answer = input("OK to continue [Y/N]? ").lower()
    return answer == "y"


#%% Create the clients

source_blob_service_client = BlobServiceClient(
    account_url=source_account_url_blob, credential=source_sas_token)
target_blob_service_client = BlobServiceClient(
    account_url=target_account_url_blob, credential=target_sas_token)

#%% List source and destination containers

source_container_iter = source_blob_service_client.list_containers(
    include_metadata=True)
target_container_iter = target_blob_service_client.list_containers(
    include_metadata=True)

source_containers = []
target_containers = []

print('Source containers:')
for container in source_container_iter:
    source_containers.append(container)
    print(container['name'], container['metadata'])

print('\nTarget containers:')
for container in target_container_iter:
    target_containers.append(container)
    print(container['name'], container['metadata'])
Ejemplo n.º 27
0
class AzureClient(CloudClient):
    """
    Implementation of a Azure Client using the Azure API

    """
    def __init__(self,
                 account_name=None,
                 credential=None,
                 auth_dict=None,
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        if auth_dict:
            account_name = auth_dict.get("STORAGE_ACCOUNT_NAME")
            credential = auth_dict.get("STORAGE_ACCOUNT_KEY")
        if account_name and credential:
            self.account_name = account_name
            self.credential = credential
            self.secret = self.create_azure_secret()

            account_url = constants.AZURE_BLOB_ENDPOINT_TEMPLATE.format(
                account_name)
            self.blob_service_client = BlobServiceClient(
                account_url=account_url, credential=credential)

    def internal_create_uls(self, name, region):
        """
        Creates the Underlying Storage using the Azure API

        Args:
           name (str): The Underlying Storage name to be created

        """
        self.blob_service_client.get_container_client(name).create_container()

    def internal_delete_uls(self, name):
        """
        Deletes the Underlying Storage using the Azure API

        Args:
           name (str): The Underlying Storage name to be deleted

        """
        self.blob_service_client.get_container_client(name).delete_container()

    def get_all_uls_names(self):
        """
        Returns a set containing all the container names that the client has access to

        """
        return {
            container["name"]
            for container in self.blob_service_client.list_containers()
        }

    def verify_uls_exists(self, uls_name):
        """
        Verifies whether a Underlying Storage with the given uls_name exists

        Args:
           uls_name (str): The Underlying Storage name to be verified

        Returns:
             bool: True if Underlying Storage exists, False otherwise

        """
        try:
            self.blob_service_client.get_container_client(
                uls_name).get_container_properties()
            return True
        except ResourceNotFoundError:
            return False

    def create_azure_secret(self):
        """
        Create a Kubernetes secret to allow NooBaa to create Azure-based backingstores

        """
        bs_secret_data = templating.load_yaml(
            constants.MCG_BACKINGSTORE_SECRET_YAML)
        bs_secret_data["metadata"]["name"] = create_unique_resource_name(
            "cldmgr-azure", "secret")
        bs_secret_data["metadata"]["namespace"] = config.ENV_DATA[
            "cluster_namespace"]
        bs_secret_data["data"]["AccountKey"] = base64.urlsafe_b64encode(
            self.credential.encode("UTF-8")).decode("ascii")
        bs_secret_data["data"]["AccountName"] = base64.urlsafe_b64encode(
            self.account_name.encode("UTF-8")).decode("ascii")

        return create_resource(**bs_secret_data)
Ejemplo n.º 28
0
class AzBlobManagerSync:
    """A utility class to help working with Azure Storage.
        This class implements synchronous methods based on the
        Microsoft Python SDK azure.storage.blob
    See:
        https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python

    Avalaible:
        - Basic methods to work with containers and blobs

    """

    def __init__(self, connection_string=None, account_url=None, credential=None):
        """Instantiate an asynchronous AzBlobManagerSync object.

        Args:
            connection_string (str): A connection string to an Azure Storage account.
            account_url (str): The URL to the blob storage account. Any other entities included in
                 the URL path (e.g. container or blob) will be discarded. This URL can be
                 optionally authenticated with a SAS token.
            credential (str):  The credentials with which to authenticate. This is optional
                 if the account URL already has a SAS token, or the connection string already
                 has shared access key values. The value can be a SAS token string, an account
                 shared access key, or an instance of a TokenCredentials class from azure.identity.
                 Credentials provided here will take precedence over those in the connection string.

        Examples:
            Creating the AzBlobManagerSync with account url and a shared access key:
             azStorageManager = AzBlobManagerSync.create(account_url=self.url, credential=self.shared_access_key)

            Creating the AzBlobManagerSync with a connection string that has the shared access key:
             azStorageManager = AzBlobManagerSync.CREATE(onnection_string='DefaultEndpointsProtocol=http;...')

         """

        self.connection_string = connection_string
        self.account_url = account_url
        self.credential = credential

        try:
            from azure.storage.blob import BlobServiceClient
            self.blob_service_client = BlobServiceClient
            if (self.connection_string is not None):
                # Create BlobServiceClient from a Connection String
                self.blob_service_client = BlobServiceClient.from_connection_string(
                    conn_str=self.connection_string, credential=self.credential)
            else:
                # Create the BlobServiceClient with account url and credential.
                self.blob_service_client = BlobServiceClient(
                    account_url=self.account_url, credential=self.credential)
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')

    def _logAzureError(self, err=AzureError):
        msg = err.message.split('\n')[0]
        logger.error(f'AzureError error: {msg}')

    def create_container(self, container_name):
        """Creates a new container.

        Args:
            container_name (str): The name of the container.
            See https://docs.microsoft.com/en-us/rest/api/storageservices/naming-and-referencing-containers--blobs--and-metadata
                 for naming convention

        Returns:
         bool: The return value. True for success, False otherwise.
        """
        success = False
        try:
            new_container = self.blob_service_client.create_container(
                container_name)
            properties = new_container.get_container_properties()
            success = properties is not None and properties.name == container_name
        except ResourceExistsError:
            logger.info(f'Container \"{container_name}\" already exists.')
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
        return success

    def delete_container(self, container_name):
        """Deletes a container.

        Args:
            container_name (str): The name of the container.

        Returns:
         bool: The return value. True for success, False otherwise.
        """
        success = False
        try:
            self.blob_service_client.delete_container(container_name)
            success = True
        except ResourceNotFoundError:
            logger.info(f'Container \"{container_name}\" doesn not exist.')
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
        return success

    def _list_containers(self, name_starts_with=None, include_metadata=False):
        """Lists containers.

        Args:
            name_starts_with (str): Filters the results to return only containers whose names
                begin with the specified prefix.
            include_metadata (bool): Specifies that container metadata to be returned in the response.

        Returns:
            ItemPaged[ContainerProperties]: An iterable (auto-paging) of ContainerProperties.
        """

        try:
            containers = []
            for container in self.blob_service_client.list_containers(
                    name_starts_with=name_starts_with, include_metadata=include_metadata):
                containers.append(container)
            return containers
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
        return None

    def list_containers_name(self, name_starts_with=None):
        """Lists containers' name.

        Args:
           name_starts_with (str): Filters the results to return only containers whose names
               begin with the specified prefix.

        Returns:
           list: A list of strings representing the container names.
        """

        containers_list = []
        containers = self._list_containers(
            name_starts_with=name_starts_with, include_metadata=False)
        if (containers is None):
            return containers_list
        for container in containers:
            containers_list.append(container['name'])
        return containers_list

    def create_append_blob(self, container_name, blob_name, replace_blob=False):
        """Creates an append blob in an existing container.

        Args:
            container_name (str): The name of the container.
            blob_name (str): The name of the blob.
            replace_blob (bool): If True, deletes existing blob with same name

        Returns:
         bool: The return value. True for success, False otherwise.
        """
        success = False
        try:
            blob_client = self.blob_service_client.get_blob_client(
                container_name, blob_name)
            # raise ResourceNotFoundError if blob does not exist
            blob_client.get_blob_properties()
            # blob exists already
            if (replace_blob is True):
                blob_client.create_append_blob()
            success = True
        except ResourceNotFoundError:
            blob_client.create_append_blob()
            success = True
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
        return success

    def create_page_blob(self,
                         container_name, blob_name, size=1024, content_settings=None,
                         metadata=None, premium_page_blob_tier=None):
        """Creates a page blob in an existing container.

        Args:
            container_name (str): The name of the container.
            blob_name (str): The name of the blob.
            size (int): This specifies the maximum size for the page blob, up to 1 TB.
                The page blob size must be aligned to a 512-byte boundary
            content_settings (ContentSettings): ContentSettings object used to set blob properties.
                Used to set content type, encoding, language, disposition, md5, and cache control.
            metadata (dict(str, str)): Name-value pairs associated with the blob as metadata
            premium_page_blob_tier (PremiumPageBlobTier): A page blob tier value to set the blob to
        Returns:
         bool: The return value. True for success, False otherwise.
        """
        success = False
        try:
            blob_client = self.blob_service_client.get_blob_client(
                container_name, blob_name)
            blob_client.create_page_blob(
                size, content_settings, metadata, premium_page_blob_tier)
            success = True
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
        return success

    def delete_blob(self, container_name, blob_name):
        """Deletes a blob.

        Args:
            container_name (str): The name of the container.
            blob_name (str): The name of the blob.

        Returns:
         bool: The return value. True for success, False otherwise.
        """
        success = False
        try:
            blob_client = self.blob_service_client.get_blob_client(
                container_name, blob_name)
            blob_client.delete_blob()
            success = True
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
        return success

    def list_blobs(self, container_name):
        """Lists the blobs in the specified container.

        Args:
            container_name (str): The name of the container.

        Returns:
            list: A list of strings representing the blob names.
        """

        blobs_list = []
        try:
            container_client = self.blob_service_client.get_container_client(
                container_name)
            for blob in container_client.list_blobs():
                blobs_list.append(blob)
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception(f'Fatal error')
        return blobs_list

    def upload_data(self, data, container_name, blob_name, blob_type='BlockBlob'):
        """Creates a new blob from a data source with automatic chunking.

        Args:
            data: The blob data to upload.
            container_name (str): The name of the container.
            blob_name (str): The name of the blob.
            blob_typr (str): The type of the blob. This can be either BlockBlob, PageBlob or AppendBlob.

        Returns:
            bool: The return value. True for success, False otherwise.
        """

        success = False
        try:
            blob_client = self.blob_service_client.get_blob_client(
                container_name, blob_name)
            blob_client.upload_blob(data)
            success = True
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
        return success

    def append_block(self, data, container_name, blob_name):
        """Commits a new block of data to the end of the existing append blob.

        Args:
            data: Content of the block.
            container_name (str): The name of the container.
            blob_name (str): The name of the blob.

        Returns:
            bool: The return value. True for success, False otherwise.
        """

        success = False
        try:
            blob_client = self.blob_service_client.get_blob_client(
                container_name, blob_name)
            blob_client.append_block(data)
            success = True
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
        return success

    def download_data(self, container_name, blob_name):
        """Downloads a blob.

        Args:
            container_name (str): The name of the container.
            blob_name (str): The name of the blob.
        Returns:
            stream: The data stream
        """
        try:
            blob_client = self.blob_service_client.get_blob_client(
                container_name, blob_name)
            stream = blob_client.download_blob()
            return stream.readall()
        except AzureError as err:
            self._logAzureError(err=err)
        except Exception:
            logger.exception('')
Ejemplo n.º 29
0
class Connector:
    def __init__(self, path=None, storage_account=None, container=None):

        logging.basicConfig(level=logging.INFO)

        self.storage_account = storage_account
        self.container = container

        if path:
            parsed_path = self.parse_azure_path(path)
            self.storage_account = parsed_path["storage_account"]
            self.container = parsed_path["container"]

        # Gets credential from azure cli
        self.credential = DefaultAzureCredential()

        # Create class wide storage account and container clients if names are passed
        if self.storage_account:
            blob_storage_url = self.get_blob_storage_url(
                storage_account=self.storage_account
            )
            self.blob_service_client = BlobServiceClient(
                credential=self.credential, account_url=blob_storage_url
            )
            if self.container:
                container_names = [
                    container.name
                    for container in self.blob_service_client.list_containers()
                ]
                if self.container in container_names:
                    self.container_client = (
                        self.blob_service_client.get_container_client(
                            container=self.container
                        )
                    )
                else:
                    raise ValueError(
                        f"The container: {self.container} is not in the storage account: {self.storage_account}"
                    )

    @arguments_decorator()
    def get_blob_storage_url(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ) -> str:
        """
        Returns the storage account url for the path or storage_account name passed

        :param path: str: optional An azure path. Defaults to None.
        :param storage_account: str: optional Storage account name. Defaults to None.
        :param container: str: optional Ignored. Defaults to None.
        :param file_path: str: optional Ignored. Defaults to None.

        :return str: The storage account url in the form: https://{storage_account}.blob.core.windows.net/
        """
        return f"https://{storage_account}.blob.core.windows.net/"

    def parse_azure_path(self, path: str) -> dict:
        """
        Parse an azure url into : storage_account, container and filepath.
        If passing a url of the for azure://container/filepath the storage account is
        taken from the class instance. If there is no storage account passed for the class
        the storage account will be None.

        :param path: str: The azure blob path
        :return: dict: A dictionary containing the container name and filepath
        """
        storage_account = self.storage_account
        container = self.container

        if path.startswith("https://"):
            storage_account = re.findall(
                r"https://(.*)\.blob\.core\.windows\.net", path
            )[0]
            path = path.replace(f"https://{storage_account}.blob.core.windows.net/", "")
            split_path = path.split("/")
            container = split_path.pop(0)
            filepath = "/".join(split_path)

        elif path.startswith("azure://"):
            path = path.replace("azure://", "")
            split_path = path.split("/")
            container = split_path.pop(0)
            filepath = "/".join(split_path)

        else:
            filepath = path
        return {
            "storage_account": storage_account,
            "container": container,
            "file_path": filepath,
        }

    def is_azure_path(self, path: str) -> bool:
        """
        Returns true if the path is of a recognised azure path format

        :param path: str: The path to test

        :return bool: True if path is of an accepted azure path format
        """
        patterns = [r"https://.*\.blob.core.windows.net", r"azure://"]
        return any([bool(re.match(p, path)) for p in patterns])

    @arguments_decorator()
    def get_blob_service_client(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ) -> BlobServiceClient:
        """
        Returns a blob service client for the specified storage account. If no parameters are passed the class values are used

        :param path: str: optional An azure path, the storage account will be used to create a client. Defaults to None.
        :param storage_account: str: optional The name of the storage account to create a client for. Defaults to None.
        :param container: str: optional Ignored. Defaults to None.
        :param file_path: str: optional Ignored. Defaults to None.

        :return BlobServiceClient: An azure blobserviceclient for the specified storage account
        """
        if storage_account == self.storage_account:
            return self.blob_service_client
        else:
            blob_storage_url = self.get_blob_storage_url(
                storage_account=storage_account
            )
            return BlobServiceClient(
                credential=self.credential, account_url=blob_storage_url
            )

    @arguments_decorator()
    def get_container_client(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ) -> ContainerClient:
        """
        Returns a container client when a container name in the storage account is passed. If no params are passed the class values will be used

        :param path: str: optional An Azure path, the container in the path will be used. Defaults to None.
        :param storage_account: str: optional A storage account name containing the container. Defaults to None.
        :param container: str: optional The name of the container to create a client for. Defaults to None.
        :param file_path: str: optional The file path will ultimately be ignored. Defaults to None.

        :exception ValueError: Raised if the container does not exist in the storage account

        :return ContainerClient: An Azure client for the container
        """
        if storage_account == self.storage_account and container == self.container:
            return self.container_client
        else:
            client = self.get_blob_service_client(storage_account=storage_account)
            container_names = [container.name for container in client.list_containers()]
            if container in container_names:
                return client.get_container_client(container=container)
            else:
                raise ValueError(
                    f"The container: {container} is not in the storage account: {storage_account}"
                )

    @arguments_decorator()
    def list_blobs(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ) -> list:
        """
        Returns a list of blobs, with paths that match the path passed

        :param path: str: optional An azure path to search for blobs. Defaults to None.
        :param storage_account: str: optional storage account name. Defaults to None.
        :param container: str: optional container name. Defaults to None.
        :param file_path: str: optional the prefix file path. Defaults to None.

        :return list: Blobs in the path passed
        """
        container_client = self.get_container_client(
            storage_account=storage_account, container=container
        )
        if file_path:
            blob_iter = container_client.list_blobs(name_starts_with=file_path)
            return [blob.name.replace(file_path, "") for blob in blob_iter]
        else:
            blob_iter = container_client.list_blobs()
            return [blob.name for blob in blob_iter]

    @multi_arguments_decorator(local_support=True)
    def download_folder(
        self,
        source_path: str = None,
        source_storage_account: str = None,
        source_container: str = None,
        source_file_path: str = None,
        dest_path: str = None,
        dest_storage_account: str = None,
        dest_container: str = None,
        dest_file_path: str = None,
    ):
        """
        Copy a folder from azure to a local path

        :param source_path: str: optional An Azure path to the folder to download. Defaults to None.
        :param source_storage_account: str: optional The storage account name. Defaults to None.
        :param source_container: str: optional The container name. Defaults to None.
        :param source_file_path: str: optional The path to the folder to download. Defaults to None.
        :param dest_path: str: optional The local path to download the folder to. Defaults to None.
        :param dest_storage_account: str: optional Ignored. Defaults to None.
        :param dest_container: str: optional Ignored. Defaults to None.
        :param dest_file_path: str: optional Ignored. Defaults to None.

        :exception ValueError: Raised when destination path is an azure path
        """
        container_client = self.get_container_client(
            storage_account=source_storage_account, container=source_container
        )

        if self.is_azure_path(dest_path):
            raise ValueError(
                f"Expected destination to be local path got azure path: {dest_path}"
            )
        os.makedirs(dest_path, exist_ok=True)

        for blob in container_client.list_blobs(source_file_path):
            file_name = os.path.basename(blob.name)
            local_path = os.path.join(dest_path, file_name)
            with open(local_path, "wb") as f:
                logging.info(f"Downloading {blob.name} to {local_path}")
                blob_data = container_client.download_blob(blob.name)
                blob_data.readinto(f)
            logging.info("Completed Download")

    @arguments_decorator()
    def blob_exists(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
    ):
        """
        Checks if a file exists in azure, return bool

        :param path: str: optional Azure path to file to check. Defaults to None.
        :param storage_account: str: optional Storage account. Defaults to None.
        :param container: str: optional Container. Defaults to None.
        :param file_path: str: optional path to file. Defaults to None.

        :return [bool]: True if file exists
        """

        client = self.get_blob_service_client(storage_account=storage_account)
        blob_client = client.get_blob_client(container, file_path)
        return blob_client.exists()

    @multi_arguments_decorator(local_support=True)
    def upload_folder(
        self,
        source_path: str = None,
        source_storage_account: str = None,
        source_container: str = None,
        source_file_path: str = None,
        dest_path: str = None,
        dest_storage_account: str = None,
        dest_container: str = None,
        dest_file_path: str = None,
    ):
        """
        Upload a directory to an azure location. Subdirectories are not currently supported

        :param source_path: str: optional Local path to folder to upload. Defaults to None.
        :param source_storage_account: str: optional Ignored. Defaults to None.
        :param source_container: str: optional Ignored. Defaults to None.
        :param source_file_path: str: optional Ignored. Defaults to None.
        :param dest_path: str: optional Azure path to upload to. Defaults to None.
        :param dest_storage_account: str: optional Storage account. Defaults to None.
        :param dest_container: str: optional Container name. Defaults to None.
        :param dest_file_path: str: optional Path to folder. Defaults to None.

        :exception ValueError: Raised if source is an Azure path
        """
        if self.is_azure_path(source_path):
            raise ValueError(
                f"Expected destination to be local path got azure path: {source_path}"
            )

        container_client = self.get_container_client(
            storage_account=dest_storage_account, container=dest_container
        )

        for root, dirs, files in os.walk(source_path):
            logging.warning(
                "upload folder does not support sub-directories only files will be uploaded"
            )
            for file in files:
                file_path = os.path.join(root, file)
                blob_path = dest_file_path + file

                logging.info(f"Uploading {file_path} to {blob_path}")
                with open(file_path, "rb") as data:
                    container_client.upload_blob(name=blob_path, data=data)

    @arguments_decorator(local_support=True)
    def open(
        self,
        path: str = None,
        storage_account: str = None,
        container: str = None,
        file_path: str = None,
        mode="r",
        *args,
        **kwargs,
    ):
        """
        wrapper around smart_open so we dont have to pass a blob client everywhere.

        :param path: str: optional Local or azure path. Defaults to None.
        :param storage_account: str: optional name of storage account. Defaults to None.
        :param container: str: optional container name. Defaults to None.
        :param file_path: str: optional path to file. Defaults to None.
        :param mode: str: optional open mode. Defaults to "r".

        :return [smart_open.open]: Opens both local and azure files
        """
        if path and not self.is_azure_path(path) and "w" in mode:
            # if it is local write mode, check the path and create folder if needed
            subdir = os.path.dirname(path)
            if subdir:
                os.makedirs(subdir, exist_ok=True)
        if storage_account:
            transport_params = {
                "client": self.get_blob_service_client(storage_account=storage_account)
            }
        else:
            transport_params = {"client": None}
        if "transport_params" not in kwargs:
            kwargs["transport_params"] = transport_params
        path = path if path else f"azure://{container}/{file_path}"
        return smart_open.open(path, mode, *args, **kwargs)
Ejemplo n.º 30
0
class AzureBlobStorage(object):
    """
    Instantiate AzureBlobStorage Class for a given Azure storage account.

    `Args:`
        account_name: str
            The name of the Azure storage account to use. Not required if ``AZURE_ACCOUNT_NAME``
            environment variable is set, or if ``account_url`` is supplied.
        credential: str
            An account shared access key with access to the Azure storage account, an SAS token
            string, or an instance of a TokenCredentials class. Not required if ``AZURE_CREDENTIAL``
            environment variable is set.
        account_domain: str
            The domain of the Azure storage account, defaults to "blob.core.windows.net".
            Not required if ``AZURE_ACCOUNT_DOMAIN`` environment variable is set or if
            ``account_url`` is supplied.
        account_url: str
            The account URL for the Azure storage account including the account name and domain.
            Not required if ``AZURE_ACCOUNT_URL`` environment variable is set.
    `Returns:`
        `AzureBlobStorage`
    """

    def __init__(self, account_name=None, credential=None, account_domain='blob.core.windows.net',
                 account_url=None):
        self.account_url = os.getenv('AZURE_ACCOUNT_URL', account_url)
        self.credential = check_env.check('AZURE_CREDENTIAL', credential)
        if not self.account_url:
            self.account_name = check_env.check('AZURE_ACCOUNT_NAME', account_name)
            self.account_domain = check_env.check('AZURE_ACCOUNT_DOMAIN', account_domain)
            self.account_url = f'https://{self.account_name}.{self.account_domain}/'
        else:
            if not self.account_url.startswith('http'):
                self.account_url = f'https://{self.account_url}'
            # Update the account name and domain if a URL is supplied
            parsed_url = urlparse(self.account_url)
            self.account_name = parsed_url.netloc.split(".")[0]
            self.account_domain = ".".join(parsed_url.netloc.split(".")[1:])
        self.client = BlobServiceClient(account_url=self.account_url, credential=self.credential)

    def list_containers(self):
        """
        Returns a list of container names for the storage account

        `Returns:`
            list[str]
                List of container names
        """

        container_names = [container.name for container in self.client.list_containers()]
        logger.info(f'Found {len(container_names)} containers.')
        return container_names

    def container_exists(self, container_name):
        """
        Verify that a container exists within the storage account

        `Args:`
            container_name: str
                The name of the container
        `Returns:`
            bool
        """

        container_client = self.get_container(container_name)
        try:
            container_client.get_container_properties()
            logger.info(f'{container_name} exists.')
            return True
        except ResourceNotFoundError:
            logger.info(f'{container_name} does not exist.')
            return False

    def get_container(self, container_name):
        """
        Returns a container client

        `Args:`
            container_name: str
                The name of the container
        `Returns:`
            `ContainerClient`
        """

        logger.info(f'Returning {container_name} container client')
        return self.client.get_container_client(container_name)

    def create_container(self, container_name, metadata=None, public_access=None, **kwargs):
        """
        Create a container

        `Args:`
            container_name: str
                The name of the container
            metadata: Optional[dict[str, str]]
                A dict with metadata to associated with the container.
            public_access: Optional[Union[PublicAccess, str]]
                Settings for public access on the container, can be 'container' or 'blob' if not
                ``None``
            kwargs:
                Additional arguments to be supplied to the Azure Blob Storage API. See `Azure Blob
                Storage SDK documentation <https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python#create-container-name--metadata-none--public-access-none----kwargs->`_
                for more info.
        `Returns:`
            `ContainerClient`
        """  # noqa

        container_client = self.client.create_container(
            container_name, metadata=metadata, public_access=public_access, **kwargs
        )
        logger.info(f'Created {container_name} container.')
        return container_client

    def delete_container(self, container_name):
        """
        Delete a container.

        `Args:`
            container_name: str
                The name of the container
        `Returns:`
            ``None``
        """

        self.client.delete_container(container_name)
        logger.info(f'{container_name} container deleted.')

    def list_blobs(self, container_name, name_starts_with=None):
        """
        List all of the names of blobs in a container

        `Args:`
            container_name: str
                The name of the container
            name_starts_with: Optional[str]
                A prefix to filter blob names
        `Returns:`
            list[str]
                A list of blob names
        """

        container_client = self.get_container(container_name)
        blobs = [
            blob for blob in container_client.list_blobs(name_starts_with=name_starts_with)
        ]
        logger.info(f'Found {len(blobs)} blobs in {container_name} container.')
        return blobs

    def blob_exists(self, container_name, blob_name):
        """
        Verify that a blob exists in the specified container

        `Args:`
            container_name: str
                The container name
            blob_name: str
                The blob name
        `Returns:`
            bool
        """

        blob_client = self.get_blob(container_name, blob_name)
        try:
            blob_client.get_blob_properties()
            logger.info(f'{blob_name} exists in {container_name} container.')
            return True
        except ResourceNotFoundError:
            logger.info(f'{blob_name} does not exist in {container_name} container.')
            return False

    def get_blob(self, container_name, blob_name):
        """
        Get a blob object

        `Args:`
            container_name: str
                The container name
            blob_name: str
                The blob name
        `Returns:`
            `BlobClient`
        """

        blob_client = self.client.get_blob_client(container_name, blob_name)
        logger.info(f'Got {blob_name} blob from {container_name} container.')
        return blob_client

    def get_blob_url(self, container_name, blob_name, account_key=None, permission=None,
                     expiry=None, start=None):
        """
        Get a URL with a shared access signature for a blob

        `Args:`
            container_name: str
                The container name
            blob_name: str
                The blob name
            account_key: Optional[str]
                An account shared access key for the storage account. Will default to the key used
                on initialization if one was provided as the credential, but required if it was not.
            permission: Optional[Union[BlobSasPermissions, str]]
                Permissions associated with the blob URL. Can be either a BlobSasPermissions object
                or a string where 'r', 'a', 'c', 'w', and 'd' correspond to read, add, create,
                write, and delete permissions respectively.
            expiry: Optional[Union[datetime, str]]
                The datetime when the URL should expire. Defaults to UTC.
            start: Optional[Union[datetime, str]]
                The datetime when the URL should become valid. Defaults to UTC. If it is ``None``,
                the URL becomes active when it is first created.
        `Returns:`
            str
                URL with shared access signature for blob
        """

        if not account_key:
            if not self.credential:
                raise ValueError(
                    'An account shared access key must be provided if it was not on initialization'
                )
            account_key = self.credential

        sas = generate_blob_sas(
            self.account_name,
            container_name,
            blob_name,
            account_key=account_key,
            permission=permission,
            expiry=expiry,
            start=start,
        )
        return f'{self.account_url}/{container_name}/{blob_name}?sas={sas}'

    def _get_content_settings_from_dict(self, kwargs_dict):
        """
        Removes any keys for ``ContentSettings`` from a dict and returns a tuple of the generated
        settings or ``None`` and a dict with the settings keys removed.

        `Args:`
            kwargs_dict: dict
                A dict which should be processed and may have keys for ``ContentSettings``
        `Returns:`
            Tuple[Optional[ContentSettings], dict]
                Any created settings or ``None`` and the dict with settings keys remvoed
        """

        kwargs_copy = {**kwargs_dict}
        content_settings = None
        content_settings_dict = {}
        content_settings_keys = [
            'content_type', 'content_encoding', 'content_language', 'content_disposition',
            'cache_control', 'content_md5'
        ]
        kwarg_keys = list(kwargs_copy.keys())
        for key in kwarg_keys:
            if key in content_settings_keys:
                content_settings_dict[key] = kwargs_copy.pop(key)
        if content_settings_dict:
            content_settings = ContentSettings(**content_settings_dict)

        return content_settings, kwargs_copy

    def put_blob(self, container_name, blob_name, local_path, **kwargs):
        """
        Puts a blob (aka file) in a bucket

        `Args:`
            container_name: str
                The name of the container to store the blob
            blob_name: str
                The name of the blob to be stored
            local_path: str
                The local path of the file to upload
            kwargs:
                Additional arguments to be supplied to the Azure Blob Storage API. See `Azure Blob
                Storage SDK documentation <https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobclient?view=azure-python#upload-blob-data--blob-type--blobtype-blockblob---blockblob----length-none--metadata-none----kwargs->`_
                for more info. Any keys that belong to the ``ContentSettings`` object will be
                provided to that class directly.
        `Returns:`
            `BlobClient`
        """  # noqa

        blob_client = self.get_blob(container_name, blob_name)

        # Move all content_settings keys into a ContentSettings object
        content_settings, kwargs_dict = self._get_content_settings_from_dict(kwargs)

        with open(local_path, 'rb') as f:
            data = f.read()

        blob_client = blob_client.upload_blob(
            data,
            overwrite=True,
            content_settings=content_settings,
            **kwargs_dict,
        )
        logger.info(f'{blob_name} blob put in {container_name} container')

        # Return refreshed BlobClient object
        return self.get_blob(container_name, blob_name)

    def download_blob(self, container_name, blob_name, local_path=None):
        """
        Downloads a blob from a container into the specified file path or a temporary file path

        `Args:`
            container_name: str
                The container name
            blob_name: str
                The blob name
            local_path: Optional[str]
                The local path where the file will be downloaded. If not specified, a temporary
                file will be created and returned, and that file will be removed automatically
                when the script is done running.
        `Returns:`
            str
                The path of the downloaded file
        """

        if not local_path:
            local_path = files.create_temp_file_for_path('TEMPFILEAZURE')

        blob_client = self.get_blob(container_name, blob_name)

        logger.info(f'Downloading {blob_name} blob from {container_name} container.')
        with open(local_path, 'wb') as f:
            blob_client.download_blob().readinto(f)
        logger.info(f'{blob_name} blob saved to {local_path}.')

        return local_path

    def delete_blob(self, container_name, blob_name):
        """
        Delete a blob in a specified container.

        `Args:`
            container_name: str
                The container name
            blob_name: str
                The blob name
        `Returns:`
            ``None``
        """

        blob_client = self.get_blob(container_name, blob_name)
        blob_client.delete_blob()
        logger.info(f'{blob_name} blob in {container_name} container deleted.')

    def upload_table(self, table, container_name, blob_name, data_type='csv', **kwargs):
        """
        Load the data from a Parsons table into a blob.

        `Args:`
            table: obj
                A :ref:`parsons-table`
            container_name: str
                The container name to upload the data into
            blob_name: str
                The blob name to upload the data into
            data_type: str
                The file format to use when writing the data. One of: `csv` or `json`
            kwargs:
                Additional keyword arguments to supply to ``put_blob``
        `Returns:`
            `BlobClient`
        """

        if data_type == 'csv':
            local_path = table.to_csv()
            content_type = 'text/csv'
        elif data_type == 'json':
            local_path = table.to_json()
            content_type = 'application/json'
        else:
            raise ValueError(f'Unknown data_type value ({data_type}): must be one of: csv or json')

        return self.put_blob(
            container_name, blob_name, local_path, content_type=content_type, **kwargs
        )