def create_target_container(self): container_name = "target" + str(uuid.uuid4()) container_client = ContainerClient(self.storage_endpoint, container_name, self.storage_key) container_client.create_container() return self.generate_sas_url(container_name, "racwdl")
def create_container(container_name): container = ContainerClient(env.REPORTS_STGACCT_URI, container_name, credential=CREDENTIALS) try: container.create_container() except Exception as e: logging.info(e)
def create_container_using_client(container_client: ContainerClient): """ Creates the container if it doesn't already exist. """ if not container_client.exists(): logger.debug(f'{" - uploading to **new** container:"}' f"{container_client.container_name}") container_client.create_container()
def create_container_using_client(container_client: ContainerClient): """ Creates and initializes a container. """ try: container_client.get_container_properties() logger.debug(f" - uploading to existing container") except: logger.debug( f" - uploading to **new** container: {container_client.container_name}" ) container_client.create_container()
def create_source_container(self, data): # for offline tests if not self.is_live: return "dummy_string" # for actual live tests container_name = "src" + str(uuid.uuid4()) container_client = ContainerClient(self.storage_endpoint, container_name, self.storage_key) container_client.create_container() self.upload_documents(data, container_client) return self.generate_sas_url(container_name, "rl")
def create_form_client_and_container_sas_url(self, **kwargs): form_recognizer_account = self.client_kwargs.pop("form_recognizer_account", None) if form_recognizer_account is None: form_recognizer_account = kwargs.pop("form_recognizer_account") form_recognizer_account_key = self.client_kwargs.pop("form_recognizer_account_key", None) if form_recognizer_account_key is None: form_recognizer_account_key = kwargs.pop("form_recognizer_account_key") storage_account = self.client_kwargs.pop("storage_account", None) if storage_account is None: storage_account = kwargs.pop("storage_account") storage_account_key = self.client_kwargs.pop("storage_account_key", None) if storage_account_key is None: storage_account_key = kwargs.pop("storage_account_key") if self.is_live: container_name = self.resource_random_name.replace("_", "-") # container names can't have underscore container_client = ContainerClient(storage_account.primary_endpoints.blob, container_name, storage_account_key) container_client.create_container() training_path = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "./sample_forms/training/")) for path, folder, files in os.walk(training_path): for document in files: with open(os.path.join(path, document), "rb") as data: if document == "Form_6.jpg": document = "subfolder/Form_6.jpg" # create virtual subfolder in container container_client.upload_blob(name=document, data=data) sas_token = generate_container_sas( storage_account.name, container_name, storage_account_key, permission=ContainerSasPermissions.from_string("rl"), expiry=datetime.utcnow() + timedelta(hours=1) ) container_sas_url = storage_account.primary_endpoints.blob + container_name + "?" + sas_token else: container_sas_url = "containersasurl" return self.client_cls( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key), **self.client_kwargs ), container_sas_url
def create_source_container(self, data, blob_prefix=""): container_name = "src" + str(uuid.uuid4()) container_client = ContainerClient(self.storage_endpoint, container_name, self.storage_key) container_client.create_container() if isinstance(data, list): for blob in data: container_client.upload_blob(name=blob_prefix + str(uuid.uuid4()) + ".txt", data=blob) else: container_client.upload_blob(name=blob_prefix + str(uuid.uuid4()) + ".txt", data=data) return self.generate_sas_url(container_name, "rl")
def create_target_container(self, data=None): # for offline tests if not self.is_live: return "dummy_string" # for actual live tests self.target_container_name = "target" + str(uuid.uuid4()) container_client = ContainerClient(self.storage_endpoint, self.target_container_name, self.storage_key) container_client.create_container() if data: self.upload_documents(data, container_client) return self.generate_sas_url(self.target_container_name, "wl")
def generate_writable_container_sas(account_name: str, account_key: str, container_name: str, access_duration_hrs: float, account_url: Optional[str] = None) -> str: """Creates a container and returns a SAS URI with read/write/list permissions. Args: account_name: str, name of blob storage account account_key: str, account SAS token or account shared access key container_name: str, name of container to create, must not match an existing container in the given storage account access_duration_hrs: float account_url: str, optional, defaults to default Azure Storage URL Returns: str, URL to newly created container Raises: azure.core.exceptions.ResourceExistsError, if container already exists NOTE: This method currently fails on non-default Azure Storage URLs. The initializer for ContainerClient() assumes the default Azure Storage URL format, which is a bug that has been reported here: https://github.com/Azure/azure-sdk-for-python/issues/12568 """ if account_url is None: account_url = build_azure_storage_uri(account=account_name) container_client = ContainerClient(account_url=account_url, container_name=container_name, credential=account_key) container_client.create_container() permissions = ContainerSasPermissions(read=True, write=True, list=True) container_sas_token = generate_container_sas( account_name=account_name, container_name=container_name, account_key=account_key, permission=permissions, expiry=datetime.utcnow() + timedelta(hours=access_duration_hrs)) return f'{account_url}/{container_name}?{container_sas_token}'
class AzureCloudInterface(CloudInterface): # Azure block blob limitations # https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs MAX_CHUNKS_PER_FILE = 50000 # Minimum block size allowed in Azure Blob Storage is 64KB MIN_CHUNK_SIZE = 64 << 10 # Azure Blob Storage permit a maximum of 4.75TB per file # This is a hard limit, while our upload procedure can go over the specified # MAX_ARCHIVE_SIZE - so we set a maximum of 1TB per file MAX_ARCHIVE_SIZE = 1 << 40 # The size of each chunk in a single object upload when the size of the # object exceeds max_single_put_size. We default to 2MB in order to # allow the default max_concurrency of 8 to be achieved when uploading # uncompressed WAL segments of the default 16MB size. DEFAULT_MAX_BLOCK_SIZE = 2 << 20 # The maximum amount of concurrent chunks allowed in a single object upload # where the size exceeds max_single_put_size. We default to 8 based on # experiments with in-region and inter-region transfers within Azure. DEFAULT_MAX_CONCURRENCY = 8 # The largest file size which will be uploaded in a single PUT request. This # should be lower than the size of the compressed WAL segment in order to # force the Azure client to use concurrent chunk upload for archiving WAL files. DEFAULT_MAX_SINGLE_PUT_SIZE = 4 << 20 # The maximum size of the requests connection pool used by the Azure client # to upload objects. REQUESTS_POOL_MAXSIZE = 32 def __init__( self, url, jobs=2, encryption_scope=None, credential=None, tags=None, max_block_size=DEFAULT_MAX_BLOCK_SIZE, max_concurrency=DEFAULT_MAX_CONCURRENCY, max_single_put_size=DEFAULT_MAX_SINGLE_PUT_SIZE, ): """ Create a new Azure Blob Storage interface given the supplied account url :param str url: Full URL of the cloud destination/source :param int jobs: How many sub-processes to use for asynchronous uploading, defaults to 2. """ super(AzureCloudInterface, self).__init__( url=url, jobs=jobs, tags=tags, ) self.encryption_scope = encryption_scope self.credential = credential self.max_block_size = max_block_size self.max_concurrency = max_concurrency self.max_single_put_size = max_single_put_size parsed_url = urlparse(url) if parsed_url.netloc.endswith(AZURE_BLOB_STORAGE_DOMAIN): # We have an Azure Storage URI so we use the following form: # <http|https>://<account-name>.<service-name>.core.windows.net/<resource-path> # where <resource-path> is <container>/<blob>. # Note that although Azure supports an implicit root container, we require # that the container is always included. self.account_url = parsed_url.netloc try: self.bucket_name = parsed_url.path.split("/")[1] except IndexError: raise ValueError("azure blob storage URL %s is malformed" % url) path = parsed_url.path.split("/")[2:] else: # We are dealing with emulated storage so we use the following form: # http://<local-machine-address>:<port>/<account-name>/<resource-path> logging.info("Using emulated storage URL: %s " % url) if "AZURE_STORAGE_CONNECTION_STRING" not in os.environ: raise ValueError( "A connection string must be provided when using emulated storage" ) try: self.bucket_name = parsed_url.path.split("/")[2] except IndexError: raise ValueError("emulated storage URL %s is malformed" % url) path = parsed_url.path.split("/")[3:] self.path = "/".join(path) self.bucket_exists = None self._reinit_session() def _reinit_session(self): """ Create a new session """ if self.credential: # Any supplied credential takes precedence over the environment credential = self.credential elif "AZURE_STORAGE_CONNECTION_STRING" in os.environ: logging.info("Authenticating to Azure with connection string") self.container_client = ContainerClient.from_connection_string( conn_str=os.getenv("AZURE_STORAGE_CONNECTION_STRING"), container_name=self.bucket_name, ) return else: if "AZURE_STORAGE_SAS_TOKEN" in os.environ: logging.info("Authenticating to Azure with SAS token") credential = os.getenv("AZURE_STORAGE_SAS_TOKEN") elif "AZURE_STORAGE_KEY" in os.environ: logging.info("Authenticating to Azure with shared key") credential = os.getenv("AZURE_STORAGE_KEY") else: logging.info( "Authenticating to Azure with default credentials") # azure-identity is not part of azure-storage-blob so only import # it if needed try: from azure.identity import DefaultAzureCredential except ImportError: raise SystemExit( "Missing required python module: azure-identity") credential = DefaultAzureCredential() session = requests.Session() adapter = requests.adapters.HTTPAdapter( pool_maxsize=self.REQUESTS_POOL_MAXSIZE) session.mount("https://", adapter) self.container_client = ContainerClient( account_url=self.account_url, container_name=self.bucket_name, credential=credential, max_single_put_size=self.max_single_put_size, max_block_size=self.max_block_size, session=session, ) @property def _extra_upload_args(self): optional_args = {} if self.encryption_scope: optional_args["encryption_scope"] = self.encryption_scope return optional_args def test_connectivity(self): """ Test Azure connectivity by trying to access a container """ try: # We are not even interested in the existence of the bucket, # we just want to see if Azure blob service is reachable. self.bucket_exists = self._check_bucket_existence() return True except (HttpResponseError, ServiceRequestError) as exc: logging.error("Can't connect to cloud provider: %s", exc) return False def _check_bucket_existence(self): """ Chck Azure Blob Storage for the target container Although there is an `exists` function it cannot be called by container-level shared access tokens. We therefore check for existence by calling list_blobs on the container. :return: True if the container exists, False otherwise :rtype: bool """ try: self.container_client.list_blobs().next() except ResourceNotFoundError: return False except StopIteration: # The bucket is empty but it does exist pass return True def _create_bucket(self): """ Create the container in cloud storage """ # By default public access is disabled for newly created containers. # Unlike S3 there is no concept of regions for containers (this is at # the storage account level in Azure) self.container_client.create_container() def _walk_blob_tree(self, obj, ignore=None): """ Walk a blob tree in a directory manner and return a list of directories and files. :param ItemPaged[BlobProperties] obj: Iterable response of BlobProperties obtained from ContainerClient.walk_blobs :param str|None ignore: An entry to be excluded from the returned list, typically the top level prefix :return: List of objects and directories in the tree :rtype: List[str] """ if obj.name != ignore: yield obj.name if isinstance(obj, BlobPrefix): # We are a prefix and not a leaf so iterate children for child in obj: for v in self._walk_blob_tree(child): yield v def list_bucket(self, prefix="", delimiter=DEFAULT_DELIMITER): """ List bucket content in a directory manner :param str prefix: :param str delimiter: :return: List of objects and dirs right under the prefix :rtype: List[str] """ res = self.container_client.walk_blobs(name_starts_with=prefix, delimiter=delimiter) return self._walk_blob_tree(res, ignore=prefix) def download_file(self, key, dest_path, decompress=None): """ Download a file from Azure Blob Storage :param str key: The key to download :param str dest_path: Where to put the destination file :param str|None decompress: Compression scheme to use for decompression """ obj = self.container_client.download_blob(key) with open(dest_path, "wb") as dest_file: if decompress is None: obj.download_to_stream(dest_file) return blob = StreamingBlobIO(obj) decompress_to_file(blob, dest_file, decompress) def remote_open(self, key, decompressor=None): """ Open a remote Azure Blob Storage object and return a readable stream :param str key: The key identifying the object to open :param barman.clients.cloud_compression.ChunkedCompressor decompressor: A ChunkedCompressor object which will be used to decompress chunks of bytes as they are read from the stream :return: A file-like object from which the stream can be read or None if the key does not exist """ try: obj = self.container_client.download_blob(key) resp = StreamingBlobIO(obj) if decompressor: return DecompressingStreamingIO(resp, decompressor) else: return resp except ResourceNotFoundError: return None def upload_fileobj( self, fileobj, key, override_tags=None, ): """ Synchronously upload the content of a file-like object to a cloud key :param fileobj IOBase: File-like object to upload :param str key: The key to identify the uploaded object :param List[tuple] override_tags: List of tags as k,v tuples to be added to the uploaded object """ # Find length of the file so we can pass it to the Azure client fileobj.seek(0, SEEK_END) length = fileobj.tell() fileobj.seek(0) extra_args = self._extra_upload_args.copy() tags = override_tags or self.tags if tags is not None: extra_args["tags"] = dict(tags) self.container_client.upload_blob(name=key, data=fileobj, overwrite=True, length=length, max_concurrency=self.max_concurrency, **extra_args) def create_multipart_upload(self, key): """No-op method because Azure has no concept of multipart uploads Instead of multipart upload, blob blocks are staged and then committed. However this does not require anything to be created up front. This method therefore does nothing. """ pass def _upload_part(self, upload_metadata, key, body, part_number): """ Upload a single block of this block blob. Uses the supplied part number to generate the block ID and returns it as the "PartNumber" in the part metadata. :param dict upload_metadata: Provider-specific metadata about the upload (not used in Azure) :param str key: The key to use in the cloud service :param object body: A stream-like object to upload :param int part_number: Part number, starting from 1 :return: The part metadata :rtype: dict[str, None|str] """ # Block IDs must be the same length for all bocks in the blob # and no greater than 64 characters. Given there is a limit of # 50000 blocks per blob we zero-pad the part_number to five # places. block_id = str(part_number).zfill(5) blob_client = self.container_client.get_blob_client(key) blob_client.stage_block(block_id, body, **self._extra_upload_args) return {"PartNumber": block_id} def _complete_multipart_upload(self, upload_metadata, key, parts): """ Finish a "multipart upload" by committing all blocks in the blob. :param dict upload_metadata: Provider-specific metadata about the upload (not used in Azure) :param str key: The key to use in the cloud service :param parts: The list of block IDs for the blocks which compose this blob """ blob_client = self.container_client.get_blob_client(key) block_list = [part["PartNumber"] for part in parts] extra_args = self._extra_upload_args.copy() if self.tags is not None: extra_args["tags"] = dict(self.tags) blob_client.commit_block_list(block_list, **extra_args) def _abort_multipart_upload(self, upload_metadata, key): """ Abort the upload of a block blob The objective of this method is to clean up any dangling resources - in this case those resources are uncommitted blocks. :param dict upload_metadata: Provider-specific metadata about the upload (not used in Azure) :param str key: The key to use in the cloud service """ # Ideally we would clean up uncommitted blocks at this point # however there is no way of doing that. # Uncommitted blocks will be discarded after 7 days or when # the blob is committed (if they're not included in the commit). # We therefore create an empty blob (thereby discarding all uploaded # blocks for that blob) and then delete it. blob_client = self.container_client.get_blob_client(key) blob_client.commit_block_list([], **self._extra_upload_args) blob_client.delete_blob() def delete_objects(self, paths): """ Delete the objects at the specified paths :param List[str] paths: """ try: # If paths is empty because the files have already been deleted then # delete_blobs will return successfully so we just call it with whatever # we were given responses = self.container_client.delete_blobs(*paths) except PartialBatchErrorException as exc: # Although the docs imply any errors will be returned in the response # object, in practice a PartialBatchErrorException is raised which contains # the response objects in its `parts` attribute. # We therefore set responses to reference the response in the exception and # treat it the same way we would a regular response. logging.warning( "PartialBatchErrorException received from Azure: %s" % exc.message) responses = exc.parts # resp is an iterator of HttpResponse objects so we check the status codes # which should all be 202 if successful errors = False for resp in responses: if resp.status_code == 404: logging.warning( "Deletion of object %s failed because it could not be found" % resp.request.url) elif resp.status_code != 202: errors = True logging.error( 'Deletion of object %s failed with error code: "%s"' % (resp.request.url, resp.status_code)) if errors: raise CloudProviderError( "Error from cloud provider while deleting objects - " "please check the Barman logs")
class FileSystemClient(StorageAccountHostsMixin): """A client to interact with a specific file system, even if that file system may not yet exist. For operations relating to a specific directory or file within this file system, a directory client or file client can be retrieved using the :func:`~get_directory_client` or :func:`~get_file_client` functions. :ivar str url: The full endpoint URL to the file system, including SAS token if used. :ivar str primary_endpoint: The full primary endpoint URL. :ivar str primary_hostname: The hostname of the primary endpoint. :param str account_url: The URI to the storage account. :param file_system_name: The file system for the directory or files. :type file_system_name: str :param credential: The credentials with which to authenticate. This is optional if the account URL already has a SAS token. The value can be a SAS token string, and account shared access key, or an instance of a TokenCredentials class from azure.identity. If the URL already has a SAS token, specifying an explicit credential will take priority. .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START create_file_system_client_from_service] :end-before: [END create_file_system_client_from_service] :language: python :dedent: 8 :caption: Get a FileSystemClient from an existing DataLakeServiceClient. """ def __init__( self, account_url, # type: str file_system_name, # type: str credential=None, # type: Optional[Any] **kwargs # type: Any ): # type: (...) -> None try: if not account_url.lower().startswith('http'): account_url = "https://" + account_url except AttributeError: raise ValueError("account URL must be a string.") parsed_url = urlparse(account_url.rstrip('/')) if not file_system_name: raise ValueError("Please specify a file system name.") if not parsed_url.netloc: raise ValueError("Invalid URL: {}".format(account_url)) blob_account_url = convert_dfs_url_to_blob_url(account_url) # TODO: add self.account_url to base_client and remove _blob_account_url self._blob_account_url = blob_account_url datalake_hosts = kwargs.pop('_hosts', None) blob_hosts = None if datalake_hosts: blob_primary_account_url = convert_dfs_url_to_blob_url( datalake_hosts[LocationMode.PRIMARY]) blob_hosts = { LocationMode.PRIMARY: blob_primary_account_url, LocationMode.SECONDARY: "" } self._container_client = ContainerClient(blob_account_url, file_system_name, credential=credential, _hosts=blob_hosts, **kwargs) _, sas_token = parse_query(parsed_url.query) self.file_system_name = file_system_name self._query_str, self._raw_credential = self._format_query_string( sas_token, credential) super(FileSystemClient, self).__init__(parsed_url, service='dfs', credential=self._raw_credential, _hosts=datalake_hosts, **kwargs) # ADLS doesn't support secondary endpoint, make sure it's empty self._hosts[LocationMode.SECONDARY] = "" self._client = DataLakeStorageClient(self.url, file_system_name, None, pipeline=self._pipeline) def _format_url(self, hostname): file_system_name = self.file_system_name if isinstance(file_system_name, six.text_type): file_system_name = file_system_name.encode('UTF-8') return "{}://{}/{}{}".format(self.scheme, hostname, quote(file_system_name), self._query_str) def __exit__(self, *args): self._container_client.close() super(FileSystemClient, self).__exit__(*args) def close(self): # type: () -> None """ This method is to close the sockets opened by the client. It need not be used when using with a context manager. """ self._container_client.close() self.__exit__() @classmethod def from_connection_string( cls, conn_str, # type: str file_system_name, # type: str credential=None, # type: Optional[Any] **kwargs # type: Any ): # type: (...) -> FileSystemClient """ Create FileSystemClient from a Connection String. :param str conn_str: A connection string to an Azure Storage account. :param file_system_name: The name of file system to interact with. :type file_system_name: str :param credential: The credentials with which to authenticate. This is optional if the account URL already has a SAS token, or the connection string already has shared access key values. The value can be a SAS token string, and account shared access key, or an instance of a TokenCredentials class from azure.identity. Credentials provided here will take precedence over those in the connection string. :return a FileSystemClient :rtype ~azure.storage.filedatalake.FileSystemClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START create_file_system_client_from_connection_string] :end-before: [END create_file_system_client_from_connection_string] :language: python :dedent: 8 :caption: Create FileSystemClient from connection string """ account_url, _, credential = parse_connection_str( conn_str, credential, 'dfs') return cls(account_url, file_system_name=file_system_name, credential=credential, **kwargs) def acquire_lease( self, lease_duration=-1, # type: int lease_id=None, # type: Optional[str] **kwargs): # type: (...) -> DataLakeLeaseClient """ Requests a new lease. If the file system does not have an active lease, the DataLake service creates a lease on the file system and returns a new lease ID. :param int lease_duration: Specifies the duration of the lease, in seconds, or negative one (-1) for a lease that never expires. A non-infinite lease can be between 15 and 60 seconds. A lease duration cannot be changed using renew or change. Default is -1 (infinite lease). :param str lease_id: Proposed lease ID, in a GUID string format. The DataLake service returns 400 (Invalid request) if the proposed lease ID is not in the correct format. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :returns: A DataLakeLeaseClient object, that can be run in a context manager. :rtype: ~azure.storage.filedatalake.DataLakeLeaseClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START acquire_lease_on_file_system] :end-before: [END acquire_lease_on_file_system] :language: python :dedent: 8 :caption: Acquiring a lease on the file system. """ lease = DataLakeLeaseClient(self, lease_id=lease_id) lease.acquire(lease_duration=lease_duration, **kwargs) return lease def create_file_system( self, metadata=None, # type: Optional[Dict[str, str]] public_access=None, # type: Optional[PublicAccess] **kwargs): # type: (...) -> Dict[str, Union[str, datetime]] """Creates a new file system under the specified account. If the file system with the same name already exists, a ResourceExistsError will be raised. This method returns a client with which to interact with the newly created file system. :param metadata: A dict with name-value pairs to associate with the file system as metadata. Example: `{'Category':'test'}` :type metadata: dict(str, str) :param public_access: To specify whether data in the file system may be accessed publicly and the level of access. :type public_access: ~azure.storage.filedatalake.PublicAccess :keyword int timeout: The timeout parameter is expressed in seconds. :rtype: ~azure.storage.filedatalake.FileSystemClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START create_file_system] :end-before: [END create_file_system] :language: python :dedent: 12 :caption: Creating a file system in the datalake service. """ return self._container_client.create_container( metadata=metadata, public_access=public_access, **kwargs) def delete_file_system(self, **kwargs): # type: (Any) -> None """Marks the specified file system for deletion. The file system and any files contained within it are later deleted during garbage collection. If the file system is not found, a ResourceNotFoundError will be raised. :keyword str or ~azure.storage.filedatalake.DataLakeLeaseClient lease: If specified, delete_file_system only succeeds if the file system's lease is active and matches this ID. Required if the file system has an active lease. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :rtype: None .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START delete_file_system] :end-before: [END delete_file_system] :language: python :dedent: 12 :caption: Deleting a file system in the datalake service. """ self._container_client.delete_container(**kwargs) def get_file_system_properties(self, **kwargs): # type: (Any) -> FileSystemProperties """Returns all user-defined metadata and system properties for the specified file system. The data returned does not include the file system's list of paths. :keyword str or ~azure.storage.filedatalake.DataLakeLeaseClient lease: If specified, get_file_system_properties only succeeds if the file system's lease is active and matches this ID. :keyword int timeout: The timeout parameter is expressed in seconds. :return: Properties for the specified file system within a file system object. :rtype: ~azure.storage.filedatalake.FileSystemProperties .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START get_file_system_properties] :end-before: [END get_file_system_properties] :language: python :dedent: 12 :caption: Getting properties on the file system. """ container_properties = self._container_client.get_container_properties( **kwargs) return FileSystemProperties._convert_from_container_props( container_properties) # pylint: disable=protected-access def set_file_system_metadata( # type: ignore self, metadata, # type: Dict[str, str] **kwargs): # type: (...) -> Dict[str, Union[str, datetime]] """Sets one or more user-defined name-value pairs for the specified file system. Each call to this operation replaces all existing metadata attached to the file system. To remove all metadata from the file system, call this operation with no metadata dict. :param metadata: A dict containing name-value pairs to associate with the file system as metadata. Example: {'category':'test'} :type metadata: dict[str, str] :keyword str or ~azure.storage.filedatalake.DataLakeLeaseClient lease: If specified, set_file_system_metadata only succeeds if the file system's lease is active and matches this ID. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :returns: filesystem-updated property dict (Etag and last modified). .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START set_file_system_metadata] :end-before: [END set_file_system_metadata] :language: python :dedent: 12 :caption: Setting metadata on the file system. """ return self._container_client.set_container_metadata(metadata=metadata, **kwargs) def set_file_system_access_policy( self, signed_identifiers, # type: Dict[str, AccessPolicy] public_access=None, # type: Optional[Union[str, PublicAccess]] **kwargs): # type: (...) -> Dict[str, Union[str, datetime]] """Sets the permissions for the specified file system or stored access policies that may be used with Shared Access Signatures. The permissions indicate whether files in a file system may be accessed publicly. :param signed_identifiers: A dictionary of access policies to associate with the file system. The dictionary may contain up to 5 elements. An empty dictionary will clear the access policies set on the service. :type signed_identifiers: dict[str, ~azure.storage.filedatalake.AccessPolicy] :param ~azure.storage.filedatalake.PublicAccess public_access: To specify whether data in the file system may be accessed publicly and the level of access. :keyword lease: Required if the file system has an active lease. Value can be a DataLakeLeaseClient object or the lease ID as a string. :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str :keyword ~datetime.datetime if_modified_since: A datetime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified date/time. :keyword ~datetime.datetime if_unmodified_since: A datetime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword int timeout: The timeout parameter is expressed in seconds. :returns: File System-updated property dict (Etag and last modified). :rtype: dict[str, str or ~datetime.datetime] """ return self._container_client.set_container_access_policy( signed_identifiers, public_access=public_access, **kwargs) def get_file_system_access_policy(self, **kwargs): # type: (Any) -> Dict[str, Any] """Gets the permissions for the specified file system. The permissions indicate whether file system data may be accessed publicly. :keyword lease: If specified, the operation only succeeds if the file system's lease is active and matches this ID. :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str :keyword int timeout: The timeout parameter is expressed in seconds. :returns: Access policy information in a dict. :rtype: dict[str, Any] """ access_policy = self._container_client.get_container_access_policy( **kwargs) return { 'public_access': PublicAccess._from_generated(access_policy['public_access']), # pylint: disable=protected-access 'signed_identifiers': access_policy['signed_identifiers'] } def get_paths( self, path=None, # type: Optional[str] recursive=True, # type: Optional[bool] max_results=None, # type: Optional[int] **kwargs): # type: (...) -> ItemPaged[PathProperties] """Returns a generator to list the paths(could be files or directories) under the specified file system. The generator will lazily follow the continuation tokens returned by the service. :param str path: Filters the results to return only paths under the specified path. :param int max_results: An optional value that specifies the maximum number of items to return per page. If omitted or greater than 5,000, the response will include up to 5,000 items per page. :keyword upn: Optional. Valid only when Hierarchical Namespace is enabled for the account. If "true", the user identity values returned in the x-ms-owner, x-ms-group, and x-ms-acl response headers will be transformed from Azure Active Directory Object IDs to User Principal Names. If "false", the values will be returned as Azure Active Directory Object IDs. The default value is false. Note that group and application Object IDs are not translated because they do not have unique friendly names. :type upn: bool :keyword int timeout: The timeout parameter is expressed in seconds. :returns: An iterable (auto-paging) response of PathProperties. :rtype: ~azure.core.paging.ItemPaged[~azure.storage.filedatalake.PathProperties] .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START get_paths_in_file_system] :end-before: [END get_paths_in_file_system] :language: python :dedent: 8 :caption: List the paths in the file system. """ timeout = kwargs.pop('timeout', None) command = functools.partial(self._client.file_system.list_paths, path=path, timeout=timeout, **kwargs) return ItemPaged(command, recursive, path=path, max_results=max_results, page_iterator_class=PathPropertiesPaged, **kwargs) def create_directory( self, directory, # type: Union[DirectoryProperties, str] metadata=None, # type: Optional[Dict[str, str]] **kwargs): # type: (...) -> DataLakeDirectoryClient """ Create directory :param directory: The directory with which to interact. This can either be the name of the directory, or an instance of DirectoryProperties. :type directory: str or ~azure.storage.filedatalake.DirectoryProperties :param metadata: Name-value pairs associated with the file as metadata. :type metadata: dict(str, str) :keyword ~azure.storage.filedatalake.ContentSettings content_settings: ContentSettings object used to set path properties. :keyword lease: Required if the file has an active lease. Value can be a DataLakeLeaseClient object or the lease ID as a string. :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str :keyword str umask: Optional and only valid if Hierarchical Namespace is enabled for the account. When creating a file or directory and the parent folder does not have a default ACL, the umask restricts the permissions of the file or directory to be created. The resulting permission is given by p & ^u, where p is the permission and u is the umask. For example, if p is 0777 and u is 0057, then the resulting permission is 0720. The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. The umask must be specified in 4-digit octal notation (e.g. 0766). :keyword str permissions: Optional and only valid if Hierarchical Namespace is enabled for the account. Sets POSIX access permissions for the file owner, the file owning group, and others. Each class may be granted read, write, or execute permission. The sticky bit is also supported. Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are supported. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :return: DataLakeDirectoryClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START create_directory_from_file_system] :end-before: [END create_directory_from_file_system] :language: python :dedent: 8 :caption: Create directory in the file system. """ directory_client = self.get_directory_client(directory) directory_client.create_directory(metadata=metadata, **kwargs) return directory_client def delete_directory( self, directory, # type: Union[DirectoryProperties, str] **kwargs): # type: (...) -> DataLakeDirectoryClient """ Marks the specified path for deletion. :param directory: The directory with which to interact. This can either be the name of the directory, or an instance of DirectoryProperties. :type directory: str or ~azure.storage.filedatalake.DirectoryProperties :keyword lease: Required if the file has an active lease. Value can be a LeaseClient object or the lease ID as a string. :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :return: DataLakeDirectoryClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START delete_directory_from_file_system] :end-before: [END delete_directory_from_file_system] :language: python :dedent: 8 :caption: Delete directory in the file system. """ directory_client = self.get_directory_client(directory) directory_client.delete_directory(**kwargs) return directory_client def create_file( self, file, # type: Union[FileProperties, str] **kwargs): # type: (...) -> DataLakeFileClient """ Create file :param file: The file with which to interact. This can either be the name of the file, or an instance of FileProperties. :type file: str or ~azure.storage.filedatalake.FileProperties :param ~azure.storage.filedatalake.ContentSettings content_settings: ContentSettings object used to set path properties. :param metadata: Name-value pairs associated with the file as metadata. :type metadata: dict(str, str) :keyword lease: Required if the file has an active lease. Value can be a DataLakeLeaseClient object or the lease ID as a string. :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str :keyword str umask: Optional and only valid if Hierarchical Namespace is enabled for the account. When creating a file or directory and the parent folder does not have a default ACL, the umask restricts the permissions of the file or directory to be created. The resulting permission is given by p & ^u, where p is the permission and u is the umask. For example, if p is 0777 and u is 0057, then the resulting permission is 0720. The default permission is 0777 for a directory and 0666 for a file. The default umask is 0027. The umask must be specified in 4-digit octal notation (e.g. 0766). :keyword str permissions: Optional and only valid if Hierarchical Namespace is enabled for the account. Sets POSIX access permissions for the file owner, the file owning group, and others. Each class may be granted read, write, or execute permission. The sticky bit is also supported. Both symbolic (rwxrw-rw-) and 4-digit octal notation (e.g. 0766) are supported. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :return: DataLakeFileClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START create_file_from_file_system] :end-before: [END create_file_from_file_system] :language: python :dedent: 8 :caption: Create file in the file system. """ file_client = self.get_file_client(file) file_client.create_file(**kwargs) return file_client def delete_file( self, file, # type: Union[FileProperties, str] **kwargs): # type: (...) -> DataLakeFileClient """ Marks the specified file for deletion. :param file: The file with which to interact. This can either be the name of the file, or an instance of FileProperties. :type file: str or ~azure.storage.filedatalake.FileProperties :keyword lease: Required if the file has an active lease. Value can be a LeaseClient object or the lease ID as a string. :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :return: DataLakeFileClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START delete_file_from_file_system] :end-before: [END delete_file_from_file_system] :language: python :dedent: 8 :caption: Delete file in the file system. """ file_client = self.get_file_client(file) file_client.delete_file(**kwargs) return file_client def _get_root_directory_client(self): # type: () -> DataLakeDirectoryClient """Get a client to interact with the root directory. :returns: A DataLakeDirectoryClient. :rtype: ~azure.storage.filedatalake.DataLakeDirectoryClient """ return self.get_directory_client('/') def get_directory_client( self, directory # type: Union[DirectoryProperties, str] ): # type: (...) -> DataLakeDirectoryClient """Get a client to interact with the specified directory. The directory need not already exist. :param directory: The directory with which to interact. This can either be the name of the directory, or an instance of DirectoryProperties. :type directory: str or ~azure.storage.filedatalake.DirectoryProperties :returns: A DataLakeDirectoryClient. :rtype: ~azure.storage.filedatalake.DataLakeDirectoryClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START get_directory_client_from_file_system] :end-before: [END get_directory_client_from_file_system] :language: python :dedent: 8 :caption: Getting the directory client to interact with a specific directory. """ try: directory_name = directory.name except AttributeError: directory_name = directory _pipeline = Pipeline( transport=TransportWrapper(self._pipeline._transport ), # pylint: disable = protected-access policies=self._pipeline. _impl_policies # pylint: disable = protected-access ) return DataLakeDirectoryClient( self.url, self.file_system_name, directory_name=directory_name, credential=self._raw_credential, _configuration=self._config, _pipeline=_pipeline, _hosts=self._hosts, require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function) def get_file_client( self, file_path # type: Union[FileProperties, str] ): # type: (...) -> DataLakeFileClient """Get a client to interact with the specified file. The file need not already exist. :param file_path: The file with which to interact. This can either be the path of the file(from root directory), or an instance of FileProperties. eg. directory/subdirectory/file :type file_path: str or ~azure.storage.filedatalake.FileProperties :returns: A DataLakeFileClient. :rtype: ~azure.storage.filedatalake..DataLakeFileClient .. admonition:: Example: .. literalinclude:: ../samples/datalake_samples_file_system.py :start-after: [START get_file_client_from_file_system] :end-before: [END get_file_client_from_file_system] :language: python :dedent: 8 :caption: Getting the file client to interact with a specific file. """ try: file_path = file_path.name except AttributeError: pass _pipeline = Pipeline( transport=TransportWrapper(self._pipeline._transport ), # pylint: disable = protected-access policies=self._pipeline. _impl_policies # pylint: disable = protected-access ) return DataLakeFileClient( self.url, self.file_system_name, file_path=file_path, credential=self._raw_credential, _hosts=self._hosts, _configuration=self._config, _pipeline=_pipeline, require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)