from azure.storage.blob import BlobServiceClient # Setup pandas display properties pandas.set_option('display.max_rows', None) pandas.set_option('display.max_columns', None) pandas.set_option('display.width', None) pandas.set_option('display.max_colwidth', None) # Constants LATEST_COVERAGE_URL = "https://azuresdkartifacts.blob.core.windows.net/azure-sdk-for-java/test-coverage/jacoco.csv" BLOB_CONTAINER_NAME = os.getenv('BLOB_CONTAINER_NAME') STORAGE_CONNECTION_STRING = os.getenv('STORAGE_CONNECTION_STRING') AGGREGATE_REPORT_NAME = 'aggregate/jacoco_aggregate.csv' # Instantiate a new BlobServiceClient using a connection string blob_service_client = BlobServiceClient.from_connection_string( STORAGE_CONNECTION_STRING) # Instantiate a new ContainerClient container_client = blob_service_client.get_container_client( BLOB_CONTAINER_NAME) def download_latest_coverage(): jacoco_artifact_csv = requests.get(LATEST_COVERAGE_URL) print("Latest Jacoco report was generated on " + jacoco_artifact_csv.headers['Last-Modified']) last_modified_time = datetime.datetime.strptime( jacoco_artifact_csv.headers['Last-Modified'], "%a, %d %b %Y %H:%M:%S %Z") with open('jacoco.csv', 'wb') as coverage_report:
def _refresh_containers_cache_file(connection_string, container, cache_file, multiple_env=False, environment="base"): """ .. versionadded:: 3001 Downloads the entire contents of an Azure storage container to the local filesystem. :param connection_string: The connection string to use to access the specified Azure Blob Container. :param container: The name of the target Azure Blob Container. :param cache_file: The path of where the file will be cached. :param multiple_env: Specifies whether the pillar should interpret top level folders as pillar environments. :param environment: Specifies which environment the container represents when in single environment mode. This is ignored if multiple_env is set as True. """ try: # Create the BlobServiceClient object which will be used to create a container client blob_service_client = BlobServiceClient.from_connection_string( connection_string) # Create the ContainerClient object container_client = blob_service_client.get_container_client(container) except Exception as exc: # pylint: disable=broad-except log.error("Exception: %s", exc) return False metadata = {} def _walk_blobs(saltenv="base", prefix=None): # Walk the blobs in the container with a generator blob_list = container_client.walk_blobs(name_starts_with=prefix) # Iterate over the generator while True: try: blob = next(blob_list) except StopIteration: break log.debug("Raw blob attributes: %s", blob) # Directories end with "/". if blob.name.endswith("/"): # Recurse into the directory _walk_blobs(prefix=blob.name) continue if multiple_env: saltenv = "base" if (not prefix or prefix == ".") else prefix[:-1] if saltenv not in metadata: metadata[saltenv] = {} if container not in metadata[saltenv]: metadata[saltenv][container] = [] metadata[saltenv][container].append(blob) _walk_blobs(saltenv=environment) # write the metadata to disk if os.path.isfile(cache_file): os.remove(cache_file) log.debug("Writing Azure blobs pillar cache file") with salt.utils.files.fopen(cache_file, "wb") as fp_: pickle.dump(metadata, fp_) return metadata
AUTHORITY_HOST_URI = 'https://login.microsoftonline.com' AUTHORITY_URI = AUTHORITY_HOST_URI + '/' + TENANT_ID RESOURCE_URI = 'https://management.core.windows.net/' # Create token to authenticate to storage account context = adal.AuthenticationContext(AUTHORITY_URI, api_version=None) mgmt_token = context.acquire_token_with_client_credentials( RESOURCE_URI, CLIENT_ID, CLIENT_SECRET) credentials = AADTokenCredentials(mgmt_token, CLIENT_ID) token_credential = ClientSecretCredential(TENANT_ID, CLIENT_ID, CLIENT_SECRET) # Create global handlers and variables adls_service_client = DataLakeServiceClient( account_url="{}://{}.dfs.core.windows.net".format("https", STORAGE_ACCOUNT_NAME), credential=token_credential) blob_service_client = BlobServiceClient( account_url="{}://{}.blob.core.windows.net".format("https", STORAGE_ACCOUNT_NAME), credential=token_credential) currentTime = datetime.now() container_client = blob_service_client.get_container_client(CONTAINER_NAME) def list_snapshots_blob(name=""): blob_list = container_client.list_blobs(name_starts_with=name, include=['snapshots', 'metadata']) for snapshot in blob_list: #print(str(snapshot)) print( str(snapshot.name + ', ' + str(snapshot.snapshot) + ', ' + str(snapshot.size)))
def init_azure_storage() -> BlobServiceClient: connect_str = os.getenv('AZURE_BLOB_CONNECT_STR') return BlobServiceClient.from_connection_string(connect_str)
def speech_recognize_once_from_file(): """performs one-shot speech recognition with input from an audio file""" # <SpeechRecognitionWithFile> speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) audio_config = speechsdk.audio.AudioConfig(filename=inputfilename) # Creates a speech recognizer using a file as audio input, also specify the speech language speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, language="en-US", audio_config=audio_config) # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. It returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details)) #creating dataframe and save the output into CSV format df = pd.DataFrame() #pd.DataFrame(columns=['comment']) df['comment'] = [str(result.text)] # df.to_csv('speech2text.csv') # </SpeechRecognitionWithFile> try: CONNECTION_STRING = AZURE_STORAGE_CONNECTION_STRING except KeyError: print("AZURE_STORAGE_CONNECTION_STRING must be set.") sys.exit(1) blob_service_client = BlobServiceClient.from_connection_string( CONNECTION_STRING) # Instantiate a new ContainerClient container_client = blob_service_client.get_container_client(container_name) try: # Create new Container in the service container_client.create_container() # Instantiate a new BlobClient blob_client = container_client.get_blob_client(outputfilename) print("") print("") print("Output file is uploaded to Blob storage") # [START upload_a_blob] # Upload content to block blob with open(outputfilename, "rb") as data: blob_client.upload_blob(data) #, blob_type="BlockBlob" # [END upload_a_blob] # [START delete_blob] #blob_client.delete_blob() # [END delete_blob] finally: # Delete the container #container_client.delete_container() pass
class AzureStorage(BaseStorage): def __init__( self, context, azure_container, storage_path, azure_account_name, azure_account_key=None, sas_token=None, connection_string=None, ): super(AzureStorage, self).__init__() self._context = context self._storage_path = storage_path.lstrip("/") self._azure_account_name = azure_account_name self._azure_account_key = azure_account_key self._azure_sas_token = sas_token self._azure_container = azure_container self._azure_connection_string = connection_string self._blob_service_client = BlobServiceClient( AZURE_STORAGE_URL_STRING.format(self._azure_account_name), credential=self._azure_account_key, ) # https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs api_version = self._blob_service_client.api_version api_version_dt = datetime.strptime(api_version, "%Y-%m-%d") if api_version_dt < _API_VERSION_LIMITS["2016-05-31"][0]: self._max_block_size = _API_VERSION_LIMITS["2016-05-31"][1] elif api_version_dt <= _API_VERSION_LIMITS["2019-07-07"][0]: self._max_block_size = _API_VERSION_LIMITS["2019-07-07"][1] elif api_version_dt >= _API_VERSION_LIMITS["2019-12-12"][0]: self._max_block_size = _API_VERSION_LIMITS["2019-12-12"][1] else: raise Exception("Unknown Azure api version %s" % api_version) def _blob_name_from_path(self, object_path): if ".." in object_path: raise Exception("Relative paths are not allowed; found %s" % object_path) return os.path.join(self._storage_path, object_path).rstrip("/") def _upload_blob_path_from_uuid(self, uuid): return self._blob_name_from_path( self._upload_blob_name_from_uuid(uuid)) def _upload_blob_name_from_uuid(self, uuid): return "uploads/{0}".format(uuid) def _blob(self, blob_name): return self._blob_service_client.get_blob_client( self._azure_container, blob_name) @property def _container(self): return self._blob_service_client.get_container_client( self._azure_container) def get_direct_download_url(self, object_path, request_ip=None, expires_in=60, requires_cors=False, head=False): blob_name = self._blob_name_from_path(object_path) try: sas_token = generate_blob_sas( self._azure_account_name, self._azure_container, blob_name, account_key=self._azure_account_key, permission=ContainerSasPermissions.from_string("r"), expiry=datetime.utcnow() + timedelta(seconds=expires_in), ) blob_url = "{}?{}".format(self._blob(blob_name).url, sas_token) except AzureError: logger.exception( "Exception when trying to get direct download for path %s", object_path) raise IOError("Exception when trying to get direct download") return blob_url def validate(self, client): super(AzureStorage, self).validate(client) def get_content(self, path): blob_name = self._blob_name_from_path(path) try: blob_stream = self._blob(blob_name).download_blob() except AzureError: logger.exception("Exception when trying to get path %s", path) raise IOError("Exception when trying to get path") return blob_stream.content_as_bytes() def put_content(self, path, content): blob_name = self._blob_name_from_path(path) try: self._blob(blob_name).upload_blob(content, blob_type=BlobType.BlockBlob, overwrite=True) except AzureError: logger.exception("Exception when trying to put path %s", path) raise IOError("Exception when trying to put path") def stream_read(self, path): with self.stream_read_file(path) as f: while True: buf = f.read(self.buffer_size) if not buf: break yield buf def stream_read_file(self, path): blob_name = self._blob_name_from_path(path) try: output_stream = io.BytesIO() self._blob(blob_name).download_blob().download_to_stream( output_stream) output_stream.seek(0) except AzureError: logger.exception( "Exception when trying to stream_file_read path %s", path) raise IOError("Exception when trying to stream_file_read path") return output_stream def stream_write(self, path, fp, content_type=None, content_encoding=None): blob_name = self._blob_name_from_path(path) content_settings = ContentSettings( content_type=content_type, content_encoding=content_encoding, ) try: self._blob(blob_name).upload_blob( fp, content_settings=content_settings, overwrite=True) except AzureError as ae: logger.exception("Exception when trying to stream_write path %s", path) raise IOError("Exception when trying to stream_write path", ae) def exists(self, path): blob_name = self._blob_name_from_path(path) try: self._blob(blob_name).get_blob_properties() except ResourceNotFoundError: return False except AzureError: logger.exception("Exception when trying to check exists path %s", path) raise IOError("Exception when trying to check exists path") return True def remove(self, path): blob_name = self._blob_name_from_path(path) try: self._blob(blob_name).delete_blob() except AzureError: logger.exception("Exception when trying to remove path %s", path) raise IOError("Exception when trying to remove path") def get_checksum(self, path): blob_name = self._blob_name_from_path(path) try: blob_properties = self._blob(blob_name).get_blob_properties() except AzureError: logger.exception( "Exception when trying to get_checksum for path %s", path) raise IOError("Exception when trying to get_checksum path") return blob_properties.etag def initiate_chunked_upload(self): random_uuid = str(uuid.uuid4()) metadata = { _BLOCKS_KEY: [], _CONTENT_TYPE_KEY: None, } return random_uuid, metadata def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None): if length == 0: return 0, storage_metadata, None upload_blob_path = self._upload_blob_path_from_uuid(uuid) new_metadata = copy.deepcopy(storage_metadata) total_bytes_written = 0 while True: current_length = length - total_bytes_written max_length = (min(current_length, self._max_block_size) if length != READ_UNTIL_END else self._max_block_size) if max_length <= 0: break limited = LimitingStream(in_fp, max_length, seekable=False) # Note: Azure fails if a zero-length block is uploaded, so we read all the data here, # and, if there is none, terminate early. block_data = b"" for chunk in iter(lambda: limited.read(31457280), b""): block_data += chunk if len(block_data) == 0: break block_index = len(new_metadata[_BLOCKS_KEY]) block_id = format(block_index, "05") new_metadata[_BLOCKS_KEY].append(block_id) try: self._blob(upload_blob_path).stage_block(block_id, block_data, validate_content=True) except AzureError as ae: logger.exception( "Exception when trying to stream_upload_chunk block %s for %s", block_id, uuid) return total_bytes_written, new_metadata, ae bytes_written = len(block_data) total_bytes_written += bytes_written if bytes_written == 0 or bytes_written < max_length: break if content_type is not None: new_metadata[_CONTENT_TYPE_KEY] = content_type return total_bytes_written, new_metadata, None def complete_chunked_upload(self, uuid, final_path, storage_metadata): """ Complete the chunked upload and store the final results in the path indicated. Returns nothing. """ # Commit the blob's blocks. upload_blob_name = self._upload_blob_name_from_uuid( uuid) # upload/<uuid> upload_blob_path = self._upload_blob_path_from_uuid( uuid) # storage/path/upload/<uuid> block_list = [ BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY] ] try: if storage_metadata[_CONTENT_TYPE_KEY] is not None: content_settings = ContentSettings( content_type=storage_metadata[_CONTENT_TYPE_KEY]) self._blob(upload_blob_path).commit_block_list( block_list, content_settings=content_settings) else: self._blob(upload_blob_path).commit_block_list(block_list) except AzureError: logger.exception( "Exception when trying to put block list for path %s from upload %s", final_path, uuid, ) raise IOError("Exception when trying to put block list") # Copy the blob to its final location. upload_blob_name = self._upload_blob_name_from_uuid(uuid) copy_source_url = self.get_direct_download_url(upload_blob_name, expires_in=300) try: final_blob_name = self._blob_name_from_path(final_path) cp = self._blob(final_blob_name).start_copy_from_url( copy_source_url) except AzureError: logger.exception( "Exception when trying to set copy uploaded blob %s to path %s", uuid, final_path) raise IOError("Exception when trying to copy uploaded blob") self._await_copy(final_blob_name) # Delete the original blob. logger.debug("Deleting chunked upload %s at path %s", uuid, upload_blob_path) try: self._blob(upload_blob_path).delete_blob() except AzureError: logger.exception( "Exception when trying to set delete uploaded blob %s", uuid) raise IOError("Exception when trying to delete uploaded blob") def cancel_chunked_upload(self, uuid, storage_metadata): """ Cancel the chunked upload and clean up any outstanding partially uploaded data. Returns nothing. """ upload_blob_path = self._upload_blob_path_from_uuid(uuid) logger.debug("Canceling chunked upload %s at path %s", uuid, upload_blob_path) try: self._blob(upload_blob_path).delete_blob() except ResourceNotFoundError: pass def _await_copy(self, blob_name): # Poll for copy completion. blob = self._blob(blob_name) copy_prop = blob.get_blob_properties().copy count = 0 while copy_prop.status == "pending": props = blob.get_blob_properties() copy_prop = props.copy if copy_prop.status == "success": return if copy_prop.status == "failed" or copy_prop.status == "aborted": raise IOError("Copy of blob %s failed with status %s" % (blob_name, copy_prop.status)) count = count + 1 if count > _MAX_COPY_POLL_COUNT: raise IOError("Timed out waiting for copy to complete") time.sleep(_COPY_POLL_SLEEP) def copy_to(self, destination, path): if self.__class__ == destination.__class__: logger.debug( "Starting copying file from Azure %s to Azure %s via an Azure copy", self._azure_container, destination._azure_container, ) copy_source_url = self.get_direct_download_url(path) blob_name = destination._blob_name_from_path(path) dest_blob = destination._blob(blob_name) destination._blob(blob_name).start_copy_from_url(copy_source_url) destination._await_copy(blob_name) logger.debug( "Finished copying file from Azure %s to Azure %s via an Azure copy", self._azure_container, destination._azure_container, ) return # Fallback to a slower, default copy. logger.debug( "Copying file from Azure container %s to %s via a streamed copy", self._azure_container, destination, ) with self.stream_read_file(path) as fp: destination.stream_write(path, fp) def setup(self): # From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services cors = [ CorsRule( allowed_origins="*", allowed_methods=["GET", "PUT"], max_age_in_seconds=3000, exposed_headers=["x-ms-meta-*"], allowed_headers=[ "x-ms-meta-data*", "x-ms-meta-target*", "x-ms-meta-abc", "Content-Type", ], ) ] self._blob_service_client.set_service_properties(cors=cors)
class AzureBlobFileSystem(AbstractFileSystem): """ Access Azure Datalake Gen2 and Azure Storage if it were a file system using Multiprotocol Access Parameters ---------- account_name: str The storage account name. This is used to authenticate requests signed with an account key and to construct the storage endpoint. It is required unless a connection string is given, or if a custom domain is used with anonymous authentication. account_key: str The storage account key. This is used for shared key authentication. If any of account key, sas token or client_id is specified, anonymous access will be used. sas_token: str A shared access signature token to use to authenticate requests instead of the account key. If account key and sas token are both specified, account key will be used to sign. If any of account key, sas token or client_id are specified, anonymous access will be used. request_session: Session The session object to use for http requests. connection_string: str If specified, this will override all other parameters besides request session. See http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ for the connection string format. socket_timeout: int If specified, this will override the default socket timeout. The timeout specified is in seconds. See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value. credential: TokenCredential or SAS token The credentials with which to authenticate. Optional if the account URL already has a SAS token. Can include an instance of TokenCredential class from azure.identity blocksize: int The block size to use for download/upload operations. Defaults to the value of ``BlockBlobService.MAX_BLOCK_SIZE`` client_id: str Client ID to use when authenticating using an AD Service Principal client/secret. client_secret: str Client secret to use when authenticating using an AD Service Principal client/secret. tenant_id: str Tenant ID to use when authenticating using an AD Service Principal client/secret. Examples -------- Authentication with an account_key >>> abfs = AzureBlobFileSystem(account_name="XXXX", account_key="XXXX", container_name="XXXX") >>> abfs.ls('') ** Sharded Parquet & csv files can be read as: ** ------------------------------------------ ddf = dd.read_csv('abfs://container_name/folder/*.csv', storage_options={ ... 'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY}) ddf = dd.read_parquet('abfs://container_name/folder.parquet', storage_options={ ... 'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY,}) Authentication with an Azure ServicePrincipal >>> abfs = AzureBlobFileSystem(account_name="XXXX", tenant_id=TENANT_ID, ... client_id=CLIENT_ID, client_secret=CLIENT_SECRET) >>> abfs.ls('') ** Read files as: ** ------------- ddf = dd.read_csv('abfs://container_name/folder/*.csv', storage_options={ 'account_name': ACCOUNT_NAME, 'tenant_id': TENANT_ID, 'client_id': CLIENT_ID, 'client_secret': CLIENT_SECRET}) }) """ protocol = "abfs" def __init__( self, account_name: str, account_key: str = None, connection_string: str = None, credential: str = None, sas_token: str = None, request_session=None, socket_timeout: int = None, blocksize: int = create_configuration( storage_sdk="blob").max_block_size, client_id: str = None, client_secret: str = None, tenant_id: str = None, ): AbstractFileSystem.__init__(self) self.account_name = account_name self.account_key = account_key self.connection_string = connection_string self.credential = credential self.sas_token = sas_token self.request_session = request_session self.socket_timeout = socket_timeout self.blocksize = blocksize self.client_id = client_id self.client_secret = client_secret self.tenant_id = tenant_id if (self.credential is None and self.account_key is None and self.sas_token is None and self.client_id is not None): self.credential = self._get_credential_from_service_principal() self.do_connect() @classmethod def _strip_protocol(cls, path: str): """ Remove the protocol from the input path Parameters ---------- path: str Path to remove the protocol from Returns ------- str Returns a path without the protocol """ logging.debug(f"_strip_protocol for {path}") ops = infer_storage_options(path) # we need to make sure that the path retains # the format {host}/{path} # here host is the container_name if ops.get("host", None): ops["path"] = ops["host"] + ops["path"] ops["path"] = ops["path"].lstrip("/") logging.debug(f"_strip_protocol({path}) = {ops}") return ops["path"] def _get_credential_from_service_principal(self): """ Create a Credential for authentication. This can include a TokenCredential client_id, client_secret and tenant_id Returns ------- Credential """ from azure.identity import ClientSecretCredential sp_token = ClientSecretCredential( tenant_id=self.tenant_id, client_id=self.client_id, client_secret=self.client_secret, ) return sp_token def do_connect(self): """Connect to the BlobServiceClient, using user-specified connection details. Tries credentials first, then connection string and finally account key Raises ------ ValueError if none of the connection details are available """ self.account_url: str = f"https://{self.account_name}.blob.core.windows.net" if self.credential is not None: self.service_client = BlobServiceClient( account_url=self.account_url, credential=self.credential) elif self.connection_string is not None: self.service_client = BlobServiceClient.from_connection_string( conn_str=self.connection_string) elif self.account_key is not None: self.service_client = BlobServiceClient( account_url=self.account_url, credential=self.account_key) else: raise ValueError("unable to connect with provided params!!") def split_path(self, path, delimiter="/", return_container: bool = False, **kwargs): """ Normalize ABFS path string into bucket and key. Parameters ---------- path : string Input path, like `abfs://my_container/path/to/file` delimiter: string Delimiter used to split the path return_container: bool Examples -------- >>> split_path("abfs://my_container/path/to/file") ['my_container', 'path/to/file'] """ if path in ["", delimiter]: return "", "" path = self._strip_protocol(path) path = path.lstrip(delimiter) if "/" not in path: # this means path is the container_name return path, "" else: return path.split(delimiter, 1) # def _generate_blobs(self, *args, **kwargs): # """Follow next_marker to get ALL results.""" # logging.debug("running _generate_blobs...") # blobs = self.blob_fs.list_blobs(*args, **kwargs) # yield from blobs # while blobs.next_marker: # logging.debug(f"following next_marker {blobs.next_marker}") # kwargs["marker"] = blobs.next_marker # blobs = self.blob_fs.list_blobs(*args, **kwargs) # yield from blobs # def _matches( # self, container_name, path, as_directory=False, delimiter="/", **kwargs # ): # """check if the path returns an exact match""" # path = path.rstrip(delimiter) # gen = self.blob_fs.list_blob_names( # container_name=container_name, # prefix=path, # delimiter=delimiter, # num_results=None, # ) # contents = list(gen) # if not contents: # return False # if as_directory: # return contents[0] == path + delimiter # else: # return contents[0] == path def ls( self, path: str, detail: bool = False, invalidate_cache: bool = True, delimiter: str = "/", return_glob: bool = False, **kwargs, ): """ Create a list of blob names from a blob container Parameters ---------- path: str Path to an Azure Blob with its container name detail: bool If False, return a list of blob names, else a list of dictionaries with blob details invalidate_cache: bool If True, do not use the cache delimiter: str Delimiter used to split paths return_glob: bool """ logging.debug(f"abfs.ls() is searching for {path}") container, path = self.split_path(path) if (container in ["", delimiter]) and (path in ["", delimiter]): # This is the case where only the containers are being returned logging.info( "Returning a list of containers in the azure blob storage account" ) if detail: contents = self.service_client.list_containers( include_metadata=True) return self._details(contents) else: contents = self.service_client.list_containers() return [f"{c.name}{delimiter}" for c in contents] else: if container not in ["", delimiter]: # This is the case where the container name is passed container_client = self.service_client.get_container_client( container=container) blobs = container_client.walk_blobs(name_starts_with=path) try: blobs = [blob for blob in blobs] except Exception: raise FileNotFoundError if len(blobs) > 1: if return_glob: return self._details(blobs, return_glob=True) if detail: return self._details(blobs) else: return [ f"{blob.container}{delimiter}{blob.name}" for blob in blobs ] elif len(blobs) == 1: if (blobs[0].name.rstrip(delimiter) == path) and not blobs[0].has_key( # NOQA "blob_type"): path = blobs[0].name blobs = container_client.walk_blobs( name_starts_with=path) if return_glob: return self._details(blobs, return_glob=True) if detail: return self._details(blobs) else: return [ f"{blob.container}{delimiter}{blob.name}" for blob in blobs ] elif isinstance(blobs[0], BlobPrefix): if detail: for blob_page in blobs: return self._details(blob_page) else: outblobs = [] for blob_page in blobs: for blob in blob_page: outblobs.append( f"{blob.container}{delimiter}{blob.name}" ) return outblobs elif blobs[0]["blob_type"] == "BlockBlob": if detail: return self._details(blobs) else: return [ f"{blob.container}{delimiter}{blob.name}" for blob in blobs ] elif isinstance(blobs[0], ItemPaged): outblobs = [] for page in blobs: for b in page: outblobs.append(b) else: raise FileNotFoundError( f"Unable to identify blobs in {path} for {blobs[0].name}" ) elif len(blobs) == 0: if return_glob or (path in ["", delimiter]): return [] else: raise FileNotFoundError else: raise FileNotFoundError def _details(self, contents, delimiter="/", return_glob: bool = False, **kwargs): """ Return a list of dictionaries of specifying details about the contents Parameters ---------- contents delimiter: str Delimiter used to separate containers and files return_glob: bool Returns ------- List of dicts Returns details about the contents, such as name, size and type """ pathlist = [] for c in contents: data = {} if c.has_key("container"): # NOQA data["name"] = f"{c.container}{delimiter}{c.name}" if c.has_key("size"): # NOQA data["size"] = c.size else: data["size"] = 0 if data["size"] == 0: data["type"] = "directory" else: data["type"] = "file" else: data["name"] = f"{c.name}{delimiter}" data["size"] = 0 data["type"] = "directory" if return_glob: data["name"] = data["name"].rstrip("/") pathlist.append(data) return pathlist def walk(self, path: str, maxdepth=None, **kwargs): """ Return all files belows path List all files, recursing into subdirectories; output is iterator-style, like ``os.walk()``. For a simple list of files, ``find()`` is available. Note that the "files" outputted will include anything that is not a directory, such as links. Parameters ---------- path: str Root to recurse into maxdepth: int Maximum recursion depth. None means limitless, but not recommended on link-based file-systems. **kwargs are passed to ``ls`` """ path = self._strip_protocol(path) full_dirs = {} dirs = {} files = {} detail = kwargs.pop("detail", False) try: listing = self.ls(path, detail=True, return_glob=True, **kwargs) except (FileNotFoundError, IOError): return [], [], [] for info in listing: # each info name must be at least [path]/part , but here # we check also for names like [path]/part/ pathname = info["name"].rstrip("/") name = pathname.rsplit("/", 1)[-1] if info["type"] == "directory" and pathname != path: # do not include "self" path full_dirs[pathname] = info dirs[name] = info elif pathname == path: # file-like with same name as give path files[""] = info else: files[name] = info if detail: yield path, dirs, files else: yield path, list(dirs), list(files) if maxdepth is not None: maxdepth -= 1 if maxdepth < 1: return for d in full_dirs: yield from self.walk(d, maxdepth=maxdepth, detail=detail, **kwargs) def mkdir(self, path, delimiter="/", exists_ok=False, **kwargs): """ Create directory entry at path Parameters ---------- path: str The path to create delimiter: str Delimiter to use when splitting the path exists_ok: bool If True, raise an exception if the directory already exists. Defaults to False """ container_name, path = self.split_path(path, delimiter=delimiter) if not exists_ok: if (container_name not in self.ls("")) and (not path): # create new container self.service_client.create_container(name=container_name) elif (container_name in [ container_path.split("/")[0] for container_path in self.ls("") ]) and path: ## attempt to create prefix container_client = self.service_client.get_container_client( container=container_name) container_client.upload_blob(name=path, data="") else: ## everything else raise RuntimeError( f"Cannot create {container_name}{delimiter}{path}.") else: if container_name in self.ls("") and path: container_client = self.service_client.get_container_client( container=container_name) container_client.upload_blob(name=path, data="") def rmdir(self, path: str, delimiter="/", **kwargs): """ Remove a directory, if empty Parameters ---------- path: str Path of directory to remove delimiter: str Delimiter to use when splitting the path """ container_name, path = self.split_path(path, delimiter=delimiter) if (container_name + delimiter in self.ls("")) and (not path): # delete container self.service_client.delete_container(container_name) def _rm(self, path, delimiter="/", **kwargs): """ Delete a given file Parameters ---------- path: str Path to file to delete delimiter: str Delimiter to use when splitting the path """ if self.isfile(path): container_name, path = self.split_path(path, delimiter=delimiter) container_client = self.service_client.get_container_client( container=container_name) logging.debug(f"Delete blob {path} in {container_name}") container_client.delete_blob(path) elif self.isdir(path): container_name, path = self.split_path(path, delimiter=delimiter) container_client = self.service_client.get_container_client( container=container_name) if (container_name + delimiter in self.ls("")) and (not path): logging.debug(f"Delete container {container_name}") container_client.delete_container(container_name) else: raise RuntimeError(f"cannot delete {path}") def _open( self, path: str, mode: str = "rb", block_size: int = None, autocommit: bool = True, cache_options=None, **kwargs, ): """Open a file on the datalake, or a block blob Parameters ---------- path: str Path to file to open mode: str What mode to open the file in - defaults to "rb" block_size: int Size per block for multi-part downloads. autocommit: bool Whether or not to write to the destination directly cache_type: str One of "readahead", "none", "mmap", "bytes", defaults to "readahead" Caching policy in read mode. See the definitions here: https://filesystem-spec.readthedocs.io/en/latest/api.html#readbuffering """ logging.debug(f"_open: {path}") return AzureBlobFile( fs=self, path=path, mode=mode, block_size=block_size or self.blocksize, autocommit=autocommit, cache_options=cache_options, **kwargs, )
def enumerate_prefix(prefix, sas_url, output_folder): account_name = sas_blob_utils.get_account_from_uri(sas_url) container_name = sas_blob_utils.get_container_from_uri(sas_url) ro_sas_token = sas_blob_utils.get_sas_token_from_uri(sas_url) assert not ro_sas_token.startswith('?') ro_sas_token = '?' + ro_sas_token storage_account_url_blob = 'https://' + account_name + '.blob.core.windows.net' # prefix = prefixes[0]; print(prefix) print('Starting enumeration for prefix {}'.format(prefix)) # Open the output file fn = path_utils.clean_filename(prefix) output_file = os.path.join(output_folder, fn) # Create the container blob_service_client = BlobServiceClient( account_url=storage_account_url_blob, credential=ro_sas_token) container_client = blob_service_client.get_container_client(container_name) # Enumerate with open(output_file, 'w') as output_f: continuation_token = '' hit_debug_limit = False i_blob = 0 while (continuation_token is not None) and (not hit_debug_limit): blobs_iter = container_client.list_blobs( name_starts_with=prefix, results_per_page=n_blobs_per_page).by_page( continuation_token=continuation_token) blobs = next(blobs_iter) n_blobs_this_page = 0 for blob in blobs: i_blob += 1 n_blobs_this_page += 1 if (debug_max_files > 0) and (i_blob > debug_max_files): print('Hit debug path limit for prefix {}'.format(prefix)) i_blob -= 1 hit_debug_limit = True break else: output_f.write(blob.name + '\n') # print('Enumerated {} blobs'.format(n_blobs_this_page)) cnt.increment(n=n_blobs_this_page) continuation_token = blobs_iter.continuation_token if sleep_time_per_page > 0: time.sleep(sleep_time_per_page) # ...while we're enumerating # ...with open(output_file) print('Finished enumerating {} blobs for prefix {}'.format(i_blob, prefix))
def connect_blob(connect_str): # Create the BlobServiceClient object which will be used to create a container client blob_service_client = BlobServiceClient.from_connection_string(connect_str) return blob_service_client
class Connector: def __init__(self, path=None, storage_account=None, container=None): logging.basicConfig(level=logging.INFO) self.storage_account = storage_account self.container = container if path: parsed_path = self.parse_azure_path(path) self.storage_account = parsed_path["storage_account"] self.container = parsed_path["container"] # Gets credential from azure cli self.credential = DefaultAzureCredential() # Create class wide storage account and container clients if names are passed if self.storage_account: blob_storage_url = self.get_blob_storage_url( storage_account=self.storage_account ) self.blob_service_client = BlobServiceClient( credential=self.credential, account_url=blob_storage_url ) if self.container: container_names = [ container.name for container in self.blob_service_client.list_containers() ] if self.container in container_names: self.container_client = ( self.blob_service_client.get_container_client( container=self.container ) ) else: raise ValueError( f"The container: {self.container} is not in the storage account: {self.storage_account}" ) @arguments_decorator() def get_blob_storage_url( self, path: str = None, storage_account: str = None, container: str = None, file_path: str = None, ) -> str: """ Returns the storage account url for the path or storage_account name passed :param path: str: optional An azure path. Defaults to None. :param storage_account: str: optional Storage account name. Defaults to None. :param container: str: optional Ignored. Defaults to None. :param file_path: str: optional Ignored. Defaults to None. :return str: The storage account url in the form: https://{storage_account}.blob.core.windows.net/ """ return f"https://{storage_account}.blob.core.windows.net/" def parse_azure_path(self, path: str) -> dict: """ Parse an azure url into : storage_account, container and filepath. If passing a url of the for azure://container/filepath the storage account is taken from the class instance. If there is no storage account passed for the class the storage account will be None. :param path: str: The azure blob path :return: dict: A dictionary containing the container name and filepath """ storage_account = self.storage_account container = self.container if path.startswith("https://"): storage_account = re.findall( r"https://(.*)\.blob\.core\.windows\.net", path )[0] path = path.replace(f"https://{storage_account}.blob.core.windows.net/", "") split_path = path.split("/") container = split_path.pop(0) filepath = "/".join(split_path) elif path.startswith("azure://"): path = path.replace("azure://", "") split_path = path.split("/") container = split_path.pop(0) filepath = "/".join(split_path) else: filepath = path return { "storage_account": storage_account, "container": container, "file_path": filepath, } def is_azure_path(self, path: str) -> bool: """ Returns true if the path is of a recognised azure path format :param path: str: The path to test :return bool: True if path is of an accepted azure path format """ patterns = [r"https://.*\.blob.core.windows.net", r"azure://"] return any([bool(re.match(p, path)) for p in patterns]) @arguments_decorator() def get_blob_service_client( self, path: str = None, storage_account: str = None, container: str = None, file_path: str = None, ) -> BlobServiceClient: """ Returns a blob service client for the specified storage account. If no parameters are passed the class values are used :param path: str: optional An azure path, the storage account will be used to create a client. Defaults to None. :param storage_account: str: optional The name of the storage account to create a client for. Defaults to None. :param container: str: optional Ignored. Defaults to None. :param file_path: str: optional Ignored. Defaults to None. :return BlobServiceClient: An azure blobserviceclient for the specified storage account """ if storage_account == self.storage_account: return self.blob_service_client else: blob_storage_url = self.get_blob_storage_url( storage_account=storage_account ) return BlobServiceClient( credential=self.credential, account_url=blob_storage_url ) @arguments_decorator() def get_container_client( self, path: str = None, storage_account: str = None, container: str = None, file_path: str = None, ) -> ContainerClient: """ Returns a container client when a container name in the storage account is passed. If no params are passed the class values will be used :param path: str: optional An Azure path, the container in the path will be used. Defaults to None. :param storage_account: str: optional A storage account name containing the container. Defaults to None. :param container: str: optional The name of the container to create a client for. Defaults to None. :param file_path: str: optional The file path will ultimately be ignored. Defaults to None. :exception ValueError: Raised if the container does not exist in the storage account :return ContainerClient: An Azure client for the container """ if storage_account == self.storage_account and container == self.container: return self.container_client else: client = self.get_blob_service_client(storage_account=storage_account) container_names = [container.name for container in client.list_containers()] if container in container_names: return client.get_container_client(container=container) else: raise ValueError( f"The container: {container} is not in the storage account: {storage_account}" ) @arguments_decorator() def list_blobs( self, path: str = None, storage_account: str = None, container: str = None, file_path: str = None, ) -> list: """ Returns a list of blobs, with paths that match the path passed :param path: str: optional An azure path to search for blobs. Defaults to None. :param storage_account: str: optional storage account name. Defaults to None. :param container: str: optional container name. Defaults to None. :param file_path: str: optional the prefix file path. Defaults to None. :return list: Blobs in the path passed """ container_client = self.get_container_client( storage_account=storage_account, container=container ) if file_path: blob_iter = container_client.list_blobs(name_starts_with=file_path) return [blob.name.replace(file_path, "") for blob in blob_iter] else: blob_iter = container_client.list_blobs() return [blob.name for blob in blob_iter] @multi_arguments_decorator(local_support=True) def download_folder( self, source_path: str = None, source_storage_account: str = None, source_container: str = None, source_file_path: str = None, dest_path: str = None, dest_storage_account: str = None, dest_container: str = None, dest_file_path: str = None, ): """ Copy a folder from azure to a local path :param source_path: str: optional An Azure path to the folder to download. Defaults to None. :param source_storage_account: str: optional The storage account name. Defaults to None. :param source_container: str: optional The container name. Defaults to None. :param source_file_path: str: optional The path to the folder to download. Defaults to None. :param dest_path: str: optional The local path to download the folder to. Defaults to None. :param dest_storage_account: str: optional Ignored. Defaults to None. :param dest_container: str: optional Ignored. Defaults to None. :param dest_file_path: str: optional Ignored. Defaults to None. :exception ValueError: Raised when destination path is an azure path """ container_client = self.get_container_client( storage_account=source_storage_account, container=source_container ) if self.is_azure_path(dest_path): raise ValueError( f"Expected destination to be local path got azure path: {dest_path}" ) os.makedirs(dest_path, exist_ok=True) for blob in container_client.list_blobs(source_file_path): file_name = os.path.basename(blob.name) local_path = os.path.join(dest_path, file_name) with open(local_path, "wb") as f: logging.info(f"Downloading {blob.name} to {local_path}") blob_data = container_client.download_blob(blob.name) blob_data.readinto(f) logging.info("Completed Download") @arguments_decorator() def blob_exists( self, path: str = None, storage_account: str = None, container: str = None, file_path: str = None, ): """ Checks if a file exists in azure, return bool :param path: str: optional Azure path to file to check. Defaults to None. :param storage_account: str: optional Storage account. Defaults to None. :param container: str: optional Container. Defaults to None. :param file_path: str: optional path to file. Defaults to None. :return [bool]: True if file exists """ client = self.get_blob_service_client(storage_account=storage_account) blob_client = client.get_blob_client(container, file_path) return blob_client.exists() @multi_arguments_decorator(local_support=True) def upload_folder( self, source_path: str = None, source_storage_account: str = None, source_container: str = None, source_file_path: str = None, dest_path: str = None, dest_storage_account: str = None, dest_container: str = None, dest_file_path: str = None, ): """ Upload a directory to an azure location. Subdirectories are not currently supported :param source_path: str: optional Local path to folder to upload. Defaults to None. :param source_storage_account: str: optional Ignored. Defaults to None. :param source_container: str: optional Ignored. Defaults to None. :param source_file_path: str: optional Ignored. Defaults to None. :param dest_path: str: optional Azure path to upload to. Defaults to None. :param dest_storage_account: str: optional Storage account. Defaults to None. :param dest_container: str: optional Container name. Defaults to None. :param dest_file_path: str: optional Path to folder. Defaults to None. :exception ValueError: Raised if source is an Azure path """ if self.is_azure_path(source_path): raise ValueError( f"Expected destination to be local path got azure path: {source_path}" ) container_client = self.get_container_client( storage_account=dest_storage_account, container=dest_container ) for root, dirs, files in os.walk(source_path): logging.warning( "upload folder does not support sub-directories only files will be uploaded" ) for file in files: file_path = os.path.join(root, file) blob_path = dest_file_path + file logging.info(f"Uploading {file_path} to {blob_path}") with open(file_path, "rb") as data: container_client.upload_blob(name=blob_path, data=data) @arguments_decorator(local_support=True) def open( self, path: str = None, storage_account: str = None, container: str = None, file_path: str = None, mode="r", *args, **kwargs, ): """ wrapper around smart_open so we dont have to pass a blob client everywhere. :param path: str: optional Local or azure path. Defaults to None. :param storage_account: str: optional name of storage account. Defaults to None. :param container: str: optional container name. Defaults to None. :param file_path: str: optional path to file. Defaults to None. :param mode: str: optional open mode. Defaults to "r". :return [smart_open.open]: Opens both local and azure files """ if path and not self.is_azure_path(path) and "w" in mode: # if it is local write mode, check the path and create folder if needed subdir = os.path.dirname(path) if subdir: os.makedirs(subdir, exist_ok=True) if storage_account: transport_params = { "client": self.get_blob_service_client(storage_account=storage_account) } else: transport_params = {"client": None} if "transport_params" not in kwargs: kwargs["transport_params"] = transport_params path = path if path else f"azure://{container}/{file_path}" return smart_open.open(path, mode, *args, **kwargs)
# Set TimeZone (Change from pacific if you live in another timezone) TIME_ZONE = timezone("US/Pacific") # This is a boolean marking whether to use cloud storage or not. # Change to true and follow tutorial instructions if you would like to use it. CLOUD_STORAGE = False print("CLOUD STORAGE STATUS:", CLOUD_STORAGE) CONTAINER_NAME = "storagetest" ts = datetime.now(TIME_ZONE) timestring = ts.strftime("%Y-%m-%d %H:%M:%S") current_date = timestring.split()[0] DAILY_STRING = "timestamp,location," + labels_string + "\n" if CLOUD_STORAGE: block_blob_service = BlobServiceClient.from_connection_string( BLOB_STORAGE_CONNECTION_STRING) ts = datetime.now(TIME_ZONE) DAILY_CSV_NAME = "objectcount" + current_date + ".csv" try: container_client = block_blob_service.create_container(CONTAINER_NAME) except ResourceExistsError: pass blob_client = block_blob_service.get_blob_client(container=CONTAINER_NAME, blob=DAILY_CSV_NAME) blob_client.upload_blob(DAILY_STRING, overwrite=True) class HubManager(object): def __init__(self): self.client = IoTHubDeviceClient.create_from_connection_string(
class StorageBlockBlobTest(StorageTestCase): def _setup(self, storage_account, key): # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient( self.account_url(storage_account, "blob"), credential=key, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if self.is_live: self.bsc.create_container(self.container_name) def _teardown(self, FILE_PATH): if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass #--Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(actual_data.readall(), expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) #--Test cases for block blobs -------------------------------------------- @GlobalStorageAccountPreparer() def test_put_block(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob = self._create_blob() # Act for i in range(5): headers = blob.stage_block(i, 'block {0}'.format(i).encode('utf-8')) self.assertIn('content_crc64', headers) # Assert @GlobalStorageAccountPreparer() def test_put_block_with_response(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() def return_response(resp, _, headers): return (resp, headers) # Act resp, headers = blob.stage_block(0, 'block 0', cls=return_response) # Assert self.assertEqual(201, resp.status_code) self.assertIn('x-ms-content-crc64', headers) @GlobalStorageAccountPreparer() def test_put_block_unicode(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob = self._create_blob() # Act headers = blob.stage_block('1', u'啊齄丂狛狜') self.assertIn('content_crc64', headers) # Assert @GlobalStorageAccountPreparer() def test_put_block_with_md5(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob = self._create_blob() # Act blob.stage_block(1, b'block', validate_content=True) # Assert @GlobalStorageAccountPreparer() def test_put_block_list(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3')] put_block_list_resp = blob.commit_block_list(block_list) # Assert content = blob.download_blob() self.assertEqual(content.readall(), b'AAABBBCCC') self.assertEqual(content.properties.etag, put_block_list_resp.get('etag')) self.assertEqual(content.properties.last_modified, put_block_list_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_put_block_list_invalid_block_id(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act try: block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='4')] blob.commit_block_list(block_list) self.fail() except HttpResponseError as e: self.assertGreaterEqual(str(e).find('specified block list is invalid'), 0) # Assert @GlobalStorageAccountPreparer() def test_put_block_list_with_md5(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3')] blob.commit_block_list(block_list, validate_content=True) # Assert @GlobalStorageAccountPreparer() def test_put_block_list_with_blob_tier_specified(self, resource_group, location, storage_account, storage_account_key): # Arrange self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob_client = self.bsc.get_blob_client(self.container_name, blob_name) blob_client.stage_block('1', b'AAA') blob_client.stage_block('2', b'BBB') blob_client.stage_block('3', b'CCC') blob_tier = StandardBlobTier.Cool # Act block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3')] blob_client.commit_block_list(block_list, standard_blob_tier=blob_tier) # Assert blob_properties = blob_client.get_blob_properties() self.assertEqual(blob_properties.blob_tier, blob_tier) @GlobalStorageAccountPreparer() def test_get_block_list_no_blocks(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob = self._create_blob() # Act block_list = blob.get_block_list('all') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 0) @GlobalStorageAccountPreparer() def test_get_block_list_uncommitted_blocks(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = blob.get_block_list('uncommitted') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 3) self.assertEqual(len(block_list[0]), 0) self.assertEqual(block_list[1][0].id, '1') self.assertEqual(block_list[1][0].size, 3) self.assertEqual(block_list[1][1].id, '2') self.assertEqual(block_list[1][1].size, 3) self.assertEqual(block_list[1][2].id, '3') self.assertEqual(block_list[1][2].size, 3) @GlobalStorageAccountPreparer() def test_get_block_list_committed_blocks(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3')] blob.commit_block_list(block_list) # Act block_list = blob.get_block_list('committed') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 3) self.assertEqual(block_list[0][0].id, '1') self.assertEqual(block_list[0][0].size, 3) self.assertEqual(block_list[0][1].id, '2') self.assertEqual(block_list[0][1].size, 3) self.assertEqual(block_list[0][2].id, '3') self.assertEqual(block_list[0][2].size, 3) @GlobalStorageAccountPreparer() def test_create_small_block_blob_with_no_overwrite(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = b'hello world' data2 = b'hello second world' # Act create_resp = blob.upload_blob(data1, overwrite=True) with self.assertRaises(ResourceExistsError): blob.upload_blob(data2, overwrite=False) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) @GlobalStorageAccountPreparer() def test_create_small_block_blob_with_overwrite(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = b'hello world' data2 = b'hello second world' # Act create_resp = blob.upload_blob(data1, overwrite=True) update_resp = blob.upload_blob(data2, overwrite=True) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) @GlobalStorageAccountPreparer() def test_create_large_block_blob_with_no_overwrite(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE) # Act create_resp = blob.upload_blob(data1, overwrite=True, metadata={'blobdata': 'data1'}) with self.assertRaises(ResourceExistsError): blob.upload_blob(data2, overwrite=False, metadata={'blobdata': 'data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) self.assertEqual(props.metadata, {'blobdata': 'data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE) @GlobalStorageAccountPreparer() def test_create_large_block_blob_with_overwrite(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob(data1, overwrite=True, metadata={'blobdata': 'data1'}) update_resp = blob.upload_blob(data2, overwrite=True, metadata={'blobdata': 'data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) self.assertEqual(props.metadata, {'blobdata': 'data2'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_single_put(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_create_blob_from_0_bytes(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_create_from_bytes_blob_unicode(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = u'hello world' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_create_from_bytes_blob_unicode(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act data = u'hello world' create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data.encode('utf-8')) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_from_bytes_blob_with_lease_id(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) lease = blob.acquire_lease() # Act create_resp = blob.upload_blob(data, lease=lease) # Assert output = blob.download_blob(lease=lease) self.assertEqual(output.readall(), data) self.assertEqual(output.properties.etag, create_resp.get('etag')) self.assertEqual(output.properties.last_modified, create_resp.get('last_modified')) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_metadata(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) metadata = {'hello': 'world', 'number': '42'} # Act blob.upload_blob(data, metadata=metadata) # Assert md = blob.get_blob_properties().metadata self.assertDictEqual(md, metadata) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_properties(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act content_settings=ContentSettings( content_type='image/png', content_language='spanish') blob.upload_blob(data, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_progress(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) create_resp = blob.upload_blob(data, raw_response_hook=callback) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_index(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data[3:]) # Assert self.assertEqual(data[3:], blob.download_blob().readall()) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_index_and_count(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data[3:], length=5) # Assert self.assertEqual(data[3:8], blob.download_blob().readall()) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_index_and_count_and_properties(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act content_settings=ContentSettings( content_type='image/png', content_language='spanish') blob.upload_blob(data[3:], length=5, content_settings=content_settings) # Assert self.assertEqual(data[3:8], blob.download_blob().readall()) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_non_parallel(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, length=LARGE_BLOB_SIZE, max_concurrency=1) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_blob_tier_specified(self, resource_group, location, storage_account, storage_account_key): # Arrange self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob_client = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' blob_tier = StandardBlobTier.Cool # Act blob_client.upload_blob(data, standard_blob_tier=blob_tier) blob_properties = blob_client.get_blob_properties() # Assert self.assertEqual(blob_properties.blob_tier, blob_tier) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_path(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'create_blob_from_input.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self._teardown(FILE_PATH) @GlobalStorageAccountPreparer() def test_create_blob_from_path_non_parallel(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(100) FILE_PATH = 'create_blob_from_path_non_par.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, length=100, max_concurrency=1) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self._teardown(FILE_PATH) @GlobalStorageAccountPreparer() def test_upload_blob_from_path_non_parallel_with_standard_blob_tier(self, resource_group, location, storage_account, storage_account_key): # Arrange self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(100) FILE_PATH = '_path_non_parallel_with_standard_blob.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) blob_tier = StandardBlobTier.Cool # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=100, max_concurrency=1, standard_blob_tier=blob_tier) props = blob.get_blob_properties() # Assert self.assertEqual(props.blob_tier, blob_tier) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_path_with_progress(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'create_blob_from_path_with_progr.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_path_with_properties(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'blob_from_path_with_properties.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings=ContentSettings( content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_chunked_upload(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'blob_from_stream_chunked_up.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_frm_stream_nonseek_chunk_upld_knwn_size(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) blob_size = len(data) - 66 FILE_PATH = 'stream_nonseek_chunk_upld_knwn_size.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, length=blob_size, max_concurrency=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_from_stream_nonseek_chunk_upld_unkwn_size(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'stream_nonseek_chunk_upld.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, max_concurrency=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_with_progress_chunked_upload(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'stream_with_progress_chunked.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_chunked_upload_with_count(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'chunked_upload_with_count.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: resp = blob.upload_blob(stream, length=blob_size) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_from_stream_chunk_upload_with_cntandrops(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'from_stream_chunk_upload_with_cntandrops.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings=ContentSettings( content_type='image/png', content_language='spanish') blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_chnked_upload_with_properties(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'chnked_upload_with_properti.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings=ContentSettings( content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_chunked_upload_with_properties(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live # Arrange self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'blob_from_stream_chunked_upload.temp.{}.dat'.format(str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) blob_tier = StandardBlobTier.Cool # Act content_settings = ContentSettings( content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings, max_concurrency=2, standard_blob_tier=blob_tier) properties = blob.get_blob_properties() # Assert self.assertEqual(properties.blob_tier, blob_tier) self._teardown(FILE_PATH) @GlobalStorageAccountPreparer() def test_create_blob_from_text(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-8') # Act create_resp = blob.upload_blob(text) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_create_blob_from_text_with_encoding(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act blob.upload_blob(text, encoding='utf-16') # Assert self.assertBlobEqual(self.container_name, blob_name, data) @GlobalStorageAccountPreparer() def test_create_blob_from_text_with_encoding_and_progress(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) blob.upload_blob(text, encoding='utf-16', raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_text_chunked_upload(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_text_data(LARGE_BLOB_SIZE) encoded_data = data.encode('utf-8') # Act blob.upload_blob(data) # Assert self.assertBlobEqual(self.container_name, blob_name, encoded_data) # Assert self.assertBlobEqual(self.container_name, blob_name, encoded_data) @GlobalStorageAccountPreparer() def test_create_blob_with_md5(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' # Act blob.upload_blob(data, validate_content=True) # Assert @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_with_md5_chunked(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, validate_content=True)
in_asset_name, in_asset) # An AMS asset is a container with a specific id that has "asset-" prepended to the GUID. # So, you need to create the asset id to identify it as the container # where Storage is to upload the video (as a block blob) in_container = 'asset-' + input_asset.asset_id # create an output Asset print(f"Creating output asset {out_asset_name}") output_asset = client.assets.create_or_update(resource_group, account_name, out_asset_name, out_asset) ### Use the Storage SDK to upload the video ### print(f"Uploading the file {source_file}") blob_service_client = BlobServiceClient.from_connection_string( os.getenv('STORAGEACCOUNTCONNECTION')) blob_client = blob_service_client.get_blob_client(in_container, source_file) working_dir = os.getcwd() print(f"Current working directory: {working_dir}") upload_file_path = os.path.join(working_dir, source_file) # WARNING: Depending on where you are launching the sample from, the path here could be off, and not include the BasicEncoding folder. # Adjust the path as needed depending on how you are launching this python sample file. # Upload the video to storage as a block blob with open(upload_file_path, "rb") as data: blob_client.upload_blob(data) audio_transform_name = 'AudioAnalyzerTransform' video_transform_name = 'VideoAnalyzerTransform'
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.blob_service_client = BlobServiceClient.from_connection_string(self.connection_string)
def __init__(self, container='ackbarstorage'): self.container = container self.conn_str = os.environ['AZURE_STORAGE_CONNECTION_STRING'] self.blob_service_client = BlobServiceClient.from_connection_string( self.conn_str)
class AzureBlobStorage(IStorage): """ Storage backend that uses Azure Blob Storage """ test = False def __init__( self, request, expire_after=None, path_prefix=None, redirect_urls=None, storage_account_name=None, storage_account_key=None, storage_container_name=None, ): super(AzureBlobStorage, self).__init__(request) self.expire_after = expire_after self.path_prefix = path_prefix self.redirect_urls = redirect_urls self.storage_account_name = storage_account_name self.storage_account_key = storage_account_key self.storage_container_name = storage_container_name self.azure_storage_account_url = "https://{}.blob.core.windows.net".format( storage_account_name) self.blob_service_client = BlobServiceClient( account_url=self.azure_storage_account_url, credential=self.storage_account_key, ) self.container_client = self.blob_service_client.get_container_client( self.storage_container_name) @classmethod def configure(cls, settings): kwargs = super(AzureBlobStorage, cls).configure(settings) kwargs["expire_after"] = int( settings.get("storage.expire_after", 60 * 60 * 24)) kwargs["path_prefix"] = settings.get("storage.prefix", "") kwargs["redirect_urls"] = asbool( settings.get("storage.redirect_urls", True)) kwargs["storage_account_name"] = settings.get( "storage.storage_account_name") if kwargs["storage_account_name"] is None: raise ValueError( "You must specify the 'storage.storage_account_name'") kwargs["storage_account_key"] = settings.get( "storage.storage_account_key") if kwargs["storage_account_key"] is None: raise ValueError( "You must specify the 'storage.storage_account_key'") kwargs["storage_container_name"] = settings.get( "storage.storage_container_name") if kwargs["storage_container_name"] is None: raise ValueError( "You must specify the 'storage.storage_container_name'") return kwargs def _generate_url(self, package: Package) -> str: path = self.get_path(package) url_params = generate_blob_sas( account_name=self.storage_account_name, container_name=self.storage_container_name, blob_name=path, account_key=self.storage_account_key, permission=BlobSasPermissions(read=True), expiry=datetime.now() + timedelta(seconds=self.expire_after), protocol="https", ) url = "{}/{}/{}?{}".format( self.azure_storage_account_url, self.storage_container_name, path, url_params, ) return url def download_response(self, package): return HTTPFound(location=self._generate_url(package)) def list(self, factory=Package): # List does not return metadata :( for blob_properties in self.container_client.list_blobs( name_starts_with=self.path_prefix): blob_client = self.container_client.get_blob_client( blob=blob_properties.name) metadata = blob_client.get_blob_properties() yield factory(metadata.metadata["name"], metadata.metadata["version"], posixpath.basename(blob_properties.name), blob_properties.last_modified, path=blob_properties.name, **Package.read_metadata(metadata.metadata)) def get_path(self, package): """ Get the fully-qualified bucket path for a package """ if "path" not in package.data: package.data["path"] = (self.path_prefix + package.name + "/" + package.filename) return package.data["path"] def upload(self, package, datastream): path = self.get_path(package) metadata = package.get_metadata() metadata["name"] = package.name metadata["version"] = package.version normalize_metadata(metadata) blob_client = self.container_client.get_blob_client(blob=path) blob_client.upload_blob(data=datastream, metadata=metadata) def delete(self, package): path = self.get_path(package) blob_client = self.container_client.get_blob_client(blob=path) blob_client.delete_blob() def check_health(self): try: self.container_client.get_blob_client( blob="__notexist").get_blob_properties() except ResourceNotFoundError: pass except Exception as e: return False, str(e) return True, "" @contextmanager def open(self, package): url = self._generate_url(package) handle = urlopen(url) try: yield BytesIO(handle.read()) finally: handle.close()
class AzureBlobArtifactRepository(ArtifactRepository): """ Stores artifacts on Azure Blob Storage. This repository is used with URIs of the form ``wasbs://<container-name>@<ystorage-account-name>.blob.core.windows.net/<path>``, following the same URI scheme as Hadoop on Azure blob storage. It requires that your Azure storage access key be available in the environment variable ``AZURE_STORAGE_ACCESS_KEY``. """ def __init__(self, artifact_uri, client=None): super(AzureBlobArtifactRepository, self).__init__(artifact_uri) # Allow override for testing if client: self.client = client return from azure.storage.blob import BlobServiceClient (_, account, _) = AzureBlobArtifactRepository.parse_wasbs_uri(artifact_uri) if "AZURE_STORAGE_CONNECTION_STRING" in os.environ: self.client = BlobServiceClient.from_connection_string( conn_str=os.environ.get("AZURE_STORAGE_CONNECTION_STRING")) elif "AZURE_STORAGE_ACCESS_KEY" in os.environ: account_url = "https://{account}.blob.core.windows.net".format( account=account) self.client = BlobServiceClient( account_url=account_url, credential=os.environ.get("AZURE_STORAGE_ACCESS_KEY")) else: raise Exception( "You need to set one of AZURE_STORAGE_CONNECTION_STRING or " "AZURE_STORAGE_ACCESS_KEY to access Azure storage.") @staticmethod def parse_wasbs_uri(uri): """Parse a wasbs:// URI, returning (container, storage_account, path).""" parsed = urllib.parse.urlparse(uri) if parsed.scheme != "wasbs": raise Exception("Not a WASBS URI: %s" % uri) match = re.match("([^@]+)@([^.]+)\\.blob\\.core\\.windows\\.net", parsed.netloc) if match is None: raise Exception("WASBS URI must be of the form " "<container>@<account>.blob.core.windows.net") container = match.group(1) storage_account = match.group(2) path = parsed.path if path.startswith('/'): path = path[1:] return container, storage_account, path def log_artifact(self, local_file, artifact_path=None): (container, _, dest_path) = self.parse_wasbs_uri(self.artifact_uri) container_client = self.client.get_container_client(container) if artifact_path: dest_path = posixpath.join(dest_path, artifact_path) dest_path = posixpath.join(dest_path, os.path.basename(local_file)) with open(local_file, "rb") as file: container_client.upload_blob(dest_path, file) def log_artifacts(self, local_dir, artifact_path=None): (container, _, dest_path) = self.parse_wasbs_uri(self.artifact_uri) container_client = self.client.get_container_client(container) if artifact_path: dest_path = posixpath.join(dest_path, artifact_path) local_dir = os.path.abspath(local_dir) for (root, _, filenames) in os.walk(local_dir): upload_path = dest_path if root != local_dir: rel_path = os.path.relpath(root, local_dir) upload_path = posixpath.join(dest_path, rel_path) for f in filenames: remote_file_path = posixpath.join(upload_path, f) local_file_path = os.path.join(root, f) with open(local_file_path, "rb") as file: container_client.upload_blob(remote_file_path, file) def list_artifacts(self, path=None): from azure.storage.blob._models import BlobPrefix (container, _, artifact_path) = self.parse_wasbs_uri(self.artifact_uri) container_client = self.client.get_container_client(container) dest_path = artifact_path if path: dest_path = posixpath.join(dest_path, path) infos = [] prefix = dest_path + "/" results = container_client.walk_blobs(name_starts_with=prefix) for r in results: if not r.name.startswith(artifact_path): raise MlflowException( "The name of the listed Azure blob does not begin with the specified" " artifact path. Artifact path: {artifact_path}. Blob name:" " {blob_name}".format(artifact_path=artifact_path, blob_name=r.name)) if isinstance(r, BlobPrefix ): # This is a prefix for items in a subdirectory subdir = posixpath.relpath(path=r.name, start=artifact_path) if subdir.endswith("/"): subdir = subdir[:-1] infos.append(FileInfo(subdir, True, None)) else: # Just a plain old blob file_name = posixpath.relpath(path=r.name, start=artifact_path) infos.append(FileInfo(file_name, False, r.size)) return sorted(infos, key=lambda f: f.path) def _download_file(self, remote_file_path, local_path): (container, _, remote_root_path) = self.parse_wasbs_uri(self.artifact_uri) container_client = self.client.get_container_client(container) remote_full_path = posixpath.join(remote_root_path, remote_file_path) with open(local_path, "wb") as file: container_client.download_blob(remote_full_path).readinto(file) def delete_artifacts(self, artifact_path=None): raise MlflowException('Not implemented yet')
def connect_service(url: str, creds: str) -> BlobServiceClient: '''Connect to the main service, maybe write new ways to connect later''' return BlobServiceClient(account_url=url, credential=creds)
class Connector(): """ Azure blobs access """ def __init__(self, **kwargs): self.account_url = kwargs["account_url"] self.account_key = kwargs["account_key"] self.client = BlobServiceClient( account_url=self.account_url, credential=self.account_key ) def _get_container_client(self, folder): return self.client.get_container_client(container=folder) def _get_blob_client(self, folder, filename): return self.client.get_blob_client(container=folder, blob=filename) def list_files(self, folder, resource_id=None, prefix='', sufix='', full_path=False): container_client = self._get_container_client(folder) file_list = [] for blob in container_client.list_blobs(name_starts_with=resource_id if resource_id is not None else ''): if full_path: blob_name = blob['name'] else: blob_name = blob['name'].split('/').pop() if blob_name.startswith(prefix) and blob_name.endswith(sufix): file_list.append(blob_name) return file_list def list_files_info(self, folder, resource_id=None, prefix='', sufix='', full_path=False): """ Returns a list with info of all files from folder [{"filename": string, "creation_date": timestamp, "modified_date": timestamp, "file_size": int} ] """ container_client = self._get_container_client(folder) file_list = [] for blob in container_client.list_blobs(name_starts_with=resource_id + '/' if resource_id is not None else ''): if full_path: blob_name = blob['name'] elif resource_id is not None: blob_name = blob['name'].replace(resource_id + '/', '', 1) else: blob_name = blob['name'] if blob_name.startswith(prefix) and blob_name.endswith(sufix): file_info = {"filename": blob_name, "creation_date": blob["creation_time"], "modified_date": blob["last_modified"], "file_size": blob["size"] } file_list.append(file_info) return file_list def is_file(self, folder, filename, resource_id=None): """ Returns true if file exists """ if resource_id is not None: blob_name = f"{resource_id}/{filename}" else: blob_name = filename try: blob_client = self._get_blob_client(folder, blob_name) blob_client.get_blob_properties() return True except ResourceNotFoundError: return False def get_file_info(self, folder, filename, resource_id=None): """ Returns file info in a dict {"filename": string, "creation_date": timestamp, "modified_date": timestamp, "file_size": int } """ if resource_id is not None: blob_name = f"{resource_id}/{filename}" else: blob_name = filename blob_client = self._get_blob_client(folder, blob_name) blob = blob_client.get_blob_properties() blob_name = blob['name'].split('/').pop() blob_info = {"filename": blob_name, "creation_date": blob["creation_time"], "modified_date": blob["last_modified"], "file_size": blob["size"] } return blob_info def download_file(self, folder, filename, resource_id=None): if resource_id is not None: blob_name = f"{resource_id}/{filename}" else: blob_name = filename blob_client = self._get_blob_client(folder, blob_name) try: download_stream = blob_client.download_blob() except Exception as err: raise Exception(f'Error downloading {filename}: {err}') else: data = download_stream.readall() return data def upload_file(self, folder, filename, data, resource_id=None): if not isinstance(data, bytes): raise Exception(f'Data should be byte type, {type(data)} detected.') if resource_id is not None: blob_name = f"{resource_id}/{filename}" else: blob_name = filename blob_client = self._get_blob_client(folder, blob_name) try: blob_client.upload_blob(data=data) blob_client.get_blob_properties() except Exception as err: raise Exception(f'Error uploading {filename}: {err}') else: # print(f'Successfully uploaded {filename}') pass def delete_file(self, folder, filename, resource_id=None): if resource_id is not None: blob_name = f"{resource_id}/{filename}" else: blob_name = filename blob_client = self._get_blob_client(folder, blob_name) try: blob_client.delete_blob() except Exception as err: raise Exception(f'Error deleting {filename}: {err}') else: # print(f'Successfully deleted {blob_name}') pass
class StorageBlobEncryptionTest(StorageTestCase): # --Helpers----------------------------------------------------------------- def _setup(self, name, key): self.bsc = BlobServiceClient(self.account_url(name, "blob"), credential=key, max_single_put_size=32 * 1024, max_block_size=4 * 1024, max_page_size=4 * 1024, max_single_get_size=1024, max_chunk_get_size=1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') self.blob_types = (BlobType.BlockBlob, BlobType.PageBlob, BlobType.AppendBlob) self.bytes = b'Foo' if self.is_live: container = self.bsc.get_container_client(self.container_name) container.create_container() def _teardown(self, file_name): if path.isfile(file_name): try: remove(file_name) except: pass def _get_container_reference(self): return self.get_resource_name(TEST_CONTAINER_PREFIX) def _get_blob_reference(self, blob_type): return self.get_resource_name(TEST_BLOB_PREFIXES[blob_type.value]) def _create_small_blob(self, blob_type): blob_name = self._get_blob_reference(blob_type) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(self.bytes, blob_type=blob_type) return blob # --Test cases for blob encryption ---------------------------------------- @GlobalStorageAccountPreparer() def test_missing_attribute_kek_wrap(self, resource_group, location, storage_account, storage_account_key): # In the shared method _generate_blob_encryption_key self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True valid_key = KeyWrapper('key1') # Act invalid_key_1 = lambda: None # functions are objects, so this effectively creates an empty object invalid_key_1.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm invalid_key_1.get_kid = valid_key.get_kid # No attribute wrap_key self.bsc.key_encryption_key = invalid_key_1 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) invalid_key_2 = lambda: None # functions are objects, so this effectively creates an empty object invalid_key_2.wrap_key = valid_key.wrap_key invalid_key_2.get_kid = valid_key.get_kid # No attribute get_key_wrap_algorithm self.bsc.key_encryption_key = invalid_key_2 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) invalid_key_3 = lambda: None # functions are objects, so this effectively creates an empty object invalid_key_3.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm invalid_key_3.wrap_key = valid_key.wrap_key # No attribute get_kid self.bsc.key_encryption_key = invalid_key_2 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) @GlobalStorageAccountPreparer() def test_invalid_value_kek_wrap(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.get_key_wrap_algorithm = None try: self._create_small_blob(BlobType.BlockBlob) self.fail() except AttributeError as e: self.assertEqual( str(e), _ERROR_OBJECT_INVALID.format('key encryption key', 'get_key_wrap_algorithm')) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.get_kid = None with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.wrap_key = None with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) @GlobalStorageAccountPreparer() def test_missing_attribute_kek_unwrap(self, resource_group, location, storage_account, storage_account_key): # Shared between all services in decrypt_blob self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True valid_key = KeyWrapper('key1') self.bsc.key_encryption_key = valid_key blob = self._create_small_blob(BlobType.BlockBlob) # Act # Note that KeyWrapper has a default value for key_id, so these Exceptions # are not due to non_matching kids. invalid_key_1 = lambda: None # functions are objects, so this effectively creates an empty object invalid_key_1.get_kid = valid_key.get_kid # No attribute unwrap_key blob.key_encryption_key = invalid_key_1 with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() invalid_key_2 = lambda: None # functions are objects, so this effectively creates an empty object invalid_key_2.unwrap_key = valid_key.unwrap_key blob.key_encryption_key = invalid_key_2 # No attribute get_kid with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_invalid_value_kek_unwrap(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.key_encryption_key = KeyWrapper('key1') blob.key_encryption_key.unwrap_key = None with self.assertRaises(HttpResponseError) as e: blob.download_blob().content_as_bytes() self.assertEqual(str(e.exception), 'Decryption failed.') @GlobalStorageAccountPreparer() def test_get_blob_kek(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act content = blob.download_blob() # Assert self.assertEqual(b"".join(list(content.chunks())), self.bytes) @GlobalStorageAccountPreparer() def test_get_blob_resolver(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') key_resolver = KeyResolver() key_resolver.put_key(self.bsc.key_encryption_key) self.bsc.key_resolver_function = key_resolver.resolve_key blob = self._create_small_blob(BlobType.BlockBlob) # Act self.bsc.key_encryption_key = None content = blob.download_blob().content_as_bytes() # Assert self.assertEqual(content, self.bytes) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_get_blob_kek_RSA(self, resource_group, location, storage_account, storage_account_key): # We can only generate random RSA keys, so this must be run live or # the playback test will fail due to a change in kek values. self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = RSAKeyWrapper('key2') blob = self._create_small_blob(BlobType.BlockBlob) # Act content = blob.download_blob() # Assert self.assertEqual(b"".join(list(content.chunks())), self.bytes) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_get_blob_nonmatching_kid(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act self.bsc.key_encryption_key.kid = 'Invalid' # Assert with self.assertRaises(HttpResponseError) as e: blob.download_blob().content_as_bytes() self.assertEqual(str(e.exception), 'Decryption failed.') @GlobalStorageAccountPreparer() def test_put_blob_invalid_stream_type(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') small_stream = StringIO(u'small') large_stream = StringIO(u'large' * self.config.max_single_put_size) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert # Block blob specific single shot with self.assertRaises(TypeError) as e: blob.upload_blob(small_stream, length=5) self.assertTrue( 'Blob data should be of type bytes.' in str(e.exception)) # Generic blob chunked with self.assertRaises(TypeError) as e: blob.upload_blob(large_stream) self.assertTrue( 'Blob data should be of type bytes.' in str(e.exception)) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_blob_chunking_required_mult_of_block_size( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(self.config.max_single_put_size + self.config.max_block_size) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=3) blob_content = blob.download_blob().content_as_bytes(max_concurrency=3) # Assert self.assertEqual(content, blob_content) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_blob_chunking_required_non_mult_of_block_size( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = urandom(self.config.max_single_put_size + 1) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=3) blob_content = blob.download_blob().content_as_bytes(max_concurrency=3) # Assert self.assertEqual(content, blob_content) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_blob_chunking_required_range_specified( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(self.config.max_single_put_size * 2) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, length=self.config.max_single_put_size + 53, max_concurrency=3) blob_content = blob.download_blob().content_as_bytes(max_concurrency=3) # Assert self.assertEqual(content[:self.config.max_single_put_size + 53], blob_content) @GlobalStorageAccountPreparer() def test_put_block_blob_single_shot(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = b'small' blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob().content_as_bytes() # Assert self.assertEqual(content, blob_content) @GlobalStorageAccountPreparer() def test_put_blob_range(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') content = b'Random repeats' * self.config.max_single_put_size * 5 # All page blob uploads call _upload_chunks, so this will test the ability # of that function to handle ranges even though it's a small blob blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content[2:], length=self.config.max_single_put_size + 5, max_concurrency=1) blob_content = blob.download_blob().content_as_bytes(max_concurrency=1) # Assert self.assertEqual(content[2:2 + self.config.max_single_put_size + 5], blob_content) @GlobalStorageAccountPreparer() def test_put_blob_empty(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = b'' blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob().content_as_bytes(max_concurrency=2) # Assert self.assertEqual(content, blob_content) @GlobalStorageAccountPreparer() def test_put_blob_serial_upload_chunking(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(self.config.max_single_put_size + 1) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=1) blob_content = blob.download_blob().content_as_bytes(max_concurrency=1) # Assert self.assertEqual(content, blob_content) @GlobalStorageAccountPreparer() def test_get_blob_range_beginning_to_middle(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=1) blob_content = blob.download_blob( offset=0, length=50).content_as_bytes(max_concurrency=1) # Assert self.assertEqual(content[:50], blob_content) @GlobalStorageAccountPreparer() def test_get_blob_range_middle_to_end(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=1) blob_content = blob.download_blob(offset=100, length=28).content_as_bytes() blob_content2 = blob.download_blob(offset=100).content_as_bytes() # Assert self.assertEqual(content[100:], blob_content) self.assertEqual(content[100:], blob_content2) @GlobalStorageAccountPreparer() def test_get_blob_range_middle_to_middle(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=5, length=93).content_as_bytes() # Assert self.assertEqual(content[5:98], blob_content) @GlobalStorageAccountPreparer() def test_get_blob_range_aligns_on_16_byte_block(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=48, length=16).content_as_bytes() # Assert self.assertEqual(content[48:64], blob_content) @GlobalStorageAccountPreparer() def test_get_blob_range_expanded_to_beginning_block_align( self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=5, length=50).content_as_bytes() # Assert self.assertEqual(content[5:55], blob_content) @GlobalStorageAccountPreparer() def test_get_blob_range_expanded_to_beginning_iv(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=22, length=20).content_as_bytes() # Assert self.assertEqual(content[22:42], blob_content) @GlobalStorageAccountPreparer() def test_put_blob_strict_mode(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True content = urandom(512) # Assert for service in self.blob_types: blob_name = self._get_blob_reference(service) blob = self.bsc.get_blob_client(self.container_name, blob_name) with self.assertRaises(ValueError): blob.upload_blob(content, blob_type=service) stream = BytesIO(content) with self.assertRaises(ValueError): blob.upload_blob(stream, length=512, blob_type=service) file_name = 'blob_strict_mode.temp.dat' with open(file_name, 'wb') as stream: stream.write(content) with open(file_name, 'rb') as stream: with self.assertRaises(ValueError): blob.upload_blob(stream, blob_type=service) with self.assertRaises(ValueError): blob.upload_blob('To encrypt', blob_type=service) self._teardown(file_name) @GlobalStorageAccountPreparer() def test_get_blob_strict_mode_no_policy(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.key_encryption_key = None # Assert with self.assertRaises(ValueError): blob.download_blob().content_as_bytes() @GlobalStorageAccountPreparer() def test_get_blob_strict_mode_unencrypted_blob(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.require_encryption = True blob.key_encryption_key = KeyWrapper('key1') # Assert with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() @GlobalStorageAccountPreparer() def test_invalid_methods_fail_block(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.stage_block('block1', urandom(32)) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.commit_block_list(['block1']) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @GlobalStorageAccountPreparer() def test_invalid_methods_fail_append(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.AppendBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.append_block(urandom(32)) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.create_append_blob() self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) # All append_from operations funnel into append_from_stream, so testing one is sufficient with self.assertRaises(ValueError) as e: blob.upload_blob(b'To encrypt', blob_type=BlobType.AppendBlob) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @GlobalStorageAccountPreparer() def test_invalid_methods_fail_page(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.PageBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.upload_page(urandom(512), offset=0, length=512) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.create_page_blob(512) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @GlobalStorageAccountPreparer() def test_validate_encryption(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True kek = KeyWrapper('key1') self.bsc.key_encryption_key = kek blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.require_encryption = False blob.key_encryption_key = None content = blob.download_blob() data = content.content_as_bytes() encryption_data = _dict_to_encryption_data( loads(content.properties.metadata['encryptiondata'])) iv = encryption_data.content_encryption_IV content_encryption_key = _validate_and_unwrap_cek( encryption_data, kek, None) cipher = _generate_AES_CBC_cipher(content_encryption_key, iv) decryptor = cipher.decryptor() unpadder = PKCS7(128).unpadder() content = decryptor.update(data) + decryptor.finalize() content = unpadder.update(content) + unpadder.finalize() self.assertEqual(self.bytes, content) @GlobalStorageAccountPreparer() def test_create_block_blob_from_star(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self._create_blob_from_star(BlobType.BlockBlob, self.bytes, self.bytes) stream = BytesIO(self.bytes) self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream) file_name = 'block_blob_from_star.temp.dat' with open(file_name, 'wb') as stream: stream.write(self.bytes) with open(file_name, 'rb') as stream: self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream) self._create_blob_from_star(BlobType.BlockBlob, b'To encrypt', 'To encrypt') self._teardown(file_name) @GlobalStorageAccountPreparer() def test_create_page_blob_from_star(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) content = self.get_random_bytes(512) self._create_blob_from_star(BlobType.PageBlob, content, content) stream = BytesIO(content) self._create_blob_from_star(BlobType.PageBlob, content, stream, length=512) file_name = 'page_blob_from_star.temp.dat' with open(file_name, 'wb') as stream: stream.write(content) with open(file_name, 'rb') as stream: self._create_blob_from_star(BlobType.PageBlob, content, stream) self._teardown(file_name) def _create_blob_from_star(self, blob_type, content, data, **kwargs): blob_name = self._get_blob_reference(blob_type) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.key_encryption_key = KeyWrapper('key1') blob.require_encryption = True blob.upload_blob(data, blob_type=blob_type, **kwargs) blob_content = blob.download_blob().content_as_bytes() self.assertEqual(content, blob_content) blob.delete_blob() @GlobalStorageAccountPreparer() def test_get_blob_to_star(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act iter_blob = b"".join(list(blob.download_blob().chunks())) bytes_blob = blob.download_blob().content_as_bytes() stream_blob = BytesIO() blob.download_blob().download_to_stream(stream_blob) stream_blob.seek(0) text_blob = blob.download_blob(encoding='UTF-8').readall() # Assert self.assertEqual(self.bytes, iter_blob) self.assertEqual(self.bytes, bytes_blob) self.assertEqual(self.bytes, stream_blob.read()) self.assertEqual(self.bytes.decode(), text_blob)
import datetime import json import logging import os import fsspec from azure.storage.blob import BlobServiceClient from newstory_scraper.config import config from newstory_scraper.pub_sub import utils from newstory_scraper.scraper import get_profile logger = logging.getLogger(__name__) try: blob_service_client = BlobServiceClient.from_connection_string( os.environ["AZURE_STORAGE_CONNECTION_STRING"]) except KeyError: UserWarning("Unable to set blob_service_client") def echo(event, registry): print(event.get("message")) def scrape_profile(event, registry): user = event["user"] prefix = event.get("prefix", ".scraper") maximum = int(event.get("maximum", "0")) path = "/".join([ prefix, datetime.datetime.utcnow().strftime("%Y-%m-%d"),
from FlaskExercise import app, db from flask import flash from werkzeug.utils import secure_filename from azure.storage.blob import BlobServiceClient import uuid blob_container = app.config['BLOB_CONTAINER'] storage_url = "https://{}.blob.core.windows.net/".format( app.config['BLOB_ACCOUNT']) blob_service = BlobServiceClient(account_url=storage_url, credential=app.config['BLOB_STORAGE_KEY']) class Animal(db.Model): __tablename__ = 'animals' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String(75)) scientific_name = db.Column(db.String(75)) description = db.Column(db.String(800)) image_path = db.Column(db.String(100)) def __repr__(self): return '<Animal {}>'.format(self.body) def save_changes(self, file): if file: filename = secure_filename(file.filename) fileExtension = filename.rsplit('.', 1)[1] randomFilename = str(uuid.uuid1()) filename = randomFilename + '.' + fileExtension try:
from django.shortcuts import render from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient from msrest.authentication import ApiKeyCredentials import cv2 from PIL import Image, ImageDraw, ImageFont import urllib from azure.storage.blob import BlobServiceClient,BlobClient connection_string = "<Blob Storage conection string>" service = BlobServiceClient.from_connection_string(conn_str=connection_string) def home(request): return render(request,"home.html") def resultado(request): font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 0.7 fontColor = (0,0,255) lineType = 2 name=request.GET["namefile"] credentials = ApiKeyCredentials(in_headers={"Prediction-key": "<Prediction Key Aquí>"}) predictor = CustomVisionPredictionClient("<Zona regional aqui>", credentials) blob = BlobClient.from_connection_string(conn_str=connection_string, container_name="images", blob_name=f"{name} training.png") url = request.GET["link"] urllib.request.urlretrieve(url, "python.png") imagen=cv2.imread("python.png") height, width, channels = imagen.shape Resultado = predictor.detect_image_url("<Prediction Key>", "<Iteration>", url) for prediction in Resultado.predictions: if prediction.probability > 0.4: bbox = prediction.bounding_box tag = prediction.tag_name
def upload_command(coreclr_args): """ Upload the JIT Args: coreclr_args (CoreclrArguments): parsed args """ print("JIT upload") def upload_blob(file, blob_name): blob_client = blob_service_client.get_blob_client( container=az_container_name, blob=blob_name) # Check if the blob already exists, and delete it if it does, before uploading / replacing it. try: blob_client.get_blob_properties() # If no exception, then the blob already exists. Delete it! print("Warning: replacing existing blob!") blob_client.delete_blob() except Exception: # Blob doesn't exist already; that's good pass with open(file, "rb") as data: blob_client.upload_blob(data) # 1. Find all the JIT builds in the product directory # 2. Upload them # # We could also upload debug info, but it's not clear it's needed for most purposes, and it is very big: # it increases the upload size from about 190MB to over 900MB for each roll. # # For reference, the JIT debug info is found: # a. For Windows, in the PDB subdirectory, e.g. PDB\clrjit.pdb # b. For Linux .dbg files, and Mac .dwarf files, in the same directory as the jit, e.g., libcoreclr.so.dbg # Target directory: <root>/git-hash/OS/architecture/build-flavor/ # Note that build-flavor will probably always be Checked. files = [] # First, find the primary JIT that we expect to find. jit_name = determine_jit_name(coreclr_args) jit_path = os.path.join(coreclr_args.product_location, jit_name) if not os.path.isfile(jit_path): print("Error: Couldn't find JIT at {}".format(jit_path)) raise RuntimeError("Missing JIT") files.append(jit_path) # Next, look for any and all cross-compilation JITs. These are named, e.g.: # clrjit_unix_x64_x64.dll # clrjit_universal_arm_x64.dll # clrjit_universal_arm64_x64.dll # and so on, and live in the same product directory as the primary JIT. # # Note that the expression below explicitly filters out the primary JIT since we added that above. # We handle the primary JIT specially so we can error if it is missing. For the cross-compilation # JITs, we don't bother trying to ensure that all the ones we might expect are actually there. # # We don't do a recursive walk because the JIT is also copied to the "sharedFramework" subdirectory, # so we don't want to pick that up. if coreclr_args.host_os == "OSX": allowed_extensions = [".dylib"] # Add .dwarf for debug info elif coreclr_args.host_os == "Linux": allowed_extensions = [".so"] # Add .dbg for debug info elif coreclr_args.host_os == "windows": allowed_extensions = [".dll"] else: raise RuntimeError("Unknown OS.") cross_jit_paths = [ os.path.join(coreclr_args.product_location, item) for item in os.listdir(coreclr_args.product_location) if re.match(r'.*clrjit.*', item) and item != jit_name and any( item.endswith(extension) for extension in allowed_extensions) ] files += cross_jit_paths # On Windows, grab the PDB files from a sub-directory. # if coreclr_args.host_os == "windows": # pdb_dir = os.path.join(coreclr_args.product_location, "PDB") # if os.path.isdir(pdb_dir): # pdb_paths = [os.path.join(pdb_dir, item) for item in os.listdir(pdb_dir) if re.match(r'.*clrjit.*', item)] # files += pdb_paths print("Uploading:") for item in files: print(" {}".format(item)) try: from azure.storage.blob import BlobServiceClient except: print("Please install:") print(" pip install azure-storage-blob") print( "See also https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python" ) raise RuntimeError("Missing azure storage package.") blob_service_client = BlobServiceClient( account_url=az_blob_storage_account_uri, credential=coreclr_args.az_storage_key) blob_folder_name = "{}/{}/{}/{}/{}".format(az_builds_root_folder, coreclr_args.git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type) total_bytes_uploaded = 0 # Should we compress the JIT on upload? It would save space, but it makes it slightly more complicated to use # because you can't just "wget" or otherwise download the file and use it immediately -- you need to unzip first. # So for now, don't compress it. compress_jit = False with TempDir() as temp_location: for file in files: if compress_jit: # Zip compress the file we will upload zip_name = os.path.basename(file) + ".zip" zip_path = os.path.join(temp_location, zip_name) print("Compress {} -> {}".format(file, zip_path)) with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file: zip_file.write(file, os.path.basename(file)) file_stat_result = os.stat(file) zip_stat_result = os.stat(zip_path) print("Compressed {:n} to {:n} bytes".format( file_stat_result.st_size, zip_stat_result.st_size)) total_bytes_uploaded += zip_stat_result.st_size blob_name = "{}/{}".format(blob_folder_name, zip_name) print("Uploading: {} ({}) -> {}".format( file, zip_path, az_blob_storage_container_uri + "/" + blob_name)) upload_blob(zip_path, blob_name) else: file_stat_result = os.stat(file) total_bytes_uploaded += file_stat_result.st_size file_name = os.path.basename(file) blob_name = "{}/{}".format(blob_folder_name, file_name) print("Uploading: {} -> {}".format( file, az_blob_storage_container_uri + "/" + blob_name)) upload_blob(file, blob_name) print("Uploaded {:n} bytes".format(total_bytes_uploaded)) print("Finished JIT upload")
class AzureClient(CloudClient): """ Implementation of a Azure Client using the Azure API """ def __init__(self, account_name=None, credential=None, auth_dict=None, *args, **kwargs): super().__init__(*args, **kwargs) if auth_dict: account_name = auth_dict.get("STORAGE_ACCOUNT_NAME") credential = auth_dict.get("STORAGE_ACCOUNT_KEY") if account_name and credential: self.account_name = account_name self.credential = credential self.secret = self.create_azure_secret() account_url = constants.AZURE_BLOB_ENDPOINT_TEMPLATE.format( account_name) self.blob_service_client = BlobServiceClient( account_url=account_url, credential=credential) def internal_create_uls(self, name, region): """ Creates the Underlying Storage using the Azure API Args: name (str): The Underlying Storage name to be created """ self.blob_service_client.get_container_client(name).create_container() def internal_delete_uls(self, name): """ Deletes the Underlying Storage using the Azure API Args: name (str): The Underlying Storage name to be deleted """ self.blob_service_client.get_container_client(name).delete_container() def get_all_uls_names(self): """ Returns a set containing all the container names that the client has access to """ return { container["name"] for container in self.blob_service_client.list_containers() } def verify_uls_exists(self, uls_name): """ Verifies whether a Underlying Storage with the given uls_name exists Args: uls_name (str): The Underlying Storage name to be verified Returns: bool: True if Underlying Storage exists, False otherwise """ try: self.blob_service_client.get_container_client( uls_name).get_container_properties() return True except ResourceNotFoundError: return False def create_azure_secret(self): """ Create a Kubernetes secret to allow NooBaa to create Azure-based backingstores """ bs_secret_data = templating.load_yaml( constants.MCG_BACKINGSTORE_SECRET_YAML) bs_secret_data["metadata"]["name"] = create_unique_resource_name( "cldmgr-azure", "secret") bs_secret_data["metadata"]["namespace"] = config.ENV_DATA[ "cluster_namespace"] bs_secret_data["data"]["AccountKey"] = base64.urlsafe_b64encode( self.credential.encode("UTF-8")).decode("ascii") bs_secret_data["data"]["AccountName"] = base64.urlsafe_b64encode( self.account_name.encode("UTF-8")).decode("ascii") return create_resource(**bs_secret_data)
class StorageBlockBlobTest(StorageTestCase): def setUp(self): super(StorageBlockBlobTest, self).setUp() url = self._get_account_url() # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient( url, credential=self.settings.STORAGE_ACCOUNT_KEY, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageBlockBlobTest, self).tearDown() #--Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(b"".join(list(actual_data)), expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) #--Test cases for block blobs -------------------------------------------- @record def test_put_block(self): # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block(i, 'block {0}'.format(i).encode('utf-8')) self.assertIsNone(resp) # Assert @record def test_put_block_unicode(self): # Arrange blob = self._create_blob() # Act resp = blob.stage_block('1', u'啊齄丂狛狜') self.assertIsNone(resp) # Assert @record def test_put_block_with_md5(self): # Arrange blob = self._create_blob() # Act blob.stage_block(1, b'block', validate_content=True) # Assert @record def test_put_block_list(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] put_block_list_resp = blob.commit_block_list(block_list) # Assert content = blob.download_blob() self.assertEqual(b"".join(list(content)), b'AAABBBCCC') self.assertEqual(content.properties.etag, put_block_list_resp.get('etag')) self.assertEqual(content.properties.last_modified, put_block_list_resp.get('last_modified')) @record def test_put_block_list_invalid_block_id(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act try: block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='4') ] blob.commit_block_list(block_list) self.fail() except HttpResponseError as e: self.assertGreaterEqual( str(e).find('specified block list is invalid'), 0) # Assert @record def test_put_block_list_with_md5(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] blob.commit_block_list(block_list, validate_content=True) # Assert @record def test_get_block_list_no_blocks(self): # Arrange blob = self._create_blob() # Act block_list = blob.get_block_list('all') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 0) @record def test_get_block_list_uncommitted_blocks(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = blob.get_block_list('uncommitted') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 3) self.assertEqual(len(block_list[0]), 0) self.assertEqual(block_list[1][0].id, '1') self.assertEqual(block_list[1][0].size, 3) self.assertEqual(block_list[1][1].id, '2') self.assertEqual(block_list[1][1].size, 3) self.assertEqual(block_list[1][2].id, '3') self.assertEqual(block_list[1][2].size, 3) @record def test_get_block_list_committed_blocks(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] blob.commit_block_list(block_list) # Act block_list = blob.get_block_list('committed') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 3) self.assertEqual(block_list[0][0].id, '1') self.assertEqual(block_list[0][0].size, 3) self.assertEqual(block_list[0][1].id, '2') self.assertEqual(block_list[0][1].size, 3) self.assertEqual(block_list[0][2].id, '3') self.assertEqual(block_list[0][2].size, 3) @record def test_create_small_block_blob_with_no_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = b'hello world' data2 = b'hello second world' # Act create_resp = blob.upload_blob(data1, overwrite=True) with self.assertRaises(ResourceExistsError): blob.upload_blob(data2, overwrite=False) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) @record def test_create_small_block_blob_with_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = b'hello world' data2 = b'hello second world' # Act create_resp = blob.upload_blob(data1, overwrite=True) update_resp = blob.upload_blob(data2, overwrite=True) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) @record def test_create_large_block_blob_with_no_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE) # Act create_resp = blob.upload_blob(data1, overwrite=True, metadata={'BlobData': 'Data1'}) with self.assertRaises(ResourceExistsError): blob.upload_blob(data2, overwrite=False, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) self.assertEqual(props.metadata, {'BlobData': 'Data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE) @record def test_create_large_block_blob_with_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob(data1, overwrite=True, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob(data2, overwrite=True, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) self.assertEqual(props.metadata, {'BlobData': 'Data2'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) @record def test_create_blob_from_bytes_single_put(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_blob_from_0_bytes(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_from_bytes_blob_unicode(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = u'hello world' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_from_bytes_blob_unicode(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act data = u'hello world' create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data.encode('utf-8')) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_from_bytes_blob_with_lease_id(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) lease = blob.acquire_lease() # Act create_resp = blob.upload_blob(data, lease=lease) # Assert output = blob.download_blob(lease=lease) self.assertEqual(b"".join(list(output)), data) self.assertEqual(output.properties.etag, create_resp.get('etag')) self.assertEqual(output.properties.last_modified, create_resp.get('last_modified')) def test_create_blob_from_bytes_with_metadata(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) metadata = {'hello': 'world', 'number': '42'} # Act blob.upload_blob(data, metadata=metadata) # Assert md = blob.get_blob_properties().metadata self.assertDictEqual(md, metadata) def test_create_blob_from_bytes_with_properties(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob.upload_blob(data, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_blob_from_bytes_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) create_resp = blob.upload_blob(data, raw_response_hook=callback) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_bytes_with_index(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data[3:]) # Assert self.assertEqual(data[3:], b"".join(list(blob.download_blob()))) @record def test_create_blob_from_bytes_with_index_and_count(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data[3:], length=5) # Assert self.assertEqual(data[3:8], b"".join(list(blob.download_blob()))) @record def test_create_blob_from_bytes_with_index_and_count_and_properties(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob.upload_blob(data[3:], length=5, content_settings=content_settings) # Assert self.assertEqual(data[3:8], b"".join(list(blob.download_blob()))) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_create_blob_from_bytes_non_parallel(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, length=LARGE_BLOB_SIZE, max_connections=1) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) def test_create_blob_from_path(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_blob_from_path_non_parallel(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(100) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, length=100, max_connections=1) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_path_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_blob_from_path_with_properties(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_blob_from_stream_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_stream_non_seekable_chunked_upload_known_size( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) blob_size = len(data) - 66 with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, length=blob_size, max_connections=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) def test_create_blob_from_stream_non_seekable_chunked_upload_unknown_size( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, max_connections=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_blob_from_stream_with_progress_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_blob_from_stream_chunked_upload_with_count(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: resp = blob.upload_blob(stream, length=blob_size) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) def test_create_blob_from_stream_chunked_upload_with_count_and_properties( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_blob_from_stream_chunked_upload_with_properties(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_create_blob_from_text(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-8') # Act create_resp = blob.upload_blob(text) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_blob_from_text_with_encoding(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act blob.upload_blob(text, encoding='utf-16') # Assert self.assertBlobEqual(self.container_name, blob_name, data) @record def test_create_blob_from_text_with_encoding_and_progress(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) blob.upload_blob(text, encoding='utf-16', raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_blob_from_text_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_text_data(LARGE_BLOB_SIZE) encoded_data = data.encode('utf-8') # Act blob.upload_blob(data) # Assert self.assertBlobEqual(self.container_name, blob_name, encoded_data) # Assert self.assertBlobEqual(self.container_name, blob_name, encoded_data) @record def test_create_blob_with_md5(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' # Act blob.upload_blob(data, validate_content=True) # Assert def test_create_blob_with_md5_chunked(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, validate_content=True)
def list_blobs(self): with BlobServiceClient.from_connection_string(self._connection_string) as client: container: ContainerClient = client.get_container_client(self._container_name) for blob in container.list_blobs(name_starts_with=self.path): yield blob
def __init__(self, client=None): connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING') if client is None: self.client = BlobServiceClient.from_connection_string(connect_str) else: self.client = client
from __future__ import print_function import os from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient try: # get the connection string connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING') # Create the BlobServiceClient object which will be used to create a container client blob_service_client = BlobServiceClient.from_connection_string(connect_str) # main menu loop while True: request = input( "Do you wish to [(s)earch] or [(d)ownload] or [exit]? > ") request = request.strip().lower() print("") # exit the loop if request == "exit": break #search for records elif request == "search" or request == "s": choice = input( "SEARCH> All containers [all], a container [con], or an object [obj]? > " ) choice = choice.strip().lower() print("") # list all containers and blobs
def __init__(self, azure_storage_conn_string): self._logger = Logger().logger self.blob_service_client = BlobServiceClient.from_connection_string(azure_storage_conn_string)