async def init_blob_for_streaming_upload( container: ContainerClient, blob_name: str, content_type: str, content_encoding: str, data: Any, return_sas_token: bool = True, ) -> str: """ Uploads the given data to a blob record. If a blob with the given name already exist, it throws an error. Returns a uri with a SAS token to access the newly created blob. """ await create_container_using_client(container) logger.info(f"Streaming blob '{blob_name}'" + f"to container '{container.container_name}' on account:" + f"'{container.account_name}'") content_settings = ContentSettings(content_type=content_type, content_encoding=content_encoding) blob = container.get_blob_client(blob_name) await blob.stage_block() await blob.commit_block_list() await blob.upload_blob(data, content_settings=content_settings) logger.debug(f" - blob '{blob_name}' uploaded. generating sas token.") if return_sas_token: sas_token = generate_blob_sas( blob.account_name, blob.container_name, blob.blob_name, account_key=blob.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(days=14), ) uri = blob.url + "?" + sas_token else: uri = remove_sas_token(blob.url) logger.debug(f" - blob access url: '{uri}'.") return uri
def _upload_file(self, file_path, storage_id): blob_service_client = BlobServiceClient.from_connection_string(conn_str=self._connection_string) try: blob_service_client.create_container(self._blob_container, public_access=PublicAccess.Container) except: pass blob_client = blob_service_client.get_blob_client(container=self._blob_container, blob=storage_id) with open(file_path, "rb") as data: blob_client.upload_blob(data) token = generate_blob_sas( account_name=self._storage_name, account_key=self._storage_key, container_name=self._blob_container, blob_name=storage_id, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) return f"{blob_client.url}?{token}"
def create_blob_link(self, blob_folder, blob_name) -> str: if blob_folder: full_path_blob = f"{blob_folder}/{blob_name}" else: full_path_blob = blob_name url = f"https://{self.account_name}.blob.core.windows.net/{self.destination}/{full_path_blob}" sas_token = generate_blob_sas( account_name=self.account_name, account_key=self.account_key, container_name=self.destination, blob_name=full_path_blob, permission=BlobSasPermissions(read=True, delete_previous_version=False), expiry=datetime.utcnow() + timedelta(days=self.expiry_download_links), ) url_with_sas = f"{url}?{sas_token}" return url_with_sas
def _generate_download_url(self, path_info, expires=3600): from azure.storage.blob import ( # pylint:disable=no-name-in-module BlobSasPermissions, generate_blob_sas, ) expires_at = datetime.utcnow() + timedelta(seconds=expires) blob_client = self.blob_service.get_blob_client( path_info.bucket, path_info.path) sas_token = generate_blob_sas( blob_client.account_name, blob_client.container_name, blob_client.blob_name, account_key=blob_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=expires_at, ) return blob_client.url + "?" + sas_token
def test_append_block_from_url_with_if_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_name = self._get_blob_reference() destination_blob_client = self.bsc.get_blob_client( self.container_name, destination_blob_name) destination_blob_properties_on_creation = destination_blob_client.create_append_blob( ) # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_match=destination_blob_properties_on_creation.get('etag')) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_match='0x111111111111111')
async def _test_append_block_from_url(self): # Arrange source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # chunk upload source_blob_client, _ = await self._create_block_blob(blob_name=source_blob_name, data=self.byte_data) source_blob_sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1) ) source_blob_url = source_blob_client.url + "?" + source_blob_sas self.config.use_byte_buffer = False destination_blob_client = await self._create_append_blob(cpk=TEST_ENCRYPTION_KEY) # Act append_blob_prop = await destination_blob_client.append_block_from_url(source_blob_url, source_offset=0, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(append_blob_prop['etag']) self.assertIsNotNone(append_blob_prop['last_modified']) # TODO: verify that the swagger is correct, header wasn't added for the response # self.assertTrue(append_blob_prop['request_server_encrypted']) self.assertEqual(append_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): await destination_blob_client.download_blob() # Act get the blob content blob = await destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(await blob.readall(), self.byte_data[0: 4 * 1024]) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash)
def url(self, name, expire=None): name = self._get_valid_path(name) if expire is None: expire = self.expiration_secs credential = None if expire: sas_token = generate_blob_sas( self.account_name, self.azure_container, name, account_key=self.account_key, permission=BlobSasPermissions(read=True), expiry=self._expire_at(expire)) credential = sas_token container_blob_url = self.client.get_blob_client( filepath_to_uri(name)).url return BlobClient.from_blob_url(container_blob_url, credential=credential).url
def _generate_url(self, package: Package) -> str: path = self.get_path(package) url_params = generate_blob_sas( account_name=self.storage_account_name, container_name=self.storage_container_name, blob_name=path, account_key=self.storage_account_key, permission=BlobSasPermissions(read=True), expiry=datetime.now() + timedelta(seconds=self.expire_after), protocol="https", ) url = "{}/{}/{}?{}".format( self.azure_storage_account_url, self.storage_container_name, path, url_params, ) return url
def test_append_block_from_url_with_source_if_none_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_etag='0x111111111111111', source_match_condition=MatchConditions.IfModified) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceNotFoundError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_etag=source_blob_properties.get('etag'), source_match_condition=MatchConditions.IfModified)
def _get_signed_url(self, prefix: str, oid: str, expires_in: int, filename: Optional[str] = None, **permissions: bool) -> str: blob_name = self._get_blob_path(prefix, oid) permissions = BlobSasPermissions(**permissions) token_expires = (datetime.now(tz=timezone.utc) + timedelta(seconds=expires_in)) extra_args = {} if filename: extra_args['content_disposition'] = f'attachment; filename="{filename}"' sas_token = generate_blob_sas(account_name=self.blob_svc_client.account_name, account_key=self.blob_svc_client.credential.account_key, container_name=self.container_name, blob_name=blob_name, permission=permissions, expiry=token_expires, **extra_args) blob_client = BlobClient(self.blob_svc_client.url, container_name=self.container_name, blob_name=blob_name, credential=sas_token) return blob_client.url # type: ignore
def get_container_uri(connection_string: str, container_name: str) -> str: """ Creates and initialize a container; returns a URI with a SAS read/write token to access it. """ container = create_container(connection_string, container_name) logger.info( f"Creating SAS token for container '{container_name}' on account: '{container.account_name}'" ) sas_token = generate_container_sas( container.account_name, container.container_name, account_key=container.credential.account_key, permission=BlobSasPermissions(read=True, add=True, write=True, create=True), expiry=datetime.utcnow() + timedelta(days=14)) uri = container.url + "?" + sas_token logger.debug(f" - container url: '{uri}'.") return uri
async def _setup(self): if not self.is_playback(): try: # create source blob to be copied from self.source_blob_name = self.get_resource_name('srcblob') self.source_blob_data = self.get_random_bytes(4 * 1024) source_blob = self.bsc.get_blob_client(self.container_name, self.source_blob_name) await self.bsc.create_container(self.container_name) await source_blob.upload_blob(self.source_blob_data) # generate a SAS so that it is accessible with a URL sas_token = source_blob.generate_shared_access_signature( permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) sas_source = BlobClient.from_blob_url(source_blob.url, credential=sas_token) self.source_blob_url = sas_source.url except: pass
async def _setup(self, storage_account, key): # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient(self.account_url(storage_account, "blob"), credential=key, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024, transport=AiohttpTestTransport()) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') # create source blob to be copied from self.source_blob_name = self.get_resource_name('srcblob') self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE) blob = self.bsc.get_blob_client(self.container_name, self.source_blob_name) if self.is_live: try: await self.bsc.create_container(self.container_name) except: pass await blob.upload_blob(self.source_blob_data, overwrite=True) # generate a SAS so that it is accessible with a URL sas_token = generate_blob_sas( blob.account_name, blob.container_name, blob.blob_name, snapshot=blob.snapshot, account_key=blob.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) self.source_blob_url = BlobClient.from_blob_url( blob.url, credential=sas_token).url self.source_blob_url_without_sas = blob.url
def generate_url(self, seconds, display_filename=None, content_type=None, inline=False): if content_type is None: content_type = self.content_type if display_filename is not None: disposition = "attachment; filename=" + display_filename elif inline: disposition = "inline" else: disposition = None token = generate_blob_sas( self.blob_client.account_name, self.blob_client.container_name, self.blob_client.blob_name, snapshot=self.blob_client.snapshot, account_key=self.blob_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.datetime.utcnow() + datetime.timedelta(seconds=seconds), cache_control='no-cache', content_disposition=disposition, content_type=content_type ) return self.blob_client.url + '?' + token
def create_sas(self, blob_path, hour_exp: float, **permissions): permissions = BlobSasPermissions( read=permissions.pop('read', True), write=permissions.pop('write', True), delete=permissions.pop('delete', True), add=permissions.pop('add', True), create=permissions.pop('create', True)) sas_token = generate_blob_sas( self.container_client.account_name, self.container_client.container_name, blob_name=blob_path, permission=permissions, account_key=self.container_client.credential.account_key, expiry=datetime.utcnow() + timedelta(hours=hour_exp), start=datetime.utcnow() - timedelta(minutes=1)) blob_path = urllib.parse.quote(blob_path) sas_url = _make_url(self.container_url, blob_path, sas_token=sas_token) return sas_url
def build_resource_file(self, file_path, container_path: str, duration_hours=24): """ Uploads a local file to an Azure Blob storage container. :param str file_path: The local path to the file. :param str container_path: The path where the file should be placed in the container before executing the task :rtype: `azure.batch.models.ResourceFile` :return: A ResourceFile initialized with a SAS URL appropriate for Batch tasks. """ # print( "Uploading file {} to container [{}]...".format( file_path, self.config.BLOB_CONTAINER_NAME)), blob_name = os.path.basename(file_path) blob_client = self.container_client.get_blob_client(blob_name) try: blob_client.delete_blob() except ResourceNotFoundError: pass with open(os.path.join(self.config.BATCH_DIRECTORY, file_path), "rb") as data: blob_client.upload_blob(data, blob_type="BlockBlob") sas_token = generate_blob_sas( blob_client.account_name, blob_client.container_name, blob_client.blob_name, permission=BlobSasPermissions(read=True), expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=duration_hours), account_key=self.config.STORAGE_ACCOUNT_KEY, ) return models.ResourceFile(http_url=blob_client.url + "?" + sas_token, file_path=container_path)
def test_set_blob_tags_using_blob_sas(self, resource_group, location, storage_account, storage_account_key): token = generate_account_sas( storage_account.name, storage_account_key, ResourceTypes(service=True, container=True, object=True), AccountSasPermissions(write=True, list=True, read=True, delete_previous_version=True, tag=True, filter_by_tags=True), datetime.utcnow() + timedelta(hours=1), ) self._setup(storage_account, token) tags = {"year": '1000', "tag2": "secondtag", "tag3": "thirdtag", "habitat_type": 'Shallow Lowland Billabongs'} blob_client, _ = self._create_block_blob(tags=tags, container_name=self.container_name) token1 = generate_blob_sas( storage_account.name, self.container_name, blob_client.blob_name, account_key=storage_account_key, permission=BlobSasPermissions(delete_previous_version=True, tag=True), expiry=datetime.utcnow() + timedelta(hours=1), ) blob_client=BlobClient.from_blob_url(blob_client.url, token1) blob_client.set_blob_tags(tags=tags) tags_on_blob = blob_client.get_blob_tags() self.assertEqual(len(tags_on_blob), len(tags)) if self.is_live: sleep(10) # To filter in a specific container use: # where = "@container='{}' and tag1='1000' and tag2 = 'secondtag'".format(container_name1) where = "\"year\"='1000' and tag2 = 'secondtag' and tag3='thirdtag'" blob_list = self.bsc.find_blobs_by_tags(filter_expression=where, results_per_page=2).by_page() first_page = next(blob_list) items_on_page1 = list(first_page) self.assertEqual(1, len(items_on_page1))
def setUp(self): super(StorageBlockBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient(url, credential=credential, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') # create source blob to be copied from self.source_blob_name = self.get_resource_name('srcblob') self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE) blob = self.bsc.get_blob_client(self.container_name, self.source_blob_name) if not self.is_playback(): self.bsc.create_container(self.container_name) blob.upload_blob(self.source_blob_data) # generate a SAS so that it is accessible with a URL sas_token = generate_blob_sas( blob.account_name, blob.container_name, blob.blob_name, snapshot=blob.snapshot, account_key=blob.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) self.source_blob_url = BlobClient.from_blob_url( blob.url, credential=sas_token).url
def test_append_block_from_url_and_validate_content_md5(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) src_md5 = StorageContentValidation.get_content_md5(source_blob_data) sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls with correct md5 resp = destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_content_md5=src_md5) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) # Act part 2: put block from url with wrong md5 with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_content_md5=StorageContentValidation.get_content_md5( b"POTATO"))
def _setup(self, bsc): self.container_name = self.get_resource_name('utcontainer') # create source blob to be copied from self.source_blob_name = self.get_resource_name('srcblob') self.source_blob_data = self.get_random_bytes(4 * 1024) source_blob = bsc.get_blob_client(self.container_name, self.source_blob_name) if self.is_live: bsc.create_container(self.container_name) source_blob.upload_blob(self.source_blob_data) # generate a SAS so that it is accessible with a URL sas_token = generate_blob_sas( source_blob.account_name, source_blob.container_name, source_blob.blob_name, snapshot=source_blob.snapshot, account_key=source_blob.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) sas_source = BlobClient.from_blob_url(source_blob.url, credential=sas_token) self.source_blob_url = sas_source.url
def aggregate_results(job_id: str, model_version: str, job_name: str, job_submission_timestamp: str) -> str: log.info(f'server_job, aggregate_results starting, job_id: {job_id}') container_url = sas_blob_utils.build_azure_storage_uri(account=api_config.STORAGE_ACCOUNT_NAME, container=api_config.STORAGE_CONTAINER_API) # when people download this, the timestamp will have : replaced by _ output_file_path = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_detections_{job_name}_{job_submission_timestamp}.json' with ContainerClient.from_container_url(container_url, credential=api_config.STORAGE_ACCOUNT_KEY) as container_client: # check if the result blob has already been written (could be another instance of the API / worker thread) # and if so, skip aggregating and uploading the results, and just generate the SAS URL, which # could be needed still if the previous request_status was `problem`. blob_client = container_client.get_blob_client(output_file_path) if blob_client.exists(): log.warning(f'The output file already exists, likely because another monitoring thread already wrote it.') else: task_outputs_dir = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_outputs/' generator = container_client.list_blobs(name_starts_with=task_outputs_dir) blobs = [i for i in generator if i.name.endswith('.json')] all_results = [] for blob_props in tqdm(blobs): with container_client.get_blob_client(blob_props) as blob_client: stream = io.BytesIO() blob_client.download_blob().readinto(stream) stream.seek(0) task_results = json.load(stream) all_results.extend(task_results) api_output = { 'info': { 'detector': f'megadetector_v{model_version}', 'detection_completion_time': get_utc_time(), 'format_version': api_config.OUTPUT_FORMAT_VERSION }, 'detection_categories': api_config.DETECTOR_LABEL_MAP, 'images': all_results } # upload the output JSON to the Job folder api_output_as_bytes = bytes(json.dumps(api_output, ensure_ascii=False, indent=1), encoding='utf-8') _ = container_client.upload_blob(name=output_file_path, data=api_output_as_bytes) output_sas = generate_blob_sas( account_name=api_config.STORAGE_ACCOUNT_NAME, container_name=api_config.STORAGE_CONTAINER_API, blob_name=output_file_path, account_key=api_config.STORAGE_ACCOUNT_KEY, permission=BlobSasPermissions(read=True, write=False), expiry=datetime.utcnow() + timedelta(days=api_config.OUTPUT_SAS_EXPIRATION_DAYS) ) output_sas_url = sas_blob_utils.build_azure_storage_uri( account=api_config.STORAGE_ACCOUNT_NAME, container=api_config.STORAGE_CONTAINER_API, blob=output_file_path, sas_token=output_sas ) log.info(f'server_job, aggregate_results done, job_id: {job_id}') log.info(f'output_sas_url: {output_sas_url}') return output_sas_url
def _setup(self, storage_account_name, key, container_prefix='utcontainer'): account_url = self.account_url(storage_account_name, "blob") if not isinstance(account_url, str): account_url = account_url.encode('utf-8') key = key.encode('utf-8') self.bsc = BlobServiceClient(account_url, credential=key, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name(container_prefix) # create source blob to be copied from self.source_blob_name = self.get_resource_name('srcblob') self.source_blob_name_with_special_chars = 'भारत¥test/testsubÐirÍ/' + self.get_resource_name( 'srcÆblob') self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE) self.source_blob_with_special_chars_data = self.get_random_bytes( SOURCE_BLOB_SIZE) blob = self.bsc.get_blob_client(self.container_name, self.source_blob_name) blob_with_special_chars = self.bsc.get_blob_client( self.container_name, self.source_blob_name_with_special_chars) if self.is_live: self.bsc.create_container(self.container_name) blob.upload_blob(self.source_blob_data) blob_with_special_chars.upload_blob( self.source_blob_with_special_chars_data) # generate a SAS so that it is accessible with a URL sas_token = generate_blob_sas( blob.account_name, blob.container_name, blob.blob_name, snapshot=blob.snapshot, account_key=blob.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # generate a SAS so that it is accessible with a URL sas_token_for_special_chars = generate_blob_sas( blob_with_special_chars.account_name, blob_with_special_chars.container_name, blob_with_special_chars.blob_name, snapshot=blob_with_special_chars.snapshot, account_key=blob_with_special_chars.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) self.source_blob_url_without_sas = blob.url self.source_blob_url = BlobClient.from_blob_url( blob.url, credential=sas_token).url self.source_blob_url_with_special_chars = BlobClient.from_blob_url( blob_with_special_chars.url, credential=sas_token_for_special_chars).url
def generate_url(self, blob_name: str, read: bool = True, add: bool = False, create: bool = False, write: bool = False, delete: bool = False, sas: bool = False, access_time: int = 1) -> str: """ Generate's blob URL. It can also generate Shared Access Signature (SAS) if ``sas=True``. :param bool write: Write access .. versionadded:: 2.0 :param bool create: Create access .. versionadded:: 2.0 :param bool add: Add access .. versionadded:: 2.0 :param bool read: Read access .. versionadded:: 2.0 :param bool delete: Delete access .. versionadded:: 2.0 :param int access_time: Time till the URL is valid :param str blob_name: Name of the blob, this could be a path :param bool sas: Set ``True`` to generate SAS key :return: Blob URL **Example without ``sas``** >>> import os >>> from azblobexplorer import AzureBlobDownload >>> az = AzureBlobDownload('account name', 'account key', 'container name') >>> az.generate_url("filename.txt") https://containername.blob.core.windows.net/blobname/filename.txt **Example with ``upload_to`` and ``sas``** >>> import os >>> from azblobexplorer import AzureBlobDownload >>> az = AzureBlobDownload('account name', 'account key', 'container name') >>> az.generate_url("filename.txt", sas=True) https://containername.blob.core.windows.net/blobname/filename.txt?se=2019-11-05T16%3A33%3A46Z&sp=w&sv=2019-02-02&sr=b&sig=t%2BpUG2C2FQKp/Hb8SdCsmaZCZxbYXHUedwsquItGx%2BM%3D """ blob = self.container_client.get_blob_client(blob_name) if sas: sas_token = generate_blob_sas( blob.account_name, blob.container_name, blob.blob_name, account_key=blob.credential.account_key, permission=BlobSasPermissions(read, add, create, write, delete), expiry=datetime.utcnow() + timedelta(hours=access_time)) return blob.url + '?' + sas_token else: return blob.url
def test_put_block_from_url_and_commit_with_cpk(self, resource_group, location, storage_account, storage_account_key): # Arrange # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute bsc = BlobServiceClient( self.account_url(storage_account.name, "blob"), credential=storage_account_key, connection_data_block_size=1024, max_single_put_size=1024, min_large_block_upload_threshold=1024, max_block_size=1024, max_page_size=1024) self._setup(bsc) # create source blob and get source blob url source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # Make sure using chunk upload, then we can record the request source_blob_client, _ = self._create_block_blob(bsc, blob_name=source_blob_name, data=self.byte_data) source_blob_sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1) ) source_blob_url = source_blob_client.url + "?" + source_blob_sas # create destination blob self.config.use_byte_buffer = False destination_blob_client, _ = self._create_block_blob(bsc, cpk=TEST_ENCRYPTION_KEY) # Act part 1: make put block from url calls destination_blob_client.stage_block_from_url(block_id=1, source_url=source_blob_url, source_offset=0, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) destination_blob_client.stage_block_from_url(block_id=2, source_url=source_blob_url, source_offset=4 * 1024, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) # Assert blocks committed, uncommitted = destination_blob_client.get_block_list('all') self.assertEqual(len(uncommitted), 2) self.assertEqual(len(committed), 0) # commit the blocks without cpk should fail block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2')] with self.assertRaises(HttpResponseError): destination_blob_client.commit_block_list(block_list) # Act commit the blocks with cpk should succeed put_block_list_resp = destination_blob_client.commit_block_list(block_list, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(put_block_list_resp['etag']) self.assertIsNotNone(put_block_list_resp['last_modified']) self.assertTrue(put_block_list_resp['request_server_encrypted']) self.assertEqual(put_block_list_resp['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content blob = destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data[0: 8 * 1024]) self.assertEqual(blob.properties.etag, put_block_list_resp['etag']) self.assertEqual(blob.properties.last_modified, put_block_list_resp['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) self._teardown(bsc)
async def test_update_page_from_url(self, storage_account_name, storage_account_key): # Arrange # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute bsc = BlobServiceClient( self.account_url(storage_account_name, "blob"), storage_account_key, max_single_put_size=1024, min_large_block_upload_threshold=1024, max_block_size=1024, max_page_size=1024, transport=AiohttpTestTransport(connection_data_block_size=1024)) await self._setup(bsc) source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # Make sure using chunk upload, then we can record the request source_blob_client, _ = await self._create_block_blob( bsc, blob_name=source_blob_name, data=self.byte_data) source_blob_sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) source_blob_url = source_blob_client.url + "?" + source_blob_sas self.config.use_byte_buffer = False blob_client = await self._create_page_blob(bsc, cpk=TEST_ENCRYPTION_KEY) # Act page_blob_prop = await blob_client.upload_pages_from_url( source_blob_url, offset=0, length=len(self.byte_data), source_offset=0, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(page_blob_prop['etag']) self.assertIsNotNone(page_blob_prop['last_modified']) self.assertTrue(page_blob_prop['request_server_encrypted']) # TODO: FIX SWAGGER # self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): await blob_client.download_blob() # Act get the blob content blob = await blob_client.download_blob( offset=0, length=len(self.byte_data), cpk=TEST_ENCRYPTION_KEY, ) # Assert content was retrieved with the cpk self.assertEqual(await blob.readall(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash)
def aggregate_results(job_id, model_version, job_name, job_submission_timestamp): log.info(f'server_job, aggregate_results starting, job_id: {job_id}') task_outputs_dir = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_outputs/' container_url = sas_blob_utils.build_azure_storage_uri( account=api_config.STORAGE_ACCOUNT_NAME, container=api_config.STORAGE_CONTAINER_API) all_results = [] with ContainerClient.from_container_url( container_url, credential=api_config.STORAGE_ACCOUNT_KEY) as container_client: generator = container_client.list_blobs( name_starts_with=task_outputs_dir) blobs = [i for i in generator if i.name.endswith('.json')] for blob_props in tqdm(blobs): with container_client.get_blob_client(blob_props) as blob_client: stream = io.BytesIO() blob_client.download_blob().readinto(stream) stream.seek(0) task_results = json.load(stream) all_results.extend(task_results) api_output = { 'info': { 'detector': f'megadetector_v{model_version}', 'detection_completion_time': get_utc_time(), 'format_version': api_config.OUTPUT_FORMAT_VERSION }, 'detection_categories': api_config.DETECTOR_LABEL_MAP, 'images': all_results } # upload the output JSON to the Job folder api_output_as_bytes = bytes(json.dumps(api_output, ensure_ascii=False, indent=1), encoding='utf-8') output_file_path = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_detections_{job_name}_{job_submission_timestamp}.json' _ = container_client.upload_blob(name=output_file_path, data=api_output_as_bytes) output_sas = generate_blob_sas( account_name=api_config.STORAGE_ACCOUNT_NAME, container_name=api_config.STORAGE_CONTAINER_API, blob_name=output_file_path, account_key=api_config.STORAGE_ACCOUNT_KEY, permission=BlobSasPermissions(read=True, write=False), expiry=datetime.utcnow() + timedelta(days=api_config.OUTPUT_SAS_EXPIRATION_DAYS)) output_sas_url = sas_blob_utils.build_azure_storage_uri( account=api_config.STORAGE_ACCOUNT_NAME, container=api_config.STORAGE_CONTAINER_API, blob=output_file_path, sas_token=output_sas) log.info(f'server_job, aggregate_results done, job_id: {job_id}') log.info(f'output_sas_url: {output_sas_url}') return output_sas_url
def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Python HTTP trigger function processed a request.') try: category = req.params.get('category') if not category: try: req_body = req.get_json() except ValueError: pass else: category = req_body.get('category') if category: logging.info("category is something") connect_str: str = 'DefaultEndpointsProtocol=https;AccountName=djbvideoappsto;AccountKey=Q2w9wi3v0JbTMUIV0kMc0K0kRHtWhTciQ4S7ZgdYSHhic59ZMQk/BlQPIFYQ/fft8uPQYymym97GgYxY4dbvOg==;EndpointSuffix=core.windows.net' # Create the BlobServiceClient object which will be used to create a container client blob_service_client = BlobServiceClient.from_connection_string( connect_str) # Create a unique name for the container container_name = "djbtest" container = blob_service_client.get_container_client( container_name) logging.info("Before List Blobs") blobs = container.list_blobs() ret = [] for blob in blobs: logging.info("Generate Token") logging.info(blob.name) sas_token = generate_blob_sas( container.account_name, container.container_name, blob.name, account_key=blob_service_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) logging.info("Before create video object") video = { "Name": blob.name, "Account": container.account_name, "Container": container.container_name, "SasToken": sas_token } logging.info("Before append to list") ret.append(video) logging.info("Converting to JSON") json_dump = json.dumps(ret) logging.info("About to return") print(json_dump) return func.HttpResponse(json_dump) else: return func.HttpResponse( "https://www.google.com/logos/doodles/2020/israel-kamakawiwooles-61st-birthday-6753651837108391.2-s.png", status_code=200) except Exception as ex: logging.exception('Exception:') logging.info(ex) logging.error(ex)
def retrieve_transcript(identifier, language, speaker_type, service_config): blob_service_client = BlobServiceClient.from_connection_string( service_config['connection_string']) container_client = blob_service_client.get_container_client(identifier) blob_client = container_client.get_blob_client('audio.wav') sas_blob = generate_blob_sas(account_name=service_config['account_name'], container_name=identifier, blob_name='audio.wav', account_key=service_config['account_key'], permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=24)) uri = blob_client.url + '?' + sas_blob logging.info("Starting transcription client...") # configure API key authorization: subscription_key configuration = cris_client.Configuration() configuration.api_key["Ocp-Apim-Subscription-Key"] = service_config[ 'subscription_key'] configuration.host = f"https://{service_config['service_region']}.api.cognitive.microsoft.com/speechtotext/v3.0" # create the client object and authenticate client = cris_client.ApiClient(configuration) # create an instance of the transcription api class api = cris_client.DefaultApi(api_client=client) try: # Specify transcription properties by passing a dict to the properties parameter. See # https://docs.microsoft.com/azure/cognitive-services/speech-service/batch-transcription#configuration-properties # for supported parameters. properties = { "punctuationMode": "Automatic", "profanityFilterMode": "None", "wordLevelTimestampsEnabled": True, "diarizationEnabled": (speaker_type == "both"), "timeToLive": "PT1H" } # Use base models for transcription. transcription_definition = cris_client.Transcription( display_name="Simple transcription", description="Simple transcription description", locale=language, content_urls=[uri], properties=properties) created_transcription, status, headers = api.create_transcription_with_http_info( transcription=transcription_definition) # get the transcription Id from the location URI transcription_id = headers["location"].split("/")[-1] # Log information about the created transcription. If you should ask for support, please # include this information. logging.info( f"Created new transcription with id '{transcription_id}' in region {service_config['service_region']}" ) logging.info("Checking status.") transcript = {} completed = False while not completed: # wait for 5 seconds before refreshing the transcription status time.sleep(5) transcription = api.get_transcription(transcription_id) logging.info(f"Transcriptions status: {transcription.status}") if transcription.status in ("Failed", "Succeeded"): completed = True if transcription.status == "Succeeded": pag_files = api.get_transcription_files(transcription_id) for file_data in _paginate(api, pag_files): if file_data.kind != "Transcription": continue results_url = file_data.links.content_url results = requests.get(results_url) transcript = json.loads(results.content) elif transcription.status == "Failed": raise Exception( f"Transcription failed: {transcription.properties.error.message}" ) finally: delete_all_transcriptions(api) return transcript