def render_video(request): template = loader.get_template('app/render_video.html') vidstatus = 'No Video Found.' queue_service = QueueService(account_name=os.environ['SVPD_STORAGE_ACCOUNT_NAME'], account_key=os.environ['SVPD_STORAGE_ACCOUNT_KEY']) messages = queue_service.get_messages(os.environ['SVPD_STORAGE_ACCOUNT_READY_TO_ENCODE'], num_messages=1, visibility_timeout=1*60) for message in messages: vidstatus = 'Queued for Rendering: ' + message.content message_obj = json.loads(message.content) access_token = ams_authenticate()['access_token'] asset = ams_post_request(access_token, "Assets", { 'Name': message_obj['filename'], 'AlternateId': message_obj['folder']}) asset_container = urllib.parse.urlparse(asset['Uri']).path[1:] asset_file = ams_post_request(access_token, "Files", { 'IsEncrypted': 'false', 'IsPrimary': 'false', 'MimeType': 'video/mp4', 'ContentFileSize': message_obj['size'], 'Name': message_obj['filename'], 'ParentAssetId': asset['Id']}) block_blob_service = BlockBlobService(account_name=os.environ['SVPD_STORAGE_ACCOUNT_NAME'], account_key=os.environ['SVPD_STORAGE_ACCOUNT_KEY']) from_url = block_blob_service.make_blob_url(os.environ['SVPD_STORAGE_ACCOUNT_UPLOADED'], message_obj['folder'] + '/' + message_obj['filename']) block_blob_service.copy_blob(asset_container, message_obj['filename'], from_url) job = ams_verbose_post_request(access_token, "Jobs", { 'Name': message_obj['filename'], 'InputMediaAssets': [{ '__metadata': { 'uri': os.environ['AMS_API_ENDPOINT'] + 'Assets(\'' + asset['Id'] + '\')' } }], 'Tasks': [{ 'Name': 'Adaptive Streaming Task', 'Configuration': 'Adaptive Streaming', 'MediaProcessorId': 'nb:mpid:UUID:ff4df607-d419-42f0-bc17-a481b1331e56', 'TaskBody': '<?xml version="1.0" encoding="utf-16"?><taskBody><inputAsset>JobInputAsset(0)</inputAsset><outputAsset assetCreationOptions="0" assetFormatOption="0" assetName="' + message_obj['filename'] + ' - MES v1.1" storageAccountName="' + os.environ['SVPD_STORAGE_ACCOUNT_NAME'] + '">JobOutputAsset(0)</outputAsset></taskBody>' },{ 'Name': 'Indexing Task', 'Configuration': '<?xml version="1.0" encoding="utf-8"?><configuration version="2.0"><input><metadata key="title" value="blah" /></input><settings></settings><features><feature name="ASR"><settings><add key="Language" value="English" /><add key="GenerateAIB" value="False" /><add key="GenerateKeywords" value="True" /><add key="ForceFullCaption" value="False" /><add key="CaptionFormats" value="ttml;sami;webvtt" /></settings></feature></features></configuration>', 'MediaProcessorId': 'nb:mpid:UUID:233e57fc-36bb-4f6f-8f18-3b662747a9f8', 'TaskBody': '<?xml version="1.0" encoding="utf-16"?><taskBody><inputAsset>JobInputAsset(0)</inputAsset><outputAsset assetCreationOptions="0" assetFormatOption="0" assetName="' + message_obj['filename'] + ' - Indexed" storageAccountName="' + os.environ['SVPD_STORAGE_ACCOUNT_NAME'] + '">JobOutputAsset(1)</outputAsset></taskBody>' }] }) queue_service.put_message(os.environ['SVPD_STORAGE_ACCOUNT_ENCODING'], json.dumps({ 'filename': message_obj['filename'], 'folder': message_obj['folder'], 'size': message_obj['size'], 'job': job['d']})) queue_service.delete_message(os.environ['SVPD_STORAGE_ACCOUNT_READY_TO_ENCODE'], message.id, message.pop_receipt) return HttpResponse(template.render({ 'vidstatus': vidstatus, }, request))
class AzureStorage: def __init__(self, connectionString, container): self.BlobService = BlockBlobService(connection_string=connectionString) nameValue = UtilityHelper.connectStringToDictionary(connectionString) self.AccountName = nameValue['AccountName'] self.container = container def getBaseURL(self): return 'https://' + self.AccountName + '.blob.core.windows.net/' def uploadByLocalFile(self, localFullFileName, remoteBlobName): self.BlobService.create_blob_from_path(self.container, remoteBlobName, localFullFileName) blobURL = 'https://' + self.AccountName + '.blob.core.windows.net/' + self.container + '/' + remoteBlobName return blobURL def uploadByStream(self, streamData, remoteBlobName): self.BlobService.create_blob_from_stream(self.container, remoteBlobName, streamData) blobURL = 'https://' + self.AccountName + '.blob.core.windows.net/' + self.container + '/' + remoteBlobName return blobURL def uploadByBytes(self, bytesData, remoteBlobName): self.BlobService.create_blob_from_bytes(self.container, remoteBlobName, bytesData) blobURL = 'https://' + self.AccountName + '.blob.core.windows.net/' + self.container + '/' + remoteBlobName return blobURL def delete(self, blobName): self.BlobService.delete_blob(self.container, blobName) def copy(self, sourceBlobURL, targetBlobName): self.BlobService.copy_blob(self.container, targetBlobName, sourceBlobURL)
def create_snapshot(file_share, directory_name, file_name, container_name, correlation_guid = str(uuid.uuid4())): file_service = FileService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) file_sas_token = file_service.generate_file_shared_access_signature( file_share, directory_name, file_name, permission = FilePermissions.READ, expiry = datetime.now() + timedelta(minutes = 10)) file_url = file_service.make_file_url(file_share, directory_name, file_name, sas_token = file_sas_token) blob_name = '{0}/{1}/{2}'.format(correlation_guid, directory_name, file_name) blob_service.create_container(container_name) try: blob_service.copy_blob(container_name, blob_name, file_url) except Exception as e: raise ValueError('Missing file ' + file_name) blob_sas_token = blob_service.generate_blob_shared_access_signature( container_name, blob_name, permission = BlobPermissions.READ, expiry = datetime.now() + timedelta(days = 1000)) return blob_service.make_blob_url(container_name, blob_name, sas_token = blob_sas_token)
def main(req: func.HttpRequest) -> func.HttpResponse: png_base_path = "https://raw.githubusercontent.com/bmorrisondev/25daysofserverless/master" try: webhook = req.get_json() images_to_add = [] regex_pattern = '[^\/]*$' for commit in webhook.get( 'commits'): # Iterate through all the commits in the push for added_item in commit.get( 'added'): # Iterate through added items if added_item.endswith("png"): # Find items with png image_item = ImageItem( ) # Create the object to store name and url regexObj = re.search(regex_pattern, added_item) image_item.name = regexObj[0] # Parse out the file name image_item.url = f"{png_base_path}/{added_item}" images_to_add.append(image_item) logging.info( f'found {image_item.name} at {image_item.url}') # Copy files to Azure Storage Account, create URLs for the blobs and return for saving to Cosmo azure_path = "images" container_name = 'day3' blob_cs = os.environ["BlobServiceCs"] block_blob_service = BlockBlobService(connection_string=blob_cs) blob_urls = [] for file in images_to_add: blob_name = f"{azure_path}/{random_string()}_{file.name}" block_blob_service.copy_blob( container_name, blob_name, file.url) # Download the file into ASA blob_url = block_blob_service.make_blob_url( container_name, blob_name) # Get a direct link to the blob in ASA blob_urls.append(blob_url) # Write URLs to Cosmo using Mongo API mongo_cs = os.environ["CosmoMongoApiCs"] cosmo_client = pymongo.MongoClient(mongo_cs) db = cosmo_client["day3"] images_collection = db["images"] for url in blob_urls: record = {"url": url} images_collection.insert_one( record) # Add the URL to the Cosmo DB collection return func.HttpResponse("ok") except Exception as err: return func.HttpResponse(f"{err}", status_code=500)
def copy_azure_files(): blob_service = BlockBlobService(account_name='labcyhw2', account_key='zy2m8C1qjV0+7A9QL6hpCeFAO3fxplOS900adzDuTGlKU9LkIERYfN/VG5STbXxYK6BMvyaRMWjPaEUNmU8GoA==') copy_from_container = 'prod' copy_to_container = 'bkp' listagem = blob_service.list_blobs(copy_from_container) for blob in listagem: blob_url = blob_service.make_blob_url(copy_from_container, blob.name) blob_service.copy_blob(copy_to_container, blob.name, blob_url) print(blob.name)
def run_sample(account_name, account_key, from_container_name, from_blob, to_container_name, to_blob): blob_service = BlockBlobService(account_name=account_name, account_key=account_key) if not blob_service.exists(from_container_name, from_blob): sys.exit('Not exist: ' + from_blob + ' in ' + from_container_name) if blob_service.exists(to_container_name, to_blob): sys.exit('Exist: ' + to_blob + ' in ' + to_container_name) blob_url = blob_service.make_blob_url(from_container_name, from_blob) blob_service.copy_blob(to_container_name, to_blob, blob_url)
def activate_model(self, model_name, active_model_name): block_blob_service = BlockBlobService( account_name=self.config.storage_account, account_key=self.config.storage_account_key) # copy model blob_url = block_blob_service.make_blob_url( self.config.models_container_name, model_name) block_blob_service.copy_blob(self.config.models_container_name, active_model_name, blob_url) # copy model metadata blob_url = block_blob_service.make_blob_url( self.config.models_container_name, model_name + ".meta") block_blob_service.copy_blob(self.config.models_container_name, active_model_name + ".meta", blob_url)
class S3AzureBlobBypass(BaseS3Bypass): """ Bypass executed by default when data source is an S3 bucket and data destination is an Azure blob container. It should be transparent to user. Conditions are: - S3Reader and AzureBlobWriter are used on configuration. - No filter modules are set up. - No transform module is set up. - No grouper module is set up. - AzureBlobWriter has not a items_limit set in configuration. - AzureBlobWriter has default items_per_buffer_write and size_per_buffer_write per default. - AzureBlobWriter has default write_buffer. """ def __init__(self, config, metadata): super(S3AzureBlobBypass, self).__init__(config, metadata) self.container = self.read_option('writer', 'container') from azure.storage.blob import BlockBlobService self.azure_service = BlockBlobService( self.read_option('writer', 'account_name'), self.read_option('writer', 'account_key')) @classmethod def meets_conditions(cls, config): if not config.writer_options['name'].endswith('AzureBlobWriter'): cls._log_skip_reason('Wrong reader configured') return False return super(S3AzureBlobBypass, cls).meets_conditions(config) @retry_long def _copy_s3_key(self, key): blob_name = key.name.split('/')[-1] url = key.generate_url(S3_URL_EXPIRES_IN) # Convert the https://<bucket>.s3.aws.com/<path> url format to # https://s3.aws.com/<bucket>/<path> Since the first one gives # certificate errors if there are dots in the bucket name url = re.sub(r'^https://([^/]+)\.s3\.amazonaws\.com/', r'https://s3.amazonaws.com/\1/', url) self.azure_service.copy_blob( self.container, blob_name, url, timeout=S3_URL_EXPIRES_IN, )
def main(): argument_spec = dict(source_uri=dict(required=True), source_key=dict(required=True), destination_account=dict(required=True), destination_key=dict(required=True), destination_container=dict(required=True), destination_blob=dict(required=True), wait=dict(default=False, type='bool'), timeout=dict(default=1000)) module = AnsibleModule(argument_spec=argument_spec) if not HAS_DEPS: module.fail_json( msg="requests and azure are required for this module ".format( HAS_DEPS_EXC)) source_account, source_container, source_blob = split_uri( module.params.get('source_uri')) source = CloudStorageAccount(account_name=source_account, account_key=module.params.get('source_key')) source_service = source.create_block_blob_service() destination_service = BlockBlobService( account_name=module.params.get('destination_account'), account_key=module.params.get('destination_key')) source_token = source.generate_shared_access_signature( Services.BLOB, ResourceTypes.OBJECT, AccountPermissions.READ, datetime.datetime.now() + timedelta(hours=1)) source_sas_url = source_service.make_blob_url(source_container, source_blob, 'https', source_token) destination_service.create_container( module.params.get('destination_container'), fail_on_exist=False) status = destination_service.copy_blob( module.params.get('destination_container'), module.params.get('destination_blob'), source_sas_url) if not module.params.get('wait'): data = dict(changed=True, status='started') module.exit_json(**data) else: copy = destination_service.get_blob_properties( module.params.get('destination_container'), module.params.get('destination_blob')).properties.copy count = 0 while copy.status != 'success': count = count + 30 if count > module.params.get('timeout'): module.fail_json( msg='Timed out waiting for async copy to complete.') time.sleep(30) copy = destination_service.get_blob_properties( module.params.get('destination_container'), module.params.get('destination_blob')).properties.copy data = dict(changed=True, status='completed') module.exit_json(**data)
class S3AzureBlobBypass(BaseS3Bypass): """ Bypass executed by default when data source is an S3 bucket and data destination is an Azure blob container. It should be transparent to user. Conditions are: - S3Reader and AzureBlobWriter are used on configuration. - No filter modules are set up. - No transform module is set up. - No grouper module is set up. - AzureBlobWriter has not a items_limit set in configuration. - AzureBlobWriter has default items_per_buffer_write and size_per_buffer_write per default. """ def __init__(self, config, metadata): super(S3AzureBlobBypass, self).__init__(config, metadata) self.container = self.read_option('writer', 'container') from azure.storage.blob import BlockBlobService self.azure_service = BlockBlobService( self.read_option('writer', 'account_name'), self.read_option('writer', 'account_key')) @classmethod def meets_conditions(cls, config): if not config.writer_options['name'].endswith('AzureBlobWriter'): cls._log_skip_reason('Wrong reader configured') return False return super(S3AzureBlobBypass, cls).meets_conditions(config) @retry_long def _copy_s3_key(self, key): blob_name = key.name.split('/')[-1] url = key.generate_url(S3_URL_EXPIRES_IN) # Convert the https://<bucket>.s3.aws.com/<path> url format to # https://s3.aws.com/<bucket>/<path> Since the first one gives # certificate errors if there are dots in the bucket name url = re.sub(r'^https://([^/]+)\.s3\.amazonaws\.com/', r'https://s3.amazonaws.com/\1/', url) self.azure_service.copy_blob( self.container, blob_name, url, timeout=S3_URL_EXPIRES_IN, )
class BlobHelper: def __init__(self, blob=None): account_name = os.environ["AzureStorageAccountName"] account_key = os.environ["AzureStorageAccountKey"] self.blob_service = BlockBlobService( account_name=account_name, account_key=account_key ) self.blob = blob def create_output_blob(self, destination_container_name): source_url = os.environ["StorageUrl"] + self.blob.name destination_blob_name = self.get_destination_blob_name() self.blob_service.copy_blob( container_name=destination_container_name, blob_name=destination_blob_name, copy_source=source_url, ) def get_destination_blob_name(self): blob_filename = self.blob.name.split("/")[1] datetime_str = datetime.today().strftime("%Y%m%d-%H%M%S") return f"{datetime_str}-{blob_filename}" def get_str_file(self, storage_container_name, storage_blob_name): compressed_file = io.BytesIO() self.blob_service.get_blob_to_stream(storage_container_name, storage_blob_name, compressed_file, max_connections=1) compressed_file.seek(0) compressed_gzip = gzip.GzipFile(fileobj=compressed_file) decompressed_file = compressed_gzip.read() compressed_file.close() compressed_gzip.close() file_string = decompressed_file.decode("utf-8-sig") return file_string def write_stream_file(self, storage_container_name, storage_blob_name, encoded_file): self.blob_service.create_blob_from_bytes(storage_container_name, storage_blob_name, encoded_file, max_connections=1)
def main(inputs: dict) -> str: """ Copy the video into the audiotranscript-files container(previously videoindexer-files), which will trigger the system of 3 functions to eventually download the transcript to SQL """ ## Set inputs vidURL = inputs['fileURL'] urlContainer, urlFileName = get_url_container_and_file_name(vidURL) bbs = BlockBlobService( connection_string=os.getenv("fsevideosConnectionString")) ## Create SAS URL sasURL = get_SAS_URL(fileURL=vidURL, block_blob_service=bbs, container=urlContainer) ## Copy blob bbs.copy_blob(container_name="audiotranscript-files", blob_name=urlFileName, copy_source=sasURL) return "done"
def copySnapshotToAttacker(self, storageAccount, storageKey, containerName, blobName, snapshotSas): blockBlobService = BlockBlobService(account_name=storageAccount, account_key=storageKey) copyProperties = blockBlobService.copy_blob(containerName, blobName, snapshotSas) while copyProperties.status != "success": copyProperties = blockBlobService.get_blob_properties( containerName, blobName).properties.copy print(copyProperties.status + ":" + copyProperties.progress) time.sleep(10) return copyProperties
def copy_blob_image(self, template, vm_name, storage_account, template_container, storage_container): # todo: weird method to refactor it later container_client = BlockBlobService(storage_account, self.storage_key) src_uri = container_client.make_blob_url(container_name=template_container, blob_name=template.split("/")[-1]) operation = container_client.copy_blob(container_name=storage_container, blob_name=vm_name + ".vhd", copy_source=src_uri) wait_for(lambda: operation.status != 'pending', num_sec='10m', delay=15) # copy operation obj.status->str return operation.status
def main(): # get command line args account = sys.argv[1] secret = sys.argv[2] srcContainer = sys.argv[3] files = sys.argv[4:] # generate container name destContainer = str(uuid.uuid4()).replace('-', '') try: # connect to blob store bs = BlockBlobService(account_name=account, account_key=secret) # create and setup container, by default a container is private bs.create_container(destContainer) bs.set_container_acl(destContainer) # perform blob copy copyStartTime = int(round(time.time() * 1000)) copyProps = {} for f in files: srcUrl = 'https://{}.blob.core.windows.net/{}/{}'.format(account, srcContainer, f) cp = bs.copy_blob(destContainer, f, srcUrl) copyProps[f] = cp # wait for copy to finish while len(copyProps.keys()) > 0: for f, prop in copyProps.items(): bp = bs.get_blob_properties(destContainer, f) copyProps[f] = None if bp.properties.copy.status is not 'pending' else bp copyProps = { k:v for k, v in copyProps.items() if v } # copy completed copyEndTime = int(round(time.time() * 1000)) print('Blob copy completed in {}ms'.format(copyEndTime - copyStartTime), file=sys.stderr) # generate SAS token, read only, valid for an hour token = bs.generate_container_shared_access_signature(destContainer, ContainerPermissions.READ | ContainerPermissions.LIST, datetime.utcnow() + timedelta(hours=1)) # return information result = { 'storage_account': account, 'container': destContainer, 'sas_token': token } print(json.dumps(result, indent=4, sort_keys=True)) except Exception as e: print(e, file=sys.stderr)
destStorage_keys = {v.key_name: v.value for v in destStorage_keys.keys} destStorage_key = destStorage_keys['key1'] print("The destination storage key is " + destStorage_key) # In[18]: # Create the target container in storage block_blob_service2 = BlockBlobService(account_name=destStorageAcct, account_key=destStorage_key) # In[19]: block_blob_service2.create_container(destContainerName, public_access=PublicAccess.Container) # In[21]: # Start Asynchronus Copy # print("Starting azure copy...") block_blob_service2.copy_blob(destContainerName, "testBlob.json", blob_url) print("Azure copy done.") generator = block_blob_service2.list_blobs(destContainerName) for blob in generator: blob_url2 = block_blob_service2.make_blob_url(destContainerName, blob.name) print("The new blob url is " + blob_url2)
def main(): sourceBlobCounter = 0 currentBlobCounter = 0 currentLabel = None block_blob_service = BlockBlobService(account_name=azureStorgeAccountName, account_key=azureStorageKeyName) # create the TargetContainer if it does not exist if (block_blob_service.exists( container_name=azureStorageTargetContainer) == False): block_blob_service.create_container( container_name=azureStorageTargetContainer) print( str(datetime.datetime.now()) + ': created target container: ' + azureStorageTargetContainer) elif emptyTargetContainer: print( str(datetime.datetime.now()) + ': deleting existing files in the container: ' + azureStorageTargetContainer) while True: delete_blob_generator = block_blob_service.list_blobs( container_name=azureStorageTargetContainer) for toDelete in block_blob_service.list_blobs( container_name=azureStorageTargetContainer): block_blob_service.delete_blob( container_name=azureStorageTargetContainer, blob_name=toDelete.name) if not delete_blob_generator.next_marker: break print( str(datetime.datetime.now()) + ': counting files in the source container: ' + azureStorageSourceContainer) while True: count_blob_generator = block_blob_service.list_blobs( container_name=azureStorageSourceContainer) for blob in count_blob_generator: sourceBlobCounter += 1 if not count_blob_generator.next_marker: break print( str(datetime.datetime.now()) + ': copying and labeling ' + str(sourceBlobCounter) + ' files to container: ' + azureStorageTargetContainer) while currentBlobCounter < sourceBlobCounter: copy_blob_generator = block_blob_service.list_blobs( container_name=azureStorageSourceContainer) for blob in copy_blob_generator: sourceBlob = block_blob_service.make_blob_url( container_name=azureStorageSourceContainer, blob_name=blob.name) label = labels[int( (currentBlobCounter / sourceBlobCounter) * len(labels))] targetName = blob.name.split('.')[0].replace( '-', '_') + '-' + str(label) + '.' + blob.name.split('.')[1] block_blob_service.copy_blob( container_name=azureStorageTargetContainer, blob_name=targetName, copy_source=sourceBlob) if currentLabel != label: print( str(datetime.datetime.now()) + ": file number: " + str(currentBlobCounter) + " begins label: " + str(label)) currentLabel = label if (currentBlobCounter % 100 == 0): sys.stdout.write('.') sys.stdout.flush() currentBlobCounter += 1 if not copy_blob_generator.next_marker: break
class BlobUtility: """ Azure blob utilities for I/O operations """ def __init__(self, account_name, account_key): """ __init__ - Initializes blob utils and establish connection to azure blob :param str account_name: Azure Blob account name. :param str account_key: Azure account key. """ self.account_name = account_name self.block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) def get_blob_to_path(self, input_container_name, input_blob_name, input_file_path): """ get_blob_to_path - Get file path in blob :param str input_container_name: BLob container name :param str input_blob_name: Blob path in the container. :param str input_file_path: File name to read. :returns: input_file_path :rtype: Blob object """ self.block_blob_service.get_blob_to_path( container_name=input_container_name, blob_name=input_blob_name, file_path=input_file_path) return input_file_path def get_blob_to_bytes(self, input_container_name, input_blob_name): """ get_blob_to_bytes - Read images from the blob :param str input_container_name: Blob container name :param str input_blob_name: Blob path in the container + input file/image name :returns: blob_byte :rtype: Blob object """ blob_byte = self.block_blob_service.get_blob_to_bytes( container_name=input_container_name, blob_name=input_blob_name) return blob_byte def create_blob_from_text(self, input_container_name, input_blob_name, data): """ create_blob_from_text - Write csv/dataframe to blob :param str input_container_name: Blob container name :param str input_blob_name: Blob path in the container to write + image name with extension. :param (csv/text file) data: csv data to write into blob. """ self.block_blob_service.create_blob_from_text( container_name=input_container_name, blob_name=input_blob_name, text=data) def make_blob_url(self, input_container_name, input_file_path): """ make_blob_url - Create blob url :param str input_container_name: Blob Container name :param str input_file_path: Blob file path to refer :returns: blob_url :rtype: Blob object """ blob_url = self.block_blob_service.make_blob_url( input_container_name, input_file_path) return blob_url def copy_blob(self, container_name, file_path, blob_url): """ copy_blob - Copy blob/data to another container using blob url :param str container_name: Target container :param str file_path: Target blob file path + target filename with extension :param blob object blob_url: Source blob url to copy data. """ self.block_blob_service.copy_blob(container_name, file_path, blob_url) def generate_container_signature(self, container_name, file_name): """ generate_container_signature - generate container signature :param str container_name: Blob Container name :param str file_name: File name :returns: file_url :rtype: Blob object """ container_sas_token = self.block_blob_service.generate_container_shared_access_signature( container_name, permission=ContainerPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1), start=datetime.utcnow()) file_url = [ 'https://', self.account_name, '.blob.core.windows.net/', container_name, '/', file_name, '?', container_sas_token ] file_url = ''.join(file_url) return file_url def generate_blob_signature(self, container_name, blob_name, file_extension): """ generate_blob_signature - generate blob signature :param str container_name: Blob Container name :param str blob_name: blob name :param str file_extension: file extension :returns: blob_url :rtype: Blob object """ token = self.block_blob_service.generate_blob_shared_access_signature( container_name, blob_name + file_extension, permission=BlobPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1), start=datetime.utcnow()) file = [ 'https://', self.account_name, '.blob.core.windows.net/', container_name, '/', blob_name, '.', file_extension ] file = ''.join(file) blob_url = f"{file}?{token}" return blob_url
class AzureStorage(BaseStorage): def __init__( self, context, azure_container, storage_path, azure_account_name, azure_account_key=None, sas_token=None, connection_string=None, is_emulated=False, socket_timeout=20, request_timeout=20, ): super(AzureStorage, self).__init__() self._context = context self._storage_path = storage_path.lstrip("/") self._azure_account_name = azure_account_key self._azure_account_key = azure_account_key self._azure_sas_token = sas_token self._azure_container = azure_container self._azure_connection_string = connection_string self._request_timeout = request_timeout self._blob_service = BlockBlobService( account_name=azure_account_name, account_key=azure_account_key, sas_token=sas_token, is_emulated=is_emulated, connection_string=connection_string, socket_timeout=socket_timeout, ) def _blob_name_from_path(self, object_path): if ".." in object_path: raise Exception("Relative paths are not allowed; found %s" % object_path) return os.path.join(self._storage_path, object_path).rstrip("/") def _upload_blob_path_from_uuid(self, uuid): return self._blob_name_from_path( self._upload_blob_name_from_uuid(uuid)) def _upload_blob_name_from_uuid(self, uuid): return "uploads/{0}".format(uuid) def get_direct_download_url(self, object_path, request_ip=None, expires_in=60, requires_cors=False, head=False): blob_name = self._blob_name_from_path(object_path) try: sas_token = self._blob_service.generate_blob_shared_access_signature( self._azure_container, blob_name, ContainerPermissions.READ, datetime.utcnow() + timedelta(seconds=expires_in), ) blob_url = self._blob_service.make_blob_url(self._azure_container, blob_name, sas_token=sas_token) except AzureException: logger.exception( "Exception when trying to get direct download for path %s", object_path) raise IOError("Exception when trying to get direct download") return blob_url def validate(self, client): super(AzureStorage, self).validate(client) self._blob_service.get_container_properties( self._azure_container, timeout=self._request_timeout) def get_content(self, path): blob_name = self._blob_name_from_path(path) try: blob = self._blob_service.get_blob_to_bytes( self._azure_container, blob_name) except AzureException: logger.exception("Exception when trying to get path %s", path) raise IOError("Exception when trying to get path") return blob.content def put_content(self, path, content): blob_name = self._blob_name_from_path(path) try: self._blob_service.create_blob_from_bytes(self._azure_container, blob_name, content) except AzureException: logger.exception("Exception when trying to put path %s", path) raise IOError("Exception when trying to put path") def stream_read(self, path): with self.stream_read_file(path) as f: while True: buf = f.read(self.buffer_size) if not buf: break yield buf def stream_read_file(self, path): blob_name = self._blob_name_from_path(path) try: output_stream = io.BytesIO() self._blob_service.get_blob_to_stream(self._azure_container, blob_name, output_stream) output_stream.seek(0) except AzureException: logger.exception( "Exception when trying to stream_file_read path %s", path) raise IOError("Exception when trying to stream_file_read path") return output_stream def stream_write(self, path, fp, content_type=None, content_encoding=None): blob_name = self._blob_name_from_path(path) content_settings = ContentSettings( content_type=content_type, content_encoding=content_encoding, ) try: self._blob_service.create_blob_from_stream( self._azure_container, blob_name, fp, content_settings=content_settings) except AzureException: logger.exception("Exception when trying to stream_write path %s", path) raise IOError("Exception when trying to stream_write path") def exists(self, path): blob_name = self._blob_name_from_path(path) try: return self._blob_service.exists(self._azure_container, blob_name, timeout=self._request_timeout) except AzureException: logger.exception("Exception when trying to check exists path %s", path) raise IOError("Exception when trying to check exists path") def remove(self, path): blob_name = self._blob_name_from_path(path) try: self._blob_service.delete_blob(self._azure_container, blob_name) except AzureException: logger.exception("Exception when trying to remove path %s", path) raise IOError("Exception when trying to remove path") def get_checksum(self, path): blob_name = self._blob_name_from_path(path) try: blob = self._blob_service.get_blob_properties( self._azure_container, blob_name) except AzureException: logger.exception( "Exception when trying to get_checksum for path %s", path) raise IOError("Exception when trying to get_checksum path") return blob.properties.etag def initiate_chunked_upload(self): random_uuid = str(uuid.uuid4()) metadata = { _BLOCKS_KEY: [], _CONTENT_TYPE_KEY: None, } return random_uuid, metadata def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None): if length == 0: return 0, storage_metadata, None upload_blob_path = self._upload_blob_path_from_uuid(uuid) new_metadata = copy.deepcopy(storage_metadata) total_bytes_written = 0 while True: current_length = length - total_bytes_written max_length = (min(current_length, _MAX_BLOCK_SIZE) if length != READ_UNTIL_END else _MAX_BLOCK_SIZE) if max_length <= 0: break limited = LimitingStream(in_fp, max_length, seekable=False) # Note: Azure fails if a zero-length block is uploaded, so we read all the data here, # and, if there is none, terminate early. block_data = b"" for chunk in iter(lambda: limited.read(4096), b""): block_data += chunk if len(block_data) == 0: break block_index = len(new_metadata[_BLOCKS_KEY]) block_id = format(block_index, "05") new_metadata[_BLOCKS_KEY].append(block_id) try: self._blob_service.put_block( self._azure_container, upload_blob_path, block_data, block_id, validate_content=True, ) except AzureException as ae: logger.exception( "Exception when trying to stream_upload_chunk block %s for %s", block_id, uuid) return total_bytes_written, new_metadata, ae bytes_written = len(block_data) total_bytes_written += bytes_written if bytes_written == 0 or bytes_written < max_length: break if content_type is not None: new_metadata[_CONTENT_TYPE_KEY] = content_type return total_bytes_written, new_metadata, None def complete_chunked_upload(self, uuid, final_path, storage_metadata): """ Complete the chunked upload and store the final results in the path indicated. Returns nothing. """ # Commit the blob's blocks. upload_blob_path = self._upload_blob_path_from_uuid(uuid) block_list = [ BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY] ] try: self._blob_service.put_block_list(self._azure_container, upload_blob_path, block_list) except AzureException: logger.exception( "Exception when trying to put block list for path %s from upload %s", final_path, uuid, ) raise IOError("Exception when trying to put block list") # Set the content type on the blob if applicable. if storage_metadata[_CONTENT_TYPE_KEY] is not None: content_settings = ContentSettings( content_type=storage_metadata[_CONTENT_TYPE_KEY]) try: self._blob_service.set_blob_properties( self._azure_container, upload_blob_path, content_settings=content_settings) except AzureException: logger.exception( "Exception when trying to set blob properties for path %s", final_path) raise IOError("Exception when trying to set blob properties") # Copy the blob to its final location. upload_blob_name = self._upload_blob_name_from_uuid(uuid) copy_source_url = self.get_direct_download_url(upload_blob_name, expires_in=300) try: blob_name = self._blob_name_from_path(final_path) copy_prop = self._blob_service.copy_blob(self._azure_container, blob_name, copy_source_url) except AzureException: logger.exception( "Exception when trying to set copy uploaded blob %s to path %s", uuid, final_path) raise IOError("Exception when trying to copy uploaded blob") self._await_copy(self._azure_container, blob_name, copy_prop) # Delete the original blob. logger.debug("Deleting chunked upload %s at path %s", uuid, upload_blob_path) try: self._blob_service.delete_blob(self._azure_container, upload_blob_path) except AzureException: logger.exception( "Exception when trying to set delete uploaded blob %s", uuid) raise IOError("Exception when trying to delete uploaded blob") def cancel_chunked_upload(self, uuid, storage_metadata): """ Cancel the chunked upload and clean up any outstanding partially uploaded data. Returns nothing. """ upload_blob_path = self._upload_blob_path_from_uuid(uuid) logger.debug("Canceling chunked upload %s at path %s", uuid, upload_blob_path) self._blob_service.delete_blob(self._azure_container, upload_blob_path) def _await_copy(self, container, blob_name, copy_prop): # Poll for copy completion. count = 0 while copy_prop.status == "pending": props = self._blob_service.get_blob_properties( container, blob_name) copy_prop = props.properties.copy if copy_prop.status == "success": return if copy_prop.status == "failed" or copy_prop.status == "aborted": raise IOError("Copy of blob %s failed with status %s" % (blob_name, copy_prop.status)) count = count + 1 if count > _MAX_COPY_POLL_COUNT: raise IOError("Timed out waiting for copy to complete") time.sleep(_COPY_POLL_SLEEP) def copy_to(self, destination, path): if self.__class__ == destination.__class__: logger.debug( "Starting copying file from Azure %s to Azure %s via an Azure copy", self._azure_container, destination._azure_container, ) copy_source_url = self.get_direct_download_url(path) blob_name = destination._blob_name_from_path(path) copy_prop = destination._blob_service.copy_blob( destination._azure_container, blob_name, copy_source_url) destination._await_copy(destination._azure_container, blob_name, copy_prop) logger.debug( "Finished copying file from Azure %s to Azure %s via an Azure copy", self._azure_container, destination._azure_container, ) return # Fallback to a slower, default copy. logger.debug( "Copying file from Azure container %s to %s via a streamed copy", self._azure_container, destination, ) with self.stream_read_file(path) as fp: destination.stream_write(path, fp) def setup(self): # From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services cors = [ CorsRule( allowed_origins="*", allowed_methods=["GET", "PUT"], max_age_in_seconds=3000, exposed_headers=["x-ms-meta-*"], allowed_headers=[ "x-ms-meta-data*", "x-ms-meta-target*", "x-ms-meta-abc", "Content-Type", ], ) ] self._blob_service.set_blob_service_properties(cors=cors)
class AzureBlobStore21(implements(StoreInterface)): def __init__(self, storage_creds, max_retries=10): self.storage_id = storage_creds["name"] self.storage_key = storage_creds["key"] self.bs = BlockBlobService(account_name=self.storage_id, account_key=self.storage_key) self.append_bs = AppendBlobService(account_name=self.storage_id, account_key=self.storage_key) self.max_retries = max_retries self.set_retries(max_retries) # ---- HELPER functions ---- def set_retries(self, count): old_count = self.max_retries self.max_retries = count # bug workaround: standard Retry classes don't retry status=409 (container is being deleted) #import azure.storage.common.retry as retry #self.bs.retry = retry.LinearRetry(backoff=5, max_attempts=count).retry #self.append_bs.retry = retry.LinearRetry(backoff=5, max_attempts=count).retry self.bs.retry = utils.make_retry_func(count) self.append_bs.retry = utils.make_retry_func(count) return old_count # ---- MISC part of interface ---- def get_service_name(self): ''' return the unique name of the storage service''' return self.storage_id def get_retry(self): return self.bs.retry def set_retry(self, value): self.bs.retry = value # ---- CONTAINER interface ---- def does_container_exist(self, container): return self.bs.exists(container) def create_container(self, container): return self.bs.create_container(container) def list_containers(self): containers = self.bs.list_containers() name_list = [contain.name for contain in containers] return name_list def delete_container(self, container): return self.bs.delete_container(container) def get_container_properties(self, container): props = self.bs.get_container_properties(container) return props def get_container_metadata(self, container): md = self.bs.get_container_metadata(container) return md # def set_container_metadata(self, container, md_dict): # return self.bs.set_container_metadata(container, md_dict) # ---- BLOB interface ---- def does_blob_exist(self, container, blob_path): return self.bs.exists(container, blob_path) def create_blob(self, container, blob_path, text, fail_if_exists=False): ifn = "*" if fail_if_exists else None return self.bs.create_blob_from_text(container, blob_path, text, if_none_match=ifn) def create_blob_from_path(self, container, blob_path, source_fn, progress_callback=None): result = self.bs.create_blob_from_path( container, blob_path, source_fn, progress_callback=progress_callback) return result def append_blob(self, container, blob_path, text, append_with_rewrite=False): # create blob if it doesn't exist if not append_with_rewrite: # normal handling if not self.append_bs.exists(container, blob_path): self.append_bs.create_blob(container, blob_path) return self.append_bs.append_blob_from_text( container, blob_path, text) ''' Appends text to a normal blob blob by reading and then rewriting the entire blob. Correctly handles concurrency/race conditions. Recommended for lots of small items (like 10,000 run names). Note: we turn off retries on azure CALL-level so that we can retry on OUR CALL-level. ''' # experimental local retry loop old_retry = self.bs.get_retry() self.bs.set_retry(utils.make_retry_func(0)) succeeded = False for i in range(20): try: if self.bs.does_blob_exist(container, blob_path): # read prev contents blob_text = self.bs.get_blob_text(container, blob_path) # append our text new_text = blob_text + text # write blob, ensuring etag matches (no one updated since above read) self.bs.create_blob(container, blob_path, new_text, if_match=blob.properties.etag) else: # if no previous blob, just try to create it self.bs.create_blob(container, blob_path, text) except BaseException as ex: logger.exception( "Error in _append_blob_with_retries, ex={}".format(ex)) sleep_time = np.random.random() * 4 console.diag( "XT store received an expected azure exception; will backoff for {:.4f} secs [retry #{}]" .format(sleep_time, i + 1)) time.sleep(sleep_time) else: succeeded = True break # restore retry self.bs.set_retry(old_retry) if not succeeded: errors.service_error( "_append_blob_with_rewrite failed (too many retries)") def list_blobs(self, container, path=None, return_names=True, recursive=True): ''' NOTE: the semantics here a tricky if recursive: - return a flat list of all full path names of all files (no directory entries) else: - return a flat list of all files and all directory names (add "/" to end of directory names) if return_names: - return list of names else: - return a list of objects with following properties: .name (file pathname) .properties .content_length (number) .modified_ns (time in ns) The delimiter trick: this is when we set the delimiter arg = "/" to tell azure to return only the blobs in the specified directory - that is, don't return blobs from child directories. In this case, azure returns the effective child directory name, followed by a "/", but not its contents (which we hope is faster). ''' delimiter = None if recursive else "/" # specific Azure path rules for good results if path: if path.startswith("/"): path = path[ 1:] # blob API wants this part of path relative to container # we should only add a "/" if path is a folder path if path.endswith("*"): # we just need to block the addition of "/" path = path[0:-1] elif not path.endswith("/"): path += "/" # best if path ends with "/" blobs = self.bs.list_blobs(container, prefix=path, delimiter=delimiter) if return_names: blobs = [blob.name for blob in blobs] else: blobs = list(blobs) return blobs def delete_blob(self, container, blob_path, snapshot=None): dss = DeleteSnapshot() return self.bs.delete_blob(container, blob_path, delete_snapshots=dss.Include) def get_blob_text(self, container, blob_path): # watch out for 0-length blobs - they trigger an Azure RETRY error text = "" # azure storage bug workaround: avoid RETRY errors for 0-length blob blob = self.bs.get_blob_properties(container, blob_path) if blob.properties.content_length: blob = self.bs.get_blob_to_text(container, blob_path) text = blob.content return text def get_blob_to_path(self, container, blob_path, dest_fn, snapshot=None, progress_callback=None): # azure storage bug workaround: avoid RETRY errors for 0-length blob blob = self.bs.get_blob_properties(container, blob_path) if blob.properties.content_length: result = self.bs.get_blob_to_path( container, blob_path, dest_fn, snapshot=snapshot, progress_callback=progress_callback) text = result.content else: md = blob.metadata if "hdi_isfolder" in md and md["hdi_isfolder"]: # its a directory marker; do NOT create a local file for it text = "" else: # 0-length text file; just write the file outselves text = "" with open(dest_fn, "wt") as outfile: outfile.write(text) return text def get_blob_properties(self, container, blob_path): props = self.bs.get_blob_properties(container, blob_path) return props def get_blob_metadata(self, container, blob_path): return self.bs.get_blob_metadata(container, blob_path) # def set_blob_metadata(self, container, blob_path, md_dict): # return self.bs.set_blob_metadata(container, blob_path, md_dict) def copy_blob(self, source_container, source_blob_path, dest_container, dest_blob_path): source_blob_url = self.bs.make_blob_url(source_container, source_blob_path) self.bs.copy_blob(dest_container, dest_blob_path, source_blob_url) def snapshot_blob(self, container, blob_path): blob = self.bs.snapshot_blob(container, blob_path) #pd = utils.obj_to_dict(blob) return blob
account_key = '<>' account_name='<>' blob_name = 'test1.txt' container_name= 'container1' copy_from = 'container1/f1' copy_to= 'container1/f2' blob_name='test1.txt' blob_service = BlockBlobService(account_name=account_name, account_key=account_key) #Create a client side SAS token sas_token1 = blob_service.generate_container_shared_access_signature(container_name,BlobPermissions.WRITE |BlobPermissions.READ , datetime.utcnow() + timedelta(hours=4)) #Create a SAS block service blob_service2 = BlockBlobService(account_name=account_name, sas_token=sas_token1) #Create a sas url blob_url = blob_service2.make_blob_url(copy_from,blob_name, sas_token=sas_token1) #copy from blob_url to the copy_to location blob_service2.copy_blob(copy_to,blob_name=blob_name, copy_source=blob_url) print('Debug: Showing contents of source file') print( blob_service2.get_blob_to_text(copy_from, blob_name).content ) print('Debug: Showing contents of destination (copied) file') print( blob_service2.get_blob_to_text(copy_to, blob_name).content ) #should exist now #%%
def cluster(request, eventname): start = time.time() md = AzureMediaStorage() block_blob_service = BlockBlobService(account_name=md.account_name, account_key=md.account_key) # Download the pre trained models, unzip them and save them in the save folder as this file # predictor_path = 'shape_predictor_5_face_landmarks.dat' #'C:/Users/lenovo/Desktop/PicProcure/events/shape_predictor_5_face_landmarks.dat' face_rec_model_path = 'dlib_face_recognition_resnet_model_v1.dat' faces_folder_path = block_blob_service.list_blobs(container_name=eventname) output_folder = [] check_folder = block_blob_service.list_blobs(container_name='profile-pics') user_list = Register.objects.all().filter(event_id=Events.objects.get( event_name=eventname)) username_list = [] for user in user_list: img = user.user_id.profile_pic username_list.append(img) #for f in check_folder: #username_list.append(f.name) #print(username_list) detector = dlib.get_frontal_face_detector() #a detector to find the faces sp = dlib.shape_predictor( predictor_path) #shape predictor to find face landmarks facerec = dlib.face_recognition_model_v1( face_rec_model_path) #face recognition model descriptors = [] images = [] output_list = [] for img in check_folder: print('Processing file:{}', format(img.name)) url = "https://picprocurestorageaccount.blob.core.windows.net/profile-pics/" + img.name #img1 = dlib.load_rgb_image(urllib.request.urlopen(url).read()) #win = dlib.image_window() img1 = numpy.array( Image.open(io.BytesIO(urllib.request.urlopen(url).read()))) #win.set_image(img1) # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upoutput_listple the image 1 time. This will make everything bigger and allow us to detect more faces. dets = detector(img1, 1) print("Number of faces detected: {}".format(len(dets))) # Now process each face we found. for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img1, d) # Compute the 128D vector that describes the face in img identified by shape. face_descriptor = facerec.compute_face_descriptor(img1, shape) descriptors.append(face_descriptor) images.append(('profile-pics', img.name, img1, shape)) print('profile pics ended') for f in faces_folder_path: print("Processing file: {}".format(f.name)) url = "https://picprocurestorageaccount.blob.core.windows.net/" + eventname + '/' + f.name #img = dlib.load_rgb_image(f) #win = dlib.image_window() img = numpy.array( Image.open(io.BytesIO(urllib.request.urlopen(url).read()))) print('reading completed ' + f.name) #win.set_image(img) # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upoutput_listple the image 1 time. This will make everything bigger and allow us to detect more faces. dets = detector(img, 1) print("Number of faces detected: {}".format(len(dets))) # Now process each face we found. for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img, d) # Compute the 128D vector that describes the face in img identified by shape. face_descriptor = facerec.compute_face_descriptor(img, shape) descriptors.append(face_descriptor) images.append((eventname, f.name, img, shape)) print('image appended ' + f.name) # Cluster the faces. print("event load completed") labels = dlib.chinese_whispers_clustering(descriptors, 0.5) num_classes = len(set(labels)) # Total number of clusters print("Number of clusters: {}".format(num_classes)) for i in range(0, num_classes): indices = [] class_length = len([label for label in labels if label == i]) for j, label in enumerate(labels): if label == i: indices.append(j) print("Indices of images in the cluster {0} : {1}".format( str(i), str(indices))) print("Size of cluster {0} : {1}".format(str(i), str(class_length))) #output_folder_path = output_folder + '/output' + str(i) # Output folder for each cluster #os.path.normpath(output_folder_path) #os.makedirs(output_folder_path) block_blob_service.create_container(eventname + str(i), public_access='blob') # Save each face to the respective cluster folder print("Saving faces to output folder...") #img, shape = images[index] #file_path = os.path.join(output_folder_path,"face_"+str(k)+"_"+str(i)) md.azure_container = eventname + str(i) output_folder.append(md.azure_container) for k, index in enumerate(indices): container, name, img, shape = images[index] #dlib.save_face_chip(img, shape, file_path, size=1000, padding = 2) url = "https://picprocurestorageaccount.blob.core.windows.net/" + container + '/' + name block_blob_service.copy_blob(container_name=md.azure_container, blob_name=name, copy_source=url) # md._save(name,img) if 0 == k: output_list.append("ouput/output" + str(i) + "/face_0" + "_" + str(i) + ".jpg") for imgs in check_folder: for output in output_folder: try: block_blob_service.get_blob_metadata(container_name=output, blob_name=imgs.name) container_name = eventname + '-' + imgs.name.split('.')[0] block_blob_service.create_container( container_name=container_name, public_access='blob') for i in block_blob_service.list_blobs(container_name=output): url = url = "https://picprocurestorageaccount.blob.core.windows.net/" + output + '/' + i.name block_blob_service.copy_blob(container_name=container_name, blob_name=i.name, copy_source=url) block_blob_service.delete_container(output) output_folder.remove(output) break except: pass block_blob_service.delete_container(eventname) return HttpResponse("Successfull")
def main(xmlblob: func.InputStream): """Creates the UKRLP lookup tables for later use This Azure Function carries out the following steps: * Decompresses the XML HESA DataSet * Parses the INSTITUTION data from the DataSet * Retrieves enrichment data from the UKRLP API for each institution * Creates a lookup item for each Institution and writes it to CosmosDB * Currently, once completed successfully this function triggers the Etl function by copying the compressed XML passed in to a Blob storage monitored by the Etl function. """ try: logging.info(f"CreateUkrlpBlobTrigger creating UKRLP lookups\n" f"Name: {xmlblob.name}\n" f"Blob Size: {xmlblob.length} bytes") create_ukrlp_start_datetime = datetime.today().strftime( "%Y%m%d %H%M%S") logging.info( f"CreateUkrlp function started on {create_ukrlp_start_datetime}") # Read the compressed Blob into a BytesIO object compressed_file = io.BytesIO(xmlblob.read()) # Read the compressed file into a GzipFile object compressed_gzip = gzip.GzipFile(fileobj=compressed_file) # Decompress the data decompressed_file = compressed_gzip.read() # Decode the bytes into a string xml_string = decompressed_file.decode("utf-8") # Parse the xml and create the lookups lookup_creator = LookupCreator(xml_string) lookup_creator.create_ukrlp_lookups() # # Copy the compressed HESA XML to the Blob storage monitored by Etl pipeline # storage_account_name = os.environ["AzureStorageAccountName"] storage_account_key = os.environ["AzureStorageAccountKey"] # Instantiate the Block Blob Service blob_service = BlockBlobService(account_name=storage_account_name, account_key=storage_account_key) logging.info( "Created Block Blob Service to Azure Storage Account {storage_account_name}" ) # Copy the dummy HESA XML we've just processed to the ETL input BLOB container output_container_name = os.environ["EtlInputContainerName"] dummy_etl_blob_name = os.environ["DummyEtlBlobName"] source_url = os.environ["CreateUkrlpSourceUrl"] source_url += xmlblob.name blob_filename = xmlblob.name.split("/")[1] destination_blob_name = ( f"{create_ukrlp_start_datetime}-{blob_filename}") logging.info( f"Copy the XML we have processed to {destination_blob_name}") blob_service.copy_blob( container_name=output_container_name, blob_name=destination_blob_name, copy_source=source_url, ) create_ukrlp_end_datetime = datetime.today().strftime("%Y%m%d %H%M%S") logging.info( f"CreateUkrlp successfully finished on {create_ukrlp_end_datetime}" ) except Exception as e: # Unexpected exception logging.error("Unexpected exception") logging.error(traceback.format_exc()) # Raise to Azure raise e
class WABS(Storage): """ A class for managing objects on Windows Azure Blob Storage. It implements the interface of Storage base class """ def __init__(self, account_name, container_name, sas_token): """Setup a Windows azure blob storage client object :param str account_name: Azure blob storage account name for connection :param str container_name: Name of container to be accessed in the account :param str sas_token: Shared access signature token for access """ self.sas_token = sas_token self.container_name = container_name # The socket_timeout is passed on to the requests session # which executes the HTTP call. Both read / connect timeouts # are set to 60s self.client = BlockBlobService(account_name=account_name, sas_token=self.sas_token) logger.debug("Created wabs client object: {0}".format(self.client)) @classmethod def get_retriable_exceptions(cls, method_name=None): """Return exceptions that should be retried for specified method of class :param str method_name: A method of class for which retriable exceptions should be searched :returns: A tuple of exception class to be retried :rtype: tuple """ if method_name == 'delete_key': return () return (AzureException, ) def get_url_prefix(self): """Returns a connection string for the client object :returns: Connection string for the client object :rtype: str """ return '{}://{}/{}/'.format(self.client.protocol, self.client.primary_endpoint, self.container_name) def list_object_keys(self, prefix='', metadata=False, pagesize=1000): """List object keys matching a prefix for the WABS client :param str prefix: A prefix string to list objects :param bool metadata: If set to True, object metadata will be fetched with object. Default is False :param int pagesize: Maximum objects to be fetched in a single WABS api call. This is limited to upto 5000 objects in WABS :returns: A generator of object dictionary with key, size and last_modified keys. Metadata will be returned if set to True :rtype: Iterator[dict] """ logger.debug("Listing files for prefix: {0}".format(prefix)) include = Include(metadata=metadata) marker = None while True: if marker: logger.debug("Paging objects " "from marker '{0}'".format(marker)) objects = self.client.list_blobs(self.container_name, prefix=prefix, num_results=pagesize, include=include, marker=marker) for obj in objects: yield { 'key': obj.name, 'last_modified': obj.properties.last_modified, 'size': obj.properties.content_length, 'metadata': obj.metadata } if objects.next_marker: marker = objects.next_marker else: break def download_file(self, source_key, destination_file): """Download a object from WABS container to local filesystem :param str source_key: Key for object to be downloaded :param str destination_file: Path on local filesystem to download file :returns: Nothing :rtype: None """ self.client.get_blob_to_path(self.container_name, source_key, destination_file) def upload_file(self, destination_key, source_file, metadata=None): """Upload a file from local filesystem to WABS :param str destination_key: Key where to store object :param str source_file: Path on local file system for file to be uploaded :param dict metadata: Metadata to be stored along with object :returns: Nothing :rtype: None """ metadata = metadata or {} logger.debug("Uploading file {0} to prefix {1}".format( source_file, destination_key)) self.client.create_blob_from_path(self.container_name, destination_key, source_file, metadata=metadata) def upload_file_obj(self, destination_key, source_fd, metadata=None): """Upload a file from file object to WABS :param str destination_key: Key where to store object :param file source_fd: A file object to be uploaded :param dict metadata: Metadata to be stored along with object :returns: Nothing :rtype: None """ metadata = metadata or {} self.client.create_blob_from_stream(self.container_name, destination_key, source_fd, metadata=metadata) # FIXME: Need to fix this function to abort, if another copy is already # happening it should abort, or it should follow the ec2 behaviour def copy_from_key(self, source_key, destination_key, metadata=None): """Copy a WABS object from one key to another key on server side :param str source_key: Source key for the object to be copied :param str destination_key: Destination key to store object :param dict metadata: Metadata to be stored along with object :returns: Nothing :rtype: None """ metadata = metadata or {} logger.debug("Copying key {0} -> {1}".format(source_key, destination_key)) # If a previous copy was pending cancel it before # starting another copy for blob in self.client.list_blobs(self.container_name, prefix=destination_key): # There should only be one blob with the given key, # However list_blobs is the only exposed API to check # existance of blob without failures # AzureBlobStorage doesn't allow more than one pending # copies to the destination key try: self.client.abort_copy_blob(self.container_name, destination_key, blob.properties.copy.id) except AzureConflictHttpError: logger.info(('No copy in progress,' + ' Ignoring AzureConflictHttpError')) source_uri = self.client.make_blob_url(self.container_name, source_key, sas_token=self.sas_token) copy_properties = self.client.copy_blob(self.container_name, destination_key, source_uri, metadata=metadata) # Wait for the copy to be a success while copy_properties.status == 'pending': # Wait a second before retrying time.sleep(1) properties = self.client.get_blob_properties( self.container_name, destination_key) copy_properties = properties.properties.copy # TODO(vin): Raise Error if copy_properties errors out def delete_key(self, destination_key): """Delete an object from WABS :param str destination_key: Destination key for the object to be deleted :returns: Nothing :rtype: None """ logger.debug("Deleting key {0}".format(destination_key)) return self.client.delete_blob(self.container_name, destination_key)
class SubmissionManager: def __init__(self): self.config = Config() self.block_blob_service = BlockBlobService( account_name=self.config.account_name(), account_key=self.config.account_key()) # not processed/verified submissions self.upload_container = 'uploaded-submissions' self.block_blob_service.create_container(self.upload_container) # processed submissions self.processed_submissions_container = 'processed-submissions' self.block_blob_service.create_container( self.processed_submissions_container) if not os.path.exists(self.config.bots_test_dir()): os.makedirs(self.config.bots_test_dir()) if not os.path.exists(self.config.bots_dir()): os.makedirs(self.config.bots_dir()) def get_uploaded_submissions(self): return self.block_blob_service.list_blobs(self.upload_container) def remove_uploaded_submission(self, file_name): self.block_blob_service.delete_blob(self.upload_container, file_name) # temp_full_path_filename will be deleted after uploading to blob container def upload_submission(self, temp_full_path_filename, remove=False): print("upload " + temp_full_path_filename) self.block_blob_service.create_blob_from_path( self.upload_container, ntpath.basename(temp_full_path_filename), temp_full_path_filename) if remove: os.remove(temp_full_path_filename) def download_submission(self, file_name): blob_url = self.block_blob_service.make_blob_url( self.upload_container, file_name) print("download " + blob_url) download_file = os.path.join(self.config.bots_test_dir(), file_name) self.block_blob_service.get_blob_to_path(self.upload_container, file_name, download_file) return download_file def move_submission_to_processed(self, file_name): blob_url = self.block_blob_service.make_blob_url( self.upload_container, file_name) blob_processed_url = self.block_blob_service.make_blob_url( self.processed_submissions_container, file_name) print("move submission {} to valid submissions {}".format( blob_url, blob_processed_url)) self.block_blob_service.copy_blob(self.processed_submissions_container, file_name, blob_url) self.block_blob_service.delete_blob(self.upload_container, file_name) self.move_submission_dir(file_name) def move_submission_dir(self, file_name): bot_test_dir = self.get_test_bot_dir(file_name) bot_tournament_dir = self.get_tournament_bot_dir(file_name) if os.path.exists(bot_tournament_dir): print("delete existing dir " + bot_tournament_dir) shutil.rmtree(bot_tournament_dir) result = shutil.move(bot_test_dir, self.config.bots_dir()) print(result) print("moved bot dir from {} to {}".format(bot_test_dir, bot_tournament_dir)) def get_test_bot_dir(self, file_name): return os.path.join(self.config.bots_test_dir(), os.path.splitext(file_name)[0]) def get_tournament_bot_dir(self, file_name): return os.path.join(self.config.bots_dir(), os.path.splitext(file_name)[0]) def extract_submission(self, file_name): full_path_to_file = os.path.join(self.config.bots_test_dir(), file_name) extract_dir = self.get_test_bot_dir(file_name) zip_ref = zipfile.ZipFile(full_path_to_file, 'r') zip_ref.extractall(path=extract_dir) zip_ref.close() return extract_dir def get_processed_submissions(self): return self.block_blob_service.list_blobs( self.processed_submissions_container)
class AzureBlobService(object): """ Application interface to access <Azure Blob Storage Service>. A wrapper of the module 'BlockBlobService' from azure SDK for python. """ blob_pattern = 'http://([\w\.]+)/(\w+)/(.*)' def __init__(self, settings_dict): # Set settings for azure connections self.settings_dict = settings_dict self.widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] self.account = settings_dict['ACCOUNT'] self.host = settings_dict['ACCOUNT'] + '.blob.' + settings_dict[ 'ENDPOINT'] logger.debug("Connectings to '%s'..." % self.host) self.block_blob_service = BlockBlobService( account_name=settings_dict['ACCOUNT'], account_key=settings_dict['KEY'], endpoint_suffix=settings_dict['ENDPOINT']) logger.debug("Connection established.") def create_container(self, container_name, set_public=False): """ Create a azure blob container. """ logger.debug("Creating container [%s] on '%s'." % (container_name, self.host)) if set_public: public_access = PublicAccess.Container logger.debug("Set container [%s] access to public." % container_name) else: public_access = None try: result = self.block_blob_service.create_container( container_name, fail_on_exist=True, timeout=self.settings_dict['TIMEOUT'], public_access=public_access) except AzureConflictHttpError as e: logger.error("The specified container [%s] already exists." % container_name) result = False logger.info("Container created: %s." % container_name) return result def list_containers(self, prefix=None): logger.debug("Request sent to list all containers on '%s'." % self.host) # An iterator to list all containers on blob icontainers = self.block_blob_service.list_containers( prefix=prefix, timeout=self.settings_dict['TIMEOUT']) # Converts an iterator to list container_names = [container for container in icontainers] logger.info("%d containers found on '%s'." % (len(container_names), self.host)) return container_names def list_blobs(self, container_name, prefix=None, suffix=None): """ Lists all blobs on the container, note that the blob_names returned are posix-style path, no matter what names were when create. """ blob_names = [] logger.debug("Request to list blobs in container [%s]." % container_name) try: # An iterator to iblobs = self.block_blob_service.list_blobs( container_name, prefix=prefix, timeout=self.settings_dict['TIMEOUT']) if suffix: blob_names = [ blob.name for blob in iblobs if blob.name.endswith(suffix) ] else: blob_names = [blob.name for blob in iblobs] except AzureMissingResourceHttpError as e: logger.error("The specified container [%s] does not exist." % container_name) logger.info("%d blobs found on [%s]." % (len(blob_names), container_name)) return blob_names def create_blob_from_path(self, container_name, blob_name, filepath): """ Uploads a file to the container. Returns an instance of `Blob` with properties and metadata. """ if not os.path.exists(filepath): logger.error("File doesn't exist: %s." % filepath) return None logger.debug("Creates blob '{}'@[{}]".format(blob_name, container_name)) blob = self.block_blob_service.create_blob_from_path( container_name, blob_name, filepath) return blob def upload(self, container_name, blob_pairs, overwrite=False): """ Uploads files to the container on Azure. Note that 'blob_name' uploaded will be converted to posix-style names, which means sep for path is '/'. `blob_pairs` A tuple consists of 2 elements, blob_name and its filepath on local filesystem. """ if not self.block_blob_service.exists(container_name): logger.info("Container [%s] which upload to doesn't exist, " "creating now." % container_name) self.create_container(container_name, set_public=True) blobs = [] blobs_in_container = self.list_blobs(container_name) for blob_name, filepath in progressbar.progressbar(\ blob_pairs, widgets=self.widgets): posix_blob_name = ppath(blob_name) if overwrite or (posix_blob_name not in blobs_in_container): self.create_blob_from_path(container_name, posix_blob_name, filepath) blobs.append(posix_blob_name) logger.info("Uploaded %d files to [%s]." % (len(blobs), container_name)) return blobs def get_blob_to_path(self, container_name, blob_name, filepath): """ Gets a blob from the container. The filepath would be returned if gotten successfully. """ dirpath = os.path.dirname(filepath) if not os.path.exists(dirpath): logger.debug("Directory '%s' does not exist, creating now..." % dirpath) os.makedirs(dirpath) # TODO: changes filepath to local-filesystem logger.debug("Gets blob '{}' from [{}]".format(blob_name, container_name)) blob = self.block_blob_service.get_blob_to_path( container_name, blob_name, filepath) return blob def download(self, container_name, dest, blob_names=None): """ Get blobs from the container to the `dest` directory. """ blobs = [] if not self.block_blob_service.exists(container_name): logger.error("Container [%s] does not exist, aborted." % container_name) return blobs # Get the list of blobs and then do comparision would be much more efficient blobs_in_container = self.list_blobs(container_name) # Get all blobs if blob_names was not specified if not blob_names: blob_names = blobs_in_container for blob_name in progressbar.progressbar(\ blob_names, widgets=self.widgets): if ppath(blob_name) in blobs_in_container: dest_filepath = normpath(safe_join(dest, blob_name)) # TODO: not sure posix-style path works for files on container # are windows-style self.get_blob_to_path(container_name, ppath(blob_name), dest_filepath) logger.debug("Got blob '{}' to '{}'.".format( blob_name, dest_filepath)) blobs.append(blob_name) else: logger.warning( "Blob name '{}' specified does not exist.".format( blob_name)) return blobs def get_blob_to_text(self, container_name, blob_name): pass def get_blobs(self, container_name, blob_names=None): pass def set_container_acl(self, container_name, set_public=True): """ Set container access permission to Public. """ if set_public: logger.info("Set public read access to container [%s]." % container_name) public_access = PublicAccess.Container else: logger.info("Set public read access to blobs on [%s]." % container_name) public_access = PublicAccess.Blob self.block_blob_service.set_container_acl(container_name, public_access=public_access) def delete_blobs(self, container_name, blob_names): """ Removes blobs from the container. """ blobs = [] for blob_name in blob_names: try: blob = self.block_blob_service.delete_blob( container_name, blob_name) logger.info("Delete the blob '%s' from container [%s]." % (blob_name, container_name)) blobs.append(blob) except AzureMissingResourceHttpError as e: logger.warning( "The sepcified blob '%s' on [%s] does not exist." % (blob_name, container_name)) return blobs def copy_blobs(self, blob_names, container_name, src_container=None, pattern=None): """ Copy blobs listed in `blob_names` to the dest container. `src_container` if src_container was given, blob_names are OK to be relative path to the container, and will be extended to `http://self.host/src_container/blob_name` `pattern` if src_container and pattern was given and blob_names was None, copies blobs in the src_container meanwhile matches the pattern to dest container. """ if blob_names == None: if src_container: blobs_in_container = self.list_blobs(src_container) matchfn = get_matchfn(pattern, True) # gets blobs from the src_container which matches the pattern(with ignorecase) blob_names = filter(lambda x: matchfn(x), blobs_in_container) else: raise ImproperlyConfigured( "Method `copy_blobs` is ought to be called with " "`src_container` given if blob_names was set to None.") if src_container: urls = [] for blob_name in blob_names: # not absolute url path if not blob_name.startswith('http'): # extends with the account and container blob_name = "http://{}/{}/{}".format( self.host, src_container, blob_name) urls.append(escape_uri_path(blob_name)) blob_names = urls blobs = [] logger.info("Will copy {} blobs to [{}].".format( len(blob_names), container_name)) for copy_source in progressbar.progressbar(blob_names, widgets=self.widgets): r = re.match(self.blob_pattern, copy_source) if r: blob_name = r.group(3) else: logger.error("Blob name specified must be a url: '{}'.".format( copy_source)) continue self.block_blob_service.copy_blob(container_name, \ blob_name, copy_source) logger.debug("Copied '{}' to '{}'.".format(copy_source, blob_name)) blobs.append(blob_name) return blobs def copy_container(self, src_container, dst_container, pattern=None): """ Copies blobs in `src_container` meanwhile match the `pattern`. """ # creates container if not exists self.create_container(dst_container, set_public=True) logger.info("Copy blobs from [{}] to [{}]".format( src_container, dst_container)) self.copy_blobs(None, dst_container, src_container=src_container, pattern=pattern)
def rendered_video(request): ism_uri = '' vtt_uri = '' template = loader.get_template('app/rendered_video.html') vidstatus = 'No Running Job Found.' # Get the next message from the queue queue_service = QueueService(account_name=os.environ['SVPD_STORAGE_ACCOUNT_NAME'], account_key=os.environ['SVPD_STORAGE_ACCOUNT_KEY']) messages = queue_service.get_messages(os.environ['SVPD_STORAGE_ACCOUNT_ENCODING'], num_messages=1, visibility_timeout=1*60) for message in messages: vidstatus = 'Rendering: ' + message.content message_obj = json.loads(message.content) access_token = ams_authenticate()['access_token'] # Get the details about the job job = ams_get_request(access_token, message_obj['job']['__metadata']['uri']) # is it done? if job['State'] == 3: vidstatus = 'Done Rendering: ' + message.content #get a reference to our storage container block_blob_service = BlockBlobService(account_name=os.environ['SVPD_STORAGE_ACCOUNT_NAME'], account_key=os.environ['SVPD_STORAGE_ACCOUNT_KEY']) #get a list of all the input and output assets associated to our job input_assets = ams_get_request(access_token, message_obj['job']['InputMediaAssets']['__deferred']['uri']) output_assets = ams_get_request(access_token, message_obj['job']['OutputMediaAssets']['__deferred']['uri']) #look through the input and output assets to figure out what one is for the indexer and for the Adaptive streaming files index_asset = '' stream_asset = '' for output_asset in output_assets['value']: if output_asset['Name'].endswith('- Indexed'): index_asset = output_asset elif output_asset['Name'].endswith('- MES v1.1'): stream_asset = output_asset #Get the storage container names for each dest_container = urllib.parse.urlparse(stream_asset['Uri']).path[1:] src_container = urllib.parse.urlparse(index_asset['Uri']).path[1:] #loop over the indexer output files copying them to the adaptive streaming container src_blobs = block_blob_service.list_blobs(src_container) for src_blob in src_blobs: block_blob_service.copy_blob(dest_container, src_blob.name, output_asset['Uri'] + '/' + src_blob.name) #create the access policy if it doen't exist access_policies = ams_get_request(access_token, os.environ['AMS_API_ENDPOINT'] + 'AccessPolicies') access_policy_id = '' for access_policy in access_policies['value']: if access_policy['Name'] == 'StreamingAccessPolicy': access_policy_id = access_policy['Id'] if access_policy_id == '': access_policy = ams_verbose_post_request(access_token, 'AccessPolicies', { 'Name': 'StreamingAccessPolicy', 'DurationInMinutes': '52594560', 'Permissions': '9' }) access_policy_id = access_policy['d']['Id'] #create the locator locator = ams_verbose_post_request(access_token, 'Locators', { 'AccessPolicyId': access_policy_id, 'AssetId': stream_asset['Id'], 'Type': 2 }) #get the URLs to the streaming endpoint and the vtt file locator_asset_files = ams_get_request(access_token, os.environ['AMS_API_ENDPOINT'] + 'Assets(\'' + locator['d']['AssetId'] + '\')/Files') for locator_asset_file in locator_asset_files['value']: if locator_asset_file['Name'].endswith('.ism'): ism_uri = locator['d']['Path'] + locator_asset_file['Name'] + '/manifest' vtt_uri = locator['d']['Path'] + message_obj['filename'] + '.vtt' #delete the job ams_delete_request(access_token, message_obj['job']['__metadata']['uri']) #delete the unused assets ams_delete_request(access_token, os.environ['AMS_API_ENDPOINT'] + 'Assets(\'' + index_asset['Id'] + '\')') ams_delete_request(access_token, os.environ['AMS_API_ENDPOINT'] + 'Assets(\'' + input_assets['value'][0]['Id'] + '\')') #add the video to the database client = document_client.DocumentClient(os.environ['DOCUMENT_ENDPOINT'], {'masterKey': os.environ['DOCUMENT_KEY']}) db = docdb_CreateDatabaseIfNotExists(client, 'svpd') collection = docdb_CreateCollectionIfNotExists(client, db, 'videos') doc = client.CreateDocument(collection['_self'], { 'id': message_obj['folder'].replace('/', '.'), 'filename': message_obj['filename'], 'vtt_uri': vtt_uri, 'ism_uri': ism_uri }) #remove the message from the queue queue_service.delete_message(os.environ['SVPD_STORAGE_ACCOUNT_ENCODING'], message.id, message.pop_receipt) return HttpResponse(template.render({ 'vidstatus': vidstatus, 'vtt_uri': vtt_uri, 'ism_uri': ism_uri }, request))
class StorageHelper: def __init__(self, storage_client): self.storage_client = storage_client def is_storage_account_name_available(self, storage_account_name): return self.storage_client.storage_accounts.check_name_availability( storage_account_name) def create_storage_account_async(self, storage_account_name, resource_group, **kwargs): storage_params = azure.mgmt.storage.models.StorageAccountCreateParameters( sku=azure.mgmt.storage.models.Sku(name='standard_lrs'), kind=azure.mgmt.storage.models.Kind.storage, location=kwargs['storage_location']) async_storage_creation = self.storage_client.storage_accounts.create( resource_group, storage_account_name, storage_params) storage_account = async_storage_creation.result() def get_storage_account_names(self, resource_group): storage_account_list = self.storage_client.storage_accounts.list_by_resource_group( resource_group) return [item.name for item in storage_account_list] def get_storage_account_properties(self, storage_account_name, resource_group): return self.storage_client.storage_accounts.get_properties( resource_group, storage_account_name) def get_storage_account_key(self, storage_account_name, resource_group): storage_keys = self.storage_client.storage_accounts.list_keys( resource_group, storage_account_name) if storage_keys is not None: return {v.key_name: v.value for v in storage_keys.keys}['key1'] else: return None def initialize_block_blob_service(self, storage_account_name, storage_key, blob_container_name): self.storage_account_name = storage_account_name self.storage_key = storage_key self.blob_container_name = blob_container_name self.block_blob_service = BlockBlobService( account_name=self.storage_account_name, account_key=self.storage_key) def create_blob_container(self): return self.block_blob_service.create_container( self.blob_container_name) def get_blob_container(self): containers = self.block_blob_service.list_containers( self.blob_container_name) return next(c for c in containers if c.name == self.blob_container_name) def copy_vhd(self, file_name, file_path): status = self.block_blob_service.copy_blob(self.blob_container_name, file_name, file_path) if status.status == 'pending': time.sleep(120) def generate_blob_container_sas_url(self, expiration_in_days): container_permission = ContainerPermissions(read=True, write=True, list=True) return self.block_blob_service.generate_container_shared_access_signature( container_name=self.blob_container_name, permission=container_permission, protocol='https', start=datetime.now(), expiry=datetime.now() + timedelta(days=expiration_in_days)) def build_upload_container_path(self, target_os_type, sas_url): return 'https://{0}.blob.core.windows.net/{1}/{2}/{3}?{4}'.format( self.storage_account_name, self.blob_container_name, target_os_type.lower(), 'piresults.json', sas_url)
class InstanceStorage(object): def __init__(self, group_name, location=None, create_if_not_exist=True): client = arm.instance() sm = client.storage.storage_accounts # Check existence of a storage account in the resource group # TODO : better with rm.list_resources for direct filtering # but issing doc on Genric filtering format # So, taking the first result of the iterator : Ouch ! new=True for sa in sm.list_by_resource_group(group_name): new=False self.name = sa.name self.location=sa.location logger.debug("Found SA %s" % self.name) break if new: logger.info("Creating storage account...") #Generating unique name for Azure unique_name = "%s%s" % ( str(group_name).translate(None, '-_.').lower(), arm.id_generator() ) # TODO : Check how to deal with account type... # Warning : the name of the storageaccount cannot be > 24 chars self.location=location result = sm.create( group_name, unique_name[:24], StorageAccountCreateParameters( location=self.location, account_type=AccountType.standard_lrs ) ) # Asysnchronous operation, so wait... res = result.result() self.name = res.name # retrieve the keys and store them in the instance self.keys = sm.list_keys(group_name, self.name) logger.debug("Key1 : %s " % repr(self.keys.key1)) logger.debug("Key2 : %s " % repr(self.keys.key2)) # retrieve the blob service self.blob = BlockBlobService( account_name=self.name, account_key=self.keys.key1 ) # Define the storage tree : # sources for the images imported to create the VM # vhds for the VM images self.sources_container= "sources" self.vhds_container= "vhds" self.blob.create_container(self.sources_container) self.blob.create_container(self.vhds_container) def list_blobs(self): for blob in self.blob.list_blobs('system'): print(blob.name) def copy_source_images_from(self, source_storage, container, filepath): # Generate a token for 10 minutes read access token = source_storage.blob.generate_blob_shared_access_signature( container, filepath, BlobPermissions.READ, datetime.utcnow() + timedelta(minutes=10), ) # Generate the sour URL of the blob source_url = source_storage.blob.make_blob_url( container, filepath, sas_token=token ) # Launch the copy from the distant storage to the current one self.blob.copy_blob( self.sources_container, os.path.basename(filepath), source_url )
class BlobStorage(Storage): _az_logger = logging.getLogger('az') def __init__(self, auth: AzureAuth, account_name: str, container_name: str, base_path: str = ""): Storage.__init__(self, base_path=base_path) self.container_name = container_name self.blob_service = BlockBlobService( account_name=account_name, account_key=auth.key_token, token_credential=TokenCredential(auth.service_principal_token), connection_string=auth.connection_string_token) ################ # OBJECT ADMIN # ################ def _list_blob_objects( self, prefix: str, filter_filename: Union[None, str] = None, filter_extension: Union[None, str, tuple] = None) -> Generator: objects_generator = self.blob_service.list_blobs(self.container_name, prefix=prefix) for key in objects_generator: filtered_key = self._filter_key(key, filter_filename, filter_extension) if filtered_key is not None: yield filtered_key @staticmethod def _filter_key(key, filter_filename, filter_extension): key_name = key.name if (filter_filename is not None and filter_filename not in key_name) or (filter_extension is not None and not key_name.endswith(filter_extension)): return None else: return key def list_objects( self, folder: Union[None, str] = None, filter_filename: Union[None, str] = None, filter_extension: Union[None, str, tuple] = None) -> Generator: return self._list_blob_objects(self._get_folder_path(folder), filter_filename=filter_filename, filter_extension=filter_extension) def _blob_copy(self, dest_container_name: str, blob_name: str, dest_object_name: Union[str, None], remove_copied: bool): if dest_object_name is None and dest_container_name == self.container_name: self._az_logger.warning(f'This config does not move the object') else: if dest_object_name is None and dest_container_name != self.container_name: dest_object_name = blob_name blob_url = self.blob_service.make_blob_url(self.container_name, blob_name) self.blob_service.copy_blob(dest_container_name, dest_object_name, blob_url) self._az_logger.debug( f'{blob_name} copied from {self.container_name} to {dest_container_name}' ) if remove_copied: self.blob_service.delete_blob(self.container_name, blob_name) self._az_logger.debug( f'{blob_name} removed from {self.container_name}') def move_object(self, dest_storage_name: str, files_to_move: Union[str, list, Generator], dest_object_name: Union[str, None] = None, remove_copied: bool = False): if isinstance(files_to_move, str): self._blob_copy(dest_storage_name, files_to_move, dest_object_name, remove_copied) else: for blob in files_to_move: if isinstance(blob, Blob): self._blob_copy(dest_storage_name, blob.name, dest_object_name, remove_copied) elif isinstance(blob, str): self._blob_copy(dest_storage_name, blob, dest_object_name, remove_copied) ########### # READERS # ########### @contextmanager def _read_to_buffer(self, path): self._az_logger.debug(f'Reading from {self.container_name}: {path}') with io.BytesIO() as buff: buff = self.blob_service.get_blob_to_bytes( container_name=self.container_name, blob_name=path).content yield io.BytesIO(buff) @contextmanager def _read_to_str_buffer(self, path): self._az_logger.debug(f'Reading from {self.container_name}: {path}') with io.StringIO() as buff: buff = self.blob_service.get_blob_to_text( container_name=self.container_name, blob_name=path).content yield io.StringIO(buff) def read_csv(self, filename: str, folder: Union[str, None] = None, **kwargs): with self._read_to_buffer(self._get_full_path(filename, folder)) as buff: return pd.read_csv(buff, **kwargs) def read_excel(self, filename: str, folder: Union[str, None] = None, **kwargs): with self._read_to_buffer(self._get_full_path(filename, folder)) as buff: return pd.read_excel(buff, **kwargs) def read_parquet(self, filename: str, folder: Union[str, None] = None, **kwargs): with self._read_to_buffer(self._get_full_path(filename, folder)) as buff: return pd.read_parquet(buff, **kwargs) def read_yaml(self, filename: str, folder=None, yaml_loader=yaml.FullLoader): with self._read_to_buffer(self._get_full_path(filename, folder)) as buff: return yaml.load(buff, Loader=yaml_loader) def read_json(self, filename: str, folder=None, **kwargs): with self._read_to_buffer(self._get_full_path(filename, folder)) as buff: return json.load(buff, **kwargs) def read_object(self, filename: str, folder: Union[str, None] = None, **kwargs): with self._read_to_buffer(self._get_full_path(filename, folder)) as buff: return buff.read(**kwargs) def read_object_to_file(self, blob_object: Blob, filename: Union[str, None] = None, folder: Union[str, None] = None, **kwargs): object_filename_full, filename = self._create_local_path( blob_object.name, filename, folder) with open(filename, 'wb') as f: self._az_logger.debug( f'Downloading {object_filename_full} to {filename}') retrieved_blob = self.blob_service.get_blob_to_bytes( self.container_name, object_filename_full) f.write(retrieved_blob.content) ########### # WRITERS # ########### def _get_bucket_path(self, filename: str, folder: Union[str, None] = None): bucket_path = self._get_full_path(filename, folder) self._az_logger.debug(f'Writing in: {bucket_path}') return bucket_path def write_csv(self, df: pd.DataFrame, filename: str, folder: Union[str, None] = None, **kwargs): with io.StringIO() as buff: df.to_csv(buff, **kwargs) self.blob_service.create_blob_from_text( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), text=buff.getvalue()) def write_excel(self, df: pd.DataFrame, filename: str, folder: Union[str, None] = None, **kwargs): with io.BytesIO() as buff: df.to_excel(buff, **kwargs) self.blob_service.create_blob_from_bytes( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), blob=buff.getvalue()) def write_parquet(self, df: pd.DataFrame, filename: str, folder: Union[str, None] = None, **kwargs): with io.BytesIO() as buff: df.to_parquet(buff, **kwargs) self.blob_service.create_blob_from_bytes( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), blob=buff.getvalue()) def write_yaml(self, data: dict, filename: str, folder: Union[str, None] = None, **kwargs): with io.StringIO() as buff: yaml.dump(data, buff, **kwargs) self.blob_service.create_blob_from_text( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), text=buff.getvalue()) def write_json(self, data: dict, filename: str, folder: Union[str, None] = None, **kwargs): with io.StringIO() as buff: json.dump(data, buff, **kwargs) self.blob_service.create_blob_from_text( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), text=buff.getvalue()) def write_object(self, write_object, filename: str, folder: Union[str, None] = None, **kwargs): if isinstance(write_object, bytes): self.blob_service.create_blob_from_bytes( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), blob=write_object) elif isinstance(write_object, io.BytesIO): self.blob_service.create_blob_from_bytes( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), blob=write_object.getvalue()) else: self.blob_service.create_blob_from_stream( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), stream=write_object) def write_object_from_file(self, object_filename: str, filename: str, folder: Union[str, None] = None, **kwargs): self.blob_service.create_blob_from_path( container_name=self.container_name, blob_name=self._get_bucket_path(filename, folder), file_path=object_filename, **kwargs)
class BlobStorageService: def __init__(self, account_name, key): self.__blockblob_service = BlockBlobService(account_name=account_name, account_key=key) def create_container(self, container_name): self.__blockblob_service.create_container(container_name) def delete_container(self, container_name): self.__blockblob_service.delete_container(container_name) def upload_file(self, container_name, filename, local_file, delete_local_file=False): self.create_container(container_name) return self.__upload_file(container_name, filename, local_file, delete_local_file) def upload_file_from_bytes(self, container_name, filename, blob): self.create_container(container_name) return self.__upload_file_from_bytes(container_name, filename, blob) def upload_directory(self, container_name, directory, storage_path=""): self.create_container(container_name) files = self.__get_files(directory) directories = self.__get_directories(directory) blobs = list( map( lambda file: self.__upload_file( container_name, os.path.join(storage_path, os.path.basename(file)), os.path.join(directory, file)), files)) return blobs + list( map( lambda dir: self.upload_directory(container_name, os.path.join(directory, dir), storage_path), directories)) def list_blobs(self, container_name, prefix): return self.__blockblob_service.list_blobs(container_name, prefix) def download_blob(self, container_name, blob_name, local_file=None): local_file = blob_name if local_file == None else local_file self.__create_local_dir(os.path.split(local_file)[0]) self.__blockblob_service.get_blob_to_path(container_name, blob_name, local_file) def download_blob_bytes(self, container_name, blob_name, local_file=None): b = self.__blockblob_service.get_blob_to_bytes(container_name, blob_name) return b def download_blobs(self, container_name, local_path="", blob_path=""): blobs = self.__get_blobs_in_path(container_name, blob_path) return blobs def download_all_blobs(self, container_name, local_path="", blob_path=""): blobs = self.__get_blobs_in_path(container_name, blob_path) base = self.__create_local_dir(local_path) list( map( lambda blob: self.download_blob(container_name, blob.name, os.path.join(base, blob.name)), blobs)) def delete_blob(self, container, blob_name): self.__blockblob_service.delete_blob(container, blob_name) def __upload_file(self, container_name, filename, local_file, delete_local_file=False): blob = self.__blockblob_service.create_blob_from_path( container_name, filename, local_file, content_settings=ContentSettings( content_type=self.__get_mime_type(local_file))) if delete_local_file: os.remove(local_file) return blob def __upload_file_from_bytes(self, container_name, filename, blob): blob = self.__blockblob_service.create_blob_from_bytes( container_name, filename, blob, content_settings=ContentSettings( content_type=self.__get_mime_type(filename))) def copy_blob(self, container_name, blob_name, blob_url): self.__blockblob_service.copy_blob(container_name, blob_name, blob_url) def make_blob_url(self, container_name, blob_name, sas_token=''): return self.__blockblob_service.make_blob_url(container_name, blob_name, sas_token=sas_token) def generate_blob_shared_access_signature(self, container_name, blob_name): permission = ContainerPermissions(read=True, write=True) return self.__blockblob_service.generate_blob_shared_access_signature( container_name, blob_name, permission, protocol='https', start=datetime.datetime.utcnow(), expiry=datetime.datetime.utcnow() + timedelta(days=1)) def set_blob_metadata(self, container_name, blob_name, metadata): return self.__blockblob_service.set_blob_metadata( container_name, blob_name, metadata) def __get_mime_type(self, file_path): return mime_content_type(file_path) def __get_blobs_in_path(self, container_name, blob_path): blobs = self.list_blobs(container_name) if not blob_path: return blobs return list(filter(lambda blob: blob.name.startswith(blob_path), blobs)) def __create_local_dir(self, local_path): if local_path: os.makedirs(local_path, exist_ok=True) return os.path.join(os.getcwd(), local_path) def __get_directories(self, local_path): return [ file for file in os.listdir(local_path) if os.path.isdir(os.path.join(local_path, file)) ] def __get_files(self, local_path): return [ file for file in os.listdir(local_path) if os.path.isfile(os.path.join(local_path, file)) ]
class AzureTransfer(BaseTransfer): def __init__(self, account_name, account_key, bucket_name, prefix=None, azure_cloud=None): prefix = "{}".format(prefix.lstrip("/") if prefix else "") super().__init__(prefix=prefix) self.account_name = account_name self.account_key = account_key self.container_name = bucket_name try: endpoint_suffix = ENDPOINT_SUFFIXES[azure_cloud] except KeyError: raise InvalidConfigurationError("Unknown azure cloud {!r}".format(azure_cloud)) self.conn = BlockBlobService( account_name=self.account_name, account_key=self.account_key, endpoint_suffix=endpoint_suffix ) self.conn.socket_timeout = 120 # Default Azure socket timeout 20s is a bit short self.container = self.get_or_create_container(self.container_name) self.log.debug("AzureTransfer initialized, %r", self.container_name) def copy_file(self, *, source_key, destination_key, metadata=None, **kwargs): timeout = kwargs.get("timeout") or 15 source_path = self.format_key_for_backend(source_key, remove_slash_prefix=True, trailing_slash=False) destination_path = self.format_key_for_backend(destination_key, remove_slash_prefix=True, trailing_slash=False) source_url = self.conn.make_blob_url(self.container_name, source_path) start = time.monotonic() self.conn.copy_blob(self.container_name, destination_path, source_url, metadata=metadata, timeout=timeout) while True: blob_properties = self.conn.get_blob_properties(self.container_name, destination_path, timeout=timeout) copy_props = blob_properties.properties.copy if copy_props.status == "success": return elif copy_props.status == "pending": if time.monotonic() - start < timeout: time.sleep(0.1) else: self.conn.abort_copy_blob(self.container_name, destination_key, copy_props.id, timeout=timeout) raise StorageError( "Copying {!r} to {!r} did not complete in {} seconds".format(source_key, destination_key, timeout) ) elif copy_props.status == "failed": raise StorageError( "Copying {!r} to {!r} failed: {!r}".format(source_key, destination_key, copy_props.status_description) ) else: raise StorageError( "Copying {!r} to {!r} failed, unexpected status: {!r}".format( source_key, destination_key, copy_props.status ) ) def get_metadata_for_key(self, key): path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=False) items = list(self._iter_key(path=path, with_metadata=True, deep=False)) if not items: raise FileNotFoundFromStorageError(key) item, = items if item.type != KEY_TYPE_OBJECT: raise FileNotFoundFromStorageError(key) # it's a prefix return item.value["metadata"] def _metadata_for_key(self, path): return list(self._iter_key(path=path, with_metadata=True, deep=False))[0].value["metadata"] def iter_key(self, key, *, with_metadata=True, deep=False, include_key=False): path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=not include_key) self.log.debug("Listing path %r", path) yield from self._iter_key(path=path, with_metadata=with_metadata, deep=deep) def _iter_key(self, *, path, with_metadata, deep): include = "metadata" if with_metadata else None kwargs = {} if path: # If you give Azure an empty path, it gives you an authentication error kwargs["prefix"] = path if not deep: kwargs["delimiter"] = "/" items = self.conn.list_blobs(self.container_name, include=include, **kwargs) for item in items: if isinstance(item, BlobPrefix): yield IterKeyItem(type=KEY_TYPE_PREFIX, value=self.format_key_from_backend(item.name).rstrip("/")) else: if with_metadata: # Azure Storage cannot handle '-' so we turn them into underscores and back again metadata = {k.replace("_", "-"): v for k, v in item.metadata.items()} else: metadata = None yield IterKeyItem( type=KEY_TYPE_OBJECT, value={ "last_modified": item.properties.last_modified, "metadata": metadata, "name": self.format_key_from_backend(item.name), "size": item.properties.content_length, }, ) def delete_key(self, key): key = self.format_key_for_backend(key, remove_slash_prefix=True) self.log.debug("Deleting key: %r", key) try: return self.conn.delete_blob(self.container_name, key) except azure.common.AzureMissingResourceHttpError as ex: # pylint: disable=no-member raise FileNotFoundFromStorageError(key) from ex def get_contents_to_file(self, key, filepath_to_store_to, *, progress_callback=None): key = self.format_key_for_backend(key, remove_slash_prefix=True) self.log.debug("Starting to fetch the contents of: %r to: %r", key, filepath_to_store_to) try: self.conn.get_blob_to_path(self.container_name, key, filepath_to_store_to) except azure.common.AzureMissingResourceHttpError as ex: # pylint: disable=no-member raise FileNotFoundFromStorageError(key) from ex if progress_callback: progress_callback(1, 1) return self._metadata_for_key(key) @classmethod def _parse_length_from_content_range(cls, content_range): """Parses the blob length from the content range header: bytes 1-3/65537""" if not content_range: raise ValueError("File size unavailable") return int(content_range.split(" ", 1)[1].split("/", 1)[1]) def _stream_blob(self, key, fileobj, progress_callback): """Streams contents of given key to given fileobj. Data is read sequentially in chunks without any seeks. This requires duplicating some functionality of the Azure SDK, which only allows reading entire blob into memory at once or returning data from random offsets""" file_size = None start_range = 0 chunk_size = self.conn.MAX_CHUNK_GET_SIZE end_range = chunk_size - 1 while True: try: # pylint: disable=protected-access blob = self.conn._get_blob(self.container_name, key, start_range=start_range, end_range=end_range) if file_size is None: file_size = self._parse_length_from_content_range(blob.properties.content_range) fileobj.write(blob.content) start_range += blob.properties.content_length if start_range == file_size: break if blob.properties.content_length == 0: raise StorageError("Empty response received for {}, range {}-{}".format(key, start_range, end_range)) end_range += blob.properties.content_length if end_range >= file_size: end_range = file_size - 1 if progress_callback: progress_callback(start_range, file_size) except azure.common.AzureHttpError as ex: # pylint: disable=no-member if ex.status_code == 416: # Empty file return raise def get_contents_to_fileobj(self, key, fileobj_to_store_to, *, progress_callback=None): key = self.format_key_for_backend(key, remove_slash_prefix=True) self.log.debug("Starting to fetch the contents of: %r", key) try: self._stream_blob(key, fileobj_to_store_to, progress_callback) except azure.common.AzureMissingResourceHttpError as ex: # pylint: disable=no-member raise FileNotFoundFromStorageError(key) from ex if progress_callback: progress_callback(1, 1) return self._metadata_for_key(key) def get_contents_to_string(self, key): key = self.format_key_for_backend(key, remove_slash_prefix=True) self.log.debug("Starting to fetch the contents of: %r", key) try: blob = self.conn.get_blob_to_bytes(self.container_name, key) return blob.content, self._metadata_for_key(key) except azure.common.AzureMissingResourceHttpError as ex: # pylint: disable=no-member raise FileNotFoundFromStorageError(key) from ex def get_file_size(self, key): key = self.format_key_for_backend(key, remove_slash_prefix=True) try: blob = self.conn.get_blob_properties(self.container_name, key) return blob.properties.content_length except azure.common.AzureMissingResourceHttpError as ex: # pylint: disable=no-member raise FileNotFoundFromStorageError(key) from ex def store_file_from_memory(self, key, memstring, metadata=None, cache_control=None, mimetype=None): if cache_control is not None: raise NotImplementedError("AzureTransfer: cache_control support not implemented") key = self.format_key_for_backend(key, remove_slash_prefix=True) content_settings = None if mimetype: content_settings = ContentSettings(content_type=mimetype) self.conn.create_blob_from_bytes( self.container_name, key, bytes(memstring), # azure would work with memoryview, but validates it's bytes content_settings=content_settings, metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_") ) def store_file_from_disk(self, key, filepath, metadata=None, multipart=None, cache_control=None, mimetype=None): if cache_control is not None: raise NotImplementedError("AzureTransfer: cache_control support not implemented") key = self.format_key_for_backend(key, remove_slash_prefix=True) content_settings = None if mimetype: content_settings = ContentSettings(content_type=mimetype) self.conn.create_blob_from_path( self.container_name, key, filepath, content_settings=content_settings, metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_") ) def store_file_object(self, key, fd, *, cache_control=None, metadata=None, mimetype=None, upload_progress_fn=None): if cache_control is not None: raise NotImplementedError("AzureTransfer: cache_control support not implemented") key = self.format_key_for_backend(key, remove_slash_prefix=True) content_settings = None if mimetype: content_settings = ContentSettings(content_type=mimetype) def progress_callback(bytes_sent, _): if upload_progress_fn: upload_progress_fn(bytes_sent) # Azure _BlobChunkUploader calls `tell()` on the stream even though it doesn't use the result. # We expect the input stream not to support `tell()` so use dummy implementation for it original_tell = getattr(fd, "tell", None) fd.tell = lambda: None try: self.conn.create_blob_from_stream( self.container_name, key, fd, content_settings=content_settings, metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_"), progress_callback=progress_callback ) finally: if original_tell: fd.tell = original_tell else: delattr(fd, "tell") def get_or_create_container(self, container_name): start_time = time.monotonic() self.conn.create_container(container_name) self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.monotonic() - start_time) return container_name