コード例 #1
0
def render_video(request):
    template = loader.get_template('app/render_video.html')
    vidstatus = 'No Video Found.'

    queue_service = QueueService(account_name=os.environ['SVPD_STORAGE_ACCOUNT_NAME'], account_key=os.environ['SVPD_STORAGE_ACCOUNT_KEY'])
    messages = queue_service.get_messages(os.environ['SVPD_STORAGE_ACCOUNT_READY_TO_ENCODE'], num_messages=1, visibility_timeout=1*60)
    
    for message in messages:
        vidstatus = 'Queued for Rendering: ' + message.content
        message_obj = json.loads(message.content)

        access_token = ams_authenticate()['access_token']
        
        asset = ams_post_request(access_token, "Assets", {
            'Name': message_obj['filename'], 
            'AlternateId': message_obj['folder']})
        
        asset_container = urllib.parse.urlparse(asset['Uri']).path[1:]

        asset_file = ams_post_request(access_token, "Files", {
            'IsEncrypted': 'false',
            'IsPrimary': 'false',
            'MimeType': 'video/mp4',
            'ContentFileSize': message_obj['size'],
            'Name': message_obj['filename'],
            'ParentAssetId': asset['Id']})

        block_blob_service = BlockBlobService(account_name=os.environ['SVPD_STORAGE_ACCOUNT_NAME'], account_key=os.environ['SVPD_STORAGE_ACCOUNT_KEY'])
        from_url = block_blob_service.make_blob_url(os.environ['SVPD_STORAGE_ACCOUNT_UPLOADED'], message_obj['folder'] + '/' + message_obj['filename'])
        block_blob_service.copy_blob(asset_container, message_obj['filename'], from_url)

        job = ams_verbose_post_request(access_token, "Jobs", {
            'Name': message_obj['filename'], 
            'InputMediaAssets': [{
                '__metadata': { 'uri': os.environ['AMS_API_ENDPOINT'] + 'Assets(\'' + asset['Id'] + '\')' }
            }],
            'Tasks': [{
                'Name': 'Adaptive Streaming Task',
                'Configuration': 'Adaptive Streaming',
                'MediaProcessorId': 'nb:mpid:UUID:ff4df607-d419-42f0-bc17-a481b1331e56',
                'TaskBody': '<?xml version="1.0" encoding="utf-16"?><taskBody><inputAsset>JobInputAsset(0)</inputAsset><outputAsset assetCreationOptions="0" assetFormatOption="0" assetName="' + message_obj['filename'] + ' - MES v1.1" storageAccountName="' + os.environ['SVPD_STORAGE_ACCOUNT_NAME'] + '">JobOutputAsset(0)</outputAsset></taskBody>'
            },{
                'Name': 'Indexing Task',
                'Configuration': '<?xml version="1.0" encoding="utf-8"?><configuration version="2.0"><input><metadata key="title" value="blah" /></input><settings></settings><features><feature name="ASR"><settings><add key="Language" value="English" /><add key="GenerateAIB" value="False" /><add key="GenerateKeywords" value="True" /><add key="ForceFullCaption" value="False" /><add key="CaptionFormats" value="ttml;sami;webvtt" /></settings></feature></features></configuration>',
                'MediaProcessorId': 'nb:mpid:UUID:233e57fc-36bb-4f6f-8f18-3b662747a9f8',
                'TaskBody': '<?xml version="1.0" encoding="utf-16"?><taskBody><inputAsset>JobInputAsset(0)</inputAsset><outputAsset assetCreationOptions="0" assetFormatOption="0" assetName="' + message_obj['filename'] + ' - Indexed" storageAccountName="' + os.environ['SVPD_STORAGE_ACCOUNT_NAME'] + '">JobOutputAsset(1)</outputAsset></taskBody>'
            }]
            })

        queue_service.put_message(os.environ['SVPD_STORAGE_ACCOUNT_ENCODING'], json.dumps({ 
            'filename': message_obj['filename'],
            'folder': message_obj['folder'],
            'size': message_obj['size'],
            'job': job['d']}))

        queue_service.delete_message(os.environ['SVPD_STORAGE_ACCOUNT_READY_TO_ENCODE'], message.id, message.pop_receipt)   

    return HttpResponse(template.render({
        'vidstatus': vidstatus,
    }, request))
コード例 #2
0
class AzureStorage:
    def __init__(self, connectionString, container):
        self.BlobService = BlockBlobService(connection_string=connectionString)
        nameValue = UtilityHelper.connectStringToDictionary(connectionString)
        self.AccountName = nameValue['AccountName']
        self.container = container

    def getBaseURL(self):
        return 'https://' + self.AccountName + '.blob.core.windows.net/'

    def uploadByLocalFile(self, localFullFileName, remoteBlobName):
        self.BlobService.create_blob_from_path(self.container, remoteBlobName,
                                               localFullFileName)
        blobURL = 'https://' + self.AccountName + '.blob.core.windows.net/' + self.container + '/' + remoteBlobName
        return blobURL

    def uploadByStream(self, streamData, remoteBlobName):
        self.BlobService.create_blob_from_stream(self.container,
                                                 remoteBlobName, streamData)
        blobURL = 'https://' + self.AccountName + '.blob.core.windows.net/' + self.container + '/' + remoteBlobName
        return blobURL

    def uploadByBytes(self, bytesData, remoteBlobName):
        self.BlobService.create_blob_from_bytes(self.container, remoteBlobName,
                                                bytesData)
        blobURL = 'https://' + self.AccountName + '.blob.core.windows.net/' + self.container + '/' + remoteBlobName
        return blobURL

    def delete(self, blobName):
        self.BlobService.delete_blob(self.container, blobName)

    def copy(self, sourceBlobURL, targetBlobName):
        self.BlobService.copy_blob(self.container, targetBlobName,
                                   sourceBlobURL)
コード例 #3
0
def create_snapshot(file_share, directory_name, file_name, container_name, correlation_guid = str(uuid.uuid4())):
    file_service = FileService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY)
    blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY)
    file_sas_token = file_service.generate_file_shared_access_signature(
        file_share,
        directory_name,
        file_name,
        permission = FilePermissions.READ,
        expiry = datetime.now() + timedelta(minutes = 10))

    file_url = file_service.make_file_url(file_share, directory_name, file_name, sas_token = file_sas_token)

    blob_name = '{0}/{1}/{2}'.format(correlation_guid, directory_name, file_name)
    blob_service.create_container(container_name)

    try:
        blob_service.copy_blob(container_name, blob_name, file_url)
    except Exception as e:
        raise ValueError('Missing file ' + file_name)

    blob_sas_token = blob_service.generate_blob_shared_access_signature(
        container_name,
        blob_name,
        permission = BlobPermissions.READ,
        expiry = datetime.now() + timedelta(days = 1000))

    return blob_service.make_blob_url(container_name, blob_name, sas_token = blob_sas_token)
コード例 #4
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    png_base_path = "https://raw.githubusercontent.com/bmorrisondev/25daysofserverless/master"

    try:
        webhook = req.get_json()
        images_to_add = []
        regex_pattern = '[^\/]*$'

        for commit in webhook.get(
                'commits'):  # Iterate through all the commits in the push
            for added_item in commit.get(
                    'added'):  # Iterate through added items
                if added_item.endswith("png"):  # Find items with png
                    image_item = ImageItem(
                    )  # Create the object to store name and url
                    regexObj = re.search(regex_pattern, added_item)
                    image_item.name = regexObj[0]  # Parse out the file name
                    image_item.url = f"{png_base_path}/{added_item}"
                    images_to_add.append(image_item)
                    logging.info(
                        f'found {image_item.name} at {image_item.url}')

        # Copy files to Azure Storage Account, create URLs for the blobs and return for saving to Cosmo
        azure_path = "images"
        container_name = 'day3'

        blob_cs = os.environ["BlobServiceCs"]

        block_blob_service = BlockBlobService(connection_string=blob_cs)

        blob_urls = []
        for file in images_to_add:
            blob_name = f"{azure_path}/{random_string()}_{file.name}"
            block_blob_service.copy_blob(
                container_name, blob_name,
                file.url)  # Download the file into ASA
            blob_url = block_blob_service.make_blob_url(
                container_name,
                blob_name)  # Get a direct link to the blob in ASA
            blob_urls.append(blob_url)

        # Write URLs to Cosmo using Mongo API
        mongo_cs = os.environ["CosmoMongoApiCs"]
        cosmo_client = pymongo.MongoClient(mongo_cs)
        db = cosmo_client["day3"]
        images_collection = db["images"]

        for url in blob_urls:
            record = {"url": url}
            images_collection.insert_one(
                record)  # Add the URL to the Cosmo DB collection

        return func.HttpResponse("ok")

    except Exception as err:
        return func.HttpResponse(f"{err}", status_code=500)
コード例 #5
0
def copy_azure_files():
    blob_service = BlockBlobService(account_name='labcyhw2', account_key='zy2m8C1qjV0+7A9QL6hpCeFAO3fxplOS900adzDuTGlKU9LkIERYfN/VG5STbXxYK6BMvyaRMWjPaEUNmU8GoA==')

    copy_from_container = 'prod'
    copy_to_container = 'bkp'
    listagem = blob_service.list_blobs(copy_from_container)
    for blob in listagem:
        blob_url = blob_service.make_blob_url(copy_from_container, blob.name)
        blob_service.copy_blob(copy_to_container, blob.name, blob_url)
        print(blob.name)
コード例 #6
0
def run_sample(account_name, account_key, from_container_name, from_blob,
               to_container_name, to_blob):
    blob_service = BlockBlobService(account_name=account_name,
                                    account_key=account_key)

    if not blob_service.exists(from_container_name, from_blob):
        sys.exit('Not exist: ' + from_blob + ' in ' + from_container_name)

    if blob_service.exists(to_container_name, to_blob):
        sys.exit('Exist: ' + to_blob + ' in ' + to_container_name)

    blob_url = blob_service.make_blob_url(from_container_name, from_blob)

    blob_service.copy_blob(to_container_name, to_blob, blob_url)
コード例 #7
0
 def activate_model(self, model_name, active_model_name):
     block_blob_service = BlockBlobService(
         account_name=self.config.storage_account,
         account_key=self.config.storage_account_key)
     # copy model
     blob_url = block_blob_service.make_blob_url(
         self.config.models_container_name, model_name)
     block_blob_service.copy_blob(self.config.models_container_name,
                                  active_model_name, blob_url)
     # copy model metadata
     blob_url = block_blob_service.make_blob_url(
         self.config.models_container_name, model_name + ".meta")
     block_blob_service.copy_blob(self.config.models_container_name,
                                  active_model_name + ".meta", blob_url)
コード例 #8
0
class S3AzureBlobBypass(BaseS3Bypass):
    """
    Bypass executed by default when data source is an S3 bucket and data destination is
    an Azure blob container.
    It should be transparent to user. Conditions are:

        - S3Reader and AzureBlobWriter are used on configuration.
        - No filter modules are set up.
        - No transform module is set up.
        - No grouper module is set up.
        - AzureBlobWriter has not a items_limit set in configuration.
        - AzureBlobWriter has default items_per_buffer_write and size_per_buffer_write per default.
        - AzureBlobWriter has default write_buffer.
    """
    def __init__(self, config, metadata):
        super(S3AzureBlobBypass, self).__init__(config, metadata)
        self.container = self.read_option('writer', 'container')
        from azure.storage.blob import BlockBlobService
        self.azure_service = BlockBlobService(
            self.read_option('writer', 'account_name'),
            self.read_option('writer', 'account_key'))

    @classmethod
    def meets_conditions(cls, config):
        if not config.writer_options['name'].endswith('AzureBlobWriter'):
            cls._log_skip_reason('Wrong reader configured')
            return False
        return super(S3AzureBlobBypass, cls).meets_conditions(config)

    @retry_long
    def _copy_s3_key(self, key):
        blob_name = key.name.split('/')[-1]
        url = key.generate_url(S3_URL_EXPIRES_IN)
        # Convert the https://<bucket>.s3.aws.com/<path> url format to
        # https://s3.aws.com/<bucket>/<path> Since the first one gives
        # certificate errors if there are dots in the bucket name
        url = re.sub(r'^https://([^/]+)\.s3\.amazonaws\.com/',
                     r'https://s3.amazonaws.com/\1/', url)

        self.azure_service.copy_blob(
            self.container,
            blob_name,
            url,
            timeout=S3_URL_EXPIRES_IN,
        )
コード例 #9
0
def main():
    argument_spec = dict(source_uri=dict(required=True),
                         source_key=dict(required=True),
                         destination_account=dict(required=True),
                         destination_key=dict(required=True),
                         destination_container=dict(required=True),
                         destination_blob=dict(required=True),
                         wait=dict(default=False, type='bool'),
                         timeout=dict(default=1000))
    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_DEPS:
        module.fail_json(
            msg="requests and azure are required for this module ".format(
                HAS_DEPS_EXC))

    source_account, source_container, source_blob = split_uri(
        module.params.get('source_uri'))
    source = CloudStorageAccount(account_name=source_account,
                                 account_key=module.params.get('source_key'))
    source_service = source.create_block_blob_service()
    destination_service = BlockBlobService(
        account_name=module.params.get('destination_account'),
        account_key=module.params.get('destination_key'))

    source_token = source.generate_shared_access_signature(
        Services.BLOB, ResourceTypes.OBJECT, AccountPermissions.READ,
        datetime.datetime.now() + timedelta(hours=1))
    source_sas_url = source_service.make_blob_url(source_container,
                                                  source_blob, 'https',
                                                  source_token)

    destination_service.create_container(
        module.params.get('destination_container'), fail_on_exist=False)
    status = destination_service.copy_blob(
        module.params.get('destination_container'),
        module.params.get('destination_blob'), source_sas_url)

    if not module.params.get('wait'):
        data = dict(changed=True, status='started')
        module.exit_json(**data)
    else:
        copy = destination_service.get_blob_properties(
            module.params.get('destination_container'),
            module.params.get('destination_blob')).properties.copy
        count = 0
        while copy.status != 'success':
            count = count + 30
            if count > module.params.get('timeout'):
                module.fail_json(
                    msg='Timed out waiting for async copy to complete.')
            time.sleep(30)
            copy = destination_service.get_blob_properties(
                module.params.get('destination_container'),
                module.params.get('destination_blob')).properties.copy
        data = dict(changed=True, status='completed')
        module.exit_json(**data)
コード例 #10
0
class S3AzureBlobBypass(BaseS3Bypass):
    """
    Bypass executed by default when data source is an S3 bucket and data destination is
    an Azure blob container.
    It should be transparent to user. Conditions are:

        - S3Reader and AzureBlobWriter are used on configuration.
        - No filter modules are set up.
        - No transform module is set up.
        - No grouper module is set up.
        - AzureBlobWriter has not a items_limit set in configuration.
        - AzureBlobWriter has default items_per_buffer_write and size_per_buffer_write per default.
    """

    def __init__(self, config, metadata):
        super(S3AzureBlobBypass, self).__init__(config, metadata)
        self.container = self.read_option('writer', 'container')
        from azure.storage.blob import BlockBlobService
        self.azure_service = BlockBlobService(
            self.read_option('writer', 'account_name'),
            self.read_option('writer', 'account_key'))

    @classmethod
    def meets_conditions(cls, config):
        if not config.writer_options['name'].endswith('AzureBlobWriter'):
            cls._log_skip_reason('Wrong reader configured')
            return False
        return super(S3AzureBlobBypass, cls).meets_conditions(config)

    @retry_long
    def _copy_s3_key(self, key):
        blob_name = key.name.split('/')[-1]
        url = key.generate_url(S3_URL_EXPIRES_IN)
        # Convert the https://<bucket>.s3.aws.com/<path> url format to
        # https://s3.aws.com/<bucket>/<path> Since the first one gives
        # certificate errors if there are dots in the bucket name
        url = re.sub(r'^https://([^/]+)\.s3\.amazonaws\.com/', r'https://s3.amazonaws.com/\1/', url)

        self.azure_service.copy_blob(
            self.container,
            blob_name,
            url,
            timeout=S3_URL_EXPIRES_IN,
        )
コード例 #11
0
class BlobHelper:
    def __init__(self, blob=None):
        account_name = os.environ["AzureStorageAccountName"]
        account_key = os.environ["AzureStorageAccountKey"]
        self.blob_service = BlockBlobService(
            account_name=account_name, account_key=account_key
        )
        self.blob = blob

    def create_output_blob(self, destination_container_name):
        source_url = os.environ["StorageUrl"] + self.blob.name
        destination_blob_name = self.get_destination_blob_name()

        self.blob_service.copy_blob(
            container_name=destination_container_name,
            blob_name=destination_blob_name,
            copy_source=source_url,
        )

    def get_destination_blob_name(self):
        blob_filename = self.blob.name.split("/")[1]
        datetime_str = datetime.today().strftime("%Y%m%d-%H%M%S")
        return f"{datetime_str}-{blob_filename}"

    def get_str_file(self, storage_container_name, storage_blob_name):
        compressed_file = io.BytesIO()

        self.blob_service.get_blob_to_stream(storage_container_name, storage_blob_name, compressed_file, max_connections=1)

        compressed_file.seek(0)

        compressed_gzip = gzip.GzipFile(fileobj=compressed_file)

        decompressed_file = compressed_gzip.read()

        compressed_file.close()
        compressed_gzip.close()

        file_string = decompressed_file.decode("utf-8-sig")

        return file_string

    def write_stream_file(self, storage_container_name, storage_blob_name, encoded_file):
        self.blob_service.create_blob_from_bytes(storage_container_name, storage_blob_name, encoded_file, max_connections=1)
コード例 #12
0
def main(inputs: dict) -> str:
    """    
    Copy the video into the audiotranscript-files container(previously videoindexer-files),
    which will trigger the system of 3 functions to eventually download the transcript to SQL
    """
    ## Set inputs
    vidURL = inputs['fileURL']
    urlContainer, urlFileName = get_url_container_and_file_name(vidURL)
    bbs = BlockBlobService(
        connection_string=os.getenv("fsevideosConnectionString"))
    ## Create SAS URL
    sasURL = get_SAS_URL(fileURL=vidURL,
                         block_blob_service=bbs,
                         container=urlContainer)
    ## Copy blob
    bbs.copy_blob(container_name="audiotranscript-files",
                  blob_name=urlFileName,
                  copy_source=sasURL)

    return "done"
コード例 #13
0
 def copySnapshotToAttacker(self, storageAccount, storageKey, containerName,
                            blobName, snapshotSas):
     blockBlobService = BlockBlobService(account_name=storageAccount,
                                         account_key=storageKey)
     copyProperties = blockBlobService.copy_blob(containerName, blobName,
                                                 snapshotSas)
     while copyProperties.status != "success":
         copyProperties = blockBlobService.get_blob_properties(
             containerName, blobName).properties.copy
         print(copyProperties.status + ":" + copyProperties.progress)
         time.sleep(10)
     return copyProperties
コード例 #14
0
 def copy_blob_image(self, template, vm_name, storage_account,
                     template_container, storage_container):
     # todo: weird method to refactor it later
     container_client = BlockBlobService(storage_account, self.storage_key)
     src_uri = container_client.make_blob_url(container_name=template_container,
                                              blob_name=template.split("/")[-1])
     operation = container_client.copy_blob(container_name=storage_container,
                                            blob_name=vm_name + ".vhd",
                                            copy_source=src_uri)
     wait_for(lambda: operation.status != 'pending', num_sec='10m', delay=15)
     # copy operation obj.status->str
     return operation.status
コード例 #15
0
def main():
    # get command line args
    account = sys.argv[1]
    secret = sys.argv[2]
    srcContainer = sys.argv[3]
    files = sys.argv[4:]

    # generate container name
    destContainer = str(uuid.uuid4()).replace('-', '')

    try:
        # connect to blob store
        bs = BlockBlobService(account_name=account, account_key=secret)

        # create and setup container, by default a container is private
        bs.create_container(destContainer)
        bs.set_container_acl(destContainer)

        # perform blob copy
        copyStartTime = int(round(time.time() * 1000))
        copyProps = {}
        for f in files:
            srcUrl = 'https://{}.blob.core.windows.net/{}/{}'.format(account, srcContainer, f)
            cp = bs.copy_blob(destContainer, f, srcUrl)
            copyProps[f] = cp

        # wait for copy to finish
        while len(copyProps.keys()) > 0:
            for f, prop in copyProps.items():
                bp = bs.get_blob_properties(destContainer, f)
                copyProps[f] = None if bp.properties.copy.status is not 'pending' else bp
            copyProps = { k:v for k, v in copyProps.items() if v }
        
        # copy completed
        copyEndTime = int(round(time.time() * 1000))
        print('Blob copy completed in {}ms'.format(copyEndTime - copyStartTime), file=sys.stderr)

        # generate SAS token, read only, valid for an hour
        token = bs.generate_container_shared_access_signature(destContainer, ContainerPermissions.READ | ContainerPermissions.LIST, datetime.utcnow() + timedelta(hours=1))

        # return information
        result = {
            'storage_account': account,
            'container': destContainer,
            'sas_token': token
        }
        print(json.dumps(result, indent=4, sort_keys=True))

    except Exception as e:
        print(e, file=sys.stderr)
コード例 #16
0
destStorage_keys = {v.key_name: v.value for v in destStorage_keys.keys}

destStorage_key = destStorage_keys['key1']
print("The destination storage key is " + destStorage_key)


# In[18]:

# Create the target container in storage

block_blob_service2 = BlockBlobService(account_name=destStorageAcct, account_key=destStorage_key)


# In[19]:

block_blob_service2.create_container(destContainerName, public_access=PublicAccess.Container)


# In[21]:

# Start Asynchronus Copy #
print("Starting azure copy...")
block_blob_service2.copy_blob(destContainerName, "testBlob.json", blob_url)

print("Azure copy done.")
generator = block_blob_service2.list_blobs(destContainerName)
for blob in generator:
    blob_url2 = block_blob_service2.make_blob_url(destContainerName, blob.name)
    print("The new blob url is " + blob_url2)

コード例 #17
0
def main():
    sourceBlobCounter = 0
    currentBlobCounter = 0
    currentLabel = None
    block_blob_service = BlockBlobService(account_name=azureStorgeAccountName,
                                          account_key=azureStorageKeyName)

    # create the TargetContainer if it does not exist
    if (block_blob_service.exists(
            container_name=azureStorageTargetContainer) == False):
        block_blob_service.create_container(
            container_name=azureStorageTargetContainer)
        print(
            str(datetime.datetime.now()) + ': created target container: ' +
            azureStorageTargetContainer)
    elif emptyTargetContainer:
        print(
            str(datetime.datetime.now()) +
            ': deleting existing files in the container: ' +
            azureStorageTargetContainer)
        while True:
            delete_blob_generator = block_blob_service.list_blobs(
                container_name=azureStorageTargetContainer)
            for toDelete in block_blob_service.list_blobs(
                    container_name=azureStorageTargetContainer):
                block_blob_service.delete_blob(
                    container_name=azureStorageTargetContainer,
                    blob_name=toDelete.name)
            if not delete_blob_generator.next_marker:
                break

    print(
        str(datetime.datetime.now()) +
        ': counting files in the source container: ' +
        azureStorageSourceContainer)
    while True:
        count_blob_generator = block_blob_service.list_blobs(
            container_name=azureStorageSourceContainer)
        for blob in count_blob_generator:
            sourceBlobCounter += 1
        if not count_blob_generator.next_marker:
            break

    print(
        str(datetime.datetime.now()) + ': copying and labeling ' +
        str(sourceBlobCounter) + ' files to container: ' +
        azureStorageTargetContainer)
    while currentBlobCounter < sourceBlobCounter:
        copy_blob_generator = block_blob_service.list_blobs(
            container_name=azureStorageSourceContainer)
        for blob in copy_blob_generator:
            sourceBlob = block_blob_service.make_blob_url(
                container_name=azureStorageSourceContainer,
                blob_name=blob.name)
            label = labels[int(
                (currentBlobCounter / sourceBlobCounter) * len(labels))]
            targetName = blob.name.split('.')[0].replace(
                '-', '_') + '-' + str(label) + '.' + blob.name.split('.')[1]
            block_blob_service.copy_blob(
                container_name=azureStorageTargetContainer,
                blob_name=targetName,
                copy_source=sourceBlob)
            if currentLabel != label:
                print(
                    str(datetime.datetime.now()) + ": file number: " +
                    str(currentBlobCounter) + " begins label: " + str(label))
                currentLabel = label
            if (currentBlobCounter % 100 == 0):
                sys.stdout.write('.')
                sys.stdout.flush()
            currentBlobCounter += 1
        if not copy_blob_generator.next_marker:
            break
コード例 #18
0
class BlobUtility:
    """ Azure blob utilities for I/O operations
    """
    def __init__(self, account_name, account_key):
        """
         __init__ - Initializes blob utils and establish connection to  azure blob
        :param str account_name: Azure Blob account name.
        :param str account_key: Azure account key.

       """
        self.account_name = account_name
        self.block_blob_service = BlockBlobService(account_name=account_name,
                                                   account_key=account_key)

    def get_blob_to_path(self, input_container_name, input_blob_name,
                         input_file_path):
        """
        get_blob_to_path - Get file path in blob

        :param str input_container_name: BLob container name
        :param str input_blob_name: Blob path in the container.
        :param str input_file_path: File name to read.

        :returns: input_file_path
        :rtype: Blob object

       """
        self.block_blob_service.get_blob_to_path(
            container_name=input_container_name,
            blob_name=input_blob_name,
            file_path=input_file_path)
        return input_file_path

    def get_blob_to_bytes(self, input_container_name, input_blob_name):
        """
        get_blob_to_bytes - Read images from the blob

        :param str input_container_name: Blob container name
        :param str input_blob_name: Blob path in the container + input file/image name

        :returns: blob_byte
        :rtype: Blob object

       """
        blob_byte = self.block_blob_service.get_blob_to_bytes(
            container_name=input_container_name, blob_name=input_blob_name)
        return blob_byte

    def create_blob_from_text(self, input_container_name, input_blob_name,
                              data):
        """
        create_blob_from_text -  Write csv/dataframe to blob

        :param str input_container_name: Blob container name
        :param str input_blob_name: Blob path in the container to write + image name with extension.
        :param (csv/text file)  data: csv data to write into blob.

       """
        self.block_blob_service.create_blob_from_text(
            container_name=input_container_name,
            blob_name=input_blob_name,
            text=data)

    def make_blob_url(self, input_container_name, input_file_path):
        """
        make_blob_url - Create blob url

        :param str input_container_name: Blob Container name
        :param str input_file_path:  Blob file path to refer

        :returns: blob_url
        :rtype: Blob object

       """
        blob_url = self.block_blob_service.make_blob_url(
            input_container_name, input_file_path)
        return blob_url

    def copy_blob(self, container_name, file_path, blob_url):
        """
        copy_blob - Copy blob/data to another container using blob url

        :param str container_name: Target container
        :param str file_path:  Target blob file path + target filename with extension
        :param blob object blob_url: Source blob url to copy data.

         """
        self.block_blob_service.copy_blob(container_name, file_path, blob_url)

    def generate_container_signature(self, container_name, file_name):
        """
        generate_container_signature - generate container signature

        :param str container_name: Blob Container name
        :param str file_name:  File name

        :returns: file_url
        :rtype: Blob object

       """
        container_sas_token = self.block_blob_service.generate_container_shared_access_signature(
            container_name,
            permission=ContainerPermissions.READ,
            expiry=datetime.utcnow() + timedelta(hours=1),
            start=datetime.utcnow())
        file_url = [
            'https://', self.account_name, '.blob.core.windows.net/',
            container_name, '/', file_name, '?', container_sas_token
        ]
        file_url = ''.join(file_url)
        return file_url

    def generate_blob_signature(self, container_name, blob_name,
                                file_extension):
        """
        generate_blob_signature - generate blob signature

        :param str container_name: Blob Container name
        :param str blob_name: blob name
        :param str file_extension: file extension

        :returns: blob_url
        :rtype: Blob object

       """
        token = self.block_blob_service.generate_blob_shared_access_signature(
            container_name,
            blob_name + file_extension,
            permission=BlobPermissions.READ,
            expiry=datetime.utcnow() + timedelta(hours=1),
            start=datetime.utcnow())
        file = [
            'https://', self.account_name, '.blob.core.windows.net/',
            container_name, '/', blob_name, '.', file_extension
        ]
        file = ''.join(file)
        blob_url = f"{file}?{token}"
        return blob_url
コード例 #19
0
ファイル: azurestorage.py プロジェクト: rrati/quay
class AzureStorage(BaseStorage):
    def __init__(
        self,
        context,
        azure_container,
        storage_path,
        azure_account_name,
        azure_account_key=None,
        sas_token=None,
        connection_string=None,
        is_emulated=False,
        socket_timeout=20,
        request_timeout=20,
    ):
        super(AzureStorage, self).__init__()
        self._context = context
        self._storage_path = storage_path.lstrip("/")

        self._azure_account_name = azure_account_key
        self._azure_account_key = azure_account_key
        self._azure_sas_token = sas_token
        self._azure_container = azure_container
        self._azure_connection_string = connection_string
        self._request_timeout = request_timeout

        self._blob_service = BlockBlobService(
            account_name=azure_account_name,
            account_key=azure_account_key,
            sas_token=sas_token,
            is_emulated=is_emulated,
            connection_string=connection_string,
            socket_timeout=socket_timeout,
        )

    def _blob_name_from_path(self, object_path):
        if ".." in object_path:
            raise Exception("Relative paths are not allowed; found %s" %
                            object_path)

        return os.path.join(self._storage_path, object_path).rstrip("/")

    def _upload_blob_path_from_uuid(self, uuid):
        return self._blob_name_from_path(
            self._upload_blob_name_from_uuid(uuid))

    def _upload_blob_name_from_uuid(self, uuid):
        return "uploads/{0}".format(uuid)

    def get_direct_download_url(self,
                                object_path,
                                request_ip=None,
                                expires_in=60,
                                requires_cors=False,
                                head=False):
        blob_name = self._blob_name_from_path(object_path)

        try:
            sas_token = self._blob_service.generate_blob_shared_access_signature(
                self._azure_container,
                blob_name,
                ContainerPermissions.READ,
                datetime.utcnow() + timedelta(seconds=expires_in),
            )

            blob_url = self._blob_service.make_blob_url(self._azure_container,
                                                        blob_name,
                                                        sas_token=sas_token)
        except AzureException:
            logger.exception(
                "Exception when trying to get direct download for path %s",
                object_path)
            raise IOError("Exception when trying to get direct download")

        return blob_url

    def validate(self, client):
        super(AzureStorage, self).validate(client)
        self._blob_service.get_container_properties(
            self._azure_container, timeout=self._request_timeout)

    def get_content(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            blob = self._blob_service.get_blob_to_bytes(
                self._azure_container, blob_name)
        except AzureException:
            logger.exception("Exception when trying to get path %s", path)
            raise IOError("Exception when trying to get path")

        return blob.content

    def put_content(self, path, content):
        blob_name = self._blob_name_from_path(path)
        try:
            self._blob_service.create_blob_from_bytes(self._azure_container,
                                                      blob_name, content)
        except AzureException:
            logger.exception("Exception when trying to put path %s", path)
            raise IOError("Exception when trying to put path")

    def stream_read(self, path):
        with self.stream_read_file(path) as f:
            while True:
                buf = f.read(self.buffer_size)
                if not buf:
                    break
                yield buf

    def stream_read_file(self, path):
        blob_name = self._blob_name_from_path(path)

        try:
            output_stream = io.BytesIO()
            self._blob_service.get_blob_to_stream(self._azure_container,
                                                  blob_name, output_stream)
            output_stream.seek(0)
        except AzureException:
            logger.exception(
                "Exception when trying to stream_file_read path %s", path)
            raise IOError("Exception when trying to stream_file_read path")

        return output_stream

    def stream_write(self, path, fp, content_type=None, content_encoding=None):
        blob_name = self._blob_name_from_path(path)
        content_settings = ContentSettings(
            content_type=content_type,
            content_encoding=content_encoding,
        )

        try:
            self._blob_service.create_blob_from_stream(
                self._azure_container,
                blob_name,
                fp,
                content_settings=content_settings)
        except AzureException:
            logger.exception("Exception when trying to stream_write path %s",
                             path)
            raise IOError("Exception when trying to stream_write path")

    def exists(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            return self._blob_service.exists(self._azure_container,
                                             blob_name,
                                             timeout=self._request_timeout)
        except AzureException:
            logger.exception("Exception when trying to check exists path %s",
                             path)
            raise IOError("Exception when trying to check exists path")

    def remove(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            self._blob_service.delete_blob(self._azure_container, blob_name)
        except AzureException:
            logger.exception("Exception when trying to remove path %s", path)
            raise IOError("Exception when trying to remove path")

    def get_checksum(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            blob = self._blob_service.get_blob_properties(
                self._azure_container, blob_name)
        except AzureException:
            logger.exception(
                "Exception when trying to get_checksum for path %s", path)
            raise IOError("Exception when trying to get_checksum path")
        return blob.properties.etag

    def initiate_chunked_upload(self):
        random_uuid = str(uuid.uuid4())
        metadata = {
            _BLOCKS_KEY: [],
            _CONTENT_TYPE_KEY: None,
        }
        return random_uuid, metadata

    def stream_upload_chunk(self,
                            uuid,
                            offset,
                            length,
                            in_fp,
                            storage_metadata,
                            content_type=None):
        if length == 0:
            return 0, storage_metadata, None

        upload_blob_path = self._upload_blob_path_from_uuid(uuid)
        new_metadata = copy.deepcopy(storage_metadata)

        total_bytes_written = 0

        while True:
            current_length = length - total_bytes_written
            max_length = (min(current_length, _MAX_BLOCK_SIZE)
                          if length != READ_UNTIL_END else _MAX_BLOCK_SIZE)
            if max_length <= 0:
                break

            limited = LimitingStream(in_fp, max_length, seekable=False)

            # Note: Azure fails if a zero-length block is uploaded, so we read all the data here,
            # and, if there is none, terminate early.
            block_data = b""
            for chunk in iter(lambda: limited.read(4096), b""):
                block_data += chunk

            if len(block_data) == 0:
                break

            block_index = len(new_metadata[_BLOCKS_KEY])
            block_id = format(block_index, "05")
            new_metadata[_BLOCKS_KEY].append(block_id)

            try:
                self._blob_service.put_block(
                    self._azure_container,
                    upload_blob_path,
                    block_data,
                    block_id,
                    validate_content=True,
                )
            except AzureException as ae:
                logger.exception(
                    "Exception when trying to stream_upload_chunk block %s for %s",
                    block_id, uuid)
                return total_bytes_written, new_metadata, ae

            bytes_written = len(block_data)
            total_bytes_written += bytes_written
            if bytes_written == 0 or bytes_written < max_length:
                break

        if content_type is not None:
            new_metadata[_CONTENT_TYPE_KEY] = content_type

        return total_bytes_written, new_metadata, None

    def complete_chunked_upload(self, uuid, final_path, storage_metadata):
        """
        Complete the chunked upload and store the final results in the path indicated.

        Returns nothing.
        """
        # Commit the blob's blocks.
        upload_blob_path = self._upload_blob_path_from_uuid(uuid)
        block_list = [
            BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY]
        ]

        try:
            self._blob_service.put_block_list(self._azure_container,
                                              upload_blob_path, block_list)
        except AzureException:
            logger.exception(
                "Exception when trying to put block list for path %s from upload %s",
                final_path,
                uuid,
            )
            raise IOError("Exception when trying to put block list")

        # Set the content type on the blob if applicable.
        if storage_metadata[_CONTENT_TYPE_KEY] is not None:
            content_settings = ContentSettings(
                content_type=storage_metadata[_CONTENT_TYPE_KEY])
            try:
                self._blob_service.set_blob_properties(
                    self._azure_container,
                    upload_blob_path,
                    content_settings=content_settings)
            except AzureException:
                logger.exception(
                    "Exception when trying to set blob properties for path %s",
                    final_path)
                raise IOError("Exception when trying to set blob properties")

        # Copy the blob to its final location.
        upload_blob_name = self._upload_blob_name_from_uuid(uuid)
        copy_source_url = self.get_direct_download_url(upload_blob_name,
                                                       expires_in=300)

        try:
            blob_name = self._blob_name_from_path(final_path)
            copy_prop = self._blob_service.copy_blob(self._azure_container,
                                                     blob_name,
                                                     copy_source_url)
        except AzureException:
            logger.exception(
                "Exception when trying to set copy uploaded blob %s to path %s",
                uuid, final_path)
            raise IOError("Exception when trying to copy uploaded blob")

        self._await_copy(self._azure_container, blob_name, copy_prop)

        # Delete the original blob.
        logger.debug("Deleting chunked upload %s at path %s", uuid,
                     upload_blob_path)
        try:
            self._blob_service.delete_blob(self._azure_container,
                                           upload_blob_path)
        except AzureException:
            logger.exception(
                "Exception when trying to set delete uploaded blob %s", uuid)
            raise IOError("Exception when trying to delete uploaded blob")

    def cancel_chunked_upload(self, uuid, storage_metadata):
        """
        Cancel the chunked upload and clean up any outstanding partially uploaded data.

        Returns nothing.
        """
        upload_blob_path = self._upload_blob_path_from_uuid(uuid)
        logger.debug("Canceling chunked upload %s at path %s", uuid,
                     upload_blob_path)
        self._blob_service.delete_blob(self._azure_container, upload_blob_path)

    def _await_copy(self, container, blob_name, copy_prop):
        # Poll for copy completion.
        count = 0
        while copy_prop.status == "pending":
            props = self._blob_service.get_blob_properties(
                container, blob_name)
            copy_prop = props.properties.copy

            if copy_prop.status == "success":
                return

            if copy_prop.status == "failed" or copy_prop.status == "aborted":
                raise IOError("Copy of blob %s failed with status %s" %
                              (blob_name, copy_prop.status))

            count = count + 1
            if count > _MAX_COPY_POLL_COUNT:
                raise IOError("Timed out waiting for copy to complete")

            time.sleep(_COPY_POLL_SLEEP)

    def copy_to(self, destination, path):
        if self.__class__ == destination.__class__:
            logger.debug(
                "Starting copying file from Azure %s to Azure %s via an Azure copy",
                self._azure_container,
                destination._azure_container,
            )
            copy_source_url = self.get_direct_download_url(path)
            blob_name = destination._blob_name_from_path(path)
            copy_prop = destination._blob_service.copy_blob(
                destination._azure_container, blob_name, copy_source_url)
            destination._await_copy(destination._azure_container, blob_name,
                                    copy_prop)
            logger.debug(
                "Finished copying file from Azure %s to Azure %s via an Azure copy",
                self._azure_container,
                destination._azure_container,
            )
            return

        # Fallback to a slower, default copy.
        logger.debug(
            "Copying file from Azure container %s to %s via a streamed copy",
            self._azure_container,
            destination,
        )
        with self.stream_read_file(path) as fp:
            destination.stream_write(path, fp)

    def setup(self):
        # From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services
        cors = [
            CorsRule(
                allowed_origins="*",
                allowed_methods=["GET", "PUT"],
                max_age_in_seconds=3000,
                exposed_headers=["x-ms-meta-*"],
                allowed_headers=[
                    "x-ms-meta-data*",
                    "x-ms-meta-target*",
                    "x-ms-meta-abc",
                    "Content-Type",
                ],
            )
        ]

        self._blob_service.set_blob_service_properties(cors=cors)
コード例 #20
0
class AzureBlobStore21(implements(StoreInterface)):
    def __init__(self, storage_creds, max_retries=10):
        self.storage_id = storage_creds["name"]
        self.storage_key = storage_creds["key"]

        self.bs = BlockBlobService(account_name=self.storage_id,
                                   account_key=self.storage_key)
        self.append_bs = AppendBlobService(account_name=self.storage_id,
                                           account_key=self.storage_key)

        self.max_retries = max_retries
        self.set_retries(max_retries)

    # ---- HELPER functions ----

    def set_retries(self, count):

        old_count = self.max_retries
        self.max_retries = count

        # bug workaround: standard Retry classes don't retry status=409 (container is being deleted)
        #import azure.storage.common.retry as retry
        #self.bs.retry = retry.LinearRetry(backoff=5, max_attempts=count).retry
        #self.append_bs.retry = retry.LinearRetry(backoff=5, max_attempts=count).retry

        self.bs.retry = utils.make_retry_func(count)
        self.append_bs.retry = utils.make_retry_func(count)

        return old_count

    # ---- MISC part of interface ----

    def get_service_name(self):
        ''' return the unique name of the storage service'''
        return self.storage_id

    def get_retry(self):
        return self.bs.retry

    def set_retry(self, value):
        self.bs.retry = value

    # ---- CONTAINER interface ----

    def does_container_exist(self, container):
        return self.bs.exists(container)

    def create_container(self, container):
        return self.bs.create_container(container)

    def list_containers(self):
        containers = self.bs.list_containers()
        name_list = [contain.name for contain in containers]
        return name_list

    def delete_container(self, container):
        return self.bs.delete_container(container)

    def get_container_properties(self, container):
        props = self.bs.get_container_properties(container)
        return props

    def get_container_metadata(self, container):
        md = self.bs.get_container_metadata(container)
        return md

    # def set_container_metadata(self, container, md_dict):
    #     return self.bs.set_container_metadata(container, md_dict)

    # ---- BLOB interface ----

    def does_blob_exist(self, container, blob_path):
        return self.bs.exists(container, blob_path)

    def create_blob(self, container, blob_path, text, fail_if_exists=False):
        ifn = "*" if fail_if_exists else None

        return self.bs.create_blob_from_text(container,
                                             blob_path,
                                             text,
                                             if_none_match=ifn)

    def create_blob_from_path(self,
                              container,
                              blob_path,
                              source_fn,
                              progress_callback=None):
        result = self.bs.create_blob_from_path(
            container,
            blob_path,
            source_fn,
            progress_callback=progress_callback)
        return result

    def append_blob(self,
                    container,
                    blob_path,
                    text,
                    append_with_rewrite=False):
        # create blob if it doesn't exist

        if not append_with_rewrite:
            # normal handling
            if not self.append_bs.exists(container, blob_path):
                self.append_bs.create_blob(container, blob_path)

            return self.append_bs.append_blob_from_text(
                container, blob_path, text)
        ''' 
        Appends text to a normal blob blob by reading and then rewriting the entire blob.
        Correctly handles concurrency/race conditions.
        Recommended for lots of small items (like 10,000 run names).

        Note: we turn off retries on azure CALL-level so that we can retry on 
        OUR CALL-level.
        '''
        # experimental local retry loop
        old_retry = self.bs.get_retry()
        self.bs.set_retry(utils.make_retry_func(0))
        succeeded = False

        for i in range(20):

            try:
                if self.bs.does_blob_exist(container, blob_path):
                    # read prev contents
                    blob_text = self.bs.get_blob_text(container, blob_path)
                    # append our text
                    new_text = blob_text + text
                    # write blob, ensuring etag matches (no one updated since above read)
                    self.bs.create_blob(container,
                                        blob_path,
                                        new_text,
                                        if_match=blob.properties.etag)
                else:
                    # if no previous blob, just try to create it
                    self.bs.create_blob(container, blob_path, text)
            except BaseException as ex:
                logger.exception(
                    "Error in _append_blob_with_retries, ex={}".format(ex))
                sleep_time = np.random.random() * 4
                console.diag(
                    "XT store received an expected azure exception; will backoff for {:.4f} secs [retry #{}]"
                    .format(sleep_time, i + 1))
                time.sleep(sleep_time)
            else:
                succeeded = True
                break

        # restore retry
        self.bs.set_retry(old_retry)

        if not succeeded:
            errors.service_error(
                "_append_blob_with_rewrite failed (too many retries)")

    def list_blobs(self,
                   container,
                   path=None,
                   return_names=True,
                   recursive=True):
        '''
        NOTE: the semantics here a tricky

        if recursive:
            - return a flat list of all full path names of all files (no directory entries)
        else: 
            - return a flat list of all files and all directory names (add "/" to end of directory names)

        if return_names:
            - return list of names
        else:
            - return a list of objects with following properties:
                .name     (file pathname)
                .properties
                    .content_length   (number)
                    .modified_ns      (time in ns)

        The delimiter trick: this is when we set the delimiter arg = "/" to tell azure to return only the blobs 
        in the specified directory - that is, don't return blobs from child directories.  In this case, azure 
        returns the effective child directory name, followed by a "/", but not its contents (which we hope is faster).
        '''
        delimiter = None if recursive else "/"

        # specific Azure path rules for good results
        if path:
            if path.startswith("/"):
                path = path[
                    1:]  # blob API wants this part of path relative to container

            # we should only add a "/" if path is a folder path
            if path.endswith("*"):
                # we just need to block the addition of "/"
                path = path[0:-1]
            elif not path.endswith("/"):
                path += "/"  # best if path ends with "/"

        blobs = self.bs.list_blobs(container, prefix=path, delimiter=delimiter)

        if return_names:
            blobs = [blob.name for blob in blobs]
        else:
            blobs = list(blobs)
        return blobs

    def delete_blob(self, container, blob_path, snapshot=None):
        dss = DeleteSnapshot()
        return self.bs.delete_blob(container,
                                   blob_path,
                                   delete_snapshots=dss.Include)

    def get_blob_text(self, container, blob_path):
        # watch out for 0-length blobs - they trigger an Azure RETRY error
        text = ""
        # azure storage bug workaround: avoid RETRY errors for 0-length blob
        blob = self.bs.get_blob_properties(container, blob_path)
        if blob.properties.content_length:
            blob = self.bs.get_blob_to_text(container, blob_path)
            text = blob.content
        return text

    def get_blob_to_path(self,
                         container,
                         blob_path,
                         dest_fn,
                         snapshot=None,
                         progress_callback=None):
        # azure storage bug workaround: avoid RETRY errors for 0-length blob
        blob = self.bs.get_blob_properties(container, blob_path)
        if blob.properties.content_length:
            result = self.bs.get_blob_to_path(
                container,
                blob_path,
                dest_fn,
                snapshot=snapshot,
                progress_callback=progress_callback)
            text = result.content
        else:
            md = blob.metadata
            if "hdi_isfolder" in md and md["hdi_isfolder"]:
                # its a directory marker; do NOT create a local file for it
                text = ""
            else:
                # 0-length text file; just write the file outselves
                text = ""
                with open(dest_fn, "wt") as outfile:
                    outfile.write(text)

        return text

    def get_blob_properties(self, container, blob_path):
        props = self.bs.get_blob_properties(container, blob_path)
        return props

    def get_blob_metadata(self, container, blob_path):
        return self.bs.get_blob_metadata(container, blob_path)

    # def set_blob_metadata(self, container, blob_path, md_dict):
    #     return self.bs.set_blob_metadata(container, blob_path, md_dict)

    def copy_blob(self, source_container, source_blob_path, dest_container,
                  dest_blob_path):
        source_blob_url = self.bs.make_blob_url(source_container,
                                                source_blob_path)
        self.bs.copy_blob(dest_container, dest_blob_path, source_blob_url)

    def snapshot_blob(self, container, blob_path):
        blob = self.bs.snapshot_blob(container, blob_path)
        #pd = utils.obj_to_dict(blob)
        return blob
account_key = '<>'
account_name='<>'
blob_name = 'test1.txt'
container_name= 'container1'
copy_from = 'container1/f1'
copy_to= 'container1/f2'
blob_name='test1.txt'

blob_service = BlockBlobService(account_name=account_name, account_key=account_key)

#Create a client side SAS token
sas_token1 = blob_service.generate_container_shared_access_signature(container_name,BlobPermissions.WRITE |BlobPermissions.READ , datetime.utcnow() + timedelta(hours=4))

#Create a SAS block service
blob_service2 = BlockBlobService(account_name=account_name, sas_token=sas_token1)

#Create a sas url
blob_url = blob_service2.make_blob_url(copy_from,blob_name, sas_token=sas_token1)

#copy from blob_url to the copy_to location 
blob_service2.copy_blob(copy_to,blob_name=blob_name, copy_source=blob_url)

print('Debug: Showing contents of source file')
print( blob_service2.get_blob_to_text(copy_from, blob_name).content )
print('Debug: Showing contents of destination (copied) file')
print( blob_service2.get_blob_to_text(copy_to, blob_name).content ) #should exist now



#%%
コード例 #22
0
ファイル: views.py プロジェクト: riya-mistry/PicProcure
def cluster(request, eventname):
    start = time.time()
    md = AzureMediaStorage()
    block_blob_service = BlockBlobService(account_name=md.account_name,
                                          account_key=md.account_key)
    # Download the pre trained models, unzip them and save them in the save folder as this file
    #
    predictor_path = 'shape_predictor_5_face_landmarks.dat'  #'C:/Users/lenovo/Desktop/PicProcure/events/shape_predictor_5_face_landmarks.dat'
    face_rec_model_path = 'dlib_face_recognition_resnet_model_v1.dat'

    faces_folder_path = block_blob_service.list_blobs(container_name=eventname)
    output_folder = []
    check_folder = block_blob_service.list_blobs(container_name='profile-pics')
    user_list = Register.objects.all().filter(event_id=Events.objects.get(
        event_name=eventname))
    username_list = []
    for user in user_list:
        img = user.user_id.profile_pic
        username_list.append(img)
    #for f in check_folder:
    #username_list.append(f.name)
    #print(username_list)

    detector = dlib.get_frontal_face_detector()  #a detector to find the faces
    sp = dlib.shape_predictor(
        predictor_path)  #shape predictor to find face landmarks
    facerec = dlib.face_recognition_model_v1(
        face_rec_model_path)  #face recognition model

    descriptors = []
    images = []
    output_list = []

    for img in check_folder:

        print('Processing file:{}', format(img.name))
        url = "https://picprocurestorageaccount.blob.core.windows.net/profile-pics/" + img.name
        #img1 = dlib.load_rgb_image(urllib.request.urlopen(url).read())
        #win = dlib.image_window()
        img1 = numpy.array(
            Image.open(io.BytesIO(urllib.request.urlopen(url).read())))
        #win.set_image(img1)

        # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upoutput_listple the image 1 time. This will make everything bigger and allow us to detect more faces.
        dets = detector(img1, 1)
        print("Number of faces detected: {}".format(len(dets)))

        # Now process each face we found.
        for k, d in enumerate(dets):
            # Get the landmarks/parts for the face in box d.
            shape = sp(img1, d)

            # Compute the 128D vector that describes the face in img identified by shape.
            face_descriptor = facerec.compute_face_descriptor(img1, shape)
            descriptors.append(face_descriptor)
            images.append(('profile-pics', img.name, img1, shape))
    print('profile pics ended')
    for f in faces_folder_path:
        print("Processing file: {}".format(f.name))
        url = "https://picprocurestorageaccount.blob.core.windows.net/" + eventname + '/' + f.name
        #img = dlib.load_rgb_image(f)
        #win = dlib.image_window()
        img = numpy.array(
            Image.open(io.BytesIO(urllib.request.urlopen(url).read())))
        print('reading completed ' + f.name)
        #win.set_image(img)
        # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upoutput_listple the image 1 time. This will make everything bigger and allow us to detect more faces.
        dets = detector(img, 1)
        print("Number of faces detected: {}".format(len(dets)))
        # Now process each face we found.

        for k, d in enumerate(dets):
            # Get the landmarks/parts for the face in box d.
            shape = sp(img, d)
            # Compute the 128D vector that describes the face in img identified by shape.
            face_descriptor = facerec.compute_face_descriptor(img, shape)
            descriptors.append(face_descriptor)
            images.append((eventname, f.name, img, shape))
            print('image appended ' + f.name)

        # Cluster the faces.
    print("event load completed")
    labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
    num_classes = len(set(labels))  # Total number of clusters
    print("Number of clusters: {}".format(num_classes))

    for i in range(0, num_classes):
        indices = []
        class_length = len([label for label in labels if label == i])
        for j, label in enumerate(labels):
            if label == i:
                indices.append(j)
        print("Indices of images in the cluster {0} : {1}".format(
            str(i), str(indices)))
        print("Size of cluster {0} : {1}".format(str(i), str(class_length)))
        #output_folder_path = output_folder + '/output' + str(i) # Output folder for each cluster
        #os.path.normpath(output_folder_path)
        #os.makedirs(output_folder_path)
        block_blob_service.create_container(eventname + str(i),
                                            public_access='blob')

        # Save each face to the respective cluster folder
        print("Saving faces to output folder...")
        #img, shape = images[index]
        #file_path = os.path.join(output_folder_path,"face_"+str(k)+"_"+str(i))
        md.azure_container = eventname + str(i)
        output_folder.append(md.azure_container)

        for k, index in enumerate(indices):
            container, name, img, shape = images[index]
            #dlib.save_face_chip(img, shape, file_path, size=1000, padding = 2)
            url = "https://picprocurestorageaccount.blob.core.windows.net/" + container + '/' + name
            block_blob_service.copy_blob(container_name=md.azure_container,
                                         blob_name=name,
                                         copy_source=url)
            # md._save(name,img)
            if 0 == k:
                output_list.append("ouput/output" + str(i) + "/face_0" + "_" +
                                   str(i) + ".jpg")

    for imgs in check_folder:

        for output in output_folder:
            try:
                block_blob_service.get_blob_metadata(container_name=output,
                                                     blob_name=imgs.name)
                container_name = eventname + '-' + imgs.name.split('.')[0]
                block_blob_service.create_container(
                    container_name=container_name, public_access='blob')
                for i in block_blob_service.list_blobs(container_name=output):
                    url = url = "https://picprocurestorageaccount.blob.core.windows.net/" + output + '/' + i.name
                    block_blob_service.copy_blob(container_name=container_name,
                                                 blob_name=i.name,
                                                 copy_source=url)
                block_blob_service.delete_container(output)
                output_folder.remove(output)
                break
            except:
                pass

    block_blob_service.delete_container(eventname)
    return HttpResponse("Successfull")
コード例 #23
0
def main(xmlblob: func.InputStream):
    """Creates the UKRLP lookup tables for later use

    This Azure Function carries out the following steps:
    * Decompresses the XML HESA DataSet

    * Parses the INSTITUTION data from the DataSet

    * Retrieves enrichment data from the UKRLP API for each institution

    * Creates a lookup item for each Institution and writes it to CosmosDB

    * Currently, once completed successfully this function triggers the Etl function by copying
      the compressed XML passed in to a Blob storage monitored by the Etl function.

    """

    try:
        logging.info(f"CreateUkrlpBlobTrigger creating UKRLP lookups\n"
                     f"Name: {xmlblob.name}\n"
                     f"Blob Size: {xmlblob.length} bytes")

        create_ukrlp_start_datetime = datetime.today().strftime(
            "%Y%m%d %H%M%S")

        logging.info(
            f"CreateUkrlp function started on {create_ukrlp_start_datetime}")

        # Read the compressed Blob into a BytesIO object
        compressed_file = io.BytesIO(xmlblob.read())

        # Read the compressed file into a GzipFile object
        compressed_gzip = gzip.GzipFile(fileobj=compressed_file)

        # Decompress the data
        decompressed_file = compressed_gzip.read()

        # Decode the bytes into a string
        xml_string = decompressed_file.decode("utf-8")

        # Parse the xml and create the lookups
        lookup_creator = LookupCreator(xml_string)
        lookup_creator.create_ukrlp_lookups()

        #
        # Copy the compressed HESA XML to the Blob storage monitored by Etl pipeline
        #
        storage_account_name = os.environ["AzureStorageAccountName"]
        storage_account_key = os.environ["AzureStorageAccountKey"]

        # Instantiate the Block Blob Service
        blob_service = BlockBlobService(account_name=storage_account_name,
                                        account_key=storage_account_key)

        logging.info(
            "Created Block Blob Service to Azure Storage Account {storage_account_name}"
        )

        # Copy the dummy HESA XML we've just processed to the ETL input BLOB container
        output_container_name = os.environ["EtlInputContainerName"]
        dummy_etl_blob_name = os.environ["DummyEtlBlobName"]
        source_url = os.environ["CreateUkrlpSourceUrl"]

        source_url += xmlblob.name
        blob_filename = xmlblob.name.split("/")[1]
        destination_blob_name = (
            f"{create_ukrlp_start_datetime}-{blob_filename}")
        logging.info(
            f"Copy the XML we have processed to {destination_blob_name}")

        blob_service.copy_blob(
            container_name=output_container_name,
            blob_name=destination_blob_name,
            copy_source=source_url,
        )

        create_ukrlp_end_datetime = datetime.today().strftime("%Y%m%d %H%M%S")
        logging.info(
            f"CreateUkrlp successfully finished on {create_ukrlp_end_datetime}"
        )

    except Exception as e:
        # Unexpected exception
        logging.error("Unexpected exception")
        logging.error(traceback.format_exc())

        # Raise to Azure
        raise e
コード例 #24
0
class WABS(Storage):
    """
    A class for managing objects on Windows Azure Blob Storage. It implements
    the interface of Storage base class
    """
    def __init__(self, account_name, container_name, sas_token):
        """Setup a Windows azure blob storage client object

        :param str account_name: Azure blob storage account name for connection
        :param str container_name: Name of container to be accessed in the account
        :param str sas_token: Shared access signature token for access

        """
        self.sas_token = sas_token
        self.container_name = container_name

        # The socket_timeout is passed on to the requests session
        # which executes the HTTP call. Both read / connect timeouts
        # are set to 60s
        self.client = BlockBlobService(account_name=account_name,
                                       sas_token=self.sas_token)
        logger.debug("Created wabs client object: {0}".format(self.client))

    @classmethod
    def get_retriable_exceptions(cls, method_name=None):
        """Return exceptions that should be retried for specified method of class

        :param str method_name: A method of class for which retriable exceptions should be searched
        :returns: A tuple of exception class to be retried
        :rtype: tuple

        """
        if method_name == 'delete_key':
            return ()
        return (AzureException, )

    def get_url_prefix(self):
        """Returns a connection string for the client object

        :returns: Connection string for the client object
        :rtype: str

        """
        return '{}://{}/{}/'.format(self.client.protocol,
                                    self.client.primary_endpoint,
                                    self.container_name)

    def list_object_keys(self, prefix='', metadata=False, pagesize=1000):
        """List object keys matching a prefix for the WABS client

        :param str prefix: A prefix string to list objects
        :param bool metadata: If set to True, object metadata will be fetched with object. Default is False
        :param int pagesize: Maximum objects to be fetched in a single WABS api call. This is limited to upto 5000 objects in WABS
        :returns: A generator of object dictionary with key, size and last_modified keys. Metadata will be returned if set to True
        :rtype: Iterator[dict]

        """

        logger.debug("Listing files for prefix: {0}".format(prefix))
        include = Include(metadata=metadata)
        marker = None
        while True:
            if marker:
                logger.debug("Paging objects "
                             "from marker '{0}'".format(marker))
            objects = self.client.list_blobs(self.container_name,
                                             prefix=prefix,
                                             num_results=pagesize,
                                             include=include,
                                             marker=marker)
            for obj in objects:
                yield {
                    'key': obj.name,
                    'last_modified': obj.properties.last_modified,
                    'size': obj.properties.content_length,
                    'metadata': obj.metadata
                }

            if objects.next_marker:
                marker = objects.next_marker
            else:
                break

    def download_file(self, source_key, destination_file):
        """Download a object from WABS container to local filesystem

        :param str source_key: Key for object to be downloaded
        :param str destination_file: Path on local filesystem to download file
        :returns: Nothing
        :rtype: None

        """
        self.client.get_blob_to_path(self.container_name, source_key,
                                     destination_file)

    def upload_file(self, destination_key, source_file, metadata=None):
        """Upload a file from local filesystem to WABS

        :param str destination_key: Key where to store object
        :param str source_file: Path on local file system for file to be uploaded
        :param dict metadata: Metadata to be stored along with object
        :returns: Nothing
        :rtype: None

        """
        metadata = metadata or {}
        logger.debug("Uploading file {0} to prefix {1}".format(
            source_file, destination_key))
        self.client.create_blob_from_path(self.container_name,
                                          destination_key,
                                          source_file,
                                          metadata=metadata)

    def upload_file_obj(self, destination_key, source_fd, metadata=None):
        """Upload a file from file object to WABS

        :param str destination_key: Key where to store object
        :param file source_fd: A file object to be uploaded
        :param dict metadata: Metadata to be stored along with object
        :returns: Nothing
        :rtype: None

        """
        metadata = metadata or {}
        self.client.create_blob_from_stream(self.container_name,
                                            destination_key,
                                            source_fd,
                                            metadata=metadata)

    # FIXME: Need to fix this function to abort, if another copy is already
    # happening it should abort, or it should follow the ec2 behaviour
    def copy_from_key(self, source_key, destination_key, metadata=None):
        """Copy a WABS object from one key to another key on server side

        :param str source_key: Source key for the object to be copied
        :param str destination_key: Destination key to store object
        :param dict metadata: Metadata to be stored along with object
        :returns: Nothing
        :rtype: None

        """
        metadata = metadata or {}
        logger.debug("Copying key {0} -> {1}".format(source_key,
                                                     destination_key))

        # If a previous copy was pending cancel it before
        # starting another copy
        for blob in self.client.list_blobs(self.container_name,
                                           prefix=destination_key):
            # There should only be one blob with the given key,
            # However list_blobs is the only exposed API to check
            # existance of blob without failures
            # AzureBlobStorage doesn't allow more than one pending
            # copies to the destination key
            try:
                self.client.abort_copy_blob(self.container_name,
                                            destination_key,
                                            blob.properties.copy.id)
            except AzureConflictHttpError:
                logger.info(('No copy in progress,' +
                             ' Ignoring AzureConflictHttpError'))
        source_uri = self.client.make_blob_url(self.container_name,
                                               source_key,
                                               sas_token=self.sas_token)
        copy_properties = self.client.copy_blob(self.container_name,
                                                destination_key,
                                                source_uri,
                                                metadata=metadata)
        # Wait for the copy to be a success
        while copy_properties.status == 'pending':
            # Wait a second before retrying
            time.sleep(1)
            properties = self.client.get_blob_properties(
                self.container_name, destination_key)
            copy_properties = properties.properties.copy
            # TODO(vin): Raise Error if copy_properties errors out

    def delete_key(self, destination_key):
        """Delete an object from WABS

        :param str destination_key: Destination key for the object to be deleted
        :returns: Nothing
        :rtype: None

        """
        logger.debug("Deleting key {0}".format(destination_key))
        return self.client.delete_blob(self.container_name, destination_key)
コード例 #25
0
class SubmissionManager:
    def __init__(self):
        self.config = Config()
        self.block_blob_service = BlockBlobService(
            account_name=self.config.account_name(),
            account_key=self.config.account_key())
        # not processed/verified submissions
        self.upload_container = 'uploaded-submissions'
        self.block_blob_service.create_container(self.upload_container)

        # processed submissions
        self.processed_submissions_container = 'processed-submissions'
        self.block_blob_service.create_container(
            self.processed_submissions_container)

        if not os.path.exists(self.config.bots_test_dir()):
            os.makedirs(self.config.bots_test_dir())

        if not os.path.exists(self.config.bots_dir()):
            os.makedirs(self.config.bots_dir())

    def get_uploaded_submissions(self):
        return self.block_blob_service.list_blobs(self.upload_container)

    def remove_uploaded_submission(self, file_name):
        self.block_blob_service.delete_blob(self.upload_container, file_name)

    # temp_full_path_filename will be deleted after uploading to blob container
    def upload_submission(self, temp_full_path_filename, remove=False):
        print("upload " + temp_full_path_filename)
        self.block_blob_service.create_blob_from_path(
            self.upload_container, ntpath.basename(temp_full_path_filename),
            temp_full_path_filename)
        if remove:
            os.remove(temp_full_path_filename)

    def download_submission(self, file_name):
        blob_url = self.block_blob_service.make_blob_url(
            self.upload_container, file_name)
        print("download " + blob_url)
        download_file = os.path.join(self.config.bots_test_dir(), file_name)
        self.block_blob_service.get_blob_to_path(self.upload_container,
                                                 file_name, download_file)
        return download_file

    def move_submission_to_processed(self, file_name):
        blob_url = self.block_blob_service.make_blob_url(
            self.upload_container, file_name)
        blob_processed_url = self.block_blob_service.make_blob_url(
            self.processed_submissions_container, file_name)
        print("move submission {} to valid submissions {}".format(
            blob_url, blob_processed_url))
        self.block_blob_service.copy_blob(self.processed_submissions_container,
                                          file_name, blob_url)
        self.block_blob_service.delete_blob(self.upload_container, file_name)
        self.move_submission_dir(file_name)

    def move_submission_dir(self, file_name):
        bot_test_dir = self.get_test_bot_dir(file_name)
        bot_tournament_dir = self.get_tournament_bot_dir(file_name)
        if os.path.exists(bot_tournament_dir):
            print("delete existing dir " + bot_tournament_dir)
            shutil.rmtree(bot_tournament_dir)
        result = shutil.move(bot_test_dir, self.config.bots_dir())
        print(result)
        print("moved bot dir from {} to {}".format(bot_test_dir,
                                                   bot_tournament_dir))

    def get_test_bot_dir(self, file_name):
        return os.path.join(self.config.bots_test_dir(),
                            os.path.splitext(file_name)[0])

    def get_tournament_bot_dir(self, file_name):
        return os.path.join(self.config.bots_dir(),
                            os.path.splitext(file_name)[0])

    def extract_submission(self, file_name):
        full_path_to_file = os.path.join(self.config.bots_test_dir(),
                                         file_name)
        extract_dir = self.get_test_bot_dir(file_name)
        zip_ref = zipfile.ZipFile(full_path_to_file, 'r')
        zip_ref.extractall(path=extract_dir)
        zip_ref.close()
        return extract_dir

    def get_processed_submissions(self):
        return self.block_blob_service.list_blobs(
            self.processed_submissions_container)
コード例 #26
0
ファイル: cloud.py プロジェクト: muma378/moose
class AzureBlobService(object):
    """
    Application interface to access <Azure Blob Storage Service>. A wrapper of
    the module 'BlockBlobService' from azure SDK for python.
    """

    blob_pattern = 'http://([\w\.]+)/(\w+)/(.*)'

    def __init__(self, settings_dict):
        # Set settings for azure connections
        self.settings_dict = settings_dict
        self.widgets = [
            progressbar.Percentage(), ' ',
            progressbar.Bar(), ' ',
            progressbar.ETA()
        ]

        self.account = settings_dict['ACCOUNT']
        self.host = settings_dict['ACCOUNT'] + '.blob.' + settings_dict[
            'ENDPOINT']
        logger.debug("Connectings to '%s'..." % self.host)
        self.block_blob_service = BlockBlobService(
            account_name=settings_dict['ACCOUNT'],
            account_key=settings_dict['KEY'],
            endpoint_suffix=settings_dict['ENDPOINT'])
        logger.debug("Connection established.")

    def create_container(self, container_name, set_public=False):
        """
        Create a azure blob container.
        """
        logger.debug("Creating container [%s] on '%s'." %
                     (container_name, self.host))

        if set_public:
            public_access = PublicAccess.Container
            logger.debug("Set container [%s] access to public." %
                         container_name)
        else:
            public_access = None

        try:
            result = self.block_blob_service.create_container(
                container_name,
                fail_on_exist=True,
                timeout=self.settings_dict['TIMEOUT'],
                public_access=public_access)
        except AzureConflictHttpError as e:
            logger.error("The specified container [%s] already exists." %
                         container_name)
            result = False

        logger.info("Container created: %s." % container_name)
        return result

    def list_containers(self, prefix=None):
        logger.debug("Request sent to list all containers on '%s'." %
                     self.host)
        # An iterator to list all containers on blob
        icontainers = self.block_blob_service.list_containers(
            prefix=prefix, timeout=self.settings_dict['TIMEOUT'])

        # Converts an iterator to list
        container_names = [container for container in icontainers]

        logger.info("%d containers found on '%s'." %
                    (len(container_names), self.host))
        return container_names

    def list_blobs(self, container_name, prefix=None, suffix=None):
        """
        Lists all blobs on the container, note that the blob_names returned
        are posix-style path, no matter what names were when create.
        """
        blob_names = []
        logger.debug("Request to list blobs in container [%s]." %
                     container_name)

        try:
            # An iterator to
            iblobs = self.block_blob_service.list_blobs(
                container_name,
                prefix=prefix,
                timeout=self.settings_dict['TIMEOUT'])

            if suffix:
                blob_names = [
                    blob.name for blob in iblobs if blob.name.endswith(suffix)
                ]
            else:
                blob_names = [blob.name for blob in iblobs]

        except AzureMissingResourceHttpError as e:
            logger.error("The specified container [%s] does not exist." %
                         container_name)

        logger.info("%d blobs found on [%s]." %
                    (len(blob_names), container_name))
        return blob_names

    def create_blob_from_path(self, container_name, blob_name, filepath):
        """
        Uploads a file to the container.

        Returns an instance of `Blob` with properties and metadata.
        """
        if not os.path.exists(filepath):
            logger.error("File doesn't exist: %s." % filepath)
            return None
        logger.debug("Creates blob '{}'@[{}]".format(blob_name,
                                                     container_name))
        blob = self.block_blob_service.create_blob_from_path(
            container_name, blob_name, filepath)
        return blob

    def upload(self, container_name, blob_pairs, overwrite=False):
        """
        Uploads files to the container on Azure. Note that 'blob_name' uploaded
        will be converted to posix-style names, which means sep for path is
        '/'.

        `blob_pairs`
            A tuple consists of 2 elements, blob_name and its filepath on local
            filesystem.
        """

        if not self.block_blob_service.exists(container_name):
            logger.info("Container [%s] which upload to doesn't exist, "
                        "creating now." % container_name)
            self.create_container(container_name, set_public=True)

        blobs = []
        blobs_in_container = self.list_blobs(container_name)
        for blob_name, filepath in progressbar.progressbar(\
                                    blob_pairs, widgets=self.widgets):
            posix_blob_name = ppath(blob_name)
            if overwrite or (posix_blob_name not in blobs_in_container):
                self.create_blob_from_path(container_name, posix_blob_name,
                                           filepath)
                blobs.append(posix_blob_name)

        logger.info("Uploaded %d files to [%s]." %
                    (len(blobs), container_name))
        return blobs

    def get_blob_to_path(self, container_name, blob_name, filepath):
        """
        Gets a blob from the container. The filepath would be returned if gotten
        successfully.
        """
        dirpath = os.path.dirname(filepath)
        if not os.path.exists(dirpath):
            logger.debug("Directory '%s' does not exist, creating now..." %
                         dirpath)
            os.makedirs(dirpath)

        # TODO: changes filepath to local-filesystem
        logger.debug("Gets blob '{}' from [{}]".format(blob_name,
                                                       container_name))
        blob = self.block_blob_service.get_blob_to_path(
            container_name, blob_name, filepath)
        return blob

    def download(self, container_name, dest, blob_names=None):
        """
        Get blobs from the container to the `dest` directory.
        """
        blobs = []

        if not self.block_blob_service.exists(container_name):
            logger.error("Container [%s] does not exist, aborted." %
                         container_name)
            return blobs
        # Get the list of blobs and then do comparision would be much more efficient
        blobs_in_container = self.list_blobs(container_name)

        # Get all blobs if blob_names was not specified
        if not blob_names:
            blob_names = blobs_in_container

        for blob_name in progressbar.progressbar(\
                            blob_names, widgets=self.widgets):
            if ppath(blob_name) in blobs_in_container:
                dest_filepath = normpath(safe_join(dest, blob_name))
                # TODO: not sure posix-style path works for files on container
                # are windows-style
                self.get_blob_to_path(container_name, ppath(blob_name),
                                      dest_filepath)
                logger.debug("Got blob '{}' to '{}'.".format(
                    blob_name, dest_filepath))
                blobs.append(blob_name)
            else:
                logger.warning(
                    "Blob name '{}' specified does not exist.".format(
                        blob_name))

        return blobs

    def get_blob_to_text(self, container_name, blob_name):
        pass

    def get_blobs(self, container_name, blob_names=None):
        pass

    def set_container_acl(self, container_name, set_public=True):
        """
        Set container access permission to Public.
        """
        if set_public:
            logger.info("Set public read access to container [%s]." %
                        container_name)
            public_access = PublicAccess.Container
        else:
            logger.info("Set public read access to blobs on [%s]." %
                        container_name)
            public_access = PublicAccess.Blob

        self.block_blob_service.set_container_acl(container_name,
                                                  public_access=public_access)

    def delete_blobs(self, container_name, blob_names):
        """
        Removes blobs from the container.
        """
        blobs = []
        for blob_name in blob_names:
            try:
                blob = self.block_blob_service.delete_blob(
                    container_name, blob_name)
                logger.info("Delete the blob '%s' from container [%s]." %
                            (blob_name, container_name))
                blobs.append(blob)
            except AzureMissingResourceHttpError as e:
                logger.warning(
                    "The sepcified blob '%s' on [%s] does not exist." %
                    (blob_name, container_name))

        return blobs

    def copy_blobs(self,
                   blob_names,
                   container_name,
                   src_container=None,
                   pattern=None):
        """
        Copy blobs listed in `blob_names` to the dest container.

        `src_container`
            if src_container was given, blob_names are OK to be relative path
            to the container, and will be extended to `http://self.host/src_container/blob_name`

        `pattern`
            if src_container and pattern was given and blob_names was None,
            copies blobs in the src_container meanwhile matches the pattern to
            dest container.

        """
        if blob_names == None:
            if src_container:
                blobs_in_container = self.list_blobs(src_container)
                matchfn = get_matchfn(pattern, True)
                # gets blobs from the src_container which matches the pattern(with ignorecase)
                blob_names = filter(lambda x: matchfn(x), blobs_in_container)
            else:
                raise ImproperlyConfigured(
                    "Method `copy_blobs` is ought to be called with "
                    "`src_container` given if blob_names was set to None.")

        if src_container:
            urls = []
            for blob_name in blob_names:
                # not absolute url path
                if not blob_name.startswith('http'):
                    # extends with the account and container
                    blob_name = "http://{}/{}/{}".format(
                        self.host, src_container, blob_name)
                urls.append(escape_uri_path(blob_name))
            blob_names = urls

        blobs = []
        logger.info("Will copy {} blobs to [{}].".format(
            len(blob_names), container_name))
        for copy_source in progressbar.progressbar(blob_names,
                                                   widgets=self.widgets):
            r = re.match(self.blob_pattern, copy_source)
            if r:
                blob_name = r.group(3)
            else:
                logger.error("Blob name specified must be a url: '{}'.".format(
                    copy_source))
                continue

            self.block_blob_service.copy_blob(container_name, \
                                            blob_name, copy_source)
            logger.debug("Copied '{}' to '{}'.".format(copy_source, blob_name))
            blobs.append(blob_name)

        return blobs

    def copy_container(self, src_container, dst_container, pattern=None):
        """
        Copies blobs in `src_container` meanwhile match the `pattern`.
        """
        # creates container if not exists
        self.create_container(dst_container, set_public=True)
        logger.info("Copy blobs from [{}] to [{}]".format(
            src_container, dst_container))
        self.copy_blobs(None,
                        dst_container,
                        src_container=src_container,
                        pattern=pattern)
コード例 #27
0
def rendered_video(request):
    ism_uri = ''
    vtt_uri = ''
    template = loader.get_template('app/rendered_video.html')
    vidstatus = 'No Running Job Found.'

    # Get the next message from the queue
    queue_service = QueueService(account_name=os.environ['SVPD_STORAGE_ACCOUNT_NAME'], account_key=os.environ['SVPD_STORAGE_ACCOUNT_KEY'])
    messages = queue_service.get_messages(os.environ['SVPD_STORAGE_ACCOUNT_ENCODING'], num_messages=1, visibility_timeout=1*60)
    
    for message in messages:
        vidstatus = 'Rendering: ' + message.content
        message_obj = json.loads(message.content)

        access_token = ams_authenticate()['access_token']

        # Get the details about the job
        job = ams_get_request(access_token, message_obj['job']['__metadata']['uri'])

        # is it done?
        if job['State'] == 3:
            vidstatus = 'Done Rendering: ' + message.content

            #get a reference to our storage container
            block_blob_service = BlockBlobService(account_name=os.environ['SVPD_STORAGE_ACCOUNT_NAME'], account_key=os.environ['SVPD_STORAGE_ACCOUNT_KEY'])
            
            #get a list of all the input and output assets associated to our job
            input_assets = ams_get_request(access_token, message_obj['job']['InputMediaAssets']['__deferred']['uri'])
            output_assets = ams_get_request(access_token, message_obj['job']['OutputMediaAssets']['__deferred']['uri'])

            #look through the input and output assets to figure out what one is for the indexer and for the Adaptive streaming files        
            index_asset = ''
            stream_asset = ''
            for output_asset in output_assets['value']:
                if output_asset['Name'].endswith('- Indexed'):
                    index_asset = output_asset
                elif output_asset['Name'].endswith('- MES v1.1'):
                    stream_asset = output_asset

            #Get the storage container names for each
            dest_container = urllib.parse.urlparse(stream_asset['Uri']).path[1:]
            src_container = urllib.parse.urlparse(index_asset['Uri']).path[1:]
            
            #loop over the indexer output files copying them to the adaptive streaming container
            src_blobs = block_blob_service.list_blobs(src_container)
            for src_blob in src_blobs:
                block_blob_service.copy_blob(dest_container, src_blob.name, output_asset['Uri'] + '/' + src_blob.name)

            #create the access policy if it doen't exist
            access_policies = ams_get_request(access_token, os.environ['AMS_API_ENDPOINT'] + 'AccessPolicies')
            access_policy_id = ''
            for access_policy in access_policies['value']:
                if access_policy['Name'] == 'StreamingAccessPolicy':
                    access_policy_id = access_policy['Id']

            if access_policy_id == '':
                access_policy = ams_verbose_post_request(access_token, 'AccessPolicies', {
                  'Name': 'StreamingAccessPolicy',
                  'DurationInMinutes': '52594560',
                  'Permissions': '9'
                })
                access_policy_id = access_policy['d']['Id']

            #create the locator
            locator = ams_verbose_post_request(access_token, 'Locators', {
                  'AccessPolicyId': access_policy_id,
                  'AssetId': stream_asset['Id'],
                  'Type': 2
                })

            #get the URLs to the streaming endpoint and the vtt file
            locator_asset_files = ams_get_request(access_token, os.environ['AMS_API_ENDPOINT'] + 'Assets(\'' + locator['d']['AssetId']  + '\')/Files')
            for locator_asset_file in locator_asset_files['value']:
                if locator_asset_file['Name'].endswith('.ism'):
                    ism_uri = locator['d']['Path'] + locator_asset_file['Name'] + '/manifest'
                    vtt_uri = locator['d']['Path'] + message_obj['filename'] + '.vtt'

            #delete the job
            ams_delete_request(access_token, message_obj['job']['__metadata']['uri'])

            #delete the unused assets
            ams_delete_request(access_token, os.environ['AMS_API_ENDPOINT'] + 'Assets(\'' + index_asset['Id'] + '\')')
            ams_delete_request(access_token, os.environ['AMS_API_ENDPOINT'] + 'Assets(\'' + input_assets['value'][0]['Id'] + '\')')

            #add the video to the database
            client = document_client.DocumentClient(os.environ['DOCUMENT_ENDPOINT'], {'masterKey': os.environ['DOCUMENT_KEY']})
            db = docdb_CreateDatabaseIfNotExists(client, 'svpd')
            collection = docdb_CreateCollectionIfNotExists(client, db, 'videos')

            doc = client.CreateDocument(collection['_self'],
            { 
                'id': message_obj['folder'].replace('/', '.'),
                'filename': message_obj['filename'],
                'vtt_uri': vtt_uri,
                'ism_uri': ism_uri
            })

            #remove the message from the queue
            queue_service.delete_message(os.environ['SVPD_STORAGE_ACCOUNT_ENCODING'], message.id, message.pop_receipt)   

    return HttpResponse(template.render({
        'vidstatus': vidstatus,
        'vtt_uri': vtt_uri,
        'ism_uri': ism_uri
    }, request))
コード例 #28
0
class StorageHelper:
    def __init__(self, storage_client):
        self.storage_client = storage_client

    def is_storage_account_name_available(self, storage_account_name):
        return self.storage_client.storage_accounts.check_name_availability(
            storage_account_name)

    def create_storage_account_async(self, storage_account_name,
                                     resource_group, **kwargs):
        storage_params = azure.mgmt.storage.models.StorageAccountCreateParameters(
            sku=azure.mgmt.storage.models.Sku(name='standard_lrs'),
            kind=azure.mgmt.storage.models.Kind.storage,
            location=kwargs['storage_location'])
        async_storage_creation = self.storage_client.storage_accounts.create(
            resource_group, storage_account_name, storage_params)
        storage_account = async_storage_creation.result()

    def get_storage_account_names(self, resource_group):
        storage_account_list = self.storage_client.storage_accounts.list_by_resource_group(
            resource_group)
        return [item.name for item in storage_account_list]

    def get_storage_account_properties(self, storage_account_name,
                                       resource_group):
        return self.storage_client.storage_accounts.get_properties(
            resource_group, storage_account_name)

    def get_storage_account_key(self, storage_account_name, resource_group):
        storage_keys = self.storage_client.storage_accounts.list_keys(
            resource_group, storage_account_name)
        if storage_keys is not None:
            return {v.key_name: v.value for v in storage_keys.keys}['key1']
        else:
            return None

    def initialize_block_blob_service(self, storage_account_name, storage_key,
                                      blob_container_name):
        self.storage_account_name = storage_account_name
        self.storage_key = storage_key
        self.blob_container_name = blob_container_name

        self.block_blob_service = BlockBlobService(
            account_name=self.storage_account_name,
            account_key=self.storage_key)

    def create_blob_container(self):
        return self.block_blob_service.create_container(
            self.blob_container_name)

    def get_blob_container(self):
        containers = self.block_blob_service.list_containers(
            self.blob_container_name)
        return next(c for c in containers
                    if c.name == self.blob_container_name)

    def copy_vhd(self, file_name, file_path):
        status = self.block_blob_service.copy_blob(self.blob_container_name,
                                                   file_name, file_path)
        if status.status == 'pending':
            time.sleep(120)

    def generate_blob_container_sas_url(self, expiration_in_days):
        container_permission = ContainerPermissions(read=True,
                                                    write=True,
                                                    list=True)
        return self.block_blob_service.generate_container_shared_access_signature(
            container_name=self.blob_container_name,
            permission=container_permission,
            protocol='https',
            start=datetime.now(),
            expiry=datetime.now() + timedelta(days=expiration_in_days))

    def build_upload_container_path(self, target_os_type, sas_url):
        return 'https://{0}.blob.core.windows.net/{1}/{2}/{3}?{4}'.format(
            self.storage_account_name, self.blob_container_name,
            target_os_type.lower(), 'piresults.json', sas_url)
コード例 #29
0
ファイル: storage.py プロジェクト: rchalumeau/cockpit
class InstanceStorage(object):

    def __init__(self, group_name, location=None, create_if_not_exist=True):

        client = arm.instance()
        sm = client.storage.storage_accounts

        # Check existence of a storage account in the resource group
        # TODO : better with rm.list_resources for direct filtering
        # but issing doc on Genric filtering format
        # So, taking the first result of the iterator : Ouch !
        new=True
        for sa in sm.list_by_resource_group(group_name):
            new=False
            self.name = sa.name
            self.location=sa.location
            logger.debug("Found SA %s" % self.name)
            break

        if new:
            logger.info("Creating storage account...")
            #Generating unique name for Azure
            unique_name = "%s%s" % (
                    str(group_name).translate(None, '-_.').lower(),
                    arm.id_generator()
                )
            # TODO : Check how to deal with account type...
            # Warning : the name of the storageaccount cannot be > 24 chars
            self.location=location
            result = sm.create(
                group_name,
                unique_name[:24],
                StorageAccountCreateParameters(
                    location=self.location,
                    account_type=AccountType.standard_lrs
                )
            )

            # Asysnchronous operation, so wait...
            res = result.result()
            self.name = res.name

        # retrieve the keys and store them in the instance
        self.keys = sm.list_keys(group_name, self.name)
        logger.debug("Key1 : %s " % repr(self.keys.key1))
        logger.debug("Key2 : %s " % repr(self.keys.key2))

        # retrieve the blob service
        self.blob = BlockBlobService(
            account_name=self.name,
            account_key=self.keys.key1
        )
        # Define the storage tree :
        # sources for the images imported to create the VM
        # vhds for the VM images
        self.sources_container= "sources"
        self.vhds_container= "vhds"
        self.blob.create_container(self.sources_container)
        self.blob.create_container(self.vhds_container)

    def list_blobs(self):
        for blob in self.blob.list_blobs('system'):
            print(blob.name)

    def copy_source_images_from(self, source_storage, container, filepath):
        # Generate a token for 10 minutes read access
        token = source_storage.blob.generate_blob_shared_access_signature(
            container,
            filepath,
            BlobPermissions.READ,
            datetime.utcnow() + timedelta(minutes=10),
        )
        # Generate the sour URL of the blob
        source_url = source_storage.blob.make_blob_url(
            container,
            filepath,
            sas_token=token
        )
        # Launch the copy from the distant storage to the current one
        self.blob.copy_blob(
            self.sources_container,
            os.path.basename(filepath),
            source_url
        )
コード例 #30
0
ファイル: blob_storage.py プロジェクト: dariopascu/caelus
class BlobStorage(Storage):
    _az_logger = logging.getLogger('az')

    def __init__(self,
                 auth: AzureAuth,
                 account_name: str,
                 container_name: str,
                 base_path: str = ""):
        Storage.__init__(self, base_path=base_path)

        self.container_name = container_name
        self.blob_service = BlockBlobService(
            account_name=account_name,
            account_key=auth.key_token,
            token_credential=TokenCredential(auth.service_principal_token),
            connection_string=auth.connection_string_token)

    ################
    # OBJECT ADMIN #
    ################
    def _list_blob_objects(
            self,
            prefix: str,
            filter_filename: Union[None, str] = None,
            filter_extension: Union[None, str, tuple] = None) -> Generator:
        objects_generator = self.blob_service.list_blobs(self.container_name,
                                                         prefix=prefix)

        for key in objects_generator:
            filtered_key = self._filter_key(key, filter_filename,
                                            filter_extension)
            if filtered_key is not None:
                yield filtered_key

    @staticmethod
    def _filter_key(key, filter_filename, filter_extension):
        key_name = key.name
        if (filter_filename is not None and filter_filename
                not in key_name) or (filter_extension is not None and
                                     not key_name.endswith(filter_extension)):
            return None
        else:
            return key

    def list_objects(
            self,
            folder: Union[None, str] = None,
            filter_filename: Union[None, str] = None,
            filter_extension: Union[None, str, tuple] = None) -> Generator:
        return self._list_blob_objects(self._get_folder_path(folder),
                                       filter_filename=filter_filename,
                                       filter_extension=filter_extension)

    def _blob_copy(self, dest_container_name: str, blob_name: str,
                   dest_object_name: Union[str, None], remove_copied: bool):
        if dest_object_name is None and dest_container_name == self.container_name:
            self._az_logger.warning(f'This config does not move the object')
        else:
            if dest_object_name is None and dest_container_name != self.container_name:
                dest_object_name = blob_name
        blob_url = self.blob_service.make_blob_url(self.container_name,
                                                   blob_name)
        self.blob_service.copy_blob(dest_container_name, dest_object_name,
                                    blob_url)
        self._az_logger.debug(
            f'{blob_name} copied from {self.container_name} to {dest_container_name}'
        )

        if remove_copied:
            self.blob_service.delete_blob(self.container_name, blob_name)
            self._az_logger.debug(
                f'{blob_name} removed from {self.container_name}')

    def move_object(self,
                    dest_storage_name: str,
                    files_to_move: Union[str, list, Generator],
                    dest_object_name: Union[str, None] = None,
                    remove_copied: bool = False):
        if isinstance(files_to_move, str):
            self._blob_copy(dest_storage_name, files_to_move, dest_object_name,
                            remove_copied)

        else:
            for blob in files_to_move:
                if isinstance(blob, Blob):
                    self._blob_copy(dest_storage_name, blob.name,
                                    dest_object_name, remove_copied)
                elif isinstance(blob, str):
                    self._blob_copy(dest_storage_name, blob, dest_object_name,
                                    remove_copied)

    ###########
    # READERS #
    ###########
    @contextmanager
    def _read_to_buffer(self, path):
        self._az_logger.debug(f'Reading from {self.container_name}: {path}')

        with io.BytesIO() as buff:
            buff = self.blob_service.get_blob_to_bytes(
                container_name=self.container_name, blob_name=path).content
            yield io.BytesIO(buff)

    @contextmanager
    def _read_to_str_buffer(self, path):
        self._az_logger.debug(f'Reading from {self.container_name}: {path}')

        with io.StringIO() as buff:
            buff = self.blob_service.get_blob_to_text(
                container_name=self.container_name, blob_name=path).content
            yield io.StringIO(buff)

    def read_csv(self,
                 filename: str,
                 folder: Union[str, None] = None,
                 **kwargs):
        with self._read_to_buffer(self._get_full_path(filename,
                                                      folder)) as buff:
            return pd.read_csv(buff, **kwargs)

    def read_excel(self,
                   filename: str,
                   folder: Union[str, None] = None,
                   **kwargs):
        with self._read_to_buffer(self._get_full_path(filename,
                                                      folder)) as buff:
            return pd.read_excel(buff, **kwargs)

    def read_parquet(self,
                     filename: str,
                     folder: Union[str, None] = None,
                     **kwargs):
        with self._read_to_buffer(self._get_full_path(filename,
                                                      folder)) as buff:
            return pd.read_parquet(buff, **kwargs)

    def read_yaml(self,
                  filename: str,
                  folder=None,
                  yaml_loader=yaml.FullLoader):
        with self._read_to_buffer(self._get_full_path(filename,
                                                      folder)) as buff:
            return yaml.load(buff, Loader=yaml_loader)

    def read_json(self, filename: str, folder=None, **kwargs):
        with self._read_to_buffer(self._get_full_path(filename,
                                                      folder)) as buff:
            return json.load(buff, **kwargs)

    def read_object(self,
                    filename: str,
                    folder: Union[str, None] = None,
                    **kwargs):
        with self._read_to_buffer(self._get_full_path(filename,
                                                      folder)) as buff:
            return buff.read(**kwargs)

    def read_object_to_file(self,
                            blob_object: Blob,
                            filename: Union[str, None] = None,
                            folder: Union[str, None] = None,
                            **kwargs):
        object_filename_full, filename = self._create_local_path(
            blob_object.name, filename, folder)

        with open(filename, 'wb') as f:
            self._az_logger.debug(
                f'Downloading {object_filename_full} to {filename}')
            retrieved_blob = self.blob_service.get_blob_to_bytes(
                self.container_name, object_filename_full)
            f.write(retrieved_blob.content)

    ###########
    # WRITERS #
    ###########
    def _get_bucket_path(self, filename: str, folder: Union[str, None] = None):
        bucket_path = self._get_full_path(filename, folder)
        self._az_logger.debug(f'Writing in: {bucket_path}')

        return bucket_path

    def write_csv(self,
                  df: pd.DataFrame,
                  filename: str,
                  folder: Union[str, None] = None,
                  **kwargs):
        with io.StringIO() as buff:
            df.to_csv(buff, **kwargs)
            self.blob_service.create_blob_from_text(
                container_name=self.container_name,
                blob_name=self._get_bucket_path(filename, folder),
                text=buff.getvalue())

    def write_excel(self,
                    df: pd.DataFrame,
                    filename: str,
                    folder: Union[str, None] = None,
                    **kwargs):
        with io.BytesIO() as buff:
            df.to_excel(buff, **kwargs)
            self.blob_service.create_blob_from_bytes(
                container_name=self.container_name,
                blob_name=self._get_bucket_path(filename, folder),
                blob=buff.getvalue())

    def write_parquet(self,
                      df: pd.DataFrame,
                      filename: str,
                      folder: Union[str, None] = None,
                      **kwargs):
        with io.BytesIO() as buff:
            df.to_parquet(buff, **kwargs)
            self.blob_service.create_blob_from_bytes(
                container_name=self.container_name,
                blob_name=self._get_bucket_path(filename, folder),
                blob=buff.getvalue())

    def write_yaml(self,
                   data: dict,
                   filename: str,
                   folder: Union[str, None] = None,
                   **kwargs):
        with io.StringIO() as buff:
            yaml.dump(data, buff, **kwargs)
            self.blob_service.create_blob_from_text(
                container_name=self.container_name,
                blob_name=self._get_bucket_path(filename, folder),
                text=buff.getvalue())

    def write_json(self,
                   data: dict,
                   filename: str,
                   folder: Union[str, None] = None,
                   **kwargs):
        with io.StringIO() as buff:
            json.dump(data, buff, **kwargs)
            self.blob_service.create_blob_from_text(
                container_name=self.container_name,
                blob_name=self._get_bucket_path(filename, folder),
                text=buff.getvalue())

    def write_object(self,
                     write_object,
                     filename: str,
                     folder: Union[str, None] = None,
                     **kwargs):
        if isinstance(write_object, bytes):
            self.blob_service.create_blob_from_bytes(
                container_name=self.container_name,
                blob_name=self._get_bucket_path(filename, folder),
                blob=write_object)
        elif isinstance(write_object, io.BytesIO):
            self.blob_service.create_blob_from_bytes(
                container_name=self.container_name,
                blob_name=self._get_bucket_path(filename, folder),
                blob=write_object.getvalue())
        else:
            self.blob_service.create_blob_from_stream(
                container_name=self.container_name,
                blob_name=self._get_bucket_path(filename, folder),
                stream=write_object)

    def write_object_from_file(self,
                               object_filename: str,
                               filename: str,
                               folder: Union[str, None] = None,
                               **kwargs):
        self.blob_service.create_blob_from_path(
            container_name=self.container_name,
            blob_name=self._get_bucket_path(filename, folder),
            file_path=object_filename,
            **kwargs)
コード例 #31
0
class BlobStorageService:
    def __init__(self, account_name, key):
        self.__blockblob_service = BlockBlobService(account_name=account_name,
                                                    account_key=key)

    def create_container(self, container_name):
        self.__blockblob_service.create_container(container_name)

    def delete_container(self, container_name):
        self.__blockblob_service.delete_container(container_name)

    def upload_file(self,
                    container_name,
                    filename,
                    local_file,
                    delete_local_file=False):
        self.create_container(container_name)
        return self.__upload_file(container_name, filename, local_file,
                                  delete_local_file)

    def upload_file_from_bytes(self, container_name, filename, blob):
        self.create_container(container_name)
        return self.__upload_file_from_bytes(container_name, filename, blob)

    def upload_directory(self, container_name, directory, storage_path=""):
        self.create_container(container_name)
        files = self.__get_files(directory)
        directories = self.__get_directories(directory)

        blobs = list(
            map(
                lambda file: self.__upload_file(
                    container_name,
                    os.path.join(storage_path, os.path.basename(file)),
                    os.path.join(directory, file)), files))

        return blobs + list(
            map(
                lambda dir: self.upload_directory(container_name,
                                                  os.path.join(directory, dir),
                                                  storage_path), directories))

    def list_blobs(self, container_name, prefix):
        return self.__blockblob_service.list_blobs(container_name, prefix)

    def download_blob(self, container_name, blob_name, local_file=None):
        local_file = blob_name if local_file == None else local_file
        self.__create_local_dir(os.path.split(local_file)[0])
        self.__blockblob_service.get_blob_to_path(container_name, blob_name,
                                                  local_file)

    def download_blob_bytes(self, container_name, blob_name, local_file=None):
        b = self.__blockblob_service.get_blob_to_bytes(container_name,
                                                       blob_name)
        return b

    def download_blobs(self, container_name, local_path="", blob_path=""):
        blobs = self.__get_blobs_in_path(container_name, blob_path)
        return blobs

    def download_all_blobs(self, container_name, local_path="", blob_path=""):
        blobs = self.__get_blobs_in_path(container_name, blob_path)
        base = self.__create_local_dir(local_path)

        list(
            map(
                lambda blob: self.download_blob(container_name, blob.name,
                                                os.path.join(base, blob.name)),
                blobs))

    def delete_blob(self, container, blob_name):
        self.__blockblob_service.delete_blob(container, blob_name)

    def __upload_file(self,
                      container_name,
                      filename,
                      local_file,
                      delete_local_file=False):
        blob = self.__blockblob_service.create_blob_from_path(
            container_name,
            filename,
            local_file,
            content_settings=ContentSettings(
                content_type=self.__get_mime_type(local_file)))
        if delete_local_file:
            os.remove(local_file)
        return blob

    def __upload_file_from_bytes(self, container_name, filename, blob):
        blob = self.__blockblob_service.create_blob_from_bytes(
            container_name,
            filename,
            blob,
            content_settings=ContentSettings(
                content_type=self.__get_mime_type(filename)))

    def copy_blob(self, container_name, blob_name, blob_url):
        self.__blockblob_service.copy_blob(container_name, blob_name, blob_url)

    def make_blob_url(self, container_name, blob_name, sas_token=''):
        return self.__blockblob_service.make_blob_url(container_name,
                                                      blob_name,
                                                      sas_token=sas_token)

    def generate_blob_shared_access_signature(self, container_name, blob_name):
        permission = ContainerPermissions(read=True, write=True)
        return self.__blockblob_service.generate_blob_shared_access_signature(
            container_name,
            blob_name,
            permission,
            protocol='https',
            start=datetime.datetime.utcnow(),
            expiry=datetime.datetime.utcnow() + timedelta(days=1))

    def set_blob_metadata(self, container_name, blob_name, metadata):
        return self.__blockblob_service.set_blob_metadata(
            container_name, blob_name, metadata)

    def __get_mime_type(self, file_path):
        return mime_content_type(file_path)

    def __get_blobs_in_path(self, container_name, blob_path):
        blobs = self.list_blobs(container_name)
        if not blob_path:
            return blobs
        return list(filter(lambda blob: blob.name.startswith(blob_path),
                           blobs))

    def __create_local_dir(self, local_path):
        if local_path:
            os.makedirs(local_path, exist_ok=True)
        return os.path.join(os.getcwd(), local_path)

    def __get_directories(self, local_path):
        return [
            file for file in os.listdir(local_path)
            if os.path.isdir(os.path.join(local_path, file))
        ]

    def __get_files(self, local_path):
        return [
            file for file in os.listdir(local_path)
            if os.path.isfile(os.path.join(local_path, file))
        ]
コード例 #32
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, bucket_name, prefix=None, azure_cloud=None):
        prefix = "{}".format(prefix.lstrip("/") if prefix else "")
        super().__init__(prefix=prefix)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = bucket_name
        try:
            endpoint_suffix = ENDPOINT_SUFFIXES[azure_cloud]
        except KeyError:
            raise InvalidConfigurationError("Unknown azure cloud {!r}".format(azure_cloud))

        self.conn = BlockBlobService(
            account_name=self.account_name, account_key=self.account_key, endpoint_suffix=endpoint_suffix
        )
        self.conn.socket_timeout = 120  # Default Azure socket timeout 20s is a bit short
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized, %r", self.container_name)

    def copy_file(self, *, source_key, destination_key, metadata=None, **kwargs):
        timeout = kwargs.get("timeout") or 15
        source_path = self.format_key_for_backend(source_key, remove_slash_prefix=True, trailing_slash=False)
        destination_path = self.format_key_for_backend(destination_key, remove_slash_prefix=True, trailing_slash=False)
        source_url = self.conn.make_blob_url(self.container_name, source_path)
        start = time.monotonic()
        self.conn.copy_blob(self.container_name, destination_path, source_url, metadata=metadata, timeout=timeout)
        while True:
            blob_properties = self.conn.get_blob_properties(self.container_name, destination_path, timeout=timeout)
            copy_props = blob_properties.properties.copy
            if copy_props.status == "success":
                return
            elif copy_props.status == "pending":
                if time.monotonic() - start < timeout:
                    time.sleep(0.1)
                else:
                    self.conn.abort_copy_blob(self.container_name, destination_key, copy_props.id, timeout=timeout)
                    raise StorageError(
                        "Copying {!r} to {!r} did not complete in {} seconds".format(source_key, destination_key, timeout)
                    )
            elif copy_props.status == "failed":
                raise StorageError(
                    "Copying {!r} to {!r} failed: {!r}".format(source_key, destination_key, copy_props.status_description)
                )
            else:
                raise StorageError(
                    "Copying {!r} to {!r} failed, unexpected status: {!r}".format(
                        source_key, destination_key, copy_props.status
                    )
                )

    def get_metadata_for_key(self, key):
        path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=False)
        items = list(self._iter_key(path=path, with_metadata=True, deep=False))
        if not items:
            raise FileNotFoundFromStorageError(key)
        item, = items
        if item.type != KEY_TYPE_OBJECT:
            raise FileNotFoundFromStorageError(key)  # it's a prefix
        return item.value["metadata"]

    def _metadata_for_key(self, path):
        return list(self._iter_key(path=path, with_metadata=True, deep=False))[0].value["metadata"]

    def iter_key(self, key, *, with_metadata=True, deep=False, include_key=False):
        path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=not include_key)
        self.log.debug("Listing path %r", path)
        yield from self._iter_key(path=path, with_metadata=with_metadata, deep=deep)

    def _iter_key(self, *, path, with_metadata, deep):
        include = "metadata" if with_metadata else None
        kwargs = {}
        if path:
            # If you give Azure an empty path, it gives you an authentication error
            kwargs["prefix"] = path
        if not deep:
            kwargs["delimiter"] = "/"
        items = self.conn.list_blobs(self.container_name, include=include, **kwargs)
        for item in items:
            if isinstance(item, BlobPrefix):
                yield IterKeyItem(type=KEY_TYPE_PREFIX, value=self.format_key_from_backend(item.name).rstrip("/"))
            else:
                if with_metadata:
                    # Azure Storage cannot handle '-' so we turn them into underscores and back again
                    metadata = {k.replace("_", "-"): v for k, v in item.metadata.items()}
                else:
                    metadata = None
                yield IterKeyItem(
                    type=KEY_TYPE_OBJECT,
                    value={
                        "last_modified": item.properties.last_modified,
                        "metadata": metadata,
                        "name": self.format_key_from_backend(item.name),
                        "size": item.properties.content_length,
                    },
                )

    def delete_key(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.log.debug("Deleting key: %r", key)
        try:
            return self.conn.delete_blob(self.container_name, key)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def get_contents_to_file(self, key, filepath_to_store_to, *, progress_callback=None):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)

        self.log.debug("Starting to fetch the contents of: %r to: %r", key, filepath_to_store_to)
        try:
            self.conn.get_blob_to_path(self.container_name, key, filepath_to_store_to)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

        if progress_callback:
            progress_callback(1, 1)
        return self._metadata_for_key(key)

    @classmethod
    def _parse_length_from_content_range(cls, content_range):
        """Parses the blob length from the content range header: bytes 1-3/65537"""
        if not content_range:
            raise ValueError("File size unavailable")

        return int(content_range.split(" ", 1)[1].split("/", 1)[1])

    def _stream_blob(self, key, fileobj, progress_callback):
        """Streams contents of given key to given fileobj. Data is read sequentially in chunks
        without any seeks. This requires duplicating some functionality of the Azure SDK, which only
        allows reading entire blob into memory at once or returning data from random offsets"""
        file_size = None
        start_range = 0
        chunk_size = self.conn.MAX_CHUNK_GET_SIZE
        end_range = chunk_size - 1
        while True:
            try:
                # pylint: disable=protected-access
                blob = self.conn._get_blob(self.container_name, key, start_range=start_range, end_range=end_range)
                if file_size is None:
                    file_size = self._parse_length_from_content_range(blob.properties.content_range)
                fileobj.write(blob.content)
                start_range += blob.properties.content_length
                if start_range == file_size:
                    break
                if blob.properties.content_length == 0:
                    raise StorageError("Empty response received for {}, range {}-{}".format(key, start_range, end_range))
                end_range += blob.properties.content_length
                if end_range >= file_size:
                    end_range = file_size - 1
                if progress_callback:
                    progress_callback(start_range, file_size)
            except azure.common.AzureHttpError as ex:  # pylint: disable=no-member
                if ex.status_code == 416:  # Empty file
                    return
                raise

    def get_contents_to_fileobj(self, key, fileobj_to_store_to, *, progress_callback=None):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)

        self.log.debug("Starting to fetch the contents of: %r", key)
        try:
            self._stream_blob(key, fileobj_to_store_to, progress_callback)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

        if progress_callback:
            progress_callback(1, 1)

        return self._metadata_for_key(key)

    def get_contents_to_string(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        self.log.debug("Starting to fetch the contents of: %r", key)
        try:
            blob = self.conn.get_blob_to_bytes(self.container_name, key)
            return blob.content, self._metadata_for_key(key)
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def get_file_size(self, key):
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        try:
            blob = self.conn.get_blob_properties(self.container_name, key)
            return blob.properties.content_length
        except azure.common.AzureMissingResourceHttpError as ex:  # pylint: disable=no-member
            raise FileNotFoundFromStorageError(key) from ex

    def store_file_from_memory(self, key, memstring, metadata=None, cache_control=None, mimetype=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)
        self.conn.create_blob_from_bytes(
            self.container_name,
            key,
            bytes(memstring),  # azure would work with memoryview, but validates it's bytes
            content_settings=content_settings,
            metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_")
        )

    def store_file_from_disk(self, key, filepath, metadata=None, multipart=None, cache_control=None, mimetype=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)
        self.conn.create_blob_from_path(
            self.container_name,
            key,
            filepath,
            content_settings=content_settings,
            metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_")
        )

    def store_file_object(self, key, fd, *, cache_control=None, metadata=None, mimetype=None, upload_progress_fn=None):
        if cache_control is not None:
            raise NotImplementedError("AzureTransfer: cache_control support not implemented")
        key = self.format_key_for_backend(key, remove_slash_prefix=True)
        content_settings = None
        if mimetype:
            content_settings = ContentSettings(content_type=mimetype)

        def progress_callback(bytes_sent, _):
            if upload_progress_fn:
                upload_progress_fn(bytes_sent)

        # Azure _BlobChunkUploader calls `tell()` on the stream even though it doesn't use the result.
        # We expect the input stream not to support `tell()` so use dummy implementation for it
        original_tell = getattr(fd, "tell", None)
        fd.tell = lambda: None
        try:
            self.conn.create_blob_from_stream(
                self.container_name,
                key,
                fd,
                content_settings=content_settings,
                metadata=self.sanitize_metadata(metadata, replace_hyphen_with="_"),
                progress_callback=progress_callback
            )
        finally:
            if original_tell:
                fd.tell = original_tell
            else:
                delattr(fd, "tell")

    def get_or_create_container(self, container_name):
        start_time = time.monotonic()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.monotonic() - start_time)
        return container_name