예제 #1
0
    def saveCsvLogToBlob(self, fileName):

        to_location_path = fileName

        account_name = s().Data["blob"]["account_name"]
        account_key = s().Data["blob"]["account_key"]
        container_name = s().Data["blob"]["container"]

        cloud_account = CloudStorageAccount(account_name=account_name,
                                            account_key=account_key)

        append_blob_service = cloud_account.create_append_blob_service()
        append_blob_service.create_container(container_name)
        append_blob_service.set_container_acl(
            container_name, public_access=PublicAccess.Container)

        if append_blob_service.exists(container_name, self.fileName):
            append_blob_service.append_blob_from_path(
                container_name,
                self.fileName,
                self.fileName,
                progress_callback=self.progress_callback_w)
        else:
            cloud_account.create_block_blob_service().create_blob_from_path(
                container_name,
                self.fileName,
                to_location_path,
                progress_callback=self.progress_callback_w)
    def container_operations_with_sas(self, account):
        container_name = 'demosasblobcontainer' + self.random_data.get_random_name(
            6)

        # Create a Block Blob Service object
        blockblob_service = account.create_block_blob_service()

        # Create a Shared Access Signature for the account
        print('1.Get account sas')

        account_sas = blockblob_service.generate_account_shared_access_signature(
            ResourceTypes.CONTAINER + ResourceTypes.OBJECT,
            AccountPermissions.READ + AccountPermissions.WRITE +
            AccountPermissions.DELETE + AccountPermissions.LIST +
            AccountPermissions.CREATE,
            datetime.datetime.utcnow() + datetime.timedelta(hours=1))

        shared_account = CloudStorageAccount(account_name=account.account_name,
                                             sas_token=account_sas)
        shared_account_block_service = shared_account.create_block_blob_service(
        )

        try:
            print('2. Create container with account sas. Container name - ' +
                  container_name)
            shared_account_block_service.create_container(container_name)

            # For the purposes of the demo, get a Container SAS
            # In a real-world application, the above Account SAS can be used
            print('3. Get container sas')
            container_sas = blockblob_service.generate_container_shared_access_signature(
                container_name,
                ContainerPermissions.READ + ContainerPermissions.WRITE +
                ContainerPermissions.DELETE + ContainerPermissions.LIST,
                datetime.datetime.utcnow() + datetime.timedelta(hours=1))

            shared_container_account = CloudStorageAccount(
                account_name=account.account_name, sas_token=container_sas)
            shared_container_block_service = shared_container_account.create_block_blob_service(
            )

            print('4. Create blob with container sas')
            shared_container_block_service.create_blob_from_text(
                container_name, 'myblob', 'blob data')

            print('5. List blobs with container sas')
            blobs = shared_container_block_service.list_blobs(container_name)
            for blob in blobs:
                print('blob ' + blob.name)

            print('6. Delete blob with container sas')
            shared_container_block_service.delete_blob(container_name,
                                                       'myblob')
        finally:
            print('7. Delete container')
            blockblob_service.delete_container(container_name)

        print("Containers Sas sample completed")
예제 #3
0
def make_blob_client(secrets):
    """
        Creates a blob client object
        :param str storage_account_key: storage account key
        :param str storage_account_name: storage account name
        :param str storage_account_suffix: storage account suffix
    """

    if secrets.shared_key:
        # Set up SharedKeyCredentials
        blob_client = blob.BlockBlobService(
            account_name=secrets.shared_key.storage_account_name,
            account_key=secrets.shared_key.storage_account_key,
            endpoint_suffix=secrets.shared_key.storage_account_suffix)
    else:
        # Set up ServicePrincipalCredentials
        arm_credentials = ServicePrincipalCredentials(
            client_id=secrets.service_principal.client_id,
            secret=secrets.service_principal.credential,
            tenant=secrets.service_principal.tenant_id,
            resource='https://management.core.windows.net/')
        m = RESOURCE_ID_PATTERN.match(
            secrets.service_principal.storage_account_resource_id)
        accountname = m.group('account')
        subscription = m.group('subscription')
        resourcegroup = m.group('resourcegroup')
        mgmt_client = StorageManagementClient(arm_credentials, subscription)
        key = mgmt_client.storage_accounts.list_keys(
            resource_group_name=resourcegroup,
            account_name=accountname).keys[0].value
        storage_client = CloudStorageAccount(accountname, key)
        blob_client = storage_client.create_block_blob_service()

    return blob_client
예제 #4
0
    def emulator(self):
        # With account
        account = CloudStorageAccount(is_emulated=True)
        client = account.create_block_blob_service()

        # Directly
        client = BlockBlobService(is_emulated=True)
예제 #5
0
    def sas_auth(self):
        # With account
        account = CloudStorageAccount(account_name="<account_name>", sas_token="<sas_token>")
        client = account.create_block_blob_service()

        # Directly
        client = BlockBlobService(account_name="<account_name>", sas_token="<sas_token>")
예제 #6
0
    def key_auth(self):
        # With account
        account = CloudStorageAccount(account_name="<account_name>", account_key="<account_key>")
        client = account.create_block_blob_service()

        # Directly
        client = BlockBlobService(account_name="<account_name>", account_key="<account_key>")
    def emulator(self):
        # With account
        account = CloudStorageAccount(is_emulated=True)
        client = account.create_block_blob_service()

        # Directly
        client = BlockBlobService(is_emulated=True)
예제 #8
0
    def test_create_service_no_key(self):
        # Arrange

        # Act
        bad_account = CloudStorageAccount('', '')
        with self.assertRaises(ValueError):
            service = bad_account.create_block_blob_service()
def create_all(app,
               account_name=None,
               account_key=None,
               container_name=None,
               include_hidden=False):
    account_name = account_name or app.config.get('AZURE_STORAGE_ACCOUNT_NAME')
    account_key = account_key or app.config.get('AZURE_STORAGE_ACCOUNT_KEY')
    container_name = container_name or app.config.get(
        'AZURE_STORAGE_CONTAINER_NAME')
    if not container_name:
        raise ValueError("No container name provided.")

    # build list of static files
    all_files = _gather_files(app, include_hidden)
    logger.debug("All valid files: %s" % all_files)

    # connect to azure
    azure = CloudStorageAccount(account_name=account_name,
                                account_key=account_key)

    # create blob service
    blob_service = azure.create_block_blob_service()

    # get_or_create container
    if not blob_service.exists(container_name):
        blob_service.create_container(container_name)

    prefix = app.config.get('AZURE_STORAGE_PREFIX', '').lstrip('/').rstrip('/')
    for (static_folder, static_url), names in six.iteritems(all_files):
        static_upload_url = '%s/%s' % (prefix.rstrip('/'),
                                       static_url.lstrip('/'))
        _write_files(blob_service, app, static_upload_url, static_folder,
                     names, container_name)
    def key_auth(self):
        # With account
        account = CloudStorageAccount(account_name='<account_name>',
                                      account_key='<account_key>')
        client = account.create_block_blob_service()

        # Directly
        client = BlockBlobService(account_name='<account_name>',
                                  account_key='<account_key>')
def main():
    argument_spec = dict(source_uri=dict(required=True),
                         source_key=dict(required=True),
                         destination_account=dict(required=True),
                         destination_key=dict(required=True),
                         destination_container=dict(required=True),
                         destination_blob=dict(required=True),
                         wait=dict(default=False, type='bool'),
                         timeout=dict(default=1000))
    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_DEPS:
        module.fail_json(
            msg="requests and azure are required for this module ".format(
                HAS_DEPS_EXC))

    source_account, source_container, source_blob = split_uri(
        module.params.get('source_uri'))
    source = CloudStorageAccount(account_name=source_account,
                                 account_key=module.params.get('source_key'))
    source_service = source.create_block_blob_service()
    destination_service = BlockBlobService(
        account_name=module.params.get('destination_account'),
        account_key=module.params.get('destination_key'))

    source_token = source.generate_shared_access_signature(
        Services.BLOB, ResourceTypes.OBJECT, AccountPermissions.READ,
        datetime.datetime.now() + timedelta(hours=1))
    source_sas_url = source_service.make_blob_url(source_container,
                                                  source_blob, 'https',
                                                  source_token)

    destination_service.create_container(
        module.params.get('destination_container'), fail_on_exist=False)
    status = destination_service.copy_blob(
        module.params.get('destination_container'),
        module.params.get('destination_blob'), source_sas_url)

    if not module.params.get('wait'):
        data = dict(changed=True, status='started')
        module.exit_json(**data)
    else:
        copy = destination_service.get_blob_properties(
            module.params.get('destination_container'),
            module.params.get('destination_blob')).properties.copy
        count = 0
        while copy.status != 'success':
            count = count + 30
            if count > module.params.get('timeout'):
                module.fail_json(
                    msg='Timed out waiting for async copy to complete.')
            time.sleep(30)
            copy = destination_service.get_blob_properties(
                module.params.get('destination_container'),
                module.params.get('destination_blob')).properties.copy
        data = dict(changed=True, status='completed')
        module.exit_json(**data)
    def sas_auth(self):
        # With account
        account = CloudStorageAccount(account_name='<account_name>',
                                      sas_token='<sas_token>')
        client = account.create_block_blob_service()

        # Directly
        client = BlockBlobService(account_name='<account_name>',
                                  sas_token='<sas_token>')
예제 #13
0
    def test_create_account_sas_and_key(self):
        # Arrange
        
        # Act
        account = CloudStorageAccount(self.account_name, self.account_key, self.sas_token)
        service = account.create_block_blob_service()

        # Assert
        self.validate_service(service, BlockBlobService)
    def public(self):
        # This applies to the blob services only
        # Public access must be enabled on the container or requests will fail

        # With account
        account = CloudStorageAccount(account_name='<account_name>')
        client = account.create_block_blob_service()

        # Directly
        client = BlockBlobService(account_name='<account_name>')
예제 #15
0
    def public(self):
        # This applies to the blob services only
        # Public access must be enabled on the container or requests will fail

        # With account
        account = CloudStorageAccount(account_name="<account_name>")
        client = account.create_block_blob_service()

        # Directly
        client = BlockBlobService(account_name="<account_name>")
예제 #16
0
    def test_create_account_emulated(self):
        # Arrange
      
        # Act
        account = CloudStorageAccount(is_emulated=True)
        service = account.create_block_blob_service()

        # Assert
        self.assertIsNotNone(service)
        self.assertEqual(service.account_name, 'devstoreaccount1')
        self.assertIsNotNone(service.account_key)
예제 #17
0
    def test_create_account_sas(self):
        # Arrange
      
        # Act
        sas_account = CloudStorageAccount(self.account_name, sas_token=self.sas_token)
        service = sas_account.create_block_blob_service()

        # Assert
        self.assertIsNotNone(service)
        self.assertEqual(service.account_name, self.account_name)
        self.assertIsNone(service.account_key)
        self.assertEqual(service.sas_token, self.sas_token)
    def __upload_model(self, model_name, service_def, storage_account: CloudStorageAccount):
        if not os.path.isfile(service_def):
            raise FileNotFoundError(service_def + ' not found')

        storage_service = storage_account.create_block_blob_service()
        container_name = "models"
        storage_service.create_container(container_name)
        hash = self.__md5(service_def)
        blob_name = urllib.parse.quote(model_name) + "_" + hash
        storage_service.create_blob_from_path(container_name, blob_name, service_def)
        sas_token = storage_service.generate_blob_shared_access_signature(container_name, blob_name, BlobPermissions.READ, datetime.utcnow() + timedelta(days=365 * 5))
        return storage_service.make_blob_url(container_name, blob_name, sas_token=sas_token)
예제 #19
0
파일: config.py 프로젝트: skepticatgit/aztk
def get_blob_client() -> blob.BlockBlobService:
    if not storage_resource_id:
        return blob.BlockBlobService(account_name=storage_account_name,
                                     account_key=storage_account_key,
                                     endpoint_suffix=storage_account_suffix)
    else:
        credentials = ServicePrincipalCredentials(
            client_id=client_id,
            secret=credential,
            tenant=tenant_id,
            resource='https://management.core.windows.net/')
        m = RESOURCE_ID_PATTERN.match(storage_resource_id)
        accountname = m.group('account')
        subscription = m.group('subscription')
        resourcegroup = m.group('resourcegroup')
        mgmt_client = StorageManagementClient(credentials, subscription)
        key = mgmt_client.storage_accounts.list_keys(
            resource_group_name=resourcegroup,
            account_name=accountname).keys[0].value
        storage_client = CloudStorageAccount(accountname, key)
        return storage_client.create_block_blob_service()
예제 #20
0
def dump(file_url):

    # step1: download blob from storage
    storage_account = CloudStorageAccount(storage_account_name,
                                          storage_account_key)

    container_name, blob_name = parse_file_url(file_url)
    blob_service = storage_account.create_block_blob_service()
    blob = blob_service.get_blob_to_bytes(container_name, blob_name)
    f = BytesIO(blob.content)  # arvo file bytes data

    reader = DataFileReader(f, DatumReader())

    event_list = []
    # step2: get the event data
    for record in reader:
        event_data = json.loads(record["Body"],
                                encoding="ascii",
                                object_hook=WindTurbineMeasure.obj_hook)
        event_list.append(event_data)

    # step3: dump to the warehouse
    batch_insert(event_list)
예제 #21
0
파일: Blob.py 프로젝트: Toomey86/Cloud_Lab2
from azure.storage import CloudStorageAccount
from azure.storage.blob import PublicAccess
from azure.storage.blob.models import ContentSettings
from azure.common.client_factory import get_client_from_cli_profile
from azure.mgmt.storage import StorageManagementClient

RESOURCE_GROUP = 'sampleStorageResourceGroup'
STORAGE_ACCOUNT_NAME = 'samplestorageaccountname'
CONTAINER_NAME = 'samplecontainername'

# log in
storage_client = get_client_from_cli_profile(StorageManagementClient)

# create a public storage container to hold the file
storage_keys = storage_client.storage_accounts.list_keys(RESOURCE_GROUP, STORAGE_ACCOUNT_NAME)
storage_keys = {v.key_name: v.value for v in storage_keys.keys}

storage_client = CloudStorageAccount(STORAGE_ACCOUNT_NAME, storage_keys['key1'])
blob_service = storage_client.create_block_blob_service()

blob_service.create_container(CONTAINER_NAME, public_access=PublicAccess.Container)

blob_service.create_blob_from_bytes(
    CONTAINER_NAME,
    'helloworld.html',
    b'<center><h1>Hello World!</h1></center>',
    content_settings=ContentSettings('text/html')
)

print(blob_service.make_blob_url(CONTAINER_NAME, 'helloworld.html'))
예제 #22
0
def _get_service():
    account_name        = config.STORAGE_ACCOUNT_NAME
    account_key         = config.STORAGE_ACCOUNT_KEY
    account             = CloudStorageAccount(account_name = account_name, account_key = account_key)
    service             = account.create_block_blob_service()
    return service
예제 #23
0
파일: tasks.py 프로젝트: uploadcare/stump
class TransferAzure():

    transaction = None
    CDN_BASE = 'https://ucarecdn.com/'
    account = None
    service = None
    file_uuid = None
    filename = None
    make_public = None

    def __init__(self, make_public=False, transaction=None):
        account_name = settings.AZURE['account_name']
        account_key = settings.AZURE['account_key']
        sas = settings.AZURE['sas']
        self.transaction = transaction
        self.transaction_body = json.loads(transaction.body)['data']
        logger.info('copying uuid: ' + self.transaction_body['uuid'])
        self.file_uuid = self.transaction_body['uuid']
        self.filename = self.transaction_body['original_filename']
        self.make_public = make_public
        self.account = CloudStorageAccount(account_name=account_name,
                                           account_key=account_key,
                                           sas_token=sas)
        self.service = self.account.create_block_blob_service()

    def save_message_object(self):
        kwargs = {
            prop: self.transaction_body[prop]
            for prop in [
                'uuid', 'filename', 'is_stored', 'done', 'file_id',
                'original_filename', 'is_ready', 'total', 'mime_type', 'size'
            ]
        }

        if self.transaction_body['is_image']:
            MessageClass = ImageUploadMessage
            kwargs['imgformat'] = self.transaction_body['image_info']['format']
            for prop in [
                    'orientation', 'height', 'width', 'geo_location',
                    'datetime_original', 'dpi'
            ]:
                kwargs[prop] = self.transaction_body['image_info'][prop]
        else:
            MessageClass = FileUploadMessage
        return MessageClass.objects.create(
            webhook_transaction=self.transaction, **kwargs)

    def _blob_exists(self, container_name, blob_name):
        exists = self.service.exists(container_name, blob_name)

    def _get_resource_reference(self):
        return '{}'.format(self.file_uuid)

    def run_copy(self):
        try:
            logger.info('creating container name')
            container_name = self._get_resource_reference()
            logger.info('container name: ' + container_name)
            self.service.create_container(container_name)
            logger.info('set permission public')
            self.service.set_container_acl(
                container_name, public_access=PublicAccess.Container)
            count = 0
            source = self.CDN_BASE + self.file_uuid + '/'
            logger.info('copying the file from source: ' + source)
            copy = self.service.copy_blob(container_name, self.filename,
                                          source)
            # Poll for copy completion
            logger.info('checking status')
            while copy.status != 'success':
                count = count + 1
                if count > 20:
                    logger.info(
                        'Timed out waiting for async copy to complete on %i count'
                        % count)
                    raise Exception(
                        'Timed out waiting for async copy to complete.')
                time.sleep(3 * count)
                logger.info('get blob properties')
                copy = self.service.get_blob_properties(
                    container_name, self.filename).properties.copy
            logger.info('saved mesg object')
            return True
        except Exception, e:
            print(e.message)
            self.service.delete_container(container_name)
        else:
class KeyVaultSampleBase(object):
    """Base class for Key Vault samples, provides common functionality needed across Key Vault sample code

    :ivar config: Azure subscription id for the user intending to run the sample
    :vartype config: :class: `KeyVaultSampleConfig`q
    
    :ivar credentials: Azure Active Directory credentials used to authenticate with Azure services
    :vartype credentials: :class: `ServicePrincipalCredentials 
     <msrestazure.azure_active_directory.ServicePrincipalCredentials>`
    
    :ivar keyvault_data_client: Key Vault data client used for interacting with key vaults 
    :vartype keyvault_data_client: :class: `KeyVaultClient <azure.keyvault.KeyVaultClient>`
    
    :ivar keyvault_mgmt_client: Key Vault management client used for creating and managing key vaults 
    :vartype keyvault_mgmt_client:  :class: `KeyVaultManagementClient <azure.mgmt.keyvault.KeyVaultManagementClient>`
    
    :ivar resource_mgmt_client: Azure resource management client used for managing azure resources, access, and groups 
    :vartype resource_mgmt_client:  :class: `ResourceManagementClient <azure.mgmt.resource.ResourceManagementClient>`
    """
    def __init__(self):
        self.config = KeyVaultSampleConfig()
        self.credentials = None
        self.keyvault_data_client = None
        self.keyvault_mgmt_client = None
        self.resource_mgmt_client = None
        self.storage_account = None
        self.block_blob_service = None
        self._setup_complete = False
        self.samples = {(name, m)
                        for name, m in inspect.getmembers(self)
                        if getattr(m, 'kv_sample', False)}
        models = {}
        models.update({
            k: v
            for k, v in azure.keyvault.models.__dict__.items()
            if isinstance(v, type)
        })
        models.update({
            k: v
            for k, v in azure.mgmt.keyvault.models.__dict__.items()
            if isinstance(v, type)
        })
        self._serializer = Serializer(models)

    def setup_sample(self):
        """
        Provides common setup for Key Vault samples, such as creating rest clients, creating a sample resource group
        if needed, and ensuring proper access for the service principal.
         
        :return: None 
        """
        if not self._setup_complete:
            self.mgmt_creds = ServicePrincipalCredentials(
                client_id=self.config.client_id,
                secret=self.config.client_secret,
                tenant=self.config.tenant_id)
            self.data_creds = ServicePrincipalCredentials(
                client_id=self.config.client_id,
                secret=self.config.client_secret,
                tenant=self.config.tenant_id)
            self.resource_mgmt_client = ResourceManagementClient(
                self.mgmt_creds, self.config.subscription_id)

            # ensure the service principle has key vault as a valid provider
            self.resource_mgmt_client.providers.register('Microsoft.KeyVault')

            # ensure the intended resource group exists
            self.resource_mgmt_client.resource_groups.create_or_update(
                self.config.group_name, {'location': self.config.location})

            self.keyvault_mgmt_client = KeyVaultManagementClient(
                self.mgmt_creds, self.config.subscription_id)

            self.keyvault_data_client = KeyVaultClient(self.data_creds)

            self.storage_account = CloudStorageAccount(
                account_name=self.config.storage_account_name,
                account_key=self.config.storage_account_key)

            self.block_blob_service = self.storage_account.create_block_blob_service(
            )

            self._setup_complete = True

    def create_vault(self):
        """
        Creates a new key vault with a unique name, granting full permissions to the current credentials
        :return: a newly created key vault
        :rtype: :class:`Vault <azure.keyvault.generated.models.Vault>`
        """
        vault_name = get_name('vault')

        # setup vault permissions for the access policy for the sample service principle
        permissions = Permissions()
        permissions.keys = KEY_PERMISSIONS_ALL
        permissions.secrets = SECRET_PERMISSIONS_ALL
        permissions.certificates = CERTIFICATE_PERMISSIONS_ALL

        policy = AccessPolicyEntry(self.config.tenant_id,
                                   self.config.client_oid, permissions)

        properties = VaultProperties(self.config.tenant_id,
                                     Sku(name='standard'),
                                     access_policies=[policy])

        parameters = VaultCreateOrUpdateParameters(self.config.location,
                                                   properties)
        parameters.properties.enabled_for_deployment = True
        parameters.properties.enabled_for_disk_encryption = True
        parameters.properties.enabled_for_template_deployment = True

        print('creating vault {}'.format(vault_name))

        vault = self.keyvault_mgmt_client.vaults.create_or_update(
            self.config.group_name, vault_name, parameters)

        # wait for vault DNS entry to be created
        # see issue: https://github.com/Azure/azure-sdk-for-python/issues/1172
        self._poll_for_vault_connection(vault.properties.vault_uri)

        print('created vault {} {}'.format(vault_name,
                                           vault.properties.vault_uri))

        return vault

    def _poll_for_vault_connection(self,
                                   vault_uri,
                                   retry_wait=10,
                                   max_retries=4):
        """
        polls the data client 'get_secrets' method until a 200 response is received indicating the the vault
        is available for data plane requests
        """
        last_error = None
        for x in range(max_retries - 1):
            try:
                # sleep first to avoid improper DNS caching
                time.sleep(retry_wait)
                self.keyvault_data_client.get_secrets(vault_uri)
                return
            except ClientRequestError as e:
                print('vault connection not available')
                last_error = e
        raise last_error

    def _serialize(self, obj):
        if isinstance(obj, Paged):
            serialized = [self._serialize(i) for i in list(obj)]
        else:
            serialized = self._serializer.body(obj, type(obj).__name__)
        return json.dumps(serialized, indent=4, separators=(',', ': '))
예제 #25
0
TWILIO_IPM_SERVICE_SID = 'IS2ec68050ef5e4c79b15b78c3ded7ddc5'

# old one with testchannel nd general
#TWILIO_SERVICE_SID = 'IS7d421d86df064d9698e91ee6e3d4bcf5'

# Initialize the client
TWILIO_IPM_CLIENT = TwilioIpMessagingClient(TWILIO_ACCOUNT_SID,
                                            TWILIO_AUTH_TOKEN)
TWILIO_IPM_SERVICE = TWILIO_IPM_CLIENT.services.get(sid=TWILIO_IPM_SERVICE_SID)

AZURE_STORAGE_ACCOUNT = CloudStorageAccount(
    "palliassistblobstorage",  # account name
    "r9tHMEj5VV/PwJyjN3KYySUqsnq9tCrxh6kDKFvVY3vrm+GluHN/a1LQjXKYIUzoHEle7x3EyIQwoOijzRJiOA==",  # access key
    "?sv=2016-05-31&ss=b&srt=sco&sp=rwdlac&se=2017-05-25T08:02:01Z&st=2017-04-04T00:02:01Z&spr=https,http&sig=DshFBBFKzV20Ml6sN8D8ZRpbIakU8jlbj8zIBDZP4z8%3D"  # sas token
)
BLOCK_BLOB_SERVICE = AZURE_STORAGE_ACCOUNT.create_block_blob_service()

#print "AZURE_STORAGE_ACCOUNT", AZURE_STORAGE_ACCOUNT
#print "BLOCK_BLOB_SERVICE", BLOCK_BLOB_SERVICE

if sys.version_info < (3, 0):
    reload(sys)
    sys.setdefaultencoding('utf8')

ENABLE_XMPP = False

DEBUG = True
TEMPLATE_DEBUG = DEBUG

ALLOWED_HOSTS = (
    'localhost',
예제 #26
0
    new_css = '.flair-' + str(position) + '{background-position: 0 -' + str(
        height * position) + 'px}'
    r.set_stylesheet(subreddit, css + new_css)


def log(message):
    table_service.insert_entity('logs',
                                {'PartitionKey': 'flair', 'RowKey': str(datetime.datetime.now()),
                                 'text': message})
    print('[*] ' + message)


storage_account = CloudStorageAccount(storage_account_name, storage_account_key)

table_service = storage_account.create_table_service()
blob_service = storage_account.create_block_blob_service()

blob_service.create_container('images', public_access='container')
table_service.create_table('flair')
table_service.create_table('logs')

r = praw.Reddit(user_agent)
r.login(username, password)
r.config.decode_html_entities = True

while True:
    for message in (m for m in r.get_unread(limit=None)):
        log('received mesage from ' + message.author.name)
        try:
            file, text = get_flair_info(message)
            if file in [blob.name for blob in list(blob_service.list_blobs('images'))]:
예제 #27
0
#      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#      THE SOFTWARE.

from azure.storage import CloudStorageAccount

import config, time

account_name = config.STORAGE_ACCOUNT_NAME
account_key = config.STORAGE_ACCOUNT_KEY

account = CloudStorageAccount(account_name=account_name,
                              account_key=account_key)

service = account.create_block_blob_service()

#   The last time a backup was dropped into the folder, it was named 'splunketccfg.tar'.
#   This is (almost) always the one to restore.

container_name = 'backups'
restore_file_name = 'splunketccfg.tar'
OUTPUT_FILE = 'splunketccfg.tar'

exists = service.exists(container_name, restore_file_name)
if exists:
    service.get_blob_to_path(container_name, restore_file_name, OUTPUT_FILE)
else:
    print('Backup file does not exist')
예제 #28
0
class StorageAccountTest(StorageTestCase):

    def setUp(self):
        super(StorageAccountTest, self).setUp()
        self.account_name = self.settings.STORAGE_ACCOUNT_NAME
        self.account_key = self.settings.STORAGE_ACCOUNT_KEY
        self.sas_token = '?sv=2015-04-05&st=2015-04-29T22%3A18%3A26Z&se=2015-04-30T02%3A23%3A26Z&sr=b&sp=rw&sip=168.1.5.60-168.1.5.70&spr=https&sig=Z%2FRHIX5Xcg0Mq2rqI3OlWTjEg2tYkboXr1P9ZUXDtkk%3D'
        self.account = CloudStorageAccount(self.account_name, self.account_key)

    #--Helpers-----------------------------------------------------------------
    def validate_service(self, service, type):
        self.assertIsNotNone(service)
        self.assertIsInstance(service, type)
        self.assertEqual(service.account_name, self.account_name)
        self.assertEqual(service.account_key, self.account_key)

    #--Test cases --------------------------------------------------------
    def test_create_block_blob_service(self):
        # Arrange

        # Act
        service = self.account.create_block_blob_service()

        # Assert
        self.validate_service(service, BlockBlobService)

    def test_create_page_blob_service(self):
        # Arrange

        # Act
        service = self.account.create_page_blob_service()

        # Assert
        self.validate_service(service, PageBlobService)

    def test_create_append_blob_service(self):
        # Arrange

        # Act
        service = self.account.create_append_blob_service()

        # Assert
        self.validate_service(service, AppendBlobService)

    def test_create_table_service(self):
        # Arrange

        # Act
        service = self.account.create_table_service()

        # Assert
        self.validate_service(service, TableService)

    def test_create_queue_service(self):
        # Arrange

        # Act
        service = self.account.create_queue_service()

        # Assert
        self.validate_service(service, QueueService)

    def test_create_file_service(self):
        # Arrange

        # Act
        service = self.account.create_file_service()

        # Assert
        self.validate_service(service, FileService)

    def test_create_service_no_key(self):
        # Arrange

        # Act
        bad_account = CloudStorageAccount('', '')
        with self.assertRaises(ValueError):
            service = bad_account.create_block_blob_service()

        # Assert

    def test_create_account_sas(self):
        # Arrange
      
        # Act
        sas_account = CloudStorageAccount(self.account_name, sas_token=self.sas_token)
        service = sas_account.create_block_blob_service()

        # Assert
        self.assertIsNotNone(service)
        self.assertEqual(service.account_name, self.account_name)
        self.assertIsNone(service.account_key)
        self.assertEqual(service.sas_token, self.sas_token)

    def test_create_account_sas_and_key(self):
        # Arrange
        
        # Act
        account = CloudStorageAccount(self.account_name, self.account_key, self.sas_token)
        service = account.create_block_blob_service()

        # Assert
        self.validate_service(service, BlockBlobService)

    def test_create_account_emulated(self):
        # Arrange
      
        # Act
        account = CloudStorageAccount(is_emulated=True)
        service = account.create_block_blob_service()

        # Assert
        self.assertIsNotNone(service)
        self.assertEqual(service.account_name, 'devstoreaccount1')
        self.assertIsNotNone(service.account_key)

    @record
    def test_generate_account_sas(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recordingfile(self.test_mode):
            return

        # Arrange
        token = self.account.generate_shared_access_signature(
            Services.BLOB,
            ResourceTypes.OBJECT,
            AccountPermissions.READ,
            datetime.utcnow() + timedelta(hours=1),
        )

        service = self.account.create_block_blob_service()
        data = b'shared access signature with read permission on blob'
        container_name='container1'
        blob_name = 'blob1.txt'

        try:
            service.create_container(container_name)
            service.create_blob_from_bytes(container_name, blob_name, data)

            # Act
            url = service.make_blob_url(
                container_name,
                blob_name,
                sas_token=token,
            )
            response = requests.get(url)

            # Assert
            self.assertTrue(response.ok)
            self.assertEqual(data, response.content)
        finally:
            service.delete_container(container_name)
예제 #29
0
#      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#      THE SOFTWARE.

from azure.storage import CloudStorageAccount

import config

account_name = config.STORAGE_ACCOUNT_NAME
account_key  = config.STORAGE_ACCOUNT_KEY

account = CloudStorageAccount(account_name = account_name, 
                              account_key = account_key)

service = account.create_block_blob_service()

#   The last time a backup was dropped into the folder, it was named 'splunketccfg.tar'.
#   This is (almost) always the one to restore.

container_name      = 'backups'
restore_file_name   = 'splunketccfg.tar'
OUTPUT_FILE         = 'splunketccfg.tar'

exists              = service.exists(container_name, restore_file_name)
if exists:
    service.get_blob_to_path(container_name, restore_file_name, OUTPUT_FILE)
else:
    print('Backup file does not exist')
    
예제 #30
0
class SampleTest():
	def __init__(self):
		try:
			import config as config
		except:
			raise ValueError('Please specify configuration settings in config.py.')

		if config.IS_EMULATED:
			self.account = CloudStorageAccount(is_emulated=True)
		else:
			# Note that account key and sas should not both be included
			account_name = config.STORAGE_ACCOUNT_NAME
			account_key = config.STORAGE_ACCOUNT_KEY
			sas = config.SAS
			self.account = CloudStorageAccount(account_name=account_name, 
											   account_key=account_key, 
											   sas_token=sas)
			self.service = self.account.create_block_blob_service()

	def test_container_samples(self):
		container = ContainerSamples(self.account)
		container.run_all_samples()

	def test_block_blob_samples(self):
		blob = BlockBlobSamples(self.account)
		blob.run_all_samples()

	def test_append_blob_samples(self):
		blob = AppendBlobSamples(self.account)
		blob.run_all_samples()

	def test_page_blob_samples(self):
		blob = PageBlobSamples(self.account)
		blob.run_all_samples()

	def list_containers(self):
		self.service = self.account.create_block_blob_service()
		containers = list(self.service.list_containers())
		print('All containers in your account:')
		for container in containers:
			print(container.name)     
	
	def list_all_blobs_in_all_containers(self):
		#self.service = self.account.create_block_blob_service()
		containers = list(self.service.list_containers())
		print('Full list:')
		for container in containers:
			print(container.name+':')		
			blobs = list(self.service.list_blobs(container.name))
			for blob in blobs:
				print(blob.name)
			print('')

	def test_get_put_blob(self):
		import config as config
		account_name = config.STORAGE_ACCOUNT_NAME
		account_key = config.STORAGE_ACCOUNT_KEY
		block_blob_service = BlockBlobService(account_name, account_key)
		block_blob_service.create_blob_from_path(
			'cont2',
			'sunset.png',
			'sunset.png',)	
		block_blob_service.get_blob_to_path('cont2', 'sunset.png', 'out-sunset.png')
예제 #31
0
class NFS_Controller:
    def __init__(self, config):
        self.config = config
        self.account = CloudStorageAccount(
            account_name=config.storage_account_name,
            account_key=config.storage_account_key)
        self.service = self.account.create_block_blob_service()

    """ utility functions """

    def get_containers(self):
        containers = self.service.list_containers()
        return containers

    def get_container_directories(self, container_name):
        bloblistingresult = self.service.list_blobs(
            container_name=container_name, delimiter='/')
        return [blob.name.rsplit('/', 1)[0] for blob in bloblistingresult]

    def create_container(self, container_name):
        self.service.create_container(container_name)

    def get_parent_directory(self, path):
        return path.rsplit('/', 1)[0]

    def exists(self, container, full_blob_name=None):
        return self.service.exists(container, full_blob_name)

    def generate_uid(self):
        r_uuid = base64.urlsafe_b64encode(uuid.uuid4().bytes)
        return r_uuid.replace('=', '')

    """ Upload: """

    def parallel_chunky_upload(self,
                               container_name,
                               full_blob_name,
                               data,
                               chunks=5):
        debug = False
        threads = []
        block_ids = []
        chunk_size = len(data) / chunks
        chunks = [
            data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size)
        ]
        for chunk in chunks:
            uid = self.generate_uid()
            block_ids.append(BlobBlock(id=uid))
            t = threading.Thread(target=self._upload_block,
                                 args=(
                                     container_name,
                                     full_blob_name,
                                     chunk,
                                     uid,
                                 ))
            threads.append(t)
            t.start()
        [t.join() for t in threads]
        self.service.put_block_list(container_name, full_blob_name, block_ids)
        return full_blob_name

    def _upload_block(self, container_name, full_blob_name, chunk, uid):
        self.service.put_block(container_name, full_blob_name, chunk, uid)

    def upload_text(self, container_name, full_blob_name, data):
        if not (self.exists(container_name)):
            self.create_container(container_name)
        self.service.create_blob_from_text(container_name, full_blob_name,
                                           data)
        return full_blob_name

    def upload_image(self, container_name, path, data):
        if not (self.exists(container_name)):
            self.create_container(container_name)
        full_blob_name = '{}{}'.format(path, '.jpeg')
        with BytesIO() as output:
            data.save(output, 'jpeg')
            image_bytes = output.getvalue()
        self.parallel_chunky_upload(container_name, full_blob_name,
                                    image_bytes)
        return full_blob_name

    def upload_from_path(self, container_name, base_nfs_path, file_path):
        if not (self.exists(container_name)):
            self.create_container(container_name)
        path = file_path.rsplit('/', 1)[1] if ('/' in file_path) else file_path
        if (base_nfs_path == ""):
            full_blob_name = '{}'.format(path)
        else:
            full_blob_name = '{}/{}'.format(base_nfs_path, path)
        self.service.create_blob_from_path(container_name, full_blob_name,
                                           file_path)

    def batched_parallel_directory_upload(
            self,
            container_name,
            base_nfs_path,
            dirpath,
            ext_filter_list=['.jpeg', '.png', '.jpg']):
        print(dirpath)
        if (ext_filter_list == None):
            file_paths = [
                os.path.realpath('{}/{}'.format(dirpath, fn))
                for fn in os.listdir(dirpath)
            ]
        else:
            file_paths = [
                os.path.realpath('{}/{}'.format(dirpath, fn))
                for fn in os.listdir(dirpath) if any(
                    fn.endswith(extension_filter)
                    for extension_filter in ext_filter_list)
            ]
        # print file_paths
        total_files_count = len(file_paths)
        current_index = 0
        batch_size = 30
        if not (self.exists(container_name)):
            self.create_container(container_name)
        batch_number = 1
        while (True):
            indices = [(current_index + i) for i in range(batch_size)]
            file_paths_batch = [
                file_paths[i] for i in indices if (i < total_files_count)
            ]
            current_index += len(file_paths_batch)
            if (len(file_paths_batch) == 0):
                break
            threads = []
            index = indices[0]
            for file_path in file_paths_batch:
                print(
                    '[Batch {}: Percent of total {}]Uploading image from file path: {}'
                    .format(batch_number, (((index * 1.0) /
                                            (total_files_count - 1)) * 100.0),
                            file_path))
                t = threading.Thread(target=self.upload_from_path,
                                     args=(container_name, base_nfs_path,
                                           file_path))
                threads.append(t)
                index = index + 1
                t.start()
            [t.join() for t in threads]
            batch_number = batch_number + 1

    """ Download """

    def parallel_download(self, container_name, full_blob_names):
        if (full_blob_names == None):
            return None
        threads = []
        results = []
        for full_blob_name in full_blob_names:
            result = {'blob': None}
            t = threading.Thread(target=self._download_blob_helper,
                                 args=(container_name, full_blob_name, result))
            results.append(result)
            threads.append(t)
            t.start()
        [t.join() for t in threads]
        blobs = [
            result['blob'] for result in results if result['blob'] != None
        ]
        return blobs

    def _download_blob_helper(self, container_name, full_blob_name, result):
        if (self.exists(container_name, full_blob_name)):
            result['blob'] = self.download_data(container_name, full_blob_name)
        else:
            return None

    def download_data(self, container_name, full_blob_name):
        print("Full blob name: " + full_blob_name)
        if not (self.exists(container_name)):
            self.create_container(container_name)
            return None
        blob = self.service.get_blob_to_bytes(container_name, full_blob_name)
        return blob

    def download_full_container(self,
                                container_name,
                                destination_directory=None):
        if not (destination_directory == None):
            destination_directory = os.path.realpath(destination_directory)
            if not (os.path.isdir(destination_directory)):
                os.makedirs(destination_directory)
        else:
            destination_directory = os.getcwd()
        if not (self.exists(container_name)):
            raise ValueError('Container does not exist')
        blobs = self.service.list_blobs(container_name)
        #code below lists all the blobs in the container and downloads them one after another
        for blob in blobs:
            print(blob.name)
            print("{}".format(blob.name))
            #check if the path contains a folder structure, create the folder structure
            if "/" in "{}".format(blob.name):
                print("there is a path in this")
                #extract the folder path and check if that folder exists locally, and if not create it
                head, tail = os.path.split("{}".format(blob.name))
                print(head)
                print(tail)
                if (os.path.isdir(destination_directory + "/" + head)):
                    #download the files to this directory
                    print("directory and sub directories exist")
                    self.service.get_blob_to_path(
                        container_name, blob.name,
                        destination_directory + "/" + head + "/" + tail)
                else:
                    #create the diretcory and download the file to it
                    print("directory doesn't exist, creating it now")
                    os.makedirs(destination_directory + "/" + head)
                    print("directory created, download initiated")
                    self.service.get_blob_to_path(
                        container_name, blob.name,
                        destination_directory + "/" + head + "/" + tail)
            else:
                self.service.get_blob_to_path(
                    container_name, blob.name,
                    destination_directory + "/" + blob.name)

    """ Logging """

    def retrieve_log_entities(self, container_name, path, filter=None):
        log_path = '{}/log.txt'.format(path)
        log_entries = LogEntriesBase()
        if self.exists(container_name, log_path):
            log_file = self.service.get_blob_to_text(container_name, log_path)
            raw_logs = log_file.content
            log_entries.deserialize(raw_logs)
        if (filter != None):
            log_entries = log_entries.get_logs(filter=filter)
        return log_entries

    def update_log(self, container_name, entry):
        path = self.get_parent_directory(entry[LogEntriesBase.PATH])
        log_path = '{}/log.txt'.format(path)
        log_entries = LogEntriesBase()
        if self.exists(container_name, log_path):
            log_file = self.service.get_blob_to_text(container_name, log_path)
            raw_logs = log_file.content
            log_entries.deserialize(raw_logs)
        log_entries.update(entry)
        raw = log_entries.serialize()
        self.service.create_blob_from_text(container_name, log_path, raw)

    def update_logs(self, container_name, entries):
        log_paths = {
            '{}/log.txt'.format(
                self.get_parent_directory(log_entry[LogEntriesBase.PATH]))
            for log_entry in entries
        }
        if len(log_paths) > 1:
            raise ValueError('Logs being updated must be of the same log file')
        log_path = log_paths[0]
        if not self.exists(container_name, log_path):
            raise ValueError(
                'Log file {} under container {} does not exist'.format(
                    log_path, container_name))
        log_entries = LogEntriesBase()
        log_file = self.service.get_blob_to_text(container_name, log_path)
        raw_logs = log_file.content
        log_entries.deserialize(raw_logs)
        for entry in entries:
            log_entries.update(entry)
        raw = log_entries.serialize()
        self.service.create_blob_from_text(container_name, log_path, raw)

    """ Avoid Using this: It is not efficient and you should always update a log directly after resource use """

    def update_multiple_log_files(self, container_name, entries):
        log_paths = {
            '{}/log.txt'.format(
                self.get_parent_directory(log_entry[LogEntriesBase.PATH]))
            for log_entry in entries
        }
        for log_path in log_paths:
            entries = [
                log_entry for log_entry in entries if '{}/log.txt'.format(
                    self.get_parent_directory(log_entry[LogEntriesBase.PATH]))
                == log_path
            ]
            self.update_logs(container_name, entries)
예제 #32
0
파일: utils.py 프로젝트: jounile/nollanet
def get_azure_blob_service():
    account = app.config.get('AZURE_ACCOUNT')
    key = app.config.get('AZURE_STORAGE_KEY')
    account = CloudStorageAccount(account_name=account, account_key=key)
    return account.create_block_blob_service()
예제 #33
0
class LogoStorageConnector:
	def __init__(self):
		try:
			import config as config
			self.config = config
		except:
			raise ValueError('Please specify configuration settings in config.py.')
		try:
			import nfs_constants as constants
			self.constants = constants
		except:
			raise ValueError('Please specify networked file system contants in nfs_constants.py.')
		self.account = CloudStorageAccount(account_name=config.STORAGE_ACCOUNT_NAME, account_key=config.STORAGE_ACCOUNT_KEY)
		self.service = self.account.create_block_blob_service()
		self._create_input_container()
		self._create_output_container()
        # self._create_checkpoints_container()

	""" Public Interfaces """
	""" Upload: input """

	def upload_brand_training_input_IPE(self, brand, IPE, isProcessed):
		return self.upload_IPE_to_bucket(self._input_container(), brand, self.constants.TRAINING_DIRECTORY_NAME, IPE, isProcessed, log = True)

	def upload_brand_operational_input_IPE(self, brand, IPE, isProcessed):
		return self.upload_IPE_to_bucket(self._input_container(), brand, self.constants.OPERATIONAL_DIRECTORY_NAME, IPE, isProcessed, log = True)

	def upload_IPE_to_bucket(self, container_name, brand, directory, IPE, isProcessed, log = False):
		bucket_path = self._create_path_to_bucket(brand, directory)
		bucket_post_entities_full_path = self._get_bucket_post_entities_file(bucket_path)
		bucket_images_base_path = self._get_bucket_image_directory(bucket_path)
		for element in IPE.posts:
			print(element.keys())
			if('picture' in element and 'picture_id' in element):
				path = '{}/{}'.format(bucket_images_base_path, element['picture_id'])
				image_path = self._upload_and_compress_image(container_name, path, element['picture'])
				element.pop('picture', None)
				element['image_path'] = image_path
		self._upload_text(container_name, bucket_post_entities_full_path, IPE.serialize())
		if(log == True):
			self.log(bucket_path, isProcessed)
		return  bucket_path

	""" Download """
	def download_brand_training_input_data(self, brand, processing_status_filter = None):
		prefix = '{}/{}'.format(brand, self.constants.TRAINING_DIRECTORY_NAME)
		return self.download_brand_data(self.constants.INPUT_CONTAINER_NAME, brand, prefix, processing_status_filter = processing_status_filter)

	def download_brand_operational_input_data(self, brand, processing_status_filter = None):
		prefix = '{}/{}'.format(brand, self.constants.OPERATIONAL_DIRECTORY_NAME)
		return self.download_brand_data(self.constants.INPUT_CONTAINER_NAME, brand, prefix, processing_status_filter = processing_status_filter)

	def download_brand_training_input_post_entities(self, brand, processing_status_filter = None):
		prefix = '{}/{}'.format(brand, self.constants.TRAINING_DIRECTORY_NAME)
		return self.download_brand_post_entities(self.constants.INPUT_CONTAINER_NAME, brand, prefix, processing_status_filter = processing_status_filter)

	def download_brand_operational_input_post_entities(self, brand, processing_status_filter = None):
		prefix = '{}/{}'.format(brand, self.constants.OPERATIONAL_DIRECTORY_NAME)
		return self.download_brand_post_entities(self.constants.INPUT_CONTAINER_NAME, brand, prefix, processing_status_filter = processing_status_filter)

	def download_brand_post_entities(self, container_name, brand, prefix, processing_status_filter = None):
		blobs = []
		logs = self.retreive_log_entities(container_name, prefix)
		if(processing_status_filter != None):
			unproccessed_entries = logs.GetLogs(processing_status_filter = processing_status_filter)
			for log in unproccessed_entries:
				blobs.append(self._download_data(container_name, '{}/{}'.format(log[PREFIX], 'post_entities.txt')))
		else:
			for log in logs:
				blobs.append(self._download_data(container_name, '{}/{}'.format(log[PREFIX], 'post_entities.txt')))
		return blobs

	def download_brand_data(self, container_name, brand, prefix, processing_status_filter = None):
		blobs = []
		if(processing_status_filter != None):
			logs = self.retreive_log_entities(container_name, prefix, processing_status_filter = processing_status_filter)
			for log in logs:
				blobs.extend(self.service.list_blobs(container_name=container_name, prefix=log[PREFIX]))
		else:
			blobs = self.service.list_blobs(container_name=container_name, prefix=prefix)
		data = []
		for blob in blobs:
			data.append(self._download_data(container_name, blob.name))
		return data

	def parallel_input_image_download(self, full_blob_names):
		return self.parallel_image_download(self.constants.INPUT_CONTAINER_NAME, full_blob_names)

	def parallel_image_download(self, container_name, full_blob_names):
		if(full_blob_names == None):
			return None
		threads = []
		results = []
		for full_blob_name in full_blob_names:
			result = {'blob': None}
			t = threading.Thread(target=self.download_image_blob, args=(container_name,full_blob_name, result))
			results.append(result)
			threads.append(t)
			t.start()
		[t.join() for t in threads]
		blobs = [result['blob'] for result in results if result['blob'] != None]
		return blobs

	def download_image_blob(self, container_name, full_blob_name, result):
		if(self.exists(container_name, full_blob_name)):
			result['blob'] = self._download_data(container_name, full_blob_name)
		else:
			return None

	def download_brand_operational_output_data(self, brand):
		path = '{}/{}'.format(brand, self.constants.OPERATIONAL_DIRECTORY_NAME)
		blobs = self.service.list_blobs(container_name=self.constants.OUTPUT_CONTAINER_NAME, prefix=path)
		return blobs

	def get_container_directories(self, container_name):
		bloblistingresult = self.service.list_blobs(container_name=container_name, delimiter='/')
		return [blob.name.rsplit('/', 1)[0] for blob in bloblistingresult]

	""" Pretty Print """

	def pretty_print_storage_structure(self):
		containers = self.service.list_containers()
		for container in containers:
			self.pretty_print_container_contents(container.name)

	def pretty_print_container_contents(self, container_name):
		print(container_name)
		blobs = self.service.list_blobs(container_name)
		for blob in blobs:
			print ('  {}'.format(blob.name))

	""" Private """

	def _create_path_to_bucket(self, brand_name, level):
		return '{}/{}/{}=={}'.format(brand_name, level, str(uuid.uuid4())[:8], datetime.datetime.now().strftime("%m-%d-%Y %I:%M%p"))

	def _get_bucket_image_directory(self, prefix):
		return '{}/[IMAGES]'.format(prefix)

	def _get_bucket_post_entities_file(self, prefix):
		return '{}/post_entities.txt'.format(prefix)

	def _create_input_container(self):
		self.service.create_container(self.constants.INPUT_CONTAINER_NAME)

	def _create_output_container(self):
		self.service.create_container(self.constants.OUTPUT_CONTAINER_NAME)

	def _create_checkpoints_container(self):
		self.service.create_container(self.constants.CHECKPOINTS_CONTAINER_NAME)

	def _create_container(self, container_name):
		self.service.create_container(container_name)

	def _input_container(self):
		return self.constants.INPUT_CONTAINER_NAME

	def _output_container(self):
		return self.constants.OUTPUT_CONTAINER_NAME

	def get_parent_directory(self, entity):
		return entity.rsplit('/', 1)[0]

	def exists(self, container, full_blob_name = None):
		return self.service.exists(container, full_blob_name)

	def _upload_text(self, container_name, full_blob_name, data):
		if not(self.exists(container_name)):
			self._create_container(container_name)
		print("uploading text to path", full_blob_name)
		self.service.create_blob_from_text(container_name, full_blob_name, data)
		return full_blob_name

	def _upload_and_compress_image(self, container_name, path, data):
		if not(self.exists(container_name)):
			self._create_container(container_name)
		full_blob_name = '{}{}'.format(path, '.jpeg')

		with BytesIO() as output:
			data.save(output, 'jpeg')
			bytes = output.getvalue()

		print("uploading image to path", path)
		self._parallel_upload(container_name, full_blob_name, bytes)
		return full_blob_name

	def _parallel_upload(self, container_name, full_blob_name, data):
		debug = False
		threads = []
		block_ids = []

		chunk_size = len(data) / 5
		if (debug):
			print("chunking data into even sections of length: ", chunk_size)
		chunks = [data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size)]

		for chunk in chunks:
			uid = self.generate_uid()
			block_ids.append(BlobBlock(id=uid))
			if (debug):
				print("spawning thread with uid: ", uid)
			t = threading.Thread(target=self._upload_block, args=(container_name,full_blob_name,chunk,uid,))
			threads.append(t)
			t.start()
		if (debug):
			print "all threads started..."
		[t.join() for t in threads]
		if (debug):
			print "all threads have completed execution"

		if (debug):
			block_list = self.service.get_block_list(container_name, full_blob_name, block_list_type=BlockListType.All)
			uncommitted = len(block_list.uncommitted_blocks)
			committed = len(block_list.committed_blocks)
			print("uncommitted: ", uncommitted, " committed: ", committed)

		if (debug):
			print "committing blocks"

		self.service.put_block_list(container_name, full_blob_name, block_ids)

		if (debug):
			block_list = self.service.get_block_list(container_name, full_blob_name, block_list_type=BlockListType.All)
			uncommitted = len(block_list.uncommitted_blocks)
			committed = len(block_list.committed_blocks)
			print ("uncommitted: ", uncommitted, " committed: ", committed)

	def _upload_block(self, container_name, full_blob_name, chunk, uid):
		self.service.put_block(container_name, full_blob_name, chunk, uid)

	def generate_uid(self):
		r_uuid = base64.urlsafe_b64encode(uuid.uuid4().bytes)
		return r_uuid.replace('=', '')

	def download_input_data(self, full_blob_name):
		return self._download_data(self.constants.INPUT_CONTAINER_NAME, full_blob_name)

	def _download_data(self, container_name, full_blob_name):
		if not(self.exists(container_name)):
			self._create_container(container_name)
		blob = self.service.get_blob_to_bytes(container_name, full_blob_name)
		return blob

	def retreive_log_entities(self, container_name, path, processing_status_filter = None):
		log_entries = LogEntries()
		log_path = path + "/log.txt"
		if self.exists(container_name,log_path):
			log_file = self.service.get_blob_to_text(container_name, log_path)
			raw_logs = log_file.content
			log_entries.deserialize(raw_logs)
		if(processing_status_filter != None):
			log_entries = log_entries.GetLogs(processing_status_filter=processing_status_filter)
		return log_entries

	def log(self, prefix, isProcessed):
		container_name = self._input_container()
		path = self.get_parent_directory(prefix)
		log_path = path + '/log.txt'
		log_entries = LogEntries()
		if self.exists(container_name,log_path):
			log_file = self.service.get_blob_to_text(container_name, log_path)
			raw_logs = log_file.content
			log_entries.deserialize(raw_logs)
		log_entries.update(prefix, isProcessed=isProcessed)
		raw = log_entries.serialize()
		self.service.create_blob_from_text(container_name, log_path, raw)

	def update_log_entries(self, bucket_names, isProcessed):
		directories = {}
		container_name = self._input_container()
		for bucket_name in bucket_names:
			print(bucket_name)
			path = self.get_parent_directory(bucket_name)
			print(path)
			log_path = path + '/log.txt'
			print(directories.keys())
			if log_path in directories:
				directories[log_path].append(bucket_name)
			else:
				print("adding new log path: ", log_path)
				directories[log_path] = []
				directories[log_path].append(bucket_name)
		for key, value in directories.iteritems():
			log_entries = LogEntries()
			if self.exists(container_name, key):
				log_file = self.service.get_blob_to_text(container_name, key)
				raw_logs = log_file.content
				print(key)
				print(raw_logs)
				log_entries.deserialize(raw_logs)
			for bucket_name in value:
				print("updating for bucket_name:", bucket_name, "for file: ", key)
				log_entries.update(bucket_name, isProcessed=isProcessed)
				print (log_entries.serialize())
			raw = log_entries.serialize()
			self.service.create_blob_from_text(container_name, key, raw)
예제 #34
0
class StorageBlobContext():
    """Initializes the repository with the specified settings dict.
        Required settings in config dict are:
        - AZURE_STORAGE_NAME
        - AZURE_STORAGE_KEY
        - AZURE_REQUIRE_ENCRYPTION
        - AZURE_KEY_IDENTIFIER
        - AZURE_SECRET_KEY
        - AZURE_STORAGE_IS_EMULATED
    """

    _account = None
    _account_name = ''
    _account_key = ''
    _is_emulated = False

    _modeldefinitions = []
    REGISTERED = True
    """ decorators """
    def get_modeldefinition(registered=False):
        def wrap(func):
            @wraps(func)
            def wrapper(self,
                        storagemodel,
                        modeldefinition=None,
                        *args,
                        **kwargs):
                """ modeldefinition already determined """
                if not modeldefinition is None:
                    return func(self, storagemodel, modeldefinition, *args,
                                **kwargs)
                """ find modeldefinition for StorageQueueModel or StorageQueueModel """
                if isinstance(storagemodel, StorageBlobModel):
                    definitionlist = [
                        definition for definition in self._modeldefinitions
                        if definition['modelname'] ==
                        storagemodel.__class__.__name__
                    ]
                else:
                    log.info('Argument is not an StorageBlobModel')
                    raise AzureStorageWrapException(
                        storagemodel, "Argument is not an StorageBlobModel")

                if len(definitionlist) == 1:
                    modeldefinition = definitionlist[0]

                elif len(definitionlist) > 1:
                    raise ModelRegisteredMoreThanOnceError(storagemodel)

                if registered and (not isinstance(modeldefinition, dict)):
                    raise ModelNotRegisteredError(storagemodel)

                return func(self, storagemodel, modeldefinition, *args,
                            **kwargs)

            return wrapper

        return wrap

    def __init__(self, **kwargs):
        """ parse kwargs """
        self._account_name = kwargs.get('AZURE_STORAGE_NAME', '')
        self._account_key = kwargs.get('AZURE_STORAGE_KEY', '')
        self._is_emulated = kwargs.get('AZURE_STORAGE_IS_EMULATED', False)
        self._key_identifier = kwargs.get('AZURE_KEY_IDENTIFIER', '')
        self._secret_key = kwargs.get('AZURE_SECRET_KEY', '')
        """ account & service init """
        if self._is_emulated:
            self._account = CloudStorageAccount(is_emulated=True)

        elif self._account_name != '' and self._account_key != '':
            self._account = CloudStorageAccount(self._account_name,
                                                self._account_key)

        else:
            raise AzureException
        """ registered models """
        self._modeldefinitions = []

    def __create__(self, modeldefinition: dict) -> bool:
        if (not modeldefinition['blobservice'] is None):
            try:
                modeldefinition['blobservice'].create_container(
                    modeldefinition['container'])
                return True

            except Exception as e:
                msg = 'failed to create {} with error {}'.format(
                    modeldefinition['container'], e)
                raise AzureStorageWrapException(msg=msg)

        else:
            return True
        pass

    def __delete__(self, modeldefinition: dict) -> bool:
        if (not modeldefinition['blobservice'] is None):
            try:
                modeldefinition['blobservice'].delete_container(
                    modeldefinition['container'])
                return True

            except Exception as e:
                msg = 'failed to delete {} with error {}'.format(
                    modeldefinition['container'], e)
                raise AzureStorageWrapException(msg=msg)

        else:
            return True
        pass

    @get_modeldefinition()
    def register_model(self, storagemodel: object, modeldefinition=None):
        """ set up an Queueservice for an StorageQueueModel in your  Azure Storage Account
            Will create the Queue if not exist!
        
            required Parameter is:
            - storagemodel: StorageQueueModel(Object)
        """
        if modeldefinition is None:
            """ test if containername already exists """
            if [
                    model for model in self._modeldefinitions
                    if model['container'] == storagemodel._containername
            ]:
                raise NameConventionError(storagemodel._containername)
            """ test if containername fits to azure naming rules """
            if not test_azurestorage_nameconventions(
                    storagemodel._containername, 'StorageBlobModel'):
                raise NameConventionError(storagemodel._containername)
            """ now register model """
            modeldefinition = {
                'modelname': storagemodel.__class__.__name__,
                'container': storagemodel._containername,
                'encrypt': storagemodel._encrypt,
                'blobservice': self._account.create_block_blob_service()
            }
            """ encrypt queue service """
            if modeldefinition['encrypt']:

                # Create the KEK used for encryption.
                # KeyWrapper is the provided sample implementation, but the user may use their own object as long as it implements the interface above.
                kek = KeyWrapper(self._key_identifier,
                                 self._secret_key)  #  Key identifier

                # Create the key resolver used for decryption.
                # KeyResolver is the provided sample implementation, but the user may use whatever implementation they choose so long as the function set on the service object behaves appropriately.
                key_resolver = KeyResolver()
                key_resolver.put_key(kek)

                # Set the require Encryption, KEK and key resolver on the service object.
                modeldefinition['blobservice'].require_encryption = True
                modeldefinition['blobservice'].key_encryption_key = kek
                modeldefinition[
                    'blobservice'].key_resolver_funcion = key_resolver.resolve_key

            self.__create__(modeldefinition)

            self._modeldefinitions.append(modeldefinition)

            log.info(
                'model {} registered successfully. Models are {!s}.'.format(
                    modeldefinition['modelname'],
                    [model['modelname'] for model in self._modeldefinitions]))
        else:
            log.info('model {} already registered. Models are {!s}.'.format(
                modeldefinition['modelname'],
                [model['modelname'] for model in self._modeldefinitions]))

        pass

    @get_modeldefinition(REGISTERED)
    def unregister_model(self,
                         storagemodel: object,
                         modeldefinition=None,
                         delete_blob=False):
        """ clear up an Queueservice for an StorageQueueModel in your  Azure Storage Account
            Will delete the hole Queue if delete_queue Flag is True!
        
            required Parameter is:
            - storagemodel: StorageQueueModel(Object)

            Optional Parameter is:
            - delete_queue: bool
        """
        """ remove from modeldefinitions """
        for i in range(len(self._modeldefinitions)):
            if self._modeldefinitions[i]['modelname'] == modeldefinition[
                    'modelname']:
                del self._modeldefinitions[i]
                break
        """ delete queue from storage if delete_queue == True """
        if delete_blob:
            self.__delete__(modeldefinition)

        log.info('model {} unregistered successfully. Models are {!s}'.format(
            modeldefinition['modelname'],
            [model['modelname'] for model in self._modeldefinitions]))
        pass

    @get_modeldefinition(REGISTERED)
    def upload(self, storagemodel: object, modeldefinition=None):
        """ insert blob message into storage """

        if (storagemodel.content is None) or (
                storagemodel.properties.content_settings.content_type is None):
            # No content to upload
            raise AzureStorageWrapException(
                storagemodel,
                "StorageBlobModel does not contain content nor content settings"
            )

        else:
            blobservice = modeldefinition['blobservice']
            container_name = modeldefinition['container']
            blob_name = storagemodel.name

            try:

                # refresh metadata
                storagemodel.__instance_to_metadata__()
                """ upload bytes """
                blobservice.create_blob_from_bytes(
                    container_name=container_name,
                    blob_name=blob_name,
                    blob=storagemodel.content,
                    metadata=storagemodel.metadata,
                    content_settings=storagemodel.properties.content_settings)

                storagemodel.properties = blobservice.get_blob_properties(
                    container_name=container_name,
                    blob_name=blob_name).properties

            except Exception as e:
                msg = 'can not save blob in container {} because {!s}'.format(
                    storagemodel._containername, e)
                raise AzureStorageWrapException(storagemodel, msg=msg)

        return storagemodel

    @get_modeldefinition(REGISTERED)
    def download(self, storagemodel: object, modeldefinition=None):
        """ load blob from storage into StorageBlobModelInstance """

        if (storagemodel.name is None):
            # No content to download
            raise AzureStorageWrapException(
                storagemodel,
                "StorageBlobModel does not contain content nor content settings"
            )

        else:
            container_name = modeldefinition['container']
            blob_name = storagemodel.name
            try:
                if modeldefinition['blobservice'].exists(
                        container_name, blob_name):
                    """ download blob """
                    blob = modeldefinition['blobservice'].get_blob_to_bytes(
                        container_name=modeldefinition['container'],
                        blob_name=storagemodel.name)

                    storagemodel.__mergeblob__(blob)

            except Exception as e:
                msg = 'can not load blob from container {} because {!s}'.format(
                    storagemodel._containername, e)
                raise AzureStorageWrapException(storagemodel, msg=msg)

        return storagemodel

    @get_modeldefinition(REGISTERED)
    def delete(self, storagemodel: object, modeldefinition=None) -> bool:
        """ delete the blob from storage """
        deleted = False

        blobservice = modeldefinition['blobservice']
        container_name = modeldefinition['container']
        blob_name = storagemodel.name

        try:
            if blobservice.exists(container_name, blob_name):
                """ delete """
                blob = blobservice.delete_blob(container_name, blob_name)
                deleted = True

        except Exception as e:
            msg = 'can not delete blob {} from storage because {!s}'.format(
                blob_name, e)
            raise AzureStorageWrapException(storagemodel, msg=msg)

        return deleted

    @get_modeldefinition(REGISTERED)
    def exists(self, storagemodel: object, modeldefinition=None) -> bool:
        """ delete the blob from storage """
        exists = False

        blobservice = modeldefinition['blobservice']
        container_name = modeldefinition['container']
        blob_name = storagemodel.name

        try:
            blobs = self.list(storagemodel,
                              modeldefinition,
                              where=storagemodel.name)
            if len(blobs) == 1:
                storagemodel.__mergeblob__(blobs[0])
                exists = True

        except Exception as e:
            msg = 'can not retireve blob {} from storage because {!s}'.format(
                blob_name, e)
            raise AzureStorageWrapException(storagemodel, msg=msg)

        return exists

    @get_modeldefinition(REGISTERED)
    def list(self,
             storagemodel: object,
             modeldefinition=None,
             where=None) -> list:
        """ list blob messages in container """
        try:
            blobnames = []
            if where is None:
                generator = modeldefinition['blobservice'].list_blobs(
                    modeldefinition['container'])
            else:
                generator = modeldefinition['blobservice'].list_blobs(
                    modeldefinition['container'], prefix=where)

            for blob in generator:
                blobnames.append(blob)

        except Exception as e:
            msg = 'can not list blobs in container {} because {!s}'.format(
                storagemodel._containername, e)
            raise AzureStorageWrapException(storagemodel, msg=msg)

        finally:
            return blobnames
예제 #35
0
class AzureBlobObjectStore(ObjectStore):
    """
    Object store that stores objects as blobs in an Azure Blob Container. A local
    cache exists that is used as an intermediate location for files between
    Galaxy and Azure.
    """
    def __init__(self, config, config_xml):
        if BlockBlobService is None:
            raise Exception(NO_BLOBSERVICE_ERROR_MESSAGE)
        super(AzureBlobObjectStore, self).__init__(config)

        self.staging_path = self.config.file_path
        self.transfer_progress = 0
        self._parse_config_xml(config_xml)
        self._configure_connection()
        self.container_lease = self._get_container_lease()

        # Clean cache only if value is set in galaxy.ini
        if self.cache_size != -1:
            # Convert GBs to bytes for comparison
            self.cache_size = self.cache_size * 1073741824
            # Helper for interruptable sleep
            self.sleeper = Sleeper()
            self.cache_monitor_thread = threading.Thread(target=self.__cache_monitor)
            self.cache_monitor_thread.start()
            log.info("Cache cleaner manager started")

    ###################
    # Private Methods #
    ###################

    # config_xml is an ElementTree object.
    def _parse_config_xml(self, config_xml):
        try:
            auth_xml = config_xml.find('auth')
            self.account_name = auth_xml.get('account_name')
            self.account_key = auth_xml.get('account_key')
            container_xml = config_xml.find('container')
            self.container_name = container_xml.get('name')
            self.max_chunk_size = int(container_xml.get('max_chunk_size', 250))  # currently unused
            cache_xml = config_xml.find('cache')
            self.cache_size = float(cache_xml.get('size', -1))
            self.staging_path = cache_xml.get('path', self.config.object_store_cache_path)

            for d_xml in config_xml.findall('extra_dir'):
                self.extra_dirs[d_xml.get('type')] = d_xml.get('path')

            log.debug("Object cache dir:    %s", self.staging_path)
            log.debug("       job work dir: %s", self.extra_dirs['job_work'])

        except Exception:
            # Toss it back up after logging, we can't continue loading at this point.
            log.exception("Malformed ObjectStore Configuration XML -- unable to continue")
            raise

    def _configure_connection(self):
        log.debug("Configuring Connection")
        self.account = CloudStorageAccount(self.account_name, self.account_key)
        self.service = self.account.create_block_blob_service()

    def _get_container_lease(self):
        """ Sometimes a handle to a container is not established right away so try
        it a few times. Raise error is connection is not established. """
        for i in range(5):
            try:
                self.service.break_container_lease(self.container_name)
                container_lease = self.service.acquire_container_lease(self.container_name)
                log.debug("Using azure blob store with container '%s'", self.container_name)
                return container_lease
            except AzureHttpError:
                try:
                    log.debug("container not found, creating azure blob store container with name '%s'", self.container_name)
                    self.service.create_container(self.container_name)
                    container_lease = self.service.acquire_container_lease(self.container_name)
                    return container_lease
                except AzureHttpError:
                    log.exception("Could not get container '%s', attempt %s/5", self.container_name, i + 1)
                    time.sleep(2)
        # All the attempts have been exhausted and connection was not established,
        # raise error
        raise AzureHttpError

    def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s', alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but S3 will not
            # follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)

        rel_path = os.path.join(*directory_hash_id(obj.id))

        if extra_dir is not None:
            if extra_dir_at_root:
                rel_path = os.path.join(extra_dir, rel_path)
            else:
                rel_path = os.path.join(rel_path, extra_dir)

        # for JOB_WORK directory
        if obj_dir:
            rel_path = os.path.join(rel_path, str(obj.id))
        if base_dir:
            base = self.extra_dirs.get(base_dir)
            return os.path.join(base, rel_path)

        # S3 folders are marked by having trailing '/' so add it now
        # rel_path = '%s/' % rel_path # assume for now we don't need this in Azure blob storage.

        if not dir_only:
            rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)

        return rel_path

    def _fix_permissions(self, rel_path):
        """ Set permissions on rel_path"""
        for basedir, _, files in os.walk(rel_path):
            umask_fix_perms(basedir, self.config.umask, 0o777, self.config.gid)
            for filename in files:
                path = os.path.join(basedir, filename)
                # Ignore symlinks
                if os.path.islink(path):
                    continue
                umask_fix_perms(path, self.config.umask, 0o666, self.config.gid)

    def _get_cache_path(self, rel_path):
        return os.path.abspath(os.path.join(self.staging_path, rel_path))

    def _get_transfer_progress(self):
        return self.transfer_progress

    def _get_size_in_azure(self, rel_path):
        try:
            properties = self.service.get_blob_properties(self.container_name, rel_path)
            # Currently this returns a blob and not a BlobProperties object
            # Similar issue for the ruby https://github.com/Azure/azure-storage-ruby/issues/13
            # The typecheck is an attempt at future-proofing this when/if the bug is fixed.
            if type(properties) is Blob:
                properties = properties.properties
            if properties:
                size_in_bytes = properties.content_length
                return size_in_bytes
        except AzureHttpError:
            log.exception("Could not get size of blob '%s' from Azure", rel_path)
            return -1

    def _in_azure(self, rel_path):
        try:
            exists = self.service.exists(self.container_name, rel_path)
        except AzureHttpError:
            log.exception("Trouble checking existence of Azure blob '%s'", rel_path)
            return False
        return exists

    def _in_cache(self, rel_path):
        """ Check if the given dataset is in the local cache. """
        cache_path = self._get_cache_path(rel_path)
        return os.path.exists(cache_path)

    def _pull_into_cache(self, rel_path):
        # Ensure the cache directory structure exists (e.g., dataset_#_files/)
        rel_path_dir = os.path.dirname(rel_path)
        if not os.path.exists(self._get_cache_path(rel_path_dir)):
            os.makedirs(self._get_cache_path(rel_path_dir))
        # Now pull in the file
        file_ok = self._download(rel_path)
        self._fix_permissions(self._get_cache_path(rel_path_dir))
        return file_ok

    def _transfer_cb(self, complete, total):
        self.transfer_progress = float(complete) / float(total) * 100  # in percent

    def _download(self, rel_path):
        local_destination = self._get_cache_path(rel_path)
        try:
            log.debug("Pulling '%s' into cache to %s", rel_path, local_destination)
            if self.cache_size > 0 and self._get_size_in_azure(rel_path) > self.cache_size:
                log.critical("File %s is larger (%s) than the cache size (%s). Cannot download.",
                             rel_path, self._get_size_in_azure(rel_path), self.cache_size)
                return False
            else:
                self.transfer_progress = 0  # Reset transfer progress counter
                self.service.get_blob_to_path(self.container_name, rel_path, local_destination, progress_callback=self._transfer_cb)
                return True
        except AzureHttpError:
            log.exception("Problem downloading '%s' from Azure", rel_path)
        return False

    def _push_to_os(self, rel_path, source_file=None, from_string=None):
        """
        Push the file pointed to by ``rel_path`` to the object store naming the blob
        ``rel_path``. If ``source_file`` is provided, push that file instead while
        still using ``rel_path`` as the blob name.
        If ``from_string`` is provided, set contents of the file to the value of
        the string.
        """
        try:
            source_file = source_file or self._get_cache_path(rel_path)

            if not os.path.exists(source_file):
                log.error("Tried updating blob '%s' from source file '%s', but source file does not exist.", rel_path, source_file)
                return False

            if os.path.getsize(source_file) == 0:
                log.debug("Wanted to push file '%s' to azure blob '%s' but its size is 0; skipping.", source_file, rel_path)
                return True

            if from_string:
                self.service.create_blob_from_text(self.container_name, rel_path, from_string, progress_callback=self._transfer_cb)
                log.debug("Pushed data from string '%s' to blob '%s'", from_string, rel_path)
            else:
                start_time = datetime.now()
                log.debug("Pushing cache file '%s' of size %s bytes to '%s'", source_file, os.path.getsize(source_file), rel_path)
                self.transfer_progress = 0  # Reset transfer progress counter
                self.service.create_blob_from_path(self.container_name, rel_path, source_file, progress_callback=self._transfer_cb)
                end_time = datetime.now()
                log.debug("Pushed cache file '%s' to blob '%s' (%s bytes transfered in %s sec)",
                          source_file, rel_path, os.path.getsize(source_file), end_time - start_time)
            return True

        except AzureHttpError:
            log.exception("Trouble pushing to Azure Blob '%s' from file '%s'", rel_path, source_file)
        return False

    ##################
    # Public Methods #
    ##################

    def exists(self, obj, **kwargs):
        in_cache = in_azure = False
        rel_path = self._construct_path(obj, **kwargs)

        in_cache = self._in_cache(rel_path)
        in_azure = self._in_azure(rel_path)
        # log.debug("~~~~~~ File '%s' exists in cache: %s; in azure: %s" % (rel_path, in_cache, in_azure))
        # dir_only does not get synced so shortcut the decision
        dir_only = kwargs.get('dir_only', False)
        base_dir = kwargs.get('base_dir', None)
        if dir_only:
            if in_cache or in_azure:
                return True
            # for JOB_WORK directory
            elif base_dir:
                if not os.path.exists(rel_path):
                    os.makedirs(rel_path)
                return True
            else:
                return False

        # TODO: Sync should probably not be done here. Add this to an async upload stack?
        if in_cache and not in_azure:
            self._push_to_os(rel_path, source_file=self._get_cache_path(rel_path))
            return True
        elif in_azure:
            return True
        else:
            return False

    def file_ready(self, obj, **kwargs):
        """
        A helper method that checks if a file corresponding to a dataset is
        ready and available to be used. Return ``True`` if so, ``False`` otherwise.
        """
        rel_path = self._construct_path(obj, **kwargs)
        # Make sure the size in cache is available in its entirety
        if self._in_cache(rel_path):
            local_size = os.path.getsize(self._get_cache_path(rel_path))
            remote_size = self._get_size_in_azure(rel_path)
            if local_size == remote_size:
                return True
            else:
                log.debug("Waiting for dataset %s to transfer from OS: %s/%s", rel_path, local_size, remote_size)

        return False

    def create(self, obj, **kwargs):

        if not self.exists(obj, **kwargs):

            # Pull out locally used fields
            extra_dir = kwargs.get('extra_dir', None)
            extra_dir_at_root = kwargs.get('extra_dir_at_root', False)
            dir_only = kwargs.get('dir_only', False)
            alt_name = kwargs.get('alt_name', None)

            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj.id))

            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)

            # Create given directory in cache
            cache_dir = os.path.join(self.staging_path, rel_path)
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir)

            # Although not really necessary to create S3 folders (because S3 has
            # flat namespace), do so for consistency with the regular file system
            # S3 folders are marked by having trailing '/' so add it now
            # s3_dir = '%s/' % rel_path
            # self._push_to_os(s3_dir, from_string='')
            # If instructed, create the dataset in cache & in S3
            if not dir_only:
                rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
                open(os.path.join(self.staging_path, rel_path), 'w').close()
                self._push_to_os(rel_path, from_string='')

    def empty(self, obj, **kwargs):
        if self.exists(obj, **kwargs):
            return bool(self.size(obj, **kwargs) > 0)
        else:
            raise ObjectNotFound( 'objectstore.empty, object does not exist: %s, kwargs: %s' % ( str( obj ), str( kwargs ) ) )

    def size(self, obj, **kwargs):
        rel_path = self._construct_path(obj, **kwargs)
        if self._in_cache(rel_path):
            try:
                return os.path.getsize(self._get_cache_path(rel_path))
            except OSError as ex:
                log.info("Could not get size of file '%s' in local cache, will try Azure. Error: %s", rel_path, ex)
        elif self.exists(obj, **kwargs):
            return self._get_size_in_azure(rel_path)
        log.warning("Did not find dataset '%s', returning 0 for size", rel_path)
        return 0

    def delete(self, obj, entire_dir=False, **kwargs):
        rel_path = self._construct_path(obj, **kwargs)
        extra_dir = kwargs.get('extra_dir', None)
        base_dir = kwargs.get('base_dir', None)
        dir_only = kwargs.get('dir_only', False)
        obj_dir = kwargs.get('obj_dir', False)
        try:
            if base_dir and dir_only and obj_dir:
                # Remove temporary data in JOB_WORK directory
                shutil.rmtree(os.path.abspath(rel_path))
                return True

            # For the case of extra_files, because we don't have a reference to
            # individual files/blobs we need to remove the entire directory structure
            # with all the files in it. This is easy for the local file system,
            # but requires iterating through each individual blob in Azure and deleing it.
            if entire_dir and extra_dir:
                shutil.rmtree(self._get_cache_path(rel_path))
                blobs = self.service.list_blobs(self.container_name, prefix=rel_path)
                for blob in blobs:
                    log.debug("Deleting from Azure: %s", blob)
                    self.service.delete_blob(self.container_name, blob.name)
                return True
            else:
                # Delete from cache first
                os.unlink(self._get_cache_path(rel_path))
                # Delete from S3 as well
                if self._in_azure(rel_path):
                    log.debug("Deleting from Azure: %s", rel_path)
                    self.service.delete_blob(self.container_name, rel_path)
                    return True
        except AzureHttpError:
            log.exception("Could not delete blob '%s' from Azure", rel_path)
        except OSError:
            log.exception('%s delete error', self.get_filename(obj, **kwargs))
        return False

    def get_data(self, obj, start=0, count=-1, **kwargs):
        rel_path = self._construct_path(obj, **kwargs)
        # Check cache first and get file if not there
        if not self._in_cache(rel_path):
            self._pull_into_cache(rel_path)
        # Read the file content from cache
        data_file = open(self._get_cache_path(rel_path), 'r')
        data_file.seek(start)
        content = data_file.read(count)
        data_file.close()
        return content

    def get_filename(self, obj, **kwargs):
        rel_path = self._construct_path(obj, **kwargs)
        base_dir = kwargs.get('base_dir', None)
        dir_only = kwargs.get('dir_only', False)
        obj_dir = kwargs.get('obj_dir', False)

        # for JOB_WORK directory
        if base_dir and dir_only and obj_dir:
            return os.path.abspath(rel_path)

        cache_path = self._get_cache_path(rel_path)
        # S3 does not recognize directories as files so cannot check if those exist.
        # So, if checking dir only, ensure given dir exists in cache and return
        # the expected cache path.
        # dir_only = kwargs.get('dir_only', False)
        # if dir_only:
        #     if not os.path.exists(cache_path):
        #         os.makedirs(cache_path)
        #     return cache_path
        # Check if the file exists in the cache first
        if self._in_cache(rel_path):
            return cache_path
        # Check if the file exists in persistent storage and, if it does, pull it into cache
        elif self.exists(obj, **kwargs):
            if dir_only:  # Directories do not get pulled into cache
                return cache_path
            else:
                if self._pull_into_cache(rel_path):
                    return cache_path
        # For the case of retrieving a directory only, return the expected path
        # even if it does not exist.
        # if dir_only:
        #     return cache_path
        raise ObjectNotFound( 'objectstore.get_filename, no cache_path: %s, kwargs: %s' % ( str( obj ), str( kwargs ) ) )

        return cache_path  # Until the upload tool does not explicitly create the dataset, return expected path

    def update_from_file(self, obj, file_name=None, create=False, **kwargs):
        if create is True:
            self.create(obj, **kwargs)
        elif self.exists(obj, **kwargs):
            rel_path = self._construct_path(obj, **kwargs)
            # Chose whether to use the dataset file itself or an alternate file
            if file_name:
                source_file = os.path.abspath(file_name)
                # Copy into cache
                cache_file = self._get_cache_path(rel_path)
                try:
                    if source_file != cache_file:
                        # FIXME? Should this be a `move`?
                        shutil.copy2(source_file, cache_file)
                    self._fix_permissions(cache_file)
                except OSError:
                    log.exception("Trouble copying source file '%s' to cache '%s'", source_file, cache_file)
            else:
                source_file = self._get_cache_path(rel_path)

            self._push_to_os(rel_path, source_file)

        else:
            raise ObjectNotFound( 'objectstore.update_from_file, object does not exist: %s, kwargs: %s' % ( str( obj ), str( kwargs ) ) )

    def get_object_url(self, obj, **kwargs):
        if self.exists(obj, **kwargs):
            rel_path = self._construct_path(obj, **kwargs)
            try:
                url = self.service.make_blob_url(container_name=self.container_name, blob_name=rel_path)
                return url
            except AzureHttpError:
                log.exception("Trouble generating URL for dataset '%s'", rel_path)
        return None

    def get_store_usage_percent(self):
        return 0.0

    ##################
    # Secret Methods #
    ##################

    def __cache_monitor(self):
        time.sleep(2)  # Wait for things to load before starting the monitor
        while self.running:
            total_size = 0
            # Is this going to be too expensive of an operation to be done frequently?
            file_list = []
            for dirpath, _, filenames in os.walk(self.staging_path):
                for filename in filenames:
                    filepath = os.path.join(dirpath, filename)
                    file_size = os.path.getsize(filepath)
                    total_size += file_size
                    # Get the time given file was last accessed
                    last_access_time = time.localtime(os.stat(filepath)[7])
                    # Compose a tuple of the access time and the file path
                    file_tuple = last_access_time, filepath, file_size
                    file_list.append(file_tuple)
            # Sort the file list (based on access time)
            file_list.sort()
            # Initiate cleaning once within 10% of the defined cache size?
            cache_limit = self.cache_size * 0.9
            if total_size > cache_limit:
                log.info("Initiating cache cleaning: current cache size: %s; clean until smaller than: %s",
                         convert_bytes(total_size), convert_bytes(cache_limit))
                # How much to delete? If simply deleting up to the cache-10% limit,
                # is likely to be deleting frequently and may run the risk of hitting
                # the limit - maybe delete additional #%?
                # For now, delete enough to leave at least 10% of the total cache free
                delete_this_much = total_size - cache_limit
                # Keep deleting datasets from file_list until deleted_amount does not
                # exceed delete_this_much; start deleting from the front of the file list,
                # which assumes the oldest files come first on the list.
                deleted_amount = 0
                for entry in enumerate(file_list):
                    if deleted_amount < delete_this_much:
                        deleted_amount += entry[2]
                        os.remove(entry[1])
                        # Debugging code for printing deleted files' stats
                        # folder, file_name = os.path.split(f[1])
                        # file_date = time.strftime("%m/%d/%y %H:%M:%S", f[0])
                        # log.debug("%s. %-25s %s, size %s (deleted %s/%s)" \
                        #     % (i, file_name, convert_bytes(f[2]), file_date, \
                        #     convert_bytes(deleted_amount), convert_bytes(delete_this_much)))
                    else:
                        log.debug("Cache cleaning done. Total space freed: %s", convert_bytes(deleted_amount))

            self.sleeper.sleep(30)  # Test cache size every 30 seconds?
예제 #36
0
파일: blobfs.py 프로젝트: mbartoli/blobfs
class Passthrough(Operations):
	def __init__(self, root):
		self.root = root
		print root
		try:
			import config as config
		except:
			raise ValueError('Please specify configuration settings in config.py.')

		if config.IS_EMULATED:
			self.account = CloudStorageAccount(is_emulated=True)
		else:
			# Note that account key and sas should not both be included
			account_name = config.STORAGE_ACCOUNT_NAME
			account_key = config.STORAGE_ACCOUNT_KEY
			sas = config.SAS
			self.account = CloudStorageAccount(account_name=account_name, 
											   account_key=account_key, 
											   sas_token=sas)
			self.service = self.account.create_block_blob_service()


	def _full_path(self, partial):
		if partial.startswith("/"):
			partial = partial[1:]
		path = os.path.join(self.root, partial)
		return path

	def _get_container_reference(self, prefix='container'):
		return '{}{}'.format(prefix, str(uuid.uuid4()).replace('-', ''))

	def access(self, path, mode):
		if debug:
			print "access" 

		full_path = self._full_path(path)
		#if not os.access(full_path, mode):
		#	pass#raise FuseOSError(errno.EACCES)
		return 0

	def chmod(self, path, mode):
		pass

	def chown(self, path, uid, gid):
		pass

	def getattr(self, path, fh=None):
		if debug:
			print "getattr  " + path 
		isFolder = False
		if len(path.split('/')) == 2:
			isFolder = True

		"""link_data = {
			"st_ctime" : 1456615173,
			"st_mtime" : 1456615173,
			"st_nlink" : 2,
			"st_mode" : 16893,
			"st_size" : 2,
			"st_gid" : 1000,
			"st_uid" : 1000,
			"st_atime" : time(),
		}"""


		folder_data = {
			"st_ctime" : 1456615173,
			"st_mtime" : 1456615173,
			"st_nlink" : 2,
#			"st_mode" : 16893,
			"st_mode" : 16895,
			"st_size" : 2,
			"st_gid" : 1000,
			"st_uid" : 1000,
			"st_atime" : time(),
		}

		
		full_path = self._full_path(path)
		try:
			st = os.lstat(full_path)
			print st
			rdata = dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid'))
		except: 
			pass
		#if os.path.isfile == True:
		#	return 
		if isFolder:
			for container in list(self.service.list_containers()):
				if container.name == path[1:]:
					return folder_data
		else:
			"""import config as config
			account_name = config.STORAGE_ACCOUNT_NAME
			account_key = config.STORAGE_ACCOUNT_KEY"""
			containername = path.split('/')[1]
			filename = path.split('/')[2]
			"""block_blob_service = BlockBlobService(account_name, account_key)
			if os.path.isfile(full_path) == False:
				fileSize = 1
			else:
				try:
					pass
					fileSize = os.path.getsize(full_path)
				except:
					fileSize = 1"""
			self.service = self.account.create_block_blob_service()
			file_data = {
				"st_ctime" : 1456615173,
				"st_mtime" : 1456615173,
				"st_nlink" : 1,
#				"st_mode" : 33188,
				"st_mode" : 33279,
				"st_size" : self.service.get_blob_properties(containername, filename).properties.content_length,
				"st_gid" : 1000,
				"st_uid" : 1000,
				"st_atime" : time(),
			}
			return file_data

		st = os.lstat(full_path)
		print st
		rdata = dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid'))
		return rdata

	def readdir(self, path, fh):
		if debug:
			print "readdir  " + path  
		
		full_path = self._full_path(path)

		dirents = ['.', '..']
		#if os.path.isdir(full_path):
		#	dirents.extend(os.listdir(full_path))
		for r in dirents:
			yield r
		containers = list(self.service.list_containers())
		#print('All containers in your account:')
		if path == "/":
			for container in containers:
				yield container.name
		else: 
			folder = path[1:]
			blobs = list(self.service.list_blobs(folder))
			for blob in blobs:
				yield blob.name
			

	def readlink(self, path):
		if debug:
			print "readlink" 

		pathname = os.readlink(self._full_path(path))
		if pathname.startswith("/"):
			# Path name is absolute, sanitize it.
			return os.path.relpath(pathname, self.root)
		else:
			return pathname

	def mknod(self, path, mode, dev):
		return os.mknod(self._full_path(path), mode, dev)

	def rmdir(self, path):
		if debug:
			print "rmdir  " + path[1:]
		deleted = self.service.delete_container(path[1:])
		return 0

	def mkdir(self, path, mode):
		"""
		Only valid in the top level of the mounted directory.
		Creates a container to serve as the folder 

		A container name must be a valid DNS name, conforming to the following 
		naming rules:
		1) Container names must start with a letter or number, and can contain
		   only letters, numbers, and the dash (-) character.
		2) Every dash (-) character must be immediately preceded and followed 
		   by a letter or number; consecutive dashes are not permitted in
		   container names.
		3) All letters in a container name must be lowercase.
		4) Container names must be from 3 through 63 characters long.

		30 second lease timeout on deleted directory.
		"""
		if debug:
			print "mkdir  " + path[1:]

		# TODO: validate input 
		self.service.create_container(path[1:])
		return 0

	def statfs(self, path):
		full_path = self._full_path(path)
		stv = os.statvfs(full_path)
		return dict((key, getattr(stv, key)) for key in ('f_bavail', 'f_bfree',
			'f_blocks', 'f_bsize', 'f_favail', 'f_ffree', 'f_files', 'f_flag',
			'f_frsize', 'f_namemax'))

	def unlink(self, path):
		return os.unlink(self._full_path(path))

	def symlink(self, name, target):
		return os.symlink(name, self._full_path(target))

	def rename(self, old, new):
		"""
		1) create new container
		2) stream contents of old container to new container
		3) delete old container
		"""
		# step 1 
		self.mkdir(new, 0777)

		# step 2
		# TODO: steam contents to new container
		"""import config as config
		account_name = config.STORAGE_ACCOUNT_NAME
		account_key = config.STORAGE_ACCOUNT_KEY
		block_blob_service = BlockBlobService(account_name, account_key)
		block_blob_service.get_blob_to_path(containername, filename, tempfilename)	
		block_blob_service.create_blob_from_path(new, filename, filename)"""	
				
	
		#step 3
		self.rmdir(old)

	def link(self, target, name):
		return os.link(self._full_path(target), self._full_path(name))

	def utimens(self, path, times=None):
		return os.utime(self._full_path(path), times)

	# File methods
	# ============

	def open(self, path, flags):
		"""if debug:
			print "open:    " + path
			print flags
		full_path = self._full_path(path)
		import config as config
		account_name = config.STORAGE_ACCOUNT_NAME
		account_key = config.STORAGE_ACCOUNT_KEY
		containername = path.split('/')[1]
		filename = path.split('/')[2]
		block_blob_service = BlockBlobService(account_name, account_key)
		try:
			print "get block blob" 
			if os.path.isdir(path.split('/')[1]) == False:
				os.mkdir(full_path.split('/')[0]+'/'+containername)
			if os.path.isfile(full_path) == False:
				block_blob_service.get_blob_to_path(containername, filename, full_path)
			else:
				print "get block blob" 
				os.remove(full_path)
				block_blob_service.get_blob_to_path(containername, filename, full_path)
		except:
			pass
		print "full path:   " + full_path 
		print os.path.isfile(full_path)"""
		return 0#os.open(full_path, flags)

	def create(self, path, mode, fi=None):
		if debug:
			print "create:   " + path
		full_path = self._full_path(path)
		return os.open(full_path, os.O_WRONLY | os.O_CREAT, mode)

	def read(self, path, length, offset, fh):
		if debug:
			print "read:	   " + path
			print "offset:  " 
			print offset
			print "length: "
			print length 
			print fh
		full_path = self._full_path(path)
		print full_path
		#os.lseek(fh, offset, os.SEEK_SET)
		#if os.path.isfile(full_path) == False:
		import config as config
		account_name = config.STORAGE_ACCOUNT_NAME
		account_key = config.STORAGE_ACCOUNT_KEY
		containername = path.split('/')[1]
		filename = path.split('/')[2]
		service = baseblobservice.BaseBlobService(account_name, account_key)
		blob = service.get_blob_to_bytes(containername, filename, None, offset, offset+length-1)
		#blob = blob[offset:(offset+length)]
		bytes = blob.content 
		return bytes
		"""try:
			if os.path.isdir(path.split('/')[1]) == False:
				os.mkdir(full_path.split('/')[0]+'/'+containername)
			if os.path.isfile(full_path) == False:
				print "read block blob" 
				block_blob_service.get_blob_to_path(containername, filename, full_path)
			else:
				os.remove(full_path)
				block_blob_service.get_blob_to_path(containername, filename, full_path)
		except:
			pass
			
		fhn = os.open(full_path, 32768)
		os.lseek(fhn, offset, os.SEEK_SET)
	
		#print "os.read(fh, length)"
		#print os.read(fh, length)
		return os.read(fhn, length)"""

	def write(self, path, buf, offset, fh):
		if debug:
			print "write:   " + path
		os.lseek(fh, offset, os.SEEK_SET)
		return os.write(fh, buf)

	def truncate(self, path, length, fh=None):
		print "truncate:   " + path
		full_path = self._full_path(path)
		with open(full_path, 'r+') as f:
			f.truncate(length)

	def flush(self, path, fh):
		print "flush:   " + path
		return os.fsync(fh)

	def release(self, path, fh):
		print "release:   " + path
		return os.close(fh)

	def fsync(self, path, fdatasync, fh):
		print "fsync:   " + path
		return self.flush(path, fh)
예제 #37
0
 def connection(self):
     if self._connection is None:
         account = CloudStorageAccount(self.account_name, self.account_key)
         self._connection = account.create_block_blob_service()
     return self._connection
예제 #38
0
class AzureBlobObjectStore(ObjectStore):
    """
    Object store that stores objects as blobs in an Azure Blob Container. A local
    cache exists that is used as an intermediate location for files between
    Galaxy and Azure.
    """
    def __init__(self, config, config_xml):
        if BlockBlobService is None:
            raise Exception(NO_BLOBSERVICE_ERROR_MESSAGE)
        super(AzureBlobObjectStore, self).__init__(config)

        self.staging_path = self.config.file_path
        self.transfer_progress = 0
        self._parse_config_xml(config_xml)
        self._configure_connection()
        self.container_lease = self._get_container_lease()

        # Clean cache only if value is set in galaxy.ini
        if self.cache_size != -1:
            # Convert GBs to bytes for comparison
            self.cache_size = self.cache_size * 1073741824
            # Helper for interruptable sleep
            self.sleeper = Sleeper()
            self.cache_monitor_thread = threading.Thread(
                target=self.__cache_monitor)
            self.cache_monitor_thread.start()
            log.info("Cache cleaner manager started")

    ###################
    # Private Methods #
    ###################

    # config_xml is an ElementTree object.
    def _parse_config_xml(self, config_xml):
        try:
            auth_xml = config_xml.find('auth')
            self.account_name = auth_xml.get('account_name')
            self.account_key = auth_xml.get('account_key')
            container_xml = config_xml.find('container')
            self.container_name = container_xml.get('name')
            self.max_chunk_size = int(container_xml.get(
                'max_chunk_size', 250))  # currently unused
            cache_xml = config_xml.find('cache')
            self.cache_size = float(cache_xml.get('size', -1))
            self.staging_path = cache_xml.get(
                'path', self.config.object_store_cache_path)

            for d_xml in config_xml.findall('extra_dir'):
                self.extra_dirs[d_xml.get('type')] = d_xml.get('path')

            log.debug("Object cache dir:    %s", self.staging_path)
            log.debug("       job work dir: %s", self.extra_dirs['job_work'])

        except Exception:
            # Toss it back up after logging, we can't continue loading at this point.
            log.exception(
                "Malformed ObjectStore Configuration XML -- unable to continue"
            )
            raise

    def _configure_connection(self):
        log.debug("Configuring Connection")
        self.account = CloudStorageAccount(self.account_name, self.account_key)
        self.service = self.account.create_block_blob_service()

    def _get_container_lease(self):
        """ Sometimes a handle to a container is not established right away so try
        it a few times. Raise error is connection is not established. """
        for i in range(5):
            try:
                self.service.break_container_lease(self.container_name)
                container_lease = self.service.acquire_container_lease(
                    self.container_name)
                log.debug("Using azure blob store with container '%s'",
                          self.container_name)
                return container_lease
            except AzureHttpError:
                try:
                    log.debug(
                        "container not found, creating azure blob store container with name '%s'",
                        self.container_name)
                    self.service.create_container(self.container_name)
                    container_lease = self.service.acquire_container_lease(
                        self.container_name)
                    return container_lease
                except AzureHttpError:
                    log.exception("Could not get container '%s', attempt %s/5",
                                  self.container_name, i + 1)
                    time.sleep(2)
        # All the attempts have been exhausted and connection was not established,
        # raise error
        raise AzureHttpError

    def _construct_path(self,
                        obj,
                        base_dir=None,
                        dir_only=None,
                        extra_dir=None,
                        extra_dir_at_root=False,
                        alt_name=None,
                        obj_dir=False,
                        **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s',
                            alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but S3 will not
            # follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)

        rel_path = os.path.join(*directory_hash_id(obj.id))

        if extra_dir is not None:
            if extra_dir_at_root:
                rel_path = os.path.join(extra_dir, rel_path)
            else:
                rel_path = os.path.join(rel_path, extra_dir)

        # for JOB_WORK directory
        if obj_dir:
            rel_path = os.path.join(rel_path, str(obj.id))
        if base_dir:
            base = self.extra_dirs.get(base_dir)
            return os.path.join(base, rel_path)

        # S3 folders are marked by having trailing '/' so add it now
        # rel_path = '%s/' % rel_path # assume for now we don't need this in Azure blob storage.

        if not dir_only:
            rel_path = os.path.join(
                rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)

        return rel_path

    def _fix_permissions(self, rel_path):
        """ Set permissions on rel_path"""
        for basedir, _, files in os.walk(rel_path):
            umask_fix_perms(basedir, self.config.umask, 0o777, self.config.gid)
            for filename in files:
                path = os.path.join(basedir, filename)
                # Ignore symlinks
                if os.path.islink(path):
                    continue
                umask_fix_perms(path, self.config.umask, 0o666,
                                self.config.gid)

    def _get_cache_path(self, rel_path):
        return os.path.abspath(os.path.join(self.staging_path, rel_path))

    def _get_transfer_progress(self):
        return self.transfer_progress

    def _get_size_in_azure(self, rel_path):
        try:
            properties = self.service.get_blob_properties(
                self.container_name, rel_path)
            # Currently this returns a blob and not a BlobProperties object
            # Similar issue for the ruby https://github.com/Azure/azure-storage-ruby/issues/13
            # The typecheck is an attempt at future-proofing this when/if the bug is fixed.
            if type(properties) is Blob:
                properties = properties.properties
            if properties:
                size_in_bytes = properties.content_length
                return size_in_bytes
        except AzureHttpError:
            log.exception("Could not get size of blob '%s' from Azure",
                          rel_path)
            return -1

    def _in_azure(self, rel_path):
        try:
            exists = self.service.exists(self.container_name, rel_path)
        except AzureHttpError:
            log.exception("Trouble checking existence of Azure blob '%s'",
                          rel_path)
            return False
        return exists

    def _in_cache(self, rel_path):
        """ Check if the given dataset is in the local cache. """
        cache_path = self._get_cache_path(rel_path)
        return os.path.exists(cache_path)

    def _pull_into_cache(self, rel_path):
        # Ensure the cache directory structure exists (e.g., dataset_#_files/)
        rel_path_dir = os.path.dirname(rel_path)
        if not os.path.exists(self._get_cache_path(rel_path_dir)):
            os.makedirs(self._get_cache_path(rel_path_dir))
        # Now pull in the file
        file_ok = self._download(rel_path)
        self._fix_permissions(self._get_cache_path(rel_path_dir))
        return file_ok

    def _transfer_cb(self, complete, total):
        self.transfer_progress = float(complete) / float(
            total) * 100  # in percent

    def _download(self, rel_path):
        local_destination = self._get_cache_path(rel_path)
        try:
            log.debug("Pulling '%s' into cache to %s", rel_path,
                      local_destination)
            if self.cache_size > 0 and self._get_size_in_azure(
                    rel_path) > self.cache_size:
                log.critical(
                    "File %s is larger (%s) than the cache size (%s). Cannot download.",
                    rel_path, self._get_size_in_azure(rel_path),
                    self.cache_size)
                return False
            else:
                self.transfer_progress = 0  # Reset transfer progress counter
                self.service.get_blob_to_path(
                    self.container_name,
                    rel_path,
                    local_destination,
                    progress_callback=self._transfer_cb)
                return True
        except AzureHttpError:
            log.exception("Problem downloading '%s' from Azure", rel_path)
        return False

    def _push_to_os(self, rel_path, source_file=None, from_string=None):
        """
        Push the file pointed to by ``rel_path`` to the object store naming the blob
        ``rel_path``. If ``source_file`` is provided, push that file instead while
        still using ``rel_path`` as the blob name.
        If ``from_string`` is provided, set contents of the file to the value of
        the string.
        """
        try:
            source_file = source_file or self._get_cache_path(rel_path)

            if not os.path.exists(source_file):
                log.error(
                    "Tried updating blob '%s' from source file '%s', but source file does not exist.",
                    rel_path, source_file)
                return False

            if os.path.getsize(source_file) == 0:
                log.debug(
                    "Wanted to push file '%s' to azure blob '%s' but its size is 0; skipping.",
                    source_file, rel_path)
                return True

            if from_string:
                self.service.create_blob_from_text(
                    self.container_name,
                    rel_path,
                    from_string,
                    progress_callback=self._transfer_cb)
                log.debug("Pushed data from string '%s' to blob '%s'",
                          from_string, rel_path)
            else:
                start_time = datetime.now()
                log.debug("Pushing cache file '%s' of size %s bytes to '%s'",
                          source_file, os.path.getsize(source_file), rel_path)
                self.transfer_progress = 0  # Reset transfer progress counter
                self.service.create_blob_from_path(
                    self.container_name,
                    rel_path,
                    source_file,
                    progress_callback=self._transfer_cb)
                end_time = datetime.now()
                log.debug(
                    "Pushed cache file '%s' to blob '%s' (%s bytes transfered in %s sec)",
                    source_file, rel_path, os.path.getsize(source_file),
                    end_time - start_time)
            return True

        except AzureHttpError:
            log.exception("Trouble pushing to Azure Blob '%s' from file '%s'",
                          rel_path, source_file)
        return False

    ##################
    # Public Methods #
    ##################

    def exists(self, obj, **kwargs):
        in_cache = in_azure = False
        rel_path = self._construct_path(obj, **kwargs)

        in_cache = self._in_cache(rel_path)
        in_azure = self._in_azure(rel_path)
        # log.debug("~~~~~~ File '%s' exists in cache: %s; in azure: %s" % (rel_path, in_cache, in_azure))
        # dir_only does not get synced so shortcut the decision
        dir_only = kwargs.get('dir_only', False)
        base_dir = kwargs.get('base_dir', None)
        if dir_only:
            if in_cache or in_azure:
                return True
            # for JOB_WORK directory
            elif base_dir:
                if not os.path.exists(rel_path):
                    os.makedirs(rel_path)
                return True
            else:
                return False

        # TODO: Sync should probably not be done here. Add this to an async upload stack?
        if in_cache and not in_azure:
            self._push_to_os(rel_path,
                             source_file=self._get_cache_path(rel_path))
            return True
        elif in_azure:
            return True
        else:
            return False

    def file_ready(self, obj, **kwargs):
        """
        A helper method that checks if a file corresponding to a dataset is
        ready and available to be used. Return ``True`` if so, ``False`` otherwise.
        """
        rel_path = self._construct_path(obj, **kwargs)
        # Make sure the size in cache is available in its entirety
        if self._in_cache(rel_path):
            local_size = os.path.getsize(self._get_cache_path(rel_path))
            remote_size = self._get_size_in_azure(rel_path)
            if local_size == remote_size:
                return True
            else:
                log.debug("Waiting for dataset %s to transfer from OS: %s/%s",
                          rel_path, local_size, remote_size)

        return False

    def create(self, obj, **kwargs):

        if not self.exists(obj, **kwargs):

            # Pull out locally used fields
            extra_dir = kwargs.get('extra_dir', None)
            extra_dir_at_root = kwargs.get('extra_dir_at_root', False)
            dir_only = kwargs.get('dir_only', False)
            alt_name = kwargs.get('alt_name', None)

            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj.id))

            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)

            # Create given directory in cache
            cache_dir = os.path.join(self.staging_path, rel_path)
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir)

            # Although not really necessary to create S3 folders (because S3 has
            # flat namespace), do so for consistency with the regular file system
            # S3 folders are marked by having trailing '/' so add it now
            # s3_dir = '%s/' % rel_path
            # self._push_to_os(s3_dir, from_string='')
            # If instructed, create the dataset in cache & in S3
            if not dir_only:
                rel_path = os.path.join(
                    rel_path,
                    alt_name if alt_name else "dataset_%s.dat" % obj.id)
                open(os.path.join(self.staging_path, rel_path), 'w').close()
                self._push_to_os(rel_path, from_string='')

    def empty(self, obj, **kwargs):
        if self.exists(obj, **kwargs):
            return bool(self.size(obj, **kwargs) > 0)
        else:
            raise ObjectNotFound(
                'objectstore.empty, object does not exist: %s, kwargs: %s' %
                (str(obj), str(kwargs)))

    def size(self, obj, **kwargs):
        rel_path = self._construct_path(obj, **kwargs)
        if self._in_cache(rel_path):
            try:
                return os.path.getsize(self._get_cache_path(rel_path))
            except OSError as ex:
                log.info(
                    "Could not get size of file '%s' in local cache, will try Azure. Error: %s",
                    rel_path, ex)
        elif self.exists(obj, **kwargs):
            return self._get_size_in_azure(rel_path)
        log.warning("Did not find dataset '%s', returning 0 for size",
                    rel_path)
        return 0

    def delete(self, obj, entire_dir=False, **kwargs):
        rel_path = self._construct_path(obj, **kwargs)
        extra_dir = kwargs.get('extra_dir', None)
        base_dir = kwargs.get('base_dir', None)
        dir_only = kwargs.get('dir_only', False)
        obj_dir = kwargs.get('obj_dir', False)
        try:
            if base_dir and dir_only and obj_dir:
                # Remove temporary data in JOB_WORK directory
                shutil.rmtree(os.path.abspath(rel_path))
                return True

            # For the case of extra_files, because we don't have a reference to
            # individual files/blobs we need to remove the entire directory structure
            # with all the files in it. This is easy for the local file system,
            # but requires iterating through each individual blob in Azure and deleing it.
            if entire_dir and extra_dir:
                shutil.rmtree(self._get_cache_path(rel_path))
                blobs = self.service.list_blobs(self.container_name,
                                                prefix=rel_path)
                for blob in blobs:
                    log.debug("Deleting from Azure: %s", blob)
                    self.service.delete_blob(self.container_name, blob.name)
                return True
            else:
                # Delete from cache first
                os.unlink(self._get_cache_path(rel_path))
                # Delete from S3 as well
                if self._in_azure(rel_path):
                    log.debug("Deleting from Azure: %s", rel_path)
                    self.service.delete_blob(self.container_name, rel_path)
                    return True
        except AzureHttpError:
            log.exception("Could not delete blob '%s' from Azure", rel_path)
        except OSError:
            log.exception('%s delete error', self.get_filename(obj, **kwargs))
        return False

    def get_data(self, obj, start=0, count=-1, **kwargs):
        rel_path = self._construct_path(obj, **kwargs)
        # Check cache first and get file if not there
        if not self._in_cache(rel_path):
            self._pull_into_cache(rel_path)
        # Read the file content from cache
        data_file = open(self._get_cache_path(rel_path), 'r')
        data_file.seek(start)
        content = data_file.read(count)
        data_file.close()
        return content

    def get_filename(self, obj, **kwargs):
        rel_path = self._construct_path(obj, **kwargs)
        base_dir = kwargs.get('base_dir', None)
        dir_only = kwargs.get('dir_only', False)
        obj_dir = kwargs.get('obj_dir', False)

        # for JOB_WORK directory
        if base_dir and dir_only and obj_dir:
            return os.path.abspath(rel_path)

        cache_path = self._get_cache_path(rel_path)
        # S3 does not recognize directories as files so cannot check if those exist.
        # So, if checking dir only, ensure given dir exists in cache and return
        # the expected cache path.
        # dir_only = kwargs.get('dir_only', False)
        # if dir_only:
        #     if not os.path.exists(cache_path):
        #         os.makedirs(cache_path)
        #     return cache_path
        # Check if the file exists in the cache first
        if self._in_cache(rel_path):
            return cache_path
        # Check if the file exists in persistent storage and, if it does, pull it into cache
        elif self.exists(obj, **kwargs):
            if dir_only:  # Directories do not get pulled into cache
                return cache_path
            else:
                if self._pull_into_cache(rel_path):
                    return cache_path
        # For the case of retrieving a directory only, return the expected path
        # even if it does not exist.
        # if dir_only:
        #     return cache_path
        raise ObjectNotFound(
            'objectstore.get_filename, no cache_path: %s, kwargs: %s' %
            (str(obj), str(kwargs)))

        return cache_path  # Until the upload tool does not explicitly create the dataset, return expected path

    def update_from_file(self, obj, file_name=None, create=False, **kwargs):
        if create is True:
            self.create(obj, **kwargs)
        elif self.exists(obj, **kwargs):
            rel_path = self._construct_path(obj, **kwargs)
            # Chose whether to use the dataset file itself or an alternate file
            if file_name:
                source_file = os.path.abspath(file_name)
                # Copy into cache
                cache_file = self._get_cache_path(rel_path)
                try:
                    if source_file != cache_file:
                        # FIXME? Should this be a `move`?
                        shutil.copy2(source_file, cache_file)
                    self._fix_permissions(cache_file)
                except OSError:
                    log.exception(
                        "Trouble copying source file '%s' to cache '%s'",
                        source_file, cache_file)
            else:
                source_file = self._get_cache_path(rel_path)

            self._push_to_os(rel_path, source_file)

        else:
            raise ObjectNotFound(
                'objectstore.update_from_file, object does not exist: %s, kwargs: %s'
                % (str(obj), str(kwargs)))

    def get_object_url(self, obj, **kwargs):
        if self.exists(obj, **kwargs):
            rel_path = self._construct_path(obj, **kwargs)
            try:
                url = self.service.make_blob_url(
                    container_name=self.container_name, blob_name=rel_path)
                return url
            except AzureHttpError:
                log.exception("Trouble generating URL for dataset '%s'",
                              rel_path)
        return None

    def get_store_usage_percent(self):
        return 0.0

    ##################
    # Secret Methods #
    ##################

    def __cache_monitor(self):
        time.sleep(2)  # Wait for things to load before starting the monitor
        while self.running:
            total_size = 0
            # Is this going to be too expensive of an operation to be done frequently?
            file_list = []
            for dirpath, _, filenames in os.walk(self.staging_path):
                for filename in filenames:
                    filepath = os.path.join(dirpath, filename)
                    file_size = os.path.getsize(filepath)
                    total_size += file_size
                    # Get the time given file was last accessed
                    last_access_time = time.localtime(os.stat(filepath)[7])
                    # Compose a tuple of the access time and the file path
                    file_tuple = last_access_time, filepath, file_size
                    file_list.append(file_tuple)
            # Sort the file list (based on access time)
            file_list.sort()
            # Initiate cleaning once within 10% of the defined cache size?
            cache_limit = self.cache_size * 0.9
            if total_size > cache_limit:
                log.info(
                    "Initiating cache cleaning: current cache size: %s; clean until smaller than: %s",
                    convert_bytes(total_size), convert_bytes(cache_limit))
                # How much to delete? If simply deleting up to the cache-10% limit,
                # is likely to be deleting frequently and may run the risk of hitting
                # the limit - maybe delete additional #%?
                # For now, delete enough to leave at least 10% of the total cache free
                delete_this_much = total_size - cache_limit
                # Keep deleting datasets from file_list until deleted_amount does not
                # exceed delete_this_much; start deleting from the front of the file list,
                # which assumes the oldest files come first on the list.
                deleted_amount = 0
                for entry in enumerate(file_list):
                    if deleted_amount < delete_this_much:
                        deleted_amount += entry[2]
                        os.remove(entry[1])
                        # Debugging code for printing deleted files' stats
                        # folder, file_name = os.path.split(f[1])
                        # file_date = time.strftime("%m/%d/%y %H:%M:%S", f[0])
                        # log.debug("%s. %-25s %s, size %s (deleted %s/%s)" \
                        #     % (i, file_name, convert_bytes(f[2]), file_date, \
                        #     convert_bytes(deleted_amount), convert_bytes(delete_this_much)))
                    else:
                        log.debug("Cache cleaning done. Total space freed: %s",
                                  convert_bytes(deleted_amount))

            self.sleeper.sleep(30)  # Test cache size every 30 seconds?
예제 #39
0
from app import app, db
from flask import request, render_template, url_for, redirect
from datetime import datetime
from werkzeug.utils import secure_filename
from app.models import Ad, CategoryList
from os.path import basename, splitext
import random, string

# Create storage service
from azure.storage import CloudStorageAccount
storage_account = CloudStorageAccount(
    account_name=app.config['STORAGE_ACCOUNT_NAME'],
    account_key=app.config['STORAGE_ACCOUNT_KEY'])
block_blob_service = storage_account.create_block_blob_service()

# Create container
from azure.storage.blob import PublicAccess
block_blob_service.create_container('images',
                                    public_access=PublicAccess.Container)

# Create service bus service
from azure.servicebus import ServiceBusService, Message, Queue
bus_service = ServiceBusService(
    service_namespace=app.config['SERVICEBUS_NAMESPACE'],
    shared_access_key_name=app.config['SERVICEBUS_ACCESS_KEYNAME'],
    shared_access_key_value=app.config['SERVICEBUS_ACCESS_KEYVALUE'])

# Create queue
bus_service.create_queue('adqueue', None, False)