def saveCsvLogToBlob(self, fileName): to_location_path = fileName account_name = s().Data["blob"]["account_name"] account_key = s().Data["blob"]["account_key"] container_name = s().Data["blob"]["container"] cloud_account = CloudStorageAccount(account_name=account_name, account_key=account_key) append_blob_service = cloud_account.create_append_blob_service() append_blob_service.create_container(container_name) append_blob_service.set_container_acl( container_name, public_access=PublicAccess.Container) if append_blob_service.exists(container_name, self.fileName): append_blob_service.append_blob_from_path( container_name, self.fileName, self.fileName, progress_callback=self.progress_callback_w) else: cloud_account.create_block_blob_service().create_blob_from_path( container_name, self.fileName, to_location_path, progress_callback=self.progress_callback_w)
def container_operations_with_sas(self, account): container_name = 'demosasblobcontainer' + self.random_data.get_random_name( 6) # Create a Block Blob Service object blockblob_service = account.create_block_blob_service() # Create a Shared Access Signature for the account print('1.Get account sas') account_sas = blockblob_service.generate_account_shared_access_signature( ResourceTypes.CONTAINER + ResourceTypes.OBJECT, AccountPermissions.READ + AccountPermissions.WRITE + AccountPermissions.DELETE + AccountPermissions.LIST + AccountPermissions.CREATE, datetime.datetime.utcnow() + datetime.timedelta(hours=1)) shared_account = CloudStorageAccount(account_name=account.account_name, sas_token=account_sas) shared_account_block_service = shared_account.create_block_blob_service( ) try: print('2. Create container with account sas. Container name - ' + container_name) shared_account_block_service.create_container(container_name) # For the purposes of the demo, get a Container SAS # In a real-world application, the above Account SAS can be used print('3. Get container sas') container_sas = blockblob_service.generate_container_shared_access_signature( container_name, ContainerPermissions.READ + ContainerPermissions.WRITE + ContainerPermissions.DELETE + ContainerPermissions.LIST, datetime.datetime.utcnow() + datetime.timedelta(hours=1)) shared_container_account = CloudStorageAccount( account_name=account.account_name, sas_token=container_sas) shared_container_block_service = shared_container_account.create_block_blob_service( ) print('4. Create blob with container sas') shared_container_block_service.create_blob_from_text( container_name, 'myblob', 'blob data') print('5. List blobs with container sas') blobs = shared_container_block_service.list_blobs(container_name) for blob in blobs: print('blob ' + blob.name) print('6. Delete blob with container sas') shared_container_block_service.delete_blob(container_name, 'myblob') finally: print('7. Delete container') blockblob_service.delete_container(container_name) print("Containers Sas sample completed")
def make_blob_client(secrets): """ Creates a blob client object :param str storage_account_key: storage account key :param str storage_account_name: storage account name :param str storage_account_suffix: storage account suffix """ if secrets.shared_key: # Set up SharedKeyCredentials blob_client = blob.BlockBlobService( account_name=secrets.shared_key.storage_account_name, account_key=secrets.shared_key.storage_account_key, endpoint_suffix=secrets.shared_key.storage_account_suffix) else: # Set up ServicePrincipalCredentials arm_credentials = ServicePrincipalCredentials( client_id=secrets.service_principal.client_id, secret=secrets.service_principal.credential, tenant=secrets.service_principal.tenant_id, resource='https://management.core.windows.net/') m = RESOURCE_ID_PATTERN.match( secrets.service_principal.storage_account_resource_id) accountname = m.group('account') subscription = m.group('subscription') resourcegroup = m.group('resourcegroup') mgmt_client = StorageManagementClient(arm_credentials, subscription) key = mgmt_client.storage_accounts.list_keys( resource_group_name=resourcegroup, account_name=accountname).keys[0].value storage_client = CloudStorageAccount(accountname, key) blob_client = storage_client.create_block_blob_service() return blob_client
def emulator(self): # With account account = CloudStorageAccount(is_emulated=True) client = account.create_block_blob_service() # Directly client = BlockBlobService(is_emulated=True)
def sas_auth(self): # With account account = CloudStorageAccount(account_name="<account_name>", sas_token="<sas_token>") client = account.create_block_blob_service() # Directly client = BlockBlobService(account_name="<account_name>", sas_token="<sas_token>")
def key_auth(self): # With account account = CloudStorageAccount(account_name="<account_name>", account_key="<account_key>") client = account.create_block_blob_service() # Directly client = BlockBlobService(account_name="<account_name>", account_key="<account_key>")
def test_create_service_no_key(self): # Arrange # Act bad_account = CloudStorageAccount('', '') with self.assertRaises(ValueError): service = bad_account.create_block_blob_service()
def create_all(app, account_name=None, account_key=None, container_name=None, include_hidden=False): account_name = account_name or app.config.get('AZURE_STORAGE_ACCOUNT_NAME') account_key = account_key or app.config.get('AZURE_STORAGE_ACCOUNT_KEY') container_name = container_name or app.config.get( 'AZURE_STORAGE_CONTAINER_NAME') if not container_name: raise ValueError("No container name provided.") # build list of static files all_files = _gather_files(app, include_hidden) logger.debug("All valid files: %s" % all_files) # connect to azure azure = CloudStorageAccount(account_name=account_name, account_key=account_key) # create blob service blob_service = azure.create_block_blob_service() # get_or_create container if not blob_service.exists(container_name): blob_service.create_container(container_name) prefix = app.config.get('AZURE_STORAGE_PREFIX', '').lstrip('/').rstrip('/') for (static_folder, static_url), names in six.iteritems(all_files): static_upload_url = '%s/%s' % (prefix.rstrip('/'), static_url.lstrip('/')) _write_files(blob_service, app, static_upload_url, static_folder, names, container_name)
def key_auth(self): # With account account = CloudStorageAccount(account_name='<account_name>', account_key='<account_key>') client = account.create_block_blob_service() # Directly client = BlockBlobService(account_name='<account_name>', account_key='<account_key>')
def main(): argument_spec = dict(source_uri=dict(required=True), source_key=dict(required=True), destination_account=dict(required=True), destination_key=dict(required=True), destination_container=dict(required=True), destination_blob=dict(required=True), wait=dict(default=False, type='bool'), timeout=dict(default=1000)) module = AnsibleModule(argument_spec=argument_spec) if not HAS_DEPS: module.fail_json( msg="requests and azure are required for this module ".format( HAS_DEPS_EXC)) source_account, source_container, source_blob = split_uri( module.params.get('source_uri')) source = CloudStorageAccount(account_name=source_account, account_key=module.params.get('source_key')) source_service = source.create_block_blob_service() destination_service = BlockBlobService( account_name=module.params.get('destination_account'), account_key=module.params.get('destination_key')) source_token = source.generate_shared_access_signature( Services.BLOB, ResourceTypes.OBJECT, AccountPermissions.READ, datetime.datetime.now() + timedelta(hours=1)) source_sas_url = source_service.make_blob_url(source_container, source_blob, 'https', source_token) destination_service.create_container( module.params.get('destination_container'), fail_on_exist=False) status = destination_service.copy_blob( module.params.get('destination_container'), module.params.get('destination_blob'), source_sas_url) if not module.params.get('wait'): data = dict(changed=True, status='started') module.exit_json(**data) else: copy = destination_service.get_blob_properties( module.params.get('destination_container'), module.params.get('destination_blob')).properties.copy count = 0 while copy.status != 'success': count = count + 30 if count > module.params.get('timeout'): module.fail_json( msg='Timed out waiting for async copy to complete.') time.sleep(30) copy = destination_service.get_blob_properties( module.params.get('destination_container'), module.params.get('destination_blob')).properties.copy data = dict(changed=True, status='completed') module.exit_json(**data)
def sas_auth(self): # With account account = CloudStorageAccount(account_name='<account_name>', sas_token='<sas_token>') client = account.create_block_blob_service() # Directly client = BlockBlobService(account_name='<account_name>', sas_token='<sas_token>')
def test_create_account_sas_and_key(self): # Arrange # Act account = CloudStorageAccount(self.account_name, self.account_key, self.sas_token) service = account.create_block_blob_service() # Assert self.validate_service(service, BlockBlobService)
def public(self): # This applies to the blob services only # Public access must be enabled on the container or requests will fail # With account account = CloudStorageAccount(account_name='<account_name>') client = account.create_block_blob_service() # Directly client = BlockBlobService(account_name='<account_name>')
def public(self): # This applies to the blob services only # Public access must be enabled on the container or requests will fail # With account account = CloudStorageAccount(account_name="<account_name>") client = account.create_block_blob_service() # Directly client = BlockBlobService(account_name="<account_name>")
def test_create_account_emulated(self): # Arrange # Act account = CloudStorageAccount(is_emulated=True) service = account.create_block_blob_service() # Assert self.assertIsNotNone(service) self.assertEqual(service.account_name, 'devstoreaccount1') self.assertIsNotNone(service.account_key)
def test_create_account_sas(self): # Arrange # Act sas_account = CloudStorageAccount(self.account_name, sas_token=self.sas_token) service = sas_account.create_block_blob_service() # Assert self.assertIsNotNone(service) self.assertEqual(service.account_name, self.account_name) self.assertIsNone(service.account_key) self.assertEqual(service.sas_token, self.sas_token)
def __upload_model(self, model_name, service_def, storage_account: CloudStorageAccount): if not os.path.isfile(service_def): raise FileNotFoundError(service_def + ' not found') storage_service = storage_account.create_block_blob_service() container_name = "models" storage_service.create_container(container_name) hash = self.__md5(service_def) blob_name = urllib.parse.quote(model_name) + "_" + hash storage_service.create_blob_from_path(container_name, blob_name, service_def) sas_token = storage_service.generate_blob_shared_access_signature(container_name, blob_name, BlobPermissions.READ, datetime.utcnow() + timedelta(days=365 * 5)) return storage_service.make_blob_url(container_name, blob_name, sas_token=sas_token)
def get_blob_client() -> blob.BlockBlobService: if not storage_resource_id: return blob.BlockBlobService(account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix) else: credentials = ServicePrincipalCredentials( client_id=client_id, secret=credential, tenant=tenant_id, resource='https://management.core.windows.net/') m = RESOURCE_ID_PATTERN.match(storage_resource_id) accountname = m.group('account') subscription = m.group('subscription') resourcegroup = m.group('resourcegroup') mgmt_client = StorageManagementClient(credentials, subscription) key = mgmt_client.storage_accounts.list_keys( resource_group_name=resourcegroup, account_name=accountname).keys[0].value storage_client = CloudStorageAccount(accountname, key) return storage_client.create_block_blob_service()
def dump(file_url): # step1: download blob from storage storage_account = CloudStorageAccount(storage_account_name, storage_account_key) container_name, blob_name = parse_file_url(file_url) blob_service = storage_account.create_block_blob_service() blob = blob_service.get_blob_to_bytes(container_name, blob_name) f = BytesIO(blob.content) # arvo file bytes data reader = DataFileReader(f, DatumReader()) event_list = [] # step2: get the event data for record in reader: event_data = json.loads(record["Body"], encoding="ascii", object_hook=WindTurbineMeasure.obj_hook) event_list.append(event_data) # step3: dump to the warehouse batch_insert(event_list)
from azure.storage import CloudStorageAccount from azure.storage.blob import PublicAccess from azure.storage.blob.models import ContentSettings from azure.common.client_factory import get_client_from_cli_profile from azure.mgmt.storage import StorageManagementClient RESOURCE_GROUP = 'sampleStorageResourceGroup' STORAGE_ACCOUNT_NAME = 'samplestorageaccountname' CONTAINER_NAME = 'samplecontainername' # log in storage_client = get_client_from_cli_profile(StorageManagementClient) # create a public storage container to hold the file storage_keys = storage_client.storage_accounts.list_keys(RESOURCE_GROUP, STORAGE_ACCOUNT_NAME) storage_keys = {v.key_name: v.value for v in storage_keys.keys} storage_client = CloudStorageAccount(STORAGE_ACCOUNT_NAME, storage_keys['key1']) blob_service = storage_client.create_block_blob_service() blob_service.create_container(CONTAINER_NAME, public_access=PublicAccess.Container) blob_service.create_blob_from_bytes( CONTAINER_NAME, 'helloworld.html', b'<center><h1>Hello World!</h1></center>', content_settings=ContentSettings('text/html') ) print(blob_service.make_blob_url(CONTAINER_NAME, 'helloworld.html'))
def _get_service(): account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY account = CloudStorageAccount(account_name = account_name, account_key = account_key) service = account.create_block_blob_service() return service
class TransferAzure(): transaction = None CDN_BASE = 'https://ucarecdn.com/' account = None service = None file_uuid = None filename = None make_public = None def __init__(self, make_public=False, transaction=None): account_name = settings.AZURE['account_name'] account_key = settings.AZURE['account_key'] sas = settings.AZURE['sas'] self.transaction = transaction self.transaction_body = json.loads(transaction.body)['data'] logger.info('copying uuid: ' + self.transaction_body['uuid']) self.file_uuid = self.transaction_body['uuid'] self.filename = self.transaction_body['original_filename'] self.make_public = make_public self.account = CloudStorageAccount(account_name=account_name, account_key=account_key, sas_token=sas) self.service = self.account.create_block_blob_service() def save_message_object(self): kwargs = { prop: self.transaction_body[prop] for prop in [ 'uuid', 'filename', 'is_stored', 'done', 'file_id', 'original_filename', 'is_ready', 'total', 'mime_type', 'size' ] } if self.transaction_body['is_image']: MessageClass = ImageUploadMessage kwargs['imgformat'] = self.transaction_body['image_info']['format'] for prop in [ 'orientation', 'height', 'width', 'geo_location', 'datetime_original', 'dpi' ]: kwargs[prop] = self.transaction_body['image_info'][prop] else: MessageClass = FileUploadMessage return MessageClass.objects.create( webhook_transaction=self.transaction, **kwargs) def _blob_exists(self, container_name, blob_name): exists = self.service.exists(container_name, blob_name) def _get_resource_reference(self): return '{}'.format(self.file_uuid) def run_copy(self): try: logger.info('creating container name') container_name = self._get_resource_reference() logger.info('container name: ' + container_name) self.service.create_container(container_name) logger.info('set permission public') self.service.set_container_acl( container_name, public_access=PublicAccess.Container) count = 0 source = self.CDN_BASE + self.file_uuid + '/' logger.info('copying the file from source: ' + source) copy = self.service.copy_blob(container_name, self.filename, source) # Poll for copy completion logger.info('checking status') while copy.status != 'success': count = count + 1 if count > 20: logger.info( 'Timed out waiting for async copy to complete on %i count' % count) raise Exception( 'Timed out waiting for async copy to complete.') time.sleep(3 * count) logger.info('get blob properties') copy = self.service.get_blob_properties( container_name, self.filename).properties.copy logger.info('saved mesg object') return True except Exception, e: print(e.message) self.service.delete_container(container_name) else:
class KeyVaultSampleBase(object): """Base class for Key Vault samples, provides common functionality needed across Key Vault sample code :ivar config: Azure subscription id for the user intending to run the sample :vartype config: :class: `KeyVaultSampleConfig`q :ivar credentials: Azure Active Directory credentials used to authenticate with Azure services :vartype credentials: :class: `ServicePrincipalCredentials <msrestazure.azure_active_directory.ServicePrincipalCredentials>` :ivar keyvault_data_client: Key Vault data client used for interacting with key vaults :vartype keyvault_data_client: :class: `KeyVaultClient <azure.keyvault.KeyVaultClient>` :ivar keyvault_mgmt_client: Key Vault management client used for creating and managing key vaults :vartype keyvault_mgmt_client: :class: `KeyVaultManagementClient <azure.mgmt.keyvault.KeyVaultManagementClient>` :ivar resource_mgmt_client: Azure resource management client used for managing azure resources, access, and groups :vartype resource_mgmt_client: :class: `ResourceManagementClient <azure.mgmt.resource.ResourceManagementClient>` """ def __init__(self): self.config = KeyVaultSampleConfig() self.credentials = None self.keyvault_data_client = None self.keyvault_mgmt_client = None self.resource_mgmt_client = None self.storage_account = None self.block_blob_service = None self._setup_complete = False self.samples = {(name, m) for name, m in inspect.getmembers(self) if getattr(m, 'kv_sample', False)} models = {} models.update({ k: v for k, v in azure.keyvault.models.__dict__.items() if isinstance(v, type) }) models.update({ k: v for k, v in azure.mgmt.keyvault.models.__dict__.items() if isinstance(v, type) }) self._serializer = Serializer(models) def setup_sample(self): """ Provides common setup for Key Vault samples, such as creating rest clients, creating a sample resource group if needed, and ensuring proper access for the service principal. :return: None """ if not self._setup_complete: self.mgmt_creds = ServicePrincipalCredentials( client_id=self.config.client_id, secret=self.config.client_secret, tenant=self.config.tenant_id) self.data_creds = ServicePrincipalCredentials( client_id=self.config.client_id, secret=self.config.client_secret, tenant=self.config.tenant_id) self.resource_mgmt_client = ResourceManagementClient( self.mgmt_creds, self.config.subscription_id) # ensure the service principle has key vault as a valid provider self.resource_mgmt_client.providers.register('Microsoft.KeyVault') # ensure the intended resource group exists self.resource_mgmt_client.resource_groups.create_or_update( self.config.group_name, {'location': self.config.location}) self.keyvault_mgmt_client = KeyVaultManagementClient( self.mgmt_creds, self.config.subscription_id) self.keyvault_data_client = KeyVaultClient(self.data_creds) self.storage_account = CloudStorageAccount( account_name=self.config.storage_account_name, account_key=self.config.storage_account_key) self.block_blob_service = self.storage_account.create_block_blob_service( ) self._setup_complete = True def create_vault(self): """ Creates a new key vault with a unique name, granting full permissions to the current credentials :return: a newly created key vault :rtype: :class:`Vault <azure.keyvault.generated.models.Vault>` """ vault_name = get_name('vault') # setup vault permissions for the access policy for the sample service principle permissions = Permissions() permissions.keys = KEY_PERMISSIONS_ALL permissions.secrets = SECRET_PERMISSIONS_ALL permissions.certificates = CERTIFICATE_PERMISSIONS_ALL policy = AccessPolicyEntry(self.config.tenant_id, self.config.client_oid, permissions) properties = VaultProperties(self.config.tenant_id, Sku(name='standard'), access_policies=[policy]) parameters = VaultCreateOrUpdateParameters(self.config.location, properties) parameters.properties.enabled_for_deployment = True parameters.properties.enabled_for_disk_encryption = True parameters.properties.enabled_for_template_deployment = True print('creating vault {}'.format(vault_name)) vault = self.keyvault_mgmt_client.vaults.create_or_update( self.config.group_name, vault_name, parameters) # wait for vault DNS entry to be created # see issue: https://github.com/Azure/azure-sdk-for-python/issues/1172 self._poll_for_vault_connection(vault.properties.vault_uri) print('created vault {} {}'.format(vault_name, vault.properties.vault_uri)) return vault def _poll_for_vault_connection(self, vault_uri, retry_wait=10, max_retries=4): """ polls the data client 'get_secrets' method until a 200 response is received indicating the the vault is available for data plane requests """ last_error = None for x in range(max_retries - 1): try: # sleep first to avoid improper DNS caching time.sleep(retry_wait) self.keyvault_data_client.get_secrets(vault_uri) return except ClientRequestError as e: print('vault connection not available') last_error = e raise last_error def _serialize(self, obj): if isinstance(obj, Paged): serialized = [self._serialize(i) for i in list(obj)] else: serialized = self._serializer.body(obj, type(obj).__name__) return json.dumps(serialized, indent=4, separators=(',', ': '))
TWILIO_IPM_SERVICE_SID = 'IS2ec68050ef5e4c79b15b78c3ded7ddc5' # old one with testchannel nd general #TWILIO_SERVICE_SID = 'IS7d421d86df064d9698e91ee6e3d4bcf5' # Initialize the client TWILIO_IPM_CLIENT = TwilioIpMessagingClient(TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN) TWILIO_IPM_SERVICE = TWILIO_IPM_CLIENT.services.get(sid=TWILIO_IPM_SERVICE_SID) AZURE_STORAGE_ACCOUNT = CloudStorageAccount( "palliassistblobstorage", # account name "r9tHMEj5VV/PwJyjN3KYySUqsnq9tCrxh6kDKFvVY3vrm+GluHN/a1LQjXKYIUzoHEle7x3EyIQwoOijzRJiOA==", # access key "?sv=2016-05-31&ss=b&srt=sco&sp=rwdlac&se=2017-05-25T08:02:01Z&st=2017-04-04T00:02:01Z&spr=https,http&sig=DshFBBFKzV20Ml6sN8D8ZRpbIakU8jlbj8zIBDZP4z8%3D" # sas token ) BLOCK_BLOB_SERVICE = AZURE_STORAGE_ACCOUNT.create_block_blob_service() #print "AZURE_STORAGE_ACCOUNT", AZURE_STORAGE_ACCOUNT #print "BLOCK_BLOB_SERVICE", BLOCK_BLOB_SERVICE if sys.version_info < (3, 0): reload(sys) sys.setdefaultencoding('utf8') ENABLE_XMPP = False DEBUG = True TEMPLATE_DEBUG = DEBUG ALLOWED_HOSTS = ( 'localhost',
new_css = '.flair-' + str(position) + '{background-position: 0 -' + str( height * position) + 'px}' r.set_stylesheet(subreddit, css + new_css) def log(message): table_service.insert_entity('logs', {'PartitionKey': 'flair', 'RowKey': str(datetime.datetime.now()), 'text': message}) print('[*] ' + message) storage_account = CloudStorageAccount(storage_account_name, storage_account_key) table_service = storage_account.create_table_service() blob_service = storage_account.create_block_blob_service() blob_service.create_container('images', public_access='container') table_service.create_table('flair') table_service.create_table('logs') r = praw.Reddit(user_agent) r.login(username, password) r.config.decode_html_entities = True while True: for message in (m for m in r.get_unread(limit=None)): log('received mesage from ' + message.author.name) try: file, text = get_flair_info(message) if file in [blob.name for blob in list(blob_service.list_blobs('images'))]:
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. from azure.storage import CloudStorageAccount import config, time account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY account = CloudStorageAccount(account_name=account_name, account_key=account_key) service = account.create_block_blob_service() # The last time a backup was dropped into the folder, it was named 'splunketccfg.tar'. # This is (almost) always the one to restore. container_name = 'backups' restore_file_name = 'splunketccfg.tar' OUTPUT_FILE = 'splunketccfg.tar' exists = service.exists(container_name, restore_file_name) if exists: service.get_blob_to_path(container_name, restore_file_name, OUTPUT_FILE) else: print('Backup file does not exist')
class StorageAccountTest(StorageTestCase): def setUp(self): super(StorageAccountTest, self).setUp() self.account_name = self.settings.STORAGE_ACCOUNT_NAME self.account_key = self.settings.STORAGE_ACCOUNT_KEY self.sas_token = '?sv=2015-04-05&st=2015-04-29T22%3A18%3A26Z&se=2015-04-30T02%3A23%3A26Z&sr=b&sp=rw&sip=168.1.5.60-168.1.5.70&spr=https&sig=Z%2FRHIX5Xcg0Mq2rqI3OlWTjEg2tYkboXr1P9ZUXDtkk%3D' self.account = CloudStorageAccount(self.account_name, self.account_key) #--Helpers----------------------------------------------------------------- def validate_service(self, service, type): self.assertIsNotNone(service) self.assertIsInstance(service, type) self.assertEqual(service.account_name, self.account_name) self.assertEqual(service.account_key, self.account_key) #--Test cases -------------------------------------------------------- def test_create_block_blob_service(self): # Arrange # Act service = self.account.create_block_blob_service() # Assert self.validate_service(service, BlockBlobService) def test_create_page_blob_service(self): # Arrange # Act service = self.account.create_page_blob_service() # Assert self.validate_service(service, PageBlobService) def test_create_append_blob_service(self): # Arrange # Act service = self.account.create_append_blob_service() # Assert self.validate_service(service, AppendBlobService) def test_create_table_service(self): # Arrange # Act service = self.account.create_table_service() # Assert self.validate_service(service, TableService) def test_create_queue_service(self): # Arrange # Act service = self.account.create_queue_service() # Assert self.validate_service(service, QueueService) def test_create_file_service(self): # Arrange # Act service = self.account.create_file_service() # Assert self.validate_service(service, FileService) def test_create_service_no_key(self): # Arrange # Act bad_account = CloudStorageAccount('', '') with self.assertRaises(ValueError): service = bad_account.create_block_blob_service() # Assert def test_create_account_sas(self): # Arrange # Act sas_account = CloudStorageAccount(self.account_name, sas_token=self.sas_token) service = sas_account.create_block_blob_service() # Assert self.assertIsNotNone(service) self.assertEqual(service.account_name, self.account_name) self.assertIsNone(service.account_key) self.assertEqual(service.sas_token, self.sas_token) def test_create_account_sas_and_key(self): # Arrange # Act account = CloudStorageAccount(self.account_name, self.account_key, self.sas_token) service = account.create_block_blob_service() # Assert self.validate_service(service, BlockBlobService) def test_create_account_emulated(self): # Arrange # Act account = CloudStorageAccount(is_emulated=True) service = account.create_block_blob_service() # Assert self.assertIsNotNone(service) self.assertEqual(service.account_name, 'devstoreaccount1') self.assertIsNotNone(service.account_key) @record def test_generate_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recordingfile(self.test_mode): return # Arrange token = self.account.generate_shared_access_signature( Services.BLOB, ResourceTypes.OBJECT, AccountPermissions.READ, datetime.utcnow() + timedelta(hours=1), ) service = self.account.create_block_blob_service() data = b'shared access signature with read permission on blob' container_name='container1' blob_name = 'blob1.txt' try: service.create_container(container_name) service.create_blob_from_bytes(container_name, blob_name, data) # Act url = service.make_blob_url( container_name, blob_name, sas_token=token, ) response = requests.get(url) # Assert self.assertTrue(response.ok) self.assertEqual(data, response.content) finally: service.delete_container(container_name)
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. from azure.storage import CloudStorageAccount import config account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY account = CloudStorageAccount(account_name = account_name, account_key = account_key) service = account.create_block_blob_service() # The last time a backup was dropped into the folder, it was named 'splunketccfg.tar'. # This is (almost) always the one to restore. container_name = 'backups' restore_file_name = 'splunketccfg.tar' OUTPUT_FILE = 'splunketccfg.tar' exists = service.exists(container_name, restore_file_name) if exists: service.get_blob_to_path(container_name, restore_file_name, OUTPUT_FILE) else: print('Backup file does not exist')
class SampleTest(): def __init__(self): try: import config as config except: raise ValueError('Please specify configuration settings in config.py.') if config.IS_EMULATED: self.account = CloudStorageAccount(is_emulated=True) else: # Note that account key and sas should not both be included account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY sas = config.SAS self.account = CloudStorageAccount(account_name=account_name, account_key=account_key, sas_token=sas) self.service = self.account.create_block_blob_service() def test_container_samples(self): container = ContainerSamples(self.account) container.run_all_samples() def test_block_blob_samples(self): blob = BlockBlobSamples(self.account) blob.run_all_samples() def test_append_blob_samples(self): blob = AppendBlobSamples(self.account) blob.run_all_samples() def test_page_blob_samples(self): blob = PageBlobSamples(self.account) blob.run_all_samples() def list_containers(self): self.service = self.account.create_block_blob_service() containers = list(self.service.list_containers()) print('All containers in your account:') for container in containers: print(container.name) def list_all_blobs_in_all_containers(self): #self.service = self.account.create_block_blob_service() containers = list(self.service.list_containers()) print('Full list:') for container in containers: print(container.name+':') blobs = list(self.service.list_blobs(container.name)) for blob in blobs: print(blob.name) print('') def test_get_put_blob(self): import config as config account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY block_blob_service = BlockBlobService(account_name, account_key) block_blob_service.create_blob_from_path( 'cont2', 'sunset.png', 'sunset.png',) block_blob_service.get_blob_to_path('cont2', 'sunset.png', 'out-sunset.png')
class NFS_Controller: def __init__(self, config): self.config = config self.account = CloudStorageAccount( account_name=config.storage_account_name, account_key=config.storage_account_key) self.service = self.account.create_block_blob_service() """ utility functions """ def get_containers(self): containers = self.service.list_containers() return containers def get_container_directories(self, container_name): bloblistingresult = self.service.list_blobs( container_name=container_name, delimiter='/') return [blob.name.rsplit('/', 1)[0] for blob in bloblistingresult] def create_container(self, container_name): self.service.create_container(container_name) def get_parent_directory(self, path): return path.rsplit('/', 1)[0] def exists(self, container, full_blob_name=None): return self.service.exists(container, full_blob_name) def generate_uid(self): r_uuid = base64.urlsafe_b64encode(uuid.uuid4().bytes) return r_uuid.replace('=', '') """ Upload: """ def parallel_chunky_upload(self, container_name, full_blob_name, data, chunks=5): debug = False threads = [] block_ids = [] chunk_size = len(data) / chunks chunks = [ data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size) ] for chunk in chunks: uid = self.generate_uid() block_ids.append(BlobBlock(id=uid)) t = threading.Thread(target=self._upload_block, args=( container_name, full_blob_name, chunk, uid, )) threads.append(t) t.start() [t.join() for t in threads] self.service.put_block_list(container_name, full_blob_name, block_ids) return full_blob_name def _upload_block(self, container_name, full_blob_name, chunk, uid): self.service.put_block(container_name, full_blob_name, chunk, uid) def upload_text(self, container_name, full_blob_name, data): if not (self.exists(container_name)): self.create_container(container_name) self.service.create_blob_from_text(container_name, full_blob_name, data) return full_blob_name def upload_image(self, container_name, path, data): if not (self.exists(container_name)): self.create_container(container_name) full_blob_name = '{}{}'.format(path, '.jpeg') with BytesIO() as output: data.save(output, 'jpeg') image_bytes = output.getvalue() self.parallel_chunky_upload(container_name, full_blob_name, image_bytes) return full_blob_name def upload_from_path(self, container_name, base_nfs_path, file_path): if not (self.exists(container_name)): self.create_container(container_name) path = file_path.rsplit('/', 1)[1] if ('/' in file_path) else file_path if (base_nfs_path == ""): full_blob_name = '{}'.format(path) else: full_blob_name = '{}/{}'.format(base_nfs_path, path) self.service.create_blob_from_path(container_name, full_blob_name, file_path) def batched_parallel_directory_upload( self, container_name, base_nfs_path, dirpath, ext_filter_list=['.jpeg', '.png', '.jpg']): print(dirpath) if (ext_filter_list == None): file_paths = [ os.path.realpath('{}/{}'.format(dirpath, fn)) for fn in os.listdir(dirpath) ] else: file_paths = [ os.path.realpath('{}/{}'.format(dirpath, fn)) for fn in os.listdir(dirpath) if any( fn.endswith(extension_filter) for extension_filter in ext_filter_list) ] # print file_paths total_files_count = len(file_paths) current_index = 0 batch_size = 30 if not (self.exists(container_name)): self.create_container(container_name) batch_number = 1 while (True): indices = [(current_index + i) for i in range(batch_size)] file_paths_batch = [ file_paths[i] for i in indices if (i < total_files_count) ] current_index += len(file_paths_batch) if (len(file_paths_batch) == 0): break threads = [] index = indices[0] for file_path in file_paths_batch: print( '[Batch {}: Percent of total {}]Uploading image from file path: {}' .format(batch_number, (((index * 1.0) / (total_files_count - 1)) * 100.0), file_path)) t = threading.Thread(target=self.upload_from_path, args=(container_name, base_nfs_path, file_path)) threads.append(t) index = index + 1 t.start() [t.join() for t in threads] batch_number = batch_number + 1 """ Download """ def parallel_download(self, container_name, full_blob_names): if (full_blob_names == None): return None threads = [] results = [] for full_blob_name in full_blob_names: result = {'blob': None} t = threading.Thread(target=self._download_blob_helper, args=(container_name, full_blob_name, result)) results.append(result) threads.append(t) t.start() [t.join() for t in threads] blobs = [ result['blob'] for result in results if result['blob'] != None ] return blobs def _download_blob_helper(self, container_name, full_blob_name, result): if (self.exists(container_name, full_blob_name)): result['blob'] = self.download_data(container_name, full_blob_name) else: return None def download_data(self, container_name, full_blob_name): print("Full blob name: " + full_blob_name) if not (self.exists(container_name)): self.create_container(container_name) return None blob = self.service.get_blob_to_bytes(container_name, full_blob_name) return blob def download_full_container(self, container_name, destination_directory=None): if not (destination_directory == None): destination_directory = os.path.realpath(destination_directory) if not (os.path.isdir(destination_directory)): os.makedirs(destination_directory) else: destination_directory = os.getcwd() if not (self.exists(container_name)): raise ValueError('Container does not exist') blobs = self.service.list_blobs(container_name) #code below lists all the blobs in the container and downloads them one after another for blob in blobs: print(blob.name) print("{}".format(blob.name)) #check if the path contains a folder structure, create the folder structure if "/" in "{}".format(blob.name): print("there is a path in this") #extract the folder path and check if that folder exists locally, and if not create it head, tail = os.path.split("{}".format(blob.name)) print(head) print(tail) if (os.path.isdir(destination_directory + "/" + head)): #download the files to this directory print("directory and sub directories exist") self.service.get_blob_to_path( container_name, blob.name, destination_directory + "/" + head + "/" + tail) else: #create the diretcory and download the file to it print("directory doesn't exist, creating it now") os.makedirs(destination_directory + "/" + head) print("directory created, download initiated") self.service.get_blob_to_path( container_name, blob.name, destination_directory + "/" + head + "/" + tail) else: self.service.get_blob_to_path( container_name, blob.name, destination_directory + "/" + blob.name) """ Logging """ def retrieve_log_entities(self, container_name, path, filter=None): log_path = '{}/log.txt'.format(path) log_entries = LogEntriesBase() if self.exists(container_name, log_path): log_file = self.service.get_blob_to_text(container_name, log_path) raw_logs = log_file.content log_entries.deserialize(raw_logs) if (filter != None): log_entries = log_entries.get_logs(filter=filter) return log_entries def update_log(self, container_name, entry): path = self.get_parent_directory(entry[LogEntriesBase.PATH]) log_path = '{}/log.txt'.format(path) log_entries = LogEntriesBase() if self.exists(container_name, log_path): log_file = self.service.get_blob_to_text(container_name, log_path) raw_logs = log_file.content log_entries.deserialize(raw_logs) log_entries.update(entry) raw = log_entries.serialize() self.service.create_blob_from_text(container_name, log_path, raw) def update_logs(self, container_name, entries): log_paths = { '{}/log.txt'.format( self.get_parent_directory(log_entry[LogEntriesBase.PATH])) for log_entry in entries } if len(log_paths) > 1: raise ValueError('Logs being updated must be of the same log file') log_path = log_paths[0] if not self.exists(container_name, log_path): raise ValueError( 'Log file {} under container {} does not exist'.format( log_path, container_name)) log_entries = LogEntriesBase() log_file = self.service.get_blob_to_text(container_name, log_path) raw_logs = log_file.content log_entries.deserialize(raw_logs) for entry in entries: log_entries.update(entry) raw = log_entries.serialize() self.service.create_blob_from_text(container_name, log_path, raw) """ Avoid Using this: It is not efficient and you should always update a log directly after resource use """ def update_multiple_log_files(self, container_name, entries): log_paths = { '{}/log.txt'.format( self.get_parent_directory(log_entry[LogEntriesBase.PATH])) for log_entry in entries } for log_path in log_paths: entries = [ log_entry for log_entry in entries if '{}/log.txt'.format( self.get_parent_directory(log_entry[LogEntriesBase.PATH])) == log_path ] self.update_logs(container_name, entries)
def get_azure_blob_service(): account = app.config.get('AZURE_ACCOUNT') key = app.config.get('AZURE_STORAGE_KEY') account = CloudStorageAccount(account_name=account, account_key=key) return account.create_block_blob_service()
class LogoStorageConnector: def __init__(self): try: import config as config self.config = config except: raise ValueError('Please specify configuration settings in config.py.') try: import nfs_constants as constants self.constants = constants except: raise ValueError('Please specify networked file system contants in nfs_constants.py.') self.account = CloudStorageAccount(account_name=config.STORAGE_ACCOUNT_NAME, account_key=config.STORAGE_ACCOUNT_KEY) self.service = self.account.create_block_blob_service() self._create_input_container() self._create_output_container() # self._create_checkpoints_container() """ Public Interfaces """ """ Upload: input """ def upload_brand_training_input_IPE(self, brand, IPE, isProcessed): return self.upload_IPE_to_bucket(self._input_container(), brand, self.constants.TRAINING_DIRECTORY_NAME, IPE, isProcessed, log = True) def upload_brand_operational_input_IPE(self, brand, IPE, isProcessed): return self.upload_IPE_to_bucket(self._input_container(), brand, self.constants.OPERATIONAL_DIRECTORY_NAME, IPE, isProcessed, log = True) def upload_IPE_to_bucket(self, container_name, brand, directory, IPE, isProcessed, log = False): bucket_path = self._create_path_to_bucket(brand, directory) bucket_post_entities_full_path = self._get_bucket_post_entities_file(bucket_path) bucket_images_base_path = self._get_bucket_image_directory(bucket_path) for element in IPE.posts: print(element.keys()) if('picture' in element and 'picture_id' in element): path = '{}/{}'.format(bucket_images_base_path, element['picture_id']) image_path = self._upload_and_compress_image(container_name, path, element['picture']) element.pop('picture', None) element['image_path'] = image_path self._upload_text(container_name, bucket_post_entities_full_path, IPE.serialize()) if(log == True): self.log(bucket_path, isProcessed) return bucket_path """ Download """ def download_brand_training_input_data(self, brand, processing_status_filter = None): prefix = '{}/{}'.format(brand, self.constants.TRAINING_DIRECTORY_NAME) return self.download_brand_data(self.constants.INPUT_CONTAINER_NAME, brand, prefix, processing_status_filter = processing_status_filter) def download_brand_operational_input_data(self, brand, processing_status_filter = None): prefix = '{}/{}'.format(brand, self.constants.OPERATIONAL_DIRECTORY_NAME) return self.download_brand_data(self.constants.INPUT_CONTAINER_NAME, brand, prefix, processing_status_filter = processing_status_filter) def download_brand_training_input_post_entities(self, brand, processing_status_filter = None): prefix = '{}/{}'.format(brand, self.constants.TRAINING_DIRECTORY_NAME) return self.download_brand_post_entities(self.constants.INPUT_CONTAINER_NAME, brand, prefix, processing_status_filter = processing_status_filter) def download_brand_operational_input_post_entities(self, brand, processing_status_filter = None): prefix = '{}/{}'.format(brand, self.constants.OPERATIONAL_DIRECTORY_NAME) return self.download_brand_post_entities(self.constants.INPUT_CONTAINER_NAME, brand, prefix, processing_status_filter = processing_status_filter) def download_brand_post_entities(self, container_name, brand, prefix, processing_status_filter = None): blobs = [] logs = self.retreive_log_entities(container_name, prefix) if(processing_status_filter != None): unproccessed_entries = logs.GetLogs(processing_status_filter = processing_status_filter) for log in unproccessed_entries: blobs.append(self._download_data(container_name, '{}/{}'.format(log[PREFIX], 'post_entities.txt'))) else: for log in logs: blobs.append(self._download_data(container_name, '{}/{}'.format(log[PREFIX], 'post_entities.txt'))) return blobs def download_brand_data(self, container_name, brand, prefix, processing_status_filter = None): blobs = [] if(processing_status_filter != None): logs = self.retreive_log_entities(container_name, prefix, processing_status_filter = processing_status_filter) for log in logs: blobs.extend(self.service.list_blobs(container_name=container_name, prefix=log[PREFIX])) else: blobs = self.service.list_blobs(container_name=container_name, prefix=prefix) data = [] for blob in blobs: data.append(self._download_data(container_name, blob.name)) return data def parallel_input_image_download(self, full_blob_names): return self.parallel_image_download(self.constants.INPUT_CONTAINER_NAME, full_blob_names) def parallel_image_download(self, container_name, full_blob_names): if(full_blob_names == None): return None threads = [] results = [] for full_blob_name in full_blob_names: result = {'blob': None} t = threading.Thread(target=self.download_image_blob, args=(container_name,full_blob_name, result)) results.append(result) threads.append(t) t.start() [t.join() for t in threads] blobs = [result['blob'] for result in results if result['blob'] != None] return blobs def download_image_blob(self, container_name, full_blob_name, result): if(self.exists(container_name, full_blob_name)): result['blob'] = self._download_data(container_name, full_blob_name) else: return None def download_brand_operational_output_data(self, brand): path = '{}/{}'.format(brand, self.constants.OPERATIONAL_DIRECTORY_NAME) blobs = self.service.list_blobs(container_name=self.constants.OUTPUT_CONTAINER_NAME, prefix=path) return blobs def get_container_directories(self, container_name): bloblistingresult = self.service.list_blobs(container_name=container_name, delimiter='/') return [blob.name.rsplit('/', 1)[0] for blob in bloblistingresult] """ Pretty Print """ def pretty_print_storage_structure(self): containers = self.service.list_containers() for container in containers: self.pretty_print_container_contents(container.name) def pretty_print_container_contents(self, container_name): print(container_name) blobs = self.service.list_blobs(container_name) for blob in blobs: print (' {}'.format(blob.name)) """ Private """ def _create_path_to_bucket(self, brand_name, level): return '{}/{}/{}=={}'.format(brand_name, level, str(uuid.uuid4())[:8], datetime.datetime.now().strftime("%m-%d-%Y %I:%M%p")) def _get_bucket_image_directory(self, prefix): return '{}/[IMAGES]'.format(prefix) def _get_bucket_post_entities_file(self, prefix): return '{}/post_entities.txt'.format(prefix) def _create_input_container(self): self.service.create_container(self.constants.INPUT_CONTAINER_NAME) def _create_output_container(self): self.service.create_container(self.constants.OUTPUT_CONTAINER_NAME) def _create_checkpoints_container(self): self.service.create_container(self.constants.CHECKPOINTS_CONTAINER_NAME) def _create_container(self, container_name): self.service.create_container(container_name) def _input_container(self): return self.constants.INPUT_CONTAINER_NAME def _output_container(self): return self.constants.OUTPUT_CONTAINER_NAME def get_parent_directory(self, entity): return entity.rsplit('/', 1)[0] def exists(self, container, full_blob_name = None): return self.service.exists(container, full_blob_name) def _upload_text(self, container_name, full_blob_name, data): if not(self.exists(container_name)): self._create_container(container_name) print("uploading text to path", full_blob_name) self.service.create_blob_from_text(container_name, full_blob_name, data) return full_blob_name def _upload_and_compress_image(self, container_name, path, data): if not(self.exists(container_name)): self._create_container(container_name) full_blob_name = '{}{}'.format(path, '.jpeg') with BytesIO() as output: data.save(output, 'jpeg') bytes = output.getvalue() print("uploading image to path", path) self._parallel_upload(container_name, full_blob_name, bytes) return full_blob_name def _parallel_upload(self, container_name, full_blob_name, data): debug = False threads = [] block_ids = [] chunk_size = len(data) / 5 if (debug): print("chunking data into even sections of length: ", chunk_size) chunks = [data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size)] for chunk in chunks: uid = self.generate_uid() block_ids.append(BlobBlock(id=uid)) if (debug): print("spawning thread with uid: ", uid) t = threading.Thread(target=self._upload_block, args=(container_name,full_blob_name,chunk,uid,)) threads.append(t) t.start() if (debug): print "all threads started..." [t.join() for t in threads] if (debug): print "all threads have completed execution" if (debug): block_list = self.service.get_block_list(container_name, full_blob_name, block_list_type=BlockListType.All) uncommitted = len(block_list.uncommitted_blocks) committed = len(block_list.committed_blocks) print("uncommitted: ", uncommitted, " committed: ", committed) if (debug): print "committing blocks" self.service.put_block_list(container_name, full_blob_name, block_ids) if (debug): block_list = self.service.get_block_list(container_name, full_blob_name, block_list_type=BlockListType.All) uncommitted = len(block_list.uncommitted_blocks) committed = len(block_list.committed_blocks) print ("uncommitted: ", uncommitted, " committed: ", committed) def _upload_block(self, container_name, full_blob_name, chunk, uid): self.service.put_block(container_name, full_blob_name, chunk, uid) def generate_uid(self): r_uuid = base64.urlsafe_b64encode(uuid.uuid4().bytes) return r_uuid.replace('=', '') def download_input_data(self, full_blob_name): return self._download_data(self.constants.INPUT_CONTAINER_NAME, full_blob_name) def _download_data(self, container_name, full_blob_name): if not(self.exists(container_name)): self._create_container(container_name) blob = self.service.get_blob_to_bytes(container_name, full_blob_name) return blob def retreive_log_entities(self, container_name, path, processing_status_filter = None): log_entries = LogEntries() log_path = path + "/log.txt" if self.exists(container_name,log_path): log_file = self.service.get_blob_to_text(container_name, log_path) raw_logs = log_file.content log_entries.deserialize(raw_logs) if(processing_status_filter != None): log_entries = log_entries.GetLogs(processing_status_filter=processing_status_filter) return log_entries def log(self, prefix, isProcessed): container_name = self._input_container() path = self.get_parent_directory(prefix) log_path = path + '/log.txt' log_entries = LogEntries() if self.exists(container_name,log_path): log_file = self.service.get_blob_to_text(container_name, log_path) raw_logs = log_file.content log_entries.deserialize(raw_logs) log_entries.update(prefix, isProcessed=isProcessed) raw = log_entries.serialize() self.service.create_blob_from_text(container_name, log_path, raw) def update_log_entries(self, bucket_names, isProcessed): directories = {} container_name = self._input_container() for bucket_name in bucket_names: print(bucket_name) path = self.get_parent_directory(bucket_name) print(path) log_path = path + '/log.txt' print(directories.keys()) if log_path in directories: directories[log_path].append(bucket_name) else: print("adding new log path: ", log_path) directories[log_path] = [] directories[log_path].append(bucket_name) for key, value in directories.iteritems(): log_entries = LogEntries() if self.exists(container_name, key): log_file = self.service.get_blob_to_text(container_name, key) raw_logs = log_file.content print(key) print(raw_logs) log_entries.deserialize(raw_logs) for bucket_name in value: print("updating for bucket_name:", bucket_name, "for file: ", key) log_entries.update(bucket_name, isProcessed=isProcessed) print (log_entries.serialize()) raw = log_entries.serialize() self.service.create_blob_from_text(container_name, key, raw)
class StorageBlobContext(): """Initializes the repository with the specified settings dict. Required settings in config dict are: - AZURE_STORAGE_NAME - AZURE_STORAGE_KEY - AZURE_REQUIRE_ENCRYPTION - AZURE_KEY_IDENTIFIER - AZURE_SECRET_KEY - AZURE_STORAGE_IS_EMULATED """ _account = None _account_name = '' _account_key = '' _is_emulated = False _modeldefinitions = [] REGISTERED = True """ decorators """ def get_modeldefinition(registered=False): def wrap(func): @wraps(func) def wrapper(self, storagemodel, modeldefinition=None, *args, **kwargs): """ modeldefinition already determined """ if not modeldefinition is None: return func(self, storagemodel, modeldefinition, *args, **kwargs) """ find modeldefinition for StorageQueueModel or StorageQueueModel """ if isinstance(storagemodel, StorageBlobModel): definitionlist = [ definition for definition in self._modeldefinitions if definition['modelname'] == storagemodel.__class__.__name__ ] else: log.info('Argument is not an StorageBlobModel') raise AzureStorageWrapException( storagemodel, "Argument is not an StorageBlobModel") if len(definitionlist) == 1: modeldefinition = definitionlist[0] elif len(definitionlist) > 1: raise ModelRegisteredMoreThanOnceError(storagemodel) if registered and (not isinstance(modeldefinition, dict)): raise ModelNotRegisteredError(storagemodel) return func(self, storagemodel, modeldefinition, *args, **kwargs) return wrapper return wrap def __init__(self, **kwargs): """ parse kwargs """ self._account_name = kwargs.get('AZURE_STORAGE_NAME', '') self._account_key = kwargs.get('AZURE_STORAGE_KEY', '') self._is_emulated = kwargs.get('AZURE_STORAGE_IS_EMULATED', False) self._key_identifier = kwargs.get('AZURE_KEY_IDENTIFIER', '') self._secret_key = kwargs.get('AZURE_SECRET_KEY', '') """ account & service init """ if self._is_emulated: self._account = CloudStorageAccount(is_emulated=True) elif self._account_name != '' and self._account_key != '': self._account = CloudStorageAccount(self._account_name, self._account_key) else: raise AzureException """ registered models """ self._modeldefinitions = [] def __create__(self, modeldefinition: dict) -> bool: if (not modeldefinition['blobservice'] is None): try: modeldefinition['blobservice'].create_container( modeldefinition['container']) return True except Exception as e: msg = 'failed to create {} with error {}'.format( modeldefinition['container'], e) raise AzureStorageWrapException(msg=msg) else: return True pass def __delete__(self, modeldefinition: dict) -> bool: if (not modeldefinition['blobservice'] is None): try: modeldefinition['blobservice'].delete_container( modeldefinition['container']) return True except Exception as e: msg = 'failed to delete {} with error {}'.format( modeldefinition['container'], e) raise AzureStorageWrapException(msg=msg) else: return True pass @get_modeldefinition() def register_model(self, storagemodel: object, modeldefinition=None): """ set up an Queueservice for an StorageQueueModel in your Azure Storage Account Will create the Queue if not exist! required Parameter is: - storagemodel: StorageQueueModel(Object) """ if modeldefinition is None: """ test if containername already exists """ if [ model for model in self._modeldefinitions if model['container'] == storagemodel._containername ]: raise NameConventionError(storagemodel._containername) """ test if containername fits to azure naming rules """ if not test_azurestorage_nameconventions( storagemodel._containername, 'StorageBlobModel'): raise NameConventionError(storagemodel._containername) """ now register model """ modeldefinition = { 'modelname': storagemodel.__class__.__name__, 'container': storagemodel._containername, 'encrypt': storagemodel._encrypt, 'blobservice': self._account.create_block_blob_service() } """ encrypt queue service """ if modeldefinition['encrypt']: # Create the KEK used for encryption. # KeyWrapper is the provided sample implementation, but the user may use their own object as long as it implements the interface above. kek = KeyWrapper(self._key_identifier, self._secret_key) # Key identifier # Create the key resolver used for decryption. # KeyResolver is the provided sample implementation, but the user may use whatever implementation they choose so long as the function set on the service object behaves appropriately. key_resolver = KeyResolver() key_resolver.put_key(kek) # Set the require Encryption, KEK and key resolver on the service object. modeldefinition['blobservice'].require_encryption = True modeldefinition['blobservice'].key_encryption_key = kek modeldefinition[ 'blobservice'].key_resolver_funcion = key_resolver.resolve_key self.__create__(modeldefinition) self._modeldefinitions.append(modeldefinition) log.info( 'model {} registered successfully. Models are {!s}.'.format( modeldefinition['modelname'], [model['modelname'] for model in self._modeldefinitions])) else: log.info('model {} already registered. Models are {!s}.'.format( modeldefinition['modelname'], [model['modelname'] for model in self._modeldefinitions])) pass @get_modeldefinition(REGISTERED) def unregister_model(self, storagemodel: object, modeldefinition=None, delete_blob=False): """ clear up an Queueservice for an StorageQueueModel in your Azure Storage Account Will delete the hole Queue if delete_queue Flag is True! required Parameter is: - storagemodel: StorageQueueModel(Object) Optional Parameter is: - delete_queue: bool """ """ remove from modeldefinitions """ for i in range(len(self._modeldefinitions)): if self._modeldefinitions[i]['modelname'] == modeldefinition[ 'modelname']: del self._modeldefinitions[i] break """ delete queue from storage if delete_queue == True """ if delete_blob: self.__delete__(modeldefinition) log.info('model {} unregistered successfully. Models are {!s}'.format( modeldefinition['modelname'], [model['modelname'] for model in self._modeldefinitions])) pass @get_modeldefinition(REGISTERED) def upload(self, storagemodel: object, modeldefinition=None): """ insert blob message into storage """ if (storagemodel.content is None) or ( storagemodel.properties.content_settings.content_type is None): # No content to upload raise AzureStorageWrapException( storagemodel, "StorageBlobModel does not contain content nor content settings" ) else: blobservice = modeldefinition['blobservice'] container_name = modeldefinition['container'] blob_name = storagemodel.name try: # refresh metadata storagemodel.__instance_to_metadata__() """ upload bytes """ blobservice.create_blob_from_bytes( container_name=container_name, blob_name=blob_name, blob=storagemodel.content, metadata=storagemodel.metadata, content_settings=storagemodel.properties.content_settings) storagemodel.properties = blobservice.get_blob_properties( container_name=container_name, blob_name=blob_name).properties except Exception as e: msg = 'can not save blob in container {} because {!s}'.format( storagemodel._containername, e) raise AzureStorageWrapException(storagemodel, msg=msg) return storagemodel @get_modeldefinition(REGISTERED) def download(self, storagemodel: object, modeldefinition=None): """ load blob from storage into StorageBlobModelInstance """ if (storagemodel.name is None): # No content to download raise AzureStorageWrapException( storagemodel, "StorageBlobModel does not contain content nor content settings" ) else: container_name = modeldefinition['container'] blob_name = storagemodel.name try: if modeldefinition['blobservice'].exists( container_name, blob_name): """ download blob """ blob = modeldefinition['blobservice'].get_blob_to_bytes( container_name=modeldefinition['container'], blob_name=storagemodel.name) storagemodel.__mergeblob__(blob) except Exception as e: msg = 'can not load blob from container {} because {!s}'.format( storagemodel._containername, e) raise AzureStorageWrapException(storagemodel, msg=msg) return storagemodel @get_modeldefinition(REGISTERED) def delete(self, storagemodel: object, modeldefinition=None) -> bool: """ delete the blob from storage """ deleted = False blobservice = modeldefinition['blobservice'] container_name = modeldefinition['container'] blob_name = storagemodel.name try: if blobservice.exists(container_name, blob_name): """ delete """ blob = blobservice.delete_blob(container_name, blob_name) deleted = True except Exception as e: msg = 'can not delete blob {} from storage because {!s}'.format( blob_name, e) raise AzureStorageWrapException(storagemodel, msg=msg) return deleted @get_modeldefinition(REGISTERED) def exists(self, storagemodel: object, modeldefinition=None) -> bool: """ delete the blob from storage """ exists = False blobservice = modeldefinition['blobservice'] container_name = modeldefinition['container'] blob_name = storagemodel.name try: blobs = self.list(storagemodel, modeldefinition, where=storagemodel.name) if len(blobs) == 1: storagemodel.__mergeblob__(blobs[0]) exists = True except Exception as e: msg = 'can not retireve blob {} from storage because {!s}'.format( blob_name, e) raise AzureStorageWrapException(storagemodel, msg=msg) return exists @get_modeldefinition(REGISTERED) def list(self, storagemodel: object, modeldefinition=None, where=None) -> list: """ list blob messages in container """ try: blobnames = [] if where is None: generator = modeldefinition['blobservice'].list_blobs( modeldefinition['container']) else: generator = modeldefinition['blobservice'].list_blobs( modeldefinition['container'], prefix=where) for blob in generator: blobnames.append(blob) except Exception as e: msg = 'can not list blobs in container {} because {!s}'.format( storagemodel._containername, e) raise AzureStorageWrapException(storagemodel, msg=msg) finally: return blobnames
class AzureBlobObjectStore(ObjectStore): """ Object store that stores objects as blobs in an Azure Blob Container. A local cache exists that is used as an intermediate location for files between Galaxy and Azure. """ def __init__(self, config, config_xml): if BlockBlobService is None: raise Exception(NO_BLOBSERVICE_ERROR_MESSAGE) super(AzureBlobObjectStore, self).__init__(config) self.staging_path = self.config.file_path self.transfer_progress = 0 self._parse_config_xml(config_xml) self._configure_connection() self.container_lease = self._get_container_lease() # Clean cache only if value is set in galaxy.ini if self.cache_size != -1: # Convert GBs to bytes for comparison self.cache_size = self.cache_size * 1073741824 # Helper for interruptable sleep self.sleeper = Sleeper() self.cache_monitor_thread = threading.Thread(target=self.__cache_monitor) self.cache_monitor_thread.start() log.info("Cache cleaner manager started") ################### # Private Methods # ################### # config_xml is an ElementTree object. def _parse_config_xml(self, config_xml): try: auth_xml = config_xml.find('auth') self.account_name = auth_xml.get('account_name') self.account_key = auth_xml.get('account_key') container_xml = config_xml.find('container') self.container_name = container_xml.get('name') self.max_chunk_size = int(container_xml.get('max_chunk_size', 250)) # currently unused cache_xml = config_xml.find('cache') self.cache_size = float(cache_xml.get('size', -1)) self.staging_path = cache_xml.get('path', self.config.object_store_cache_path) for d_xml in config_xml.findall('extra_dir'): self.extra_dirs[d_xml.get('type')] = d_xml.get('path') log.debug("Object cache dir: %s", self.staging_path) log.debug(" job work dir: %s", self.extra_dirs['job_work']) except Exception: # Toss it back up after logging, we can't continue loading at this point. log.exception("Malformed ObjectStore Configuration XML -- unable to continue") raise def _configure_connection(self): log.debug("Configuring Connection") self.account = CloudStorageAccount(self.account_name, self.account_key) self.service = self.account.create_block_blob_service() def _get_container_lease(self): """ Sometimes a handle to a container is not established right away so try it a few times. Raise error is connection is not established. """ for i in range(5): try: self.service.break_container_lease(self.container_name) container_lease = self.service.acquire_container_lease(self.container_name) log.debug("Using azure blob store with container '%s'", self.container_name) return container_lease except AzureHttpError: try: log.debug("container not found, creating azure blob store container with name '%s'", self.container_name) self.service.create_container(self.container_name) container_lease = self.service.acquire_container_lease(self.container_name) return container_lease except AzureHttpError: log.exception("Could not get container '%s', attempt %s/5", self.container_name, i + 1) time.sleep(2) # All the attempts have been exhausted and connection was not established, # raise error raise AzureHttpError def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but S3 will not # follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # S3 folders are marked by having trailing '/' so add it now # rel_path = '%s/' % rel_path # assume for now we don't need this in Azure blob storage. if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path def _fix_permissions(self, rel_path): """ Set permissions on rel_path""" for basedir, _, files in os.walk(rel_path): umask_fix_perms(basedir, self.config.umask, 0o777, self.config.gid) for filename in files: path = os.path.join(basedir, filename) # Ignore symlinks if os.path.islink(path): continue umask_fix_perms(path, self.config.umask, 0o666, self.config.gid) def _get_cache_path(self, rel_path): return os.path.abspath(os.path.join(self.staging_path, rel_path)) def _get_transfer_progress(self): return self.transfer_progress def _get_size_in_azure(self, rel_path): try: properties = self.service.get_blob_properties(self.container_name, rel_path) # Currently this returns a blob and not a BlobProperties object # Similar issue for the ruby https://github.com/Azure/azure-storage-ruby/issues/13 # The typecheck is an attempt at future-proofing this when/if the bug is fixed. if type(properties) is Blob: properties = properties.properties if properties: size_in_bytes = properties.content_length return size_in_bytes except AzureHttpError: log.exception("Could not get size of blob '%s' from Azure", rel_path) return -1 def _in_azure(self, rel_path): try: exists = self.service.exists(self.container_name, rel_path) except AzureHttpError: log.exception("Trouble checking existence of Azure blob '%s'", rel_path) return False return exists def _in_cache(self, rel_path): """ Check if the given dataset is in the local cache. """ cache_path = self._get_cache_path(rel_path) return os.path.exists(cache_path) def _pull_into_cache(self, rel_path): # Ensure the cache directory structure exists (e.g., dataset_#_files/) rel_path_dir = os.path.dirname(rel_path) if not os.path.exists(self._get_cache_path(rel_path_dir)): os.makedirs(self._get_cache_path(rel_path_dir)) # Now pull in the file file_ok = self._download(rel_path) self._fix_permissions(self._get_cache_path(rel_path_dir)) return file_ok def _transfer_cb(self, complete, total): self.transfer_progress = float(complete) / float(total) * 100 # in percent def _download(self, rel_path): local_destination = self._get_cache_path(rel_path) try: log.debug("Pulling '%s' into cache to %s", rel_path, local_destination) if self.cache_size > 0 and self._get_size_in_azure(rel_path) > self.cache_size: log.critical("File %s is larger (%s) than the cache size (%s). Cannot download.", rel_path, self._get_size_in_azure(rel_path), self.cache_size) return False else: self.transfer_progress = 0 # Reset transfer progress counter self.service.get_blob_to_path(self.container_name, rel_path, local_destination, progress_callback=self._transfer_cb) return True except AzureHttpError: log.exception("Problem downloading '%s' from Azure", rel_path) return False def _push_to_os(self, rel_path, source_file=None, from_string=None): """ Push the file pointed to by ``rel_path`` to the object store naming the blob ``rel_path``. If ``source_file`` is provided, push that file instead while still using ``rel_path`` as the blob name. If ``from_string`` is provided, set contents of the file to the value of the string. """ try: source_file = source_file or self._get_cache_path(rel_path) if not os.path.exists(source_file): log.error("Tried updating blob '%s' from source file '%s', but source file does not exist.", rel_path, source_file) return False if os.path.getsize(source_file) == 0: log.debug("Wanted to push file '%s' to azure blob '%s' but its size is 0; skipping.", source_file, rel_path) return True if from_string: self.service.create_blob_from_text(self.container_name, rel_path, from_string, progress_callback=self._transfer_cb) log.debug("Pushed data from string '%s' to blob '%s'", from_string, rel_path) else: start_time = datetime.now() log.debug("Pushing cache file '%s' of size %s bytes to '%s'", source_file, os.path.getsize(source_file), rel_path) self.transfer_progress = 0 # Reset transfer progress counter self.service.create_blob_from_path(self.container_name, rel_path, source_file, progress_callback=self._transfer_cb) end_time = datetime.now() log.debug("Pushed cache file '%s' to blob '%s' (%s bytes transfered in %s sec)", source_file, rel_path, os.path.getsize(source_file), end_time - start_time) return True except AzureHttpError: log.exception("Trouble pushing to Azure Blob '%s' from file '%s'", rel_path, source_file) return False ################## # Public Methods # ################## def exists(self, obj, **kwargs): in_cache = in_azure = False rel_path = self._construct_path(obj, **kwargs) in_cache = self._in_cache(rel_path) in_azure = self._in_azure(rel_path) # log.debug("~~~~~~ File '%s' exists in cache: %s; in azure: %s" % (rel_path, in_cache, in_azure)) # dir_only does not get synced so shortcut the decision dir_only = kwargs.get('dir_only', False) base_dir = kwargs.get('base_dir', None) if dir_only: if in_cache or in_azure: return True # for JOB_WORK directory elif base_dir: if not os.path.exists(rel_path): os.makedirs(rel_path) return True else: return False # TODO: Sync should probably not be done here. Add this to an async upload stack? if in_cache and not in_azure: self._push_to_os(rel_path, source_file=self._get_cache_path(rel_path)) return True elif in_azure: return True else: return False def file_ready(self, obj, **kwargs): """ A helper method that checks if a file corresponding to a dataset is ready and available to be used. Return ``True`` if so, ``False`` otherwise. """ rel_path = self._construct_path(obj, **kwargs) # Make sure the size in cache is available in its entirety if self._in_cache(rel_path): local_size = os.path.getsize(self._get_cache_path(rel_path)) remote_size = self._get_size_in_azure(rel_path) if local_size == remote_size: return True else: log.debug("Waiting for dataset %s to transfer from OS: %s/%s", rel_path, local_size, remote_size) return False def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) extra_dir_at_root = kwargs.get('extra_dir_at_root', False) dir_only = kwargs.get('dir_only', False) alt_name = kwargs.get('alt_name', None) # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # Create given directory in cache cache_dir = os.path.join(self.staging_path, rel_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Although not really necessary to create S3 folders (because S3 has # flat namespace), do so for consistency with the regular file system # S3 folders are marked by having trailing '/' so add it now # s3_dir = '%s/' % rel_path # self._push_to_os(s3_dir, from_string='') # If instructed, create the dataset in cache & in S3 if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='') def empty(self, obj, **kwargs): if self.exists(obj, **kwargs): return bool(self.size(obj, **kwargs) > 0) else: raise ObjectNotFound( 'objectstore.empty, object does not exist: %s, kwargs: %s' % ( str( obj ), str( kwargs ) ) ) def size(self, obj, **kwargs): rel_path = self._construct_path(obj, **kwargs) if self._in_cache(rel_path): try: return os.path.getsize(self._get_cache_path(rel_path)) except OSError as ex: log.info("Could not get size of file '%s' in local cache, will try Azure. Error: %s", rel_path, ex) elif self.exists(obj, **kwargs): return self._get_size_in_azure(rel_path) log.warning("Did not find dataset '%s', returning 0 for size", rel_path) return 0 def delete(self, obj, entire_dir=False, **kwargs): rel_path = self._construct_path(obj, **kwargs) extra_dir = kwargs.get('extra_dir', None) base_dir = kwargs.get('base_dir', None) dir_only = kwargs.get('dir_only', False) obj_dir = kwargs.get('obj_dir', False) try: if base_dir and dir_only and obj_dir: # Remove temporary data in JOB_WORK directory shutil.rmtree(os.path.abspath(rel_path)) return True # For the case of extra_files, because we don't have a reference to # individual files/blobs we need to remove the entire directory structure # with all the files in it. This is easy for the local file system, # but requires iterating through each individual blob in Azure and deleing it. if entire_dir and extra_dir: shutil.rmtree(self._get_cache_path(rel_path)) blobs = self.service.list_blobs(self.container_name, prefix=rel_path) for blob in blobs: log.debug("Deleting from Azure: %s", blob) self.service.delete_blob(self.container_name, blob.name) return True else: # Delete from cache first os.unlink(self._get_cache_path(rel_path)) # Delete from S3 as well if self._in_azure(rel_path): log.debug("Deleting from Azure: %s", rel_path) self.service.delete_blob(self.container_name, rel_path) return True except AzureHttpError: log.exception("Could not delete blob '%s' from Azure", rel_path) except OSError: log.exception('%s delete error', self.get_filename(obj, **kwargs)) return False def get_data(self, obj, start=0, count=-1, **kwargs): rel_path = self._construct_path(obj, **kwargs) # Check cache first and get file if not there if not self._in_cache(rel_path): self._pull_into_cache(rel_path) # Read the file content from cache data_file = open(self._get_cache_path(rel_path), 'r') data_file.seek(start) content = data_file.read(count) data_file.close() return content def get_filename(self, obj, **kwargs): rel_path = self._construct_path(obj, **kwargs) base_dir = kwargs.get('base_dir', None) dir_only = kwargs.get('dir_only', False) obj_dir = kwargs.get('obj_dir', False) # for JOB_WORK directory if base_dir and dir_only and obj_dir: return os.path.abspath(rel_path) cache_path = self._get_cache_path(rel_path) # S3 does not recognize directories as files so cannot check if those exist. # So, if checking dir only, ensure given dir exists in cache and return # the expected cache path. # dir_only = kwargs.get('dir_only', False) # if dir_only: # if not os.path.exists(cache_path): # os.makedirs(cache_path) # return cache_path # Check if the file exists in the cache first if self._in_cache(rel_path): return cache_path # Check if the file exists in persistent storage and, if it does, pull it into cache elif self.exists(obj, **kwargs): if dir_only: # Directories do not get pulled into cache return cache_path else: if self._pull_into_cache(rel_path): return cache_path # For the case of retrieving a directory only, return the expected path # even if it does not exist. # if dir_only: # return cache_path raise ObjectNotFound( 'objectstore.get_filename, no cache_path: %s, kwargs: %s' % ( str( obj ), str( kwargs ) ) ) return cache_path # Until the upload tool does not explicitly create the dataset, return expected path def update_from_file(self, obj, file_name=None, create=False, **kwargs): if create is True: self.create(obj, **kwargs) elif self.exists(obj, **kwargs): rel_path = self._construct_path(obj, **kwargs) # Chose whether to use the dataset file itself or an alternate file if file_name: source_file = os.path.abspath(file_name) # Copy into cache cache_file = self._get_cache_path(rel_path) try: if source_file != cache_file: # FIXME? Should this be a `move`? shutil.copy2(source_file, cache_file) self._fix_permissions(cache_file) except OSError: log.exception("Trouble copying source file '%s' to cache '%s'", source_file, cache_file) else: source_file = self._get_cache_path(rel_path) self._push_to_os(rel_path, source_file) else: raise ObjectNotFound( 'objectstore.update_from_file, object does not exist: %s, kwargs: %s' % ( str( obj ), str( kwargs ) ) ) def get_object_url(self, obj, **kwargs): if self.exists(obj, **kwargs): rel_path = self._construct_path(obj, **kwargs) try: url = self.service.make_blob_url(container_name=self.container_name, blob_name=rel_path) return url except AzureHttpError: log.exception("Trouble generating URL for dataset '%s'", rel_path) return None def get_store_usage_percent(self): return 0.0 ################## # Secret Methods # ################## def __cache_monitor(self): time.sleep(2) # Wait for things to load before starting the monitor while self.running: total_size = 0 # Is this going to be too expensive of an operation to be done frequently? file_list = [] for dirpath, _, filenames in os.walk(self.staging_path): for filename in filenames: filepath = os.path.join(dirpath, filename) file_size = os.path.getsize(filepath) total_size += file_size # Get the time given file was last accessed last_access_time = time.localtime(os.stat(filepath)[7]) # Compose a tuple of the access time and the file path file_tuple = last_access_time, filepath, file_size file_list.append(file_tuple) # Sort the file list (based on access time) file_list.sort() # Initiate cleaning once within 10% of the defined cache size? cache_limit = self.cache_size * 0.9 if total_size > cache_limit: log.info("Initiating cache cleaning: current cache size: %s; clean until smaller than: %s", convert_bytes(total_size), convert_bytes(cache_limit)) # How much to delete? If simply deleting up to the cache-10% limit, # is likely to be deleting frequently and may run the risk of hitting # the limit - maybe delete additional #%? # For now, delete enough to leave at least 10% of the total cache free delete_this_much = total_size - cache_limit # Keep deleting datasets from file_list until deleted_amount does not # exceed delete_this_much; start deleting from the front of the file list, # which assumes the oldest files come first on the list. deleted_amount = 0 for entry in enumerate(file_list): if deleted_amount < delete_this_much: deleted_amount += entry[2] os.remove(entry[1]) # Debugging code for printing deleted files' stats # folder, file_name = os.path.split(f[1]) # file_date = time.strftime("%m/%d/%y %H:%M:%S", f[0]) # log.debug("%s. %-25s %s, size %s (deleted %s/%s)" \ # % (i, file_name, convert_bytes(f[2]), file_date, \ # convert_bytes(deleted_amount), convert_bytes(delete_this_much))) else: log.debug("Cache cleaning done. Total space freed: %s", convert_bytes(deleted_amount)) self.sleeper.sleep(30) # Test cache size every 30 seconds?
class Passthrough(Operations): def __init__(self, root): self.root = root print root try: import config as config except: raise ValueError('Please specify configuration settings in config.py.') if config.IS_EMULATED: self.account = CloudStorageAccount(is_emulated=True) else: # Note that account key and sas should not both be included account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY sas = config.SAS self.account = CloudStorageAccount(account_name=account_name, account_key=account_key, sas_token=sas) self.service = self.account.create_block_blob_service() def _full_path(self, partial): if partial.startswith("/"): partial = partial[1:] path = os.path.join(self.root, partial) return path def _get_container_reference(self, prefix='container'): return '{}{}'.format(prefix, str(uuid.uuid4()).replace('-', '')) def access(self, path, mode): if debug: print "access" full_path = self._full_path(path) #if not os.access(full_path, mode): # pass#raise FuseOSError(errno.EACCES) return 0 def chmod(self, path, mode): pass def chown(self, path, uid, gid): pass def getattr(self, path, fh=None): if debug: print "getattr " + path isFolder = False if len(path.split('/')) == 2: isFolder = True """link_data = { "st_ctime" : 1456615173, "st_mtime" : 1456615173, "st_nlink" : 2, "st_mode" : 16893, "st_size" : 2, "st_gid" : 1000, "st_uid" : 1000, "st_atime" : time(), }""" folder_data = { "st_ctime" : 1456615173, "st_mtime" : 1456615173, "st_nlink" : 2, # "st_mode" : 16893, "st_mode" : 16895, "st_size" : 2, "st_gid" : 1000, "st_uid" : 1000, "st_atime" : time(), } full_path = self._full_path(path) try: st = os.lstat(full_path) print st rdata = dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid')) except: pass #if os.path.isfile == True: # return if isFolder: for container in list(self.service.list_containers()): if container.name == path[1:]: return folder_data else: """import config as config account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY""" containername = path.split('/')[1] filename = path.split('/')[2] """block_blob_service = BlockBlobService(account_name, account_key) if os.path.isfile(full_path) == False: fileSize = 1 else: try: pass fileSize = os.path.getsize(full_path) except: fileSize = 1""" self.service = self.account.create_block_blob_service() file_data = { "st_ctime" : 1456615173, "st_mtime" : 1456615173, "st_nlink" : 1, # "st_mode" : 33188, "st_mode" : 33279, "st_size" : self.service.get_blob_properties(containername, filename).properties.content_length, "st_gid" : 1000, "st_uid" : 1000, "st_atime" : time(), } return file_data st = os.lstat(full_path) print st rdata = dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid')) return rdata def readdir(self, path, fh): if debug: print "readdir " + path full_path = self._full_path(path) dirents = ['.', '..'] #if os.path.isdir(full_path): # dirents.extend(os.listdir(full_path)) for r in dirents: yield r containers = list(self.service.list_containers()) #print('All containers in your account:') if path == "/": for container in containers: yield container.name else: folder = path[1:] blobs = list(self.service.list_blobs(folder)) for blob in blobs: yield blob.name def readlink(self, path): if debug: print "readlink" pathname = os.readlink(self._full_path(path)) if pathname.startswith("/"): # Path name is absolute, sanitize it. return os.path.relpath(pathname, self.root) else: return pathname def mknod(self, path, mode, dev): return os.mknod(self._full_path(path), mode, dev) def rmdir(self, path): if debug: print "rmdir " + path[1:] deleted = self.service.delete_container(path[1:]) return 0 def mkdir(self, path, mode): """ Only valid in the top level of the mounted directory. Creates a container to serve as the folder A container name must be a valid DNS name, conforming to the following naming rules: 1) Container names must start with a letter or number, and can contain only letters, numbers, and the dash (-) character. 2) Every dash (-) character must be immediately preceded and followed by a letter or number; consecutive dashes are not permitted in container names. 3) All letters in a container name must be lowercase. 4) Container names must be from 3 through 63 characters long. 30 second lease timeout on deleted directory. """ if debug: print "mkdir " + path[1:] # TODO: validate input self.service.create_container(path[1:]) return 0 def statfs(self, path): full_path = self._full_path(path) stv = os.statvfs(full_path) return dict((key, getattr(stv, key)) for key in ('f_bavail', 'f_bfree', 'f_blocks', 'f_bsize', 'f_favail', 'f_ffree', 'f_files', 'f_flag', 'f_frsize', 'f_namemax')) def unlink(self, path): return os.unlink(self._full_path(path)) def symlink(self, name, target): return os.symlink(name, self._full_path(target)) def rename(self, old, new): """ 1) create new container 2) stream contents of old container to new container 3) delete old container """ # step 1 self.mkdir(new, 0777) # step 2 # TODO: steam contents to new container """import config as config account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY block_blob_service = BlockBlobService(account_name, account_key) block_blob_service.get_blob_to_path(containername, filename, tempfilename) block_blob_service.create_blob_from_path(new, filename, filename)""" #step 3 self.rmdir(old) def link(self, target, name): return os.link(self._full_path(target), self._full_path(name)) def utimens(self, path, times=None): return os.utime(self._full_path(path), times) # File methods # ============ def open(self, path, flags): """if debug: print "open: " + path print flags full_path = self._full_path(path) import config as config account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY containername = path.split('/')[1] filename = path.split('/')[2] block_blob_service = BlockBlobService(account_name, account_key) try: print "get block blob" if os.path.isdir(path.split('/')[1]) == False: os.mkdir(full_path.split('/')[0]+'/'+containername) if os.path.isfile(full_path) == False: block_blob_service.get_blob_to_path(containername, filename, full_path) else: print "get block blob" os.remove(full_path) block_blob_service.get_blob_to_path(containername, filename, full_path) except: pass print "full path: " + full_path print os.path.isfile(full_path)""" return 0#os.open(full_path, flags) def create(self, path, mode, fi=None): if debug: print "create: " + path full_path = self._full_path(path) return os.open(full_path, os.O_WRONLY | os.O_CREAT, mode) def read(self, path, length, offset, fh): if debug: print "read: " + path print "offset: " print offset print "length: " print length print fh full_path = self._full_path(path) print full_path #os.lseek(fh, offset, os.SEEK_SET) #if os.path.isfile(full_path) == False: import config as config account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY containername = path.split('/')[1] filename = path.split('/')[2] service = baseblobservice.BaseBlobService(account_name, account_key) blob = service.get_blob_to_bytes(containername, filename, None, offset, offset+length-1) #blob = blob[offset:(offset+length)] bytes = blob.content return bytes """try: if os.path.isdir(path.split('/')[1]) == False: os.mkdir(full_path.split('/')[0]+'/'+containername) if os.path.isfile(full_path) == False: print "read block blob" block_blob_service.get_blob_to_path(containername, filename, full_path) else: os.remove(full_path) block_blob_service.get_blob_to_path(containername, filename, full_path) except: pass fhn = os.open(full_path, 32768) os.lseek(fhn, offset, os.SEEK_SET) #print "os.read(fh, length)" #print os.read(fh, length) return os.read(fhn, length)""" def write(self, path, buf, offset, fh): if debug: print "write: " + path os.lseek(fh, offset, os.SEEK_SET) return os.write(fh, buf) def truncate(self, path, length, fh=None): print "truncate: " + path full_path = self._full_path(path) with open(full_path, 'r+') as f: f.truncate(length) def flush(self, path, fh): print "flush: " + path return os.fsync(fh) def release(self, path, fh): print "release: " + path return os.close(fh) def fsync(self, path, fdatasync, fh): print "fsync: " + path return self.flush(path, fh)
def connection(self): if self._connection is None: account = CloudStorageAccount(self.account_name, self.account_key) self._connection = account.create_block_blob_service() return self._connection
class AzureBlobObjectStore(ObjectStore): """ Object store that stores objects as blobs in an Azure Blob Container. A local cache exists that is used as an intermediate location for files between Galaxy and Azure. """ def __init__(self, config, config_xml): if BlockBlobService is None: raise Exception(NO_BLOBSERVICE_ERROR_MESSAGE) super(AzureBlobObjectStore, self).__init__(config) self.staging_path = self.config.file_path self.transfer_progress = 0 self._parse_config_xml(config_xml) self._configure_connection() self.container_lease = self._get_container_lease() # Clean cache only if value is set in galaxy.ini if self.cache_size != -1: # Convert GBs to bytes for comparison self.cache_size = self.cache_size * 1073741824 # Helper for interruptable sleep self.sleeper = Sleeper() self.cache_monitor_thread = threading.Thread( target=self.__cache_monitor) self.cache_monitor_thread.start() log.info("Cache cleaner manager started") ################### # Private Methods # ################### # config_xml is an ElementTree object. def _parse_config_xml(self, config_xml): try: auth_xml = config_xml.find('auth') self.account_name = auth_xml.get('account_name') self.account_key = auth_xml.get('account_key') container_xml = config_xml.find('container') self.container_name = container_xml.get('name') self.max_chunk_size = int(container_xml.get( 'max_chunk_size', 250)) # currently unused cache_xml = config_xml.find('cache') self.cache_size = float(cache_xml.get('size', -1)) self.staging_path = cache_xml.get( 'path', self.config.object_store_cache_path) for d_xml in config_xml.findall('extra_dir'): self.extra_dirs[d_xml.get('type')] = d_xml.get('path') log.debug("Object cache dir: %s", self.staging_path) log.debug(" job work dir: %s", self.extra_dirs['job_work']) except Exception: # Toss it back up after logging, we can't continue loading at this point. log.exception( "Malformed ObjectStore Configuration XML -- unable to continue" ) raise def _configure_connection(self): log.debug("Configuring Connection") self.account = CloudStorageAccount(self.account_name, self.account_key) self.service = self.account.create_block_blob_service() def _get_container_lease(self): """ Sometimes a handle to a container is not established right away so try it a few times. Raise error is connection is not established. """ for i in range(5): try: self.service.break_container_lease(self.container_name) container_lease = self.service.acquire_container_lease( self.container_name) log.debug("Using azure blob store with container '%s'", self.container_name) return container_lease except AzureHttpError: try: log.debug( "container not found, creating azure blob store container with name '%s'", self.container_name) self.service.create_container(self.container_name) container_lease = self.service.acquire_container_lease( self.container_name) return container_lease except AzureHttpError: log.exception("Could not get container '%s', attempt %s/5", self.container_name, i + 1) time.sleep(2) # All the attempts have been exhausted and connection was not established, # raise error raise AzureHttpError def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but S3 will not # follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # S3 folders are marked by having trailing '/' so add it now # rel_path = '%s/' % rel_path # assume for now we don't need this in Azure blob storage. if not dir_only: rel_path = os.path.join( rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path def _fix_permissions(self, rel_path): """ Set permissions on rel_path""" for basedir, _, files in os.walk(rel_path): umask_fix_perms(basedir, self.config.umask, 0o777, self.config.gid) for filename in files: path = os.path.join(basedir, filename) # Ignore symlinks if os.path.islink(path): continue umask_fix_perms(path, self.config.umask, 0o666, self.config.gid) def _get_cache_path(self, rel_path): return os.path.abspath(os.path.join(self.staging_path, rel_path)) def _get_transfer_progress(self): return self.transfer_progress def _get_size_in_azure(self, rel_path): try: properties = self.service.get_blob_properties( self.container_name, rel_path) # Currently this returns a blob and not a BlobProperties object # Similar issue for the ruby https://github.com/Azure/azure-storage-ruby/issues/13 # The typecheck is an attempt at future-proofing this when/if the bug is fixed. if type(properties) is Blob: properties = properties.properties if properties: size_in_bytes = properties.content_length return size_in_bytes except AzureHttpError: log.exception("Could not get size of blob '%s' from Azure", rel_path) return -1 def _in_azure(self, rel_path): try: exists = self.service.exists(self.container_name, rel_path) except AzureHttpError: log.exception("Trouble checking existence of Azure blob '%s'", rel_path) return False return exists def _in_cache(self, rel_path): """ Check if the given dataset is in the local cache. """ cache_path = self._get_cache_path(rel_path) return os.path.exists(cache_path) def _pull_into_cache(self, rel_path): # Ensure the cache directory structure exists (e.g., dataset_#_files/) rel_path_dir = os.path.dirname(rel_path) if not os.path.exists(self._get_cache_path(rel_path_dir)): os.makedirs(self._get_cache_path(rel_path_dir)) # Now pull in the file file_ok = self._download(rel_path) self._fix_permissions(self._get_cache_path(rel_path_dir)) return file_ok def _transfer_cb(self, complete, total): self.transfer_progress = float(complete) / float( total) * 100 # in percent def _download(self, rel_path): local_destination = self._get_cache_path(rel_path) try: log.debug("Pulling '%s' into cache to %s", rel_path, local_destination) if self.cache_size > 0 and self._get_size_in_azure( rel_path) > self.cache_size: log.critical( "File %s is larger (%s) than the cache size (%s). Cannot download.", rel_path, self._get_size_in_azure(rel_path), self.cache_size) return False else: self.transfer_progress = 0 # Reset transfer progress counter self.service.get_blob_to_path( self.container_name, rel_path, local_destination, progress_callback=self._transfer_cb) return True except AzureHttpError: log.exception("Problem downloading '%s' from Azure", rel_path) return False def _push_to_os(self, rel_path, source_file=None, from_string=None): """ Push the file pointed to by ``rel_path`` to the object store naming the blob ``rel_path``. If ``source_file`` is provided, push that file instead while still using ``rel_path`` as the blob name. If ``from_string`` is provided, set contents of the file to the value of the string. """ try: source_file = source_file or self._get_cache_path(rel_path) if not os.path.exists(source_file): log.error( "Tried updating blob '%s' from source file '%s', but source file does not exist.", rel_path, source_file) return False if os.path.getsize(source_file) == 0: log.debug( "Wanted to push file '%s' to azure blob '%s' but its size is 0; skipping.", source_file, rel_path) return True if from_string: self.service.create_blob_from_text( self.container_name, rel_path, from_string, progress_callback=self._transfer_cb) log.debug("Pushed data from string '%s' to blob '%s'", from_string, rel_path) else: start_time = datetime.now() log.debug("Pushing cache file '%s' of size %s bytes to '%s'", source_file, os.path.getsize(source_file), rel_path) self.transfer_progress = 0 # Reset transfer progress counter self.service.create_blob_from_path( self.container_name, rel_path, source_file, progress_callback=self._transfer_cb) end_time = datetime.now() log.debug( "Pushed cache file '%s' to blob '%s' (%s bytes transfered in %s sec)", source_file, rel_path, os.path.getsize(source_file), end_time - start_time) return True except AzureHttpError: log.exception("Trouble pushing to Azure Blob '%s' from file '%s'", rel_path, source_file) return False ################## # Public Methods # ################## def exists(self, obj, **kwargs): in_cache = in_azure = False rel_path = self._construct_path(obj, **kwargs) in_cache = self._in_cache(rel_path) in_azure = self._in_azure(rel_path) # log.debug("~~~~~~ File '%s' exists in cache: %s; in azure: %s" % (rel_path, in_cache, in_azure)) # dir_only does not get synced so shortcut the decision dir_only = kwargs.get('dir_only', False) base_dir = kwargs.get('base_dir', None) if dir_only: if in_cache or in_azure: return True # for JOB_WORK directory elif base_dir: if not os.path.exists(rel_path): os.makedirs(rel_path) return True else: return False # TODO: Sync should probably not be done here. Add this to an async upload stack? if in_cache and not in_azure: self._push_to_os(rel_path, source_file=self._get_cache_path(rel_path)) return True elif in_azure: return True else: return False def file_ready(self, obj, **kwargs): """ A helper method that checks if a file corresponding to a dataset is ready and available to be used. Return ``True`` if so, ``False`` otherwise. """ rel_path = self._construct_path(obj, **kwargs) # Make sure the size in cache is available in its entirety if self._in_cache(rel_path): local_size = os.path.getsize(self._get_cache_path(rel_path)) remote_size = self._get_size_in_azure(rel_path) if local_size == remote_size: return True else: log.debug("Waiting for dataset %s to transfer from OS: %s/%s", rel_path, local_size, remote_size) return False def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) extra_dir_at_root = kwargs.get('extra_dir_at_root', False) dir_only = kwargs.get('dir_only', False) alt_name = kwargs.get('alt_name', None) # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # Create given directory in cache cache_dir = os.path.join(self.staging_path, rel_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Although not really necessary to create S3 folders (because S3 has # flat namespace), do so for consistency with the regular file system # S3 folders are marked by having trailing '/' so add it now # s3_dir = '%s/' % rel_path # self._push_to_os(s3_dir, from_string='') # If instructed, create the dataset in cache & in S3 if not dir_only: rel_path = os.path.join( rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='') def empty(self, obj, **kwargs): if self.exists(obj, **kwargs): return bool(self.size(obj, **kwargs) > 0) else: raise ObjectNotFound( 'objectstore.empty, object does not exist: %s, kwargs: %s' % (str(obj), str(kwargs))) def size(self, obj, **kwargs): rel_path = self._construct_path(obj, **kwargs) if self._in_cache(rel_path): try: return os.path.getsize(self._get_cache_path(rel_path)) except OSError as ex: log.info( "Could not get size of file '%s' in local cache, will try Azure. Error: %s", rel_path, ex) elif self.exists(obj, **kwargs): return self._get_size_in_azure(rel_path) log.warning("Did not find dataset '%s', returning 0 for size", rel_path) return 0 def delete(self, obj, entire_dir=False, **kwargs): rel_path = self._construct_path(obj, **kwargs) extra_dir = kwargs.get('extra_dir', None) base_dir = kwargs.get('base_dir', None) dir_only = kwargs.get('dir_only', False) obj_dir = kwargs.get('obj_dir', False) try: if base_dir and dir_only and obj_dir: # Remove temporary data in JOB_WORK directory shutil.rmtree(os.path.abspath(rel_path)) return True # For the case of extra_files, because we don't have a reference to # individual files/blobs we need to remove the entire directory structure # with all the files in it. This is easy for the local file system, # but requires iterating through each individual blob in Azure and deleing it. if entire_dir and extra_dir: shutil.rmtree(self._get_cache_path(rel_path)) blobs = self.service.list_blobs(self.container_name, prefix=rel_path) for blob in blobs: log.debug("Deleting from Azure: %s", blob) self.service.delete_blob(self.container_name, blob.name) return True else: # Delete from cache first os.unlink(self._get_cache_path(rel_path)) # Delete from S3 as well if self._in_azure(rel_path): log.debug("Deleting from Azure: %s", rel_path) self.service.delete_blob(self.container_name, rel_path) return True except AzureHttpError: log.exception("Could not delete blob '%s' from Azure", rel_path) except OSError: log.exception('%s delete error', self.get_filename(obj, **kwargs)) return False def get_data(self, obj, start=0, count=-1, **kwargs): rel_path = self._construct_path(obj, **kwargs) # Check cache first and get file if not there if not self._in_cache(rel_path): self._pull_into_cache(rel_path) # Read the file content from cache data_file = open(self._get_cache_path(rel_path), 'r') data_file.seek(start) content = data_file.read(count) data_file.close() return content def get_filename(self, obj, **kwargs): rel_path = self._construct_path(obj, **kwargs) base_dir = kwargs.get('base_dir', None) dir_only = kwargs.get('dir_only', False) obj_dir = kwargs.get('obj_dir', False) # for JOB_WORK directory if base_dir and dir_only and obj_dir: return os.path.abspath(rel_path) cache_path = self._get_cache_path(rel_path) # S3 does not recognize directories as files so cannot check if those exist. # So, if checking dir only, ensure given dir exists in cache and return # the expected cache path. # dir_only = kwargs.get('dir_only', False) # if dir_only: # if not os.path.exists(cache_path): # os.makedirs(cache_path) # return cache_path # Check if the file exists in the cache first if self._in_cache(rel_path): return cache_path # Check if the file exists in persistent storage and, if it does, pull it into cache elif self.exists(obj, **kwargs): if dir_only: # Directories do not get pulled into cache return cache_path else: if self._pull_into_cache(rel_path): return cache_path # For the case of retrieving a directory only, return the expected path # even if it does not exist. # if dir_only: # return cache_path raise ObjectNotFound( 'objectstore.get_filename, no cache_path: %s, kwargs: %s' % (str(obj), str(kwargs))) return cache_path # Until the upload tool does not explicitly create the dataset, return expected path def update_from_file(self, obj, file_name=None, create=False, **kwargs): if create is True: self.create(obj, **kwargs) elif self.exists(obj, **kwargs): rel_path = self._construct_path(obj, **kwargs) # Chose whether to use the dataset file itself or an alternate file if file_name: source_file = os.path.abspath(file_name) # Copy into cache cache_file = self._get_cache_path(rel_path) try: if source_file != cache_file: # FIXME? Should this be a `move`? shutil.copy2(source_file, cache_file) self._fix_permissions(cache_file) except OSError: log.exception( "Trouble copying source file '%s' to cache '%s'", source_file, cache_file) else: source_file = self._get_cache_path(rel_path) self._push_to_os(rel_path, source_file) else: raise ObjectNotFound( 'objectstore.update_from_file, object does not exist: %s, kwargs: %s' % (str(obj), str(kwargs))) def get_object_url(self, obj, **kwargs): if self.exists(obj, **kwargs): rel_path = self._construct_path(obj, **kwargs) try: url = self.service.make_blob_url( container_name=self.container_name, blob_name=rel_path) return url except AzureHttpError: log.exception("Trouble generating URL for dataset '%s'", rel_path) return None def get_store_usage_percent(self): return 0.0 ################## # Secret Methods # ################## def __cache_monitor(self): time.sleep(2) # Wait for things to load before starting the monitor while self.running: total_size = 0 # Is this going to be too expensive of an operation to be done frequently? file_list = [] for dirpath, _, filenames in os.walk(self.staging_path): for filename in filenames: filepath = os.path.join(dirpath, filename) file_size = os.path.getsize(filepath) total_size += file_size # Get the time given file was last accessed last_access_time = time.localtime(os.stat(filepath)[7]) # Compose a tuple of the access time and the file path file_tuple = last_access_time, filepath, file_size file_list.append(file_tuple) # Sort the file list (based on access time) file_list.sort() # Initiate cleaning once within 10% of the defined cache size? cache_limit = self.cache_size * 0.9 if total_size > cache_limit: log.info( "Initiating cache cleaning: current cache size: %s; clean until smaller than: %s", convert_bytes(total_size), convert_bytes(cache_limit)) # How much to delete? If simply deleting up to the cache-10% limit, # is likely to be deleting frequently and may run the risk of hitting # the limit - maybe delete additional #%? # For now, delete enough to leave at least 10% of the total cache free delete_this_much = total_size - cache_limit # Keep deleting datasets from file_list until deleted_amount does not # exceed delete_this_much; start deleting from the front of the file list, # which assumes the oldest files come first on the list. deleted_amount = 0 for entry in enumerate(file_list): if deleted_amount < delete_this_much: deleted_amount += entry[2] os.remove(entry[1]) # Debugging code for printing deleted files' stats # folder, file_name = os.path.split(f[1]) # file_date = time.strftime("%m/%d/%y %H:%M:%S", f[0]) # log.debug("%s. %-25s %s, size %s (deleted %s/%s)" \ # % (i, file_name, convert_bytes(f[2]), file_date, \ # convert_bytes(deleted_amount), convert_bytes(delete_this_much))) else: log.debug("Cache cleaning done. Total space freed: %s", convert_bytes(deleted_amount)) self.sleeper.sleep(30) # Test cache size every 30 seconds?
from app import app, db from flask import request, render_template, url_for, redirect from datetime import datetime from werkzeug.utils import secure_filename from app.models import Ad, CategoryList from os.path import basename, splitext import random, string # Create storage service from azure.storage import CloudStorageAccount storage_account = CloudStorageAccount( account_name=app.config['STORAGE_ACCOUNT_NAME'], account_key=app.config['STORAGE_ACCOUNT_KEY']) block_blob_service = storage_account.create_block_blob_service() # Create container from azure.storage.blob import PublicAccess block_blob_service.create_container('images', public_access=PublicAccess.Container) # Create service bus service from azure.servicebus import ServiceBusService, Message, Queue bus_service = ServiceBusService( service_namespace=app.config['SERVICEBUS_NAMESPACE'], shared_access_key_name=app.config['SERVICEBUS_ACCESS_KEYNAME'], shared_access_key_value=app.config['SERVICEBUS_ACCESS_KEYVALUE']) # Create queue bus_service.create_queue('adqueue', None, False)