def submit(): blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) # Get a SAS signature (read for 24 hours) for the input container save to a string inputsig = sasUrl(account=ACCOUNT_NAME, key=ACCOUNT_KEY, container=INPUT_CONTAINER, permission='r') # Get a SAS signature (write for 24 hours) for the output container save to a string outputsig = sasUrl(account = ACCOUNT_NAME, key = ACCOUNT_KEY, container = OUTPUT_CONTAINER, permission = 'rwl') # List all the blobs and dump the content to a string blobs = blob_service.list_blobs(INPUT_CONTAINER) bloblist = [] for blob in blobs: bloblist.append(blob.name) os.environ[SLURMDEMO_INPUTSIG] = inputsig os.environ[SLURMDEMO_OUTPUTSIG] = outputsig os.environ[SLURMDEMO_BLOBLIST] = json.dumps(bloblist) os.environ[SLURMDEMO_INPUTCONTAINER] = INPUT_CONTAINER os.environ[SLURMDEMO_OUTPUTCONTAINER] = OUTPUT_CONTAINER os.environ[SLURMDEMO_INPUTACCOUNT] = ACCOUNT_NAME os.environ[SLURMDEMO_OUTPUTACCOUNT] = ACCOUNT_NAME # Call sbatch cli = "sbatch -N 2 -n 2 --array=0-{nb} slurmdemo.sh".format(nb=len(bloblist)) run(cli, showoutput=True)
def submit(): blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) # Get a SAS signature (read for 24 hours) for the input container save to a string inputsig = sasUrl(account=ACCOUNT_NAME, key=ACCOUNT_KEY, container=INPUT_CONTAINER, permission='r') # Get a SAS signature (write for 24 hours) for the output container save to a string outputsig = sasUrl(account=ACCOUNT_NAME, key=ACCOUNT_KEY, container=OUTPUT_CONTAINER, permission='rwl') # List all the blobs and dump the content to a string blobs = blob_service.list_blobs(INPUT_CONTAINER) bloblist = [] for blob in blobs: bloblist.append(blob.name) os.environ[SLURMDEMO_INPUTSIG] = inputsig os.environ[SLURMDEMO_OUTPUTSIG] = outputsig os.environ[SLURMDEMO_BLOBLIST] = json.dumps(bloblist) os.environ[SLURMDEMO_INPUTCONTAINER] = INPUT_CONTAINER os.environ[SLURMDEMO_OUTPUTCONTAINER] = OUTPUT_CONTAINER os.environ[SLURMDEMO_INPUTACCOUNT] = ACCOUNT_NAME os.environ[SLURMDEMO_OUTPUTACCOUNT] = ACCOUNT_NAME # Call sbatch cli = "sbatch --array=0-{nb} slurmdemo.sh".format(nb=len(bloblist)) run(cli, showoutput=True)
def enumerate_objects(container): blob_service = BlobService(AZURE_ACCOUNT_NAME, AZURE_ACCOUNT_KEY) blobs = blob_service.list_blobs(container) items = [] for blob in blobs: items.append(blob.name) return items
def upload_azure_blob(account, account_key, container, filename, file, file_type='file/csv'): block_blob_service = BlobService(account_name=account, account_key=account_key) # block_blob_service.put_block_blob_from_path( # container, # blockblob, # file, # x_ms_blob_content_type='file/csv' # ) block_blob_service.create_blob_from_stream(container, filename, file) generator = block_blob_service.list_blobs(container) for blob in generator: print(blob.name)
def download_data(key): blob_service = BlobService(account_name='asosdsrecruiting', account_key=key) blobs = [] marker = None while True: batch = blob_service.list_blobs('recruitingdata', marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: file_name = blob.name print('Downloading: ' + file_name) blob_service.get_blob_to_path('recruitingdata', file_name, file_name.replace('/', '_') + '.csv')
class _BlobStorageTestCase(_TestCase): def _get_container_name(self, handler_name): container = _get_handler_config_value(handler_name, 'container') if container: container = container.replace('_', '-').lower() return container def setUp(self): self.service = BlobService(ACCOUNT_NAME, ACCOUNT_KEY) # ensure that there's no log file in the container before each test containers = [c.name for c in self.service.list_containers()] for handler in LOGGING['handlers']: container = self._get_container_name(handler) if container in containers: filename = _get_handler_config_value(handler, 'filename') basename = os.path.basename(filename) for blob in self.service.list_blobs(container, prefix=basename): self.service.delete_blob(container, blob.name)
class _BlobStorageTestCase(_TestCase): def _get_container_name(self, handler_name): container = _get_handler_config_value(handler_name, 'container') if container: container = container.replace('_', '-').lower() return container def setUp(self): self.service = BlobService(ACCOUNT_NAME, ACCOUNT_KEY) # ensure that there's no log file in the container before each test containers = [c.name for c in self.service.list_containers()] for handler in LOGGING['handlers']: container = self._get_container_name(handler) if container in containers: filename = _get_handler_config_value(handler, 'filename') basename = os.path.basename(filename) for blob in self.service.list_blobs(container, prefix=basename): self.service.delete_blob(container, blob.name)
class BlobReader(Reader): def __init__(self, account, key, container): self.block_blob_service = BlobService(account_name=account, account_key=key) self.container = container def get_data(self, name): counter = BLOB_RETRIES while counter: try: data = self.block_blob_service.get_blob_to_bytes( self.container, name) except AzureException as azure_exc: counter -= 1 else: return data raise RuntimeError("Couldn't read from blob, %s" % (azure_exc.args[0])) def to_file(self, handle, blobpath): counter = BLOB_RETRIES while counter: try: self.block_blob_service.get_blob_to_file( self.container, blobpath, handle, max_connections=2, progress_callback=None) except AzureException as azure_exc: counter -= 1 else: return raise RuntimeError("Couldn't download blob, %s" % (azure_exc.args[0])) def list(self, prefix): return self.block_blob_service.list_blobs(self.container, prefix)
def module_impl(rm, log, params, check_mode=False): if not HAS_AZURE: raise Exception("The Azure python sdk is not installed (try 'pip install azure')") if not HAS_REQUESTS: raise Exception("The requests python module is not installed (try 'pip install requests')") resource_group = params.get('resource_group') account_name = params.get('account_name') container_name = params.get('container_name') mode = params.get('mode') x_ms_meta_name_values = params.get('x_ms_meta_name_values') x_ms_blob_public_access = params.get('x_ms_blob_public_access') x_ms_blob_cache_control = params.get('x_ms_blob_cache_control') x_ms_blob_content_encoding = params.get('x_ms_blob_content_encoding') x_ms_blob_content_language = params.get('x_ms_blob_content_language') x_ms_blob_content_type = params.get('x_ms_blob_content_type') prefix = params.get('prefix') marker = params.get('marker') max_results = params.get('max_results') blob_name = params.get('blob_name') file_path = params.get('file_path') overwrite = params.get('overwrite') permissions = params.get('permissions') hours = params.get('hours') days = params.get('days') access_token = params.get('access_token') results = dict(changed=False) storage_client = rm.storage_client if not resource_group: raise Exception("Parameter error: resource_group cannot be None.") if not account_name: raise Exception("Parameter error: account_name cannot be None.") if not container_name: raise Exception("Parameter error: container_name cannot be None.") if not NAME_PATTERN.match(container_name): raise Exception("Parameter error: container_name must consist of lowercase letters, numbers and hyphens. It must begin with " + "a letter or number. It may not contain two consecutive hyphens.") # add file path validation results['account_name'] = account_name results['resource_group'] = resource_group results['container_name'] = container_name # put (upload), get (download), geturl (return download url (Ansible 1.3+), getstr (download object as string (1.3+)), list (list keys (2.0+)), create (bucket), delete (bucket), and delobj (delete object) try: log('Getting keys') keys = {} response = storage_client.storage_accounts.list_keys(resource_group, account_name) keys[KeyName.key1] = response.storage_account_keys.key1 keys[KeyName.key2] = response.storage_account_keys.key2 except AzureHttpError as e: log('Error getting keys for account %s' % account_name) raise Exception(str(e.message)) try: log('Create blob service') bs = BlobService(account_name, keys[KeyName.key1]) except Exception as e: log('Error creating blob service.') raise Exception(str(e.args[0])) if mode == 'create': container = get_container_facts(bs, container_name) if container is not None: # container exists results['container'] = container results['msg'] = "Container already exists." return results # create the container if not check_mode: log('Create container %s' % container_name) bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access) results['container'] = get_container_facts(bs, container_name) results['msg'] = "Container created successfully." results['changed'] = True return results if mode == 'update': container = get_container_facts(bs, container_name) if container is None: # container does not exist if not check_mode: log('Create container %s' % container_name) bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access) results['changed'] = True results['msg'] = 'Container created successfully.' return results # update existing container results['msg'] = "Container not changed." if x_ms_meta_name_values: if not check_mode: log('Update x_ms_meta_name_values for container %s' % container_name) bs.set_container_metadata(container_name, x_ms_meta_name_values) results['changed'] = True results['msg'] = 'Container meta data updated successfully.' if x_ms_blob_public_access: access = x_ms_blob_public_access if x_ms_blob_public_access == 'private': access = None if not check_mode: log('Set access to %s for container %s' % (access, container_name)) bs.set_container_acl(container_name=container_name, x_ms_blob_public_access=access) results['changed'] = True results['msg'] = 'Container ACL updated successfully.' if permissions: if hours == 0 and days == 0: raise Exception("Parameter error: expecting hours > 0 or days > 0") id = "%s-%s" % (container_name, permissions) si = get_identifier(id, hours, days, permissions) identifiers = SignedIdentifiers() identifiers.signed_identifiers.append(si) if not check_mode: log('Set permissions to %s for container %s' % (permissions, container_name)) bs.set_container_acl(container_name=container_name,signed_identifiers=identifiers) results['changed'] = True results['msg'] = 'Container ACL updated successfully.' results['container'] = get_container_facts(bs, container_name) return results if mode == 'delete': container = get_container_facts(bs, container_name) if container is None: results['msg'] = "Container %s could not be found." % container_name return results if not check_mode: log('Deleting container %s' % container_name) bs.delete_container(container_name) results['changed'] = True results['msg'] = 'Container deleted successfully.' return results if mode == 'delete_blob': if blob_name is None: raise Exception("Parameter error: blob_name cannot be None.") container = container_check(bs, container_name) blob = get_blob_facts(bs, container_name, blob_name) if not blob: results['msg'] = 'Blob %s could not be found in container %s.' % (blob_name, container_name) return results if not check_mode: log('Deleteing %s from container %s.' % (blob_name, container_name)) bs.delete_blob(container_name, blob_name) results['changed'] = True results['msg'] = 'Blob successfully deleted.' return results if mode == 'put': if not blob_name: raise Exception("Parameter error: blob_name cannot be None.") if not file_path : raise Exception("Parameter error: file_path cannot be None.") if not path_check(file_path): raise Exception("File %s does not exist." % file_path) container = get_container_facts(bs, container_name) blob = None if container is not None: blob = get_blob_facts(bs, container_name, blob_name) if container is not None and blob is not None: # both container and blob already exist md5_remote = blob['content-md5'] md5_local = get_md5(file_path) results['container'] = container results['blob'] = blob if md5_local == md5_remote: sum_matches = True results['msg'] = 'File checksums match. File not uploaded.' if overwrite == 'always': if not check_mode: log('Uploading %s to container %s.' % (file_path, container_name)) put_block_blob( bs, container_name, blob_name, file_path, x_ms_meta_name_values, x_ms_blob_cache_control, x_ms_blob_content_encoding, x_ms_blob_content_language, x_ms_blob_content_type ) results['blob'] = get_blob_facts(bs, container_name, blob_name) results['changed'] = True results['msg'] = 'File successfully uploaded.' else: sum_matches = False if overwrite in ('always', 'different'): if not check_mode: log('Uploading %s to container %s.' % (file_path, container_name)) put_block_blob( bs, container_name, blob_name, file_path, x_ms_meta_name_values, x_ms_blob_cache_control, x_ms_blob_content_encoding, x_ms_blob_content_language, x_ms_blob_content_type ) results['blob'] = get_blob_facts(bs, container_name, blob_name) results['changed'] = True results['msg'] = 'File successfully uploaded.' else: results['msg'] = "WARNING: Checksums do not match. Use overwrite parameter to force upload." return results if container is None: # container does not exist. create container and upload. if not check_mode: log('Creating container %s.' % container_name) bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access) log('Uploading %s to container %s.' % (file_path, container_name)) put_block_blob( bs, container_name, blob_name, file_path, x_ms_meta_name_values, x_ms_blob_cache_control, x_ms_blob_content_encoding, x_ms_blob_content_language, x_ms_blob_content_type ) results['conainer'] = get_container_facts(bs, container_name) results['blob'] = get_blob_facts(bs, container_name, blob_name) results['changed'] = True results['msg'] = 'Successfully created container and uploaded file.' return results if container is not None: # container exists. just upload. if not check_mode: log('Uploading %s to container %s.' % (file_path, container_name)) put_block_blob( bs, container_name, blob_name, file_path, x_ms_meta_name_values, x_ms_blob_cache_control, x_ms_blob_content_encoding, x_ms_blob_content_language, x_ms_blob_content_type ) results['blob'] = get_blob_facts(bs, container_name, blob_name) results['changed'] = True results['msg'] = 'Successfully updloaded file.' return results if mode == 'list': container = container_check(bs, container_name) response = bs.list_blobs( container_name, prefix, marker, max_results ) results['blobs'] = [] for blob in response.blobs: b = dict( name = blob.name, snapshot = blob.snapshot, last_modified = blob.properties.last_modified, content_length = blob.properties.content_length, blob_type = blob.properties.blob_type, ) results['blobs'].append(b) return results if mode == 'get': if file_path is None: raise Exception("Parameter error: file_path cannot be None.") container = container_check(bs, container_name) blob = blob_check(bs, container_name, blob_name) path_exists = path_check(file_path) if not path_exists or overwrite == 'always': if not check_mode: bs.get_blob_to_path(container_name, blob_name, file_path) results['changed'] = True results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path) return results if path_exists: md5_remote = blob['content-md5'] md5_local = get_md5(file_path) if md5_local == md5_remote: sum_matches = True if overwrite == 'always': if not check_mode: bs.get_blob_to_path(container_name, blob_name, file_path) results['changed'] = True results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path) else: results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force." else: sum_matches = False if overwrite in ('always', 'different'): if not check_mode: bs.get_blob_to_path(container_name, blob_name, file_path) results['changed'] = True results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path) else: results['msg'] ="WARNING: Checksums do not match. Use overwrite parameter to force download." if sum_matches is True and overwrite == 'never': results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force." return results if mode == 'get_url': if not blob_name: raise Exception("Parameter error: blob_name cannot be None.") container = container_check(bs, container_name) blob = blob_check(bs, container_name, blob_name) url = bs.make_blob_url( container_name=container_name, blob_name=blob_name, sas_token=access_token) results['url'] = url results['msg'] = "Url: %s" % url return results if mode == 'get_token': if hours == 0 and days == 0: raise Exception("Parameter error: expecting hours > 0 or days > 0") container = container_check(bs, container_name) blob = blob_check(bs, container_name, blob_name) results['blob_name'] = blob_name sap = get_shared_access_policy(permissions, hours=hours, days=days) token = bs.generate_shared_access_signature(container_name, blob_name, sap) results['access_token'] = token return results
class AzureIOStore(IOStore): """ A class that lets you get input from and send output to Azure Storage. """ def __init__(self, account_name, container_name, name_prefix=""): """ Make a new AzureIOStore that reads from and writes to the given container in the given account, adding the given prefix to keys. All paths will be interpreted as keys or key prefixes. If the name prefix does not end with a trailing slash, and is not empty, one will be added automatically. Account keys are retrieved from the AZURE_ACCOUNT_KEY environment variable or from the ~/.toilAzureCredentials file, as in Toil itself. """ # Make sure azure libraries actually loaded assert (have_azure) self.account_name = account_name self.container_name = container_name self.name_prefix = name_prefix if self.name_prefix != "" and not self.name_prefix.endswith("/"): # Make sure it has the trailing slash required. self.name_prefix += "/" # Sneak into Toil and use the same keys it uses self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey( self.account_name) # This will hold out Azure blob store connection self.connection = None def __getstate__(self): """ Return the state to use for pickling. We don't want to try and pickle an open Azure connection. """ return (self.account_name, self.account_key, self.container_name, self.name_prefix) def __setstate__(self, state): """ Set up after unpickling. """ self.account_name = state[0] self.account_key = state[1] self.container_name = state[2] self.name_prefix = state[3] self.connection = None def __connect(self): """ Make sure we have an Azure connection, and set one up if we don't. """ if self.connection is None: RealtimeLogger.debug("Connecting to account {}, using " "container {} and prefix {}".format( self.account_name, self.container_name, self.name_prefix)) # Connect to the blob service where we keep everything self.connection = BlobService(account_name=self.account_name, account_key=self.account_key) @backoff def read_input_file(self, input_path, local_path): """ Get input from Azure. """ self.__connect() RealtimeLogger.debug("Loading {} from AzureIOStore".format(input_path)) # Download the blob. This is known to be synchronous, although it can # call a callback during the process. self.connection.get_blob_to_path(self.container_name, self.name_prefix + input_path, local_path) def list_input_directory(self, input_path, recursive=False, with_times=False): """ Loop over fake /-delimited directories on Azure. The prefix may or may not not have a trailing slash; if not, one will be added automatically. Returns the names of files and fake directories in the given input fake directory, non-recursively. If with_times is specified, will yield (name, time) pairs including modification times as datetime objects. Times on directories are None. """ self.__connect() RealtimeLogger.info( "Enumerating {} from AzureIOStore".format(input_path)) # Work out what the directory name to list is fake_directory = self.name_prefix + input_path if fake_directory != "" and not fake_directory.endswith("/"): # We have a nonempty prefix, and we need to end it with a slash fake_directory += "/" # This will hold the marker that we need to send back to get the next # page, if there is one. See <http://stackoverflow.com/a/24303682> marker = None # This holds the subdirectories we found; we yield each exactly once if # we aren't recursing. subdirectories = set() while True: # Get the results from Azure. We don't use delimiter since Azure # doesn't seem to provide the placeholder entries it's supposed to. result = self.connection.list_blobs(self.container_name, prefix=fake_directory, marker=marker) RealtimeLogger.info("Found {} files".format(len(result))) for blob in result: # Yield each result's blob name, but directory names only once # Drop the common prefix relative_path = blob.name[len(fake_directory):] if (not recursive) and "/" in relative_path: # We found a file in a subdirectory, and we aren't supposed # to be recursing. subdirectory, _ = relative_path.split("/", 1) if subdirectory not in subdirectories: # It's a new subdirectory. Yield and remember it subdirectories.add(subdirectory) if with_times: yield subdirectory, None else: yield subdirectory else: # We found an actual file if with_times: mtime = blob.properties.last_modified if isinstance(mtime, datetime.datetime): # Make sure we're getting proper localized datetimes # from the new Azure Storage API. assert (mtime.tzinfo is not None and mtime.tzinfo.utcoffset(mtime) is not None) else: # Convert mtime from a string as in the old API. mtime = dateutil.parser.parse(mtime).replace( tzinfo=dateutil.tz.tzutc()) yield relative_path, mtime else: yield relative_path # Save the marker marker = result.next_marker if not marker: break @backoff def write_output_file(self, local_path, output_path): """ Write output to Azure. Will create the container if necessary. """ self.__connect() RealtimeLogger.debug("Saving {} to AzureIOStore".format(output_path)) try: # Make the container self.connection.create_container(self.container_name) except azure.WindowsAzureConflictError: # The container probably already exists pass # Upload the blob (synchronously) # TODO: catch no container error here, make the container, and retry self.connection.put_block_blob_from_path( self.container_name, self.name_prefix + output_path, local_path) @backoff def exists(self, path): """ Returns true if the given input or output file exists in Azure already. """ self.__connect() marker = None while True: try: # Make the container self.connection.create_container(self.container_name) except azure.WindowsAzureConflictError: # The container probably already exists pass # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it return True # Save the marker marker = result.next_marker if not marker: break return False @backoff def get_mtime(self, path): """ Returns the modification time of the given blob if it exists, or None otherwise. """ self.__connect() marker = None while True: # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it mtime = blob.properties.last_modified if isinstance(mtime, datetime.datetime): # Make sure we're getting proper localized datetimes # from the new Azure Storage API. assert (mtime.tzinfo is not None and mtime.tzinfo.utcoffset(mtime) is not None) else: # Convert mtime from a string as in the old API. mtime = dateutil.parser.parse(mtime).replace( tzinfo=dateutil.tz.tzutc()) return mtime # Save the marker marker = result.next_marker if not marker: break return None @backoff def get_size(self, path): """ Returns the size in bytes of the given blob if it exists, or None otherwise. """ self.__connect() marker = None while True: # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it size = blob.properties.content_length return size # Save the marker marker = result.next_marker if not marker: break return None
class AzureIOStore(IOStore): """ A class that lets you get input from and send output to Azure Storage. """ def __init__(self, account_name, container_name, name_prefix=""): """ Make a new AzureIOStore that reads from and writes to the given container in the given account, adding the given prefix to keys. All paths will be interpreted as keys or key prefixes. If the name prefix does not end with a trailing slash, and is not empty, one will be added automatically. Account keys are retrieved from the AZURE_ACCOUNT_KEY environment variable or from the ~/.toilAzureCredentials file, as in Toil itself. """ # Make sure azure libraries actually loaded assert(have_azure) self.account_name = account_name self.container_name = container_name self.name_prefix = name_prefix if self.name_prefix != "" and not self.name_prefix.endswith("/"): # Make sure it has the trailing slash required. self.name_prefix += "/" # Sneak into Toil and use the same keys it uses self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey( self.account_name) # This will hold out Azure blob store connection self.connection = None def __getstate__(self): """ Return the state to use for pickling. We don't want to try and pickle an open Azure connection. """ return (self.account_name, self.account_key, self.container_name, self.name_prefix) def __setstate__(self, state): """ Set up after unpickling. """ self.account_name = state[0] self.account_key = state[1] self.container_name = state[2] self.name_prefix = state[3] self.connection = None def __connect(self): """ Make sure we have an Azure connection, and set one up if we don't. """ if self.connection is None: RealTimeLogger.get().debug("Connecting to account {}, using " "container {} and prefix {}".format(self.account_name, self.container_name, self.name_prefix)) # Connect to the blob service where we keep everything self.connection = BlobService( account_name=self.account_name, account_key=self.account_key) @backoff def read_input_file(self, input_path, local_path): """ Get input from Azure. """ self.__connect() RealTimeLogger.get().debug("Loading {} from AzureIOStore".format( input_path)) # Download the blob. This is known to be synchronous, although it can # call a callback during the process. self.connection.get_blob_to_path(self.container_name, self.name_prefix + input_path, local_path) def list_input_directory(self, input_path, recursive=False, with_times=False): """ Loop over fake /-delimited directories on Azure. The prefix may or may not not have a trailing slash; if not, one will be added automatically. Returns the names of files and fake directories in the given input fake directory, non-recursively. If with_times is specified, will yield (name, time) pairs including modification times as datetime objects. Times on directories are None. """ self.__connect() RealTimeLogger.get().info("Enumerating {} from AzureIOStore".format( input_path)) # Work out what the directory name to list is fake_directory = self.name_prefix + input_path if fake_directory != "" and not fake_directory.endswith("/"): # We have a nonempty prefix, and we need to end it with a slash fake_directory += "/" # This will hold the marker that we need to send back to get the next # page, if there is one. See <http://stackoverflow.com/a/24303682> marker = None # This holds the subdirectories we found; we yield each exactly once if # we aren't recursing. subdirectories = set() while True: # Get the results from Azure. We don't use delimiter since Azure # doesn't seem to provide the placeholder entries it's supposed to. result = self.connection.list_blobs(self.container_name, marker=marker) RealTimeLogger.get().info("Found {} files".format(len(result))) for blob in result: # Yield each result's blob name, but directory names only once # Drop the common prefix relative_path = blob.name if (not recursive) and "/" in relative_path: # We found a file in a subdirectory, and we aren't supposed # to be recursing. subdirectory, _ = relative_path.split("/", 1) if subdirectory not in subdirectories: # It's a new subdirectory. Yield and remember it subdirectories.add(subdirectory) if with_times: yield subdirectory, None else: yield subdirectory else: # We found an actual file if with_times: mtime = dateutil.parser.parse( blob.properties.last_modified).replace( tzinfo=dateutil.tz.tzutc()) yield relative_path, mtime else: yield relative_path # Save the marker marker = result.next_marker if not marker: break @backoff def write_output_file(self, local_path, output_path): """ Write output to Azure. Will create the container if necessary. """ self.__connect() RealTimeLogger.get().debug("Saving {} to AzureIOStore".format( output_path)) try: # Make the container self.connection.create_container(self.container_name) except azure.WindowsAzureConflictError: # The container probably already exists pass # Upload the blob (synchronously) # TODO: catch no container error here, make the container, and retry self.connection.put_block_blob_from_path(self.container_name, self.name_prefix + output_path, local_path) @backoff def exists(self, path): """ Returns true if the given input or output file exists in Azure already. """ self.__connect() marker = None while True: try: # Make the container self.connection.create_container(self.container_name) except azure.WindowsAzureConflictError: # The container probably already exists pass # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it return True # Save the marker marker = result.next_marker if not marker: break return False @backoff def get_mtime(self, path): """ Returns the modification time of the given blob if it exists, or None otherwise. """ self.__connect() marker = None while True: # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it return dateutil.parser.parse( blob.properties.last_modified).replace( tzinfo=dateutil.tz.tzutc()) # Save the marker marker = result.next_marker if not marker: break return None
blob_service = BlobService(account_name="<account_name>", account_key="<account_key>") blob_service.create_container("datacontainer") blob_service.create_container("datacontainer", x_ms_blob_public_access="container") blob_service.set_container_acl("datacontainer", x_ms_blob_public_access="container") blob_service.put_block_blob_from_path( "datacontainer", "datablob", "StorageClientPy.py", x_ms_blob_content_type="text/x-script.phyton" ) blobs = [] marker = None while True: batch = blob_service.list_blobs("datacontainer", marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: print(blob.name) blob_service.get_blob_to_path("datacontainer", "datablob", "out-StorageClientPy.py") blob_service.delete_blob("datacontainer", "datablob")
class Azure(object): ''' A class used to connect to the Azure storage and upload/download files using blob storage ''' def __init__(self, params={}): ''' Constructor for the Azure object ''' if "user" in params: self.user = params["user"] else: self.user = None if "key" in params: self.key = params["key"] else: self.key = None def connect(self, host, port, user, password, secure): ''' Connect to the Azure service with given user and key @param user - username to use to connect to @param key - key to use to connect ''' kwargs = {} err = None if not host is None: kwargs["host_base"] = "." + host if not user is None: kwargs["account_name"] = user elif not self.user is None: kwargs["account_name"] = self.user if not password is None: kwargs["account_key"] = password elif not self.key is None: kwargs["account_key"] = self.key kwargs["protocol"] = "https" if secure else "http" try: self.service = BlobService(**kwargs) except Exception as e: err = e.message self.service = None if self.service is None: raise OsakaException("Failed to connect to Azure:" + ("" if err is None else err)) @classmethod def getSchemes(clazz): ''' Returns a list of schemes this handler handles Note: handling the scheme of another handler produces unknown results @returns list of handled schemes ''' return ["azure", "azures"] def close(self): ''' Close this service ''' pass def put(self, path, url): ''' Put a file up to the cloud @param path - path to upload @param url - path in cloud to upload too ''' if os.path.isdir(path): return walk(self.put, path, url) cont, blob = get_container_and_path(urlparse.urlparse(url).path) self.service.create_container(cont) self.service.put_block_blob_from_path(cont, blob, path) return True def get(self, url, dest): ''' Get file(s) from the cloud @param url - url on cloud to pull down (on cloud) @param dest - dest to download too ''' cont, blob = get_container_and_path(urlparse.urlparse(url).path) for b in self.service.list_blobs(cont, prefix=blob): destination = os.path.join(dest, os.path.relpath( b.name, blob)) if blob != b.name else dest if not os.path.exists(os.path.dirname(destination)): os.mkdir(os.path.dirname(destination)) self.service.get_blob_to_path(cont, b.name, destination) return True def rm(self, url): ''' Remove this url and all children urls @param url - url to remove ''' cont, blob = get_container_and_path(urlparse.urlparse(url).path) for b in self.service.list_blobs(cont, prefix=blob): self.service.delete_blob(cont, b.name) return True
blob_service.create_container(CONTAINER, x_ms_blob_public_access='container') def putblob(fileid, filename): global ACCOUNT blob_service.put_block_blob_from_path( CONTAINER, fileid, filename, x_ms_blob_content_type=guess_type(filename) ) return 'https://%s.blob.core.windows.net/%s/%s' %(ACCOUNT, CONTAINER, fileid) putblob('quotes.pkl', 'quotes.pkl') blobs = [] marker = None while True: batch = blob_service.list_blobs(CONTAINER, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: print(blob.name) #blob_service.delete_blob(CONTAINER, 'quotes.pkl')
# -*- coding: utf-8 -*- """ Created on Fri Oct 09 16:36:56 2015 @author: justin.malinchak """ # List blobs in container from azure.storage.blob import BlobService blob_service = BlobService(account_name='portalvhdss5m831rhl98hj', account_key='Z1MliCYE7p9Ks9kYQoGeM4V99hODtiJL82BVi/zIm06jLYh7n0tV8YaZHzITKixMwUUmjJ1Vp05XrgHG+gXFlg==') blobs = [] marker = None while True: batch = blob_service.list_blobs('mycontainer', marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: bname = blob.name print('') print(bname) print('') bpathname = 'C:\\Batches\\$Work\\' + bname blob_service.get_blob_to_path('mycontainer', bname, bpathname) print('') print('blob downloaded ' + bpathname) print('')
class Command(BaseCommand): help = "Synchronizes static media to cloud files." option_list = BaseCommand.option_list + ( optparse.make_option('-w', '--wipe', action='store_true', dest='wipe', default=False, help="Wipes out entire contents of container first."), optparse.make_option('-t', '--test-run', action='store_true', dest='test_run', default=False, help="Performs a test run of the sync."), optparse.make_option('-c', '--container', dest='container', help="Override STATIC_CONTAINER."), ) # settings from azurite.settings ACCOUNT_NAME = AZURITE['ACCOUNT_NAME'] ACCOUNT_KEY = AZURITE['ACCOUNT_KEY'] STATIC_CONTAINER = AZURITE['STATIC_CONTAINER'] # paths DIRECTORY = os.path.abspath(settings.STATIC_ROOT) STATIC_URL = settings.STATIC_URL if not DIRECTORY.endswith('/'): DIRECTORY = DIRECTORY + '/' if STATIC_URL.startswith('/'): STATIC_URL = STATIC_URL[1:] local_object_names = [] create_count = 0 upload_count = 0 update_count = 0 skip_count = 0 delete_count = 0 service = None def handle(self, *args, **options): self.wipe = options.get('wipe') self.test_run = options.get('test_run') self.verbosity = int(options.get('verbosity')) if hasattr(options, 'container'): self.STATIC_CONTAINER = options.get('container') self.sync_files() def sync_files(self): self.service = BlobService(account_name=self.ACCOUNT_NAME, account_key=self.ACCOUNT_KEY) try: self.service.get_container_properties(self.STATIC_CONTAINER) except AzureMissingResourceHttpError: self.service.create_container(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') # if -w option is provided, wipe out the contents of the container if self.wipe: blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER)) if self.test_run: print "Wipe would delete %d objects." % blob_count else: print "Deleting %d objects..." % blob_count for blob in self.service.list_blobs(self.STATIC_CONTAINER): self.service.delete_blob(self.STATIC_CONTAINER, blob.name) # walk through the directory, creating or updating files on the cloud os.path.walk(self.DIRECTORY, self.upload_files, "foo") # remove any files on remote that don't exist locally self.delete_files() # print out the final tally to the cmd line self.update_count = self.upload_count - self.create_count print if self.test_run: print "Test run complete with the following results:" print "Skipped %d. Created %d. Updated %d. Deleted %d." % ( self.skip_count, self.create_count, self.update_count, self.delete_count) def upload_files(self, arg, dirname, names): # upload or skip items for item in names: file_path = os.path.join(dirname, item) if os.path.isdir(file_path): continue # Don't try to upload directories object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1] self.local_object_names.append(object_name) try: properties = self.service.get_blob_properties(self.STATIC_CONTAINER, object_name) except AzureMissingResourceHttpError: properties = {} self.create_count += 1 cloud_datetime = None if 'last-modified' in properties: cloud_datetime = (properties['last-modified'] and datetime.datetime.strptime( properties['last-modified'], "%a, %d %b %Y %H:%M:%S %Z" ) or None) local_datetime = datetime.datetime.utcfromtimestamp( os.stat(file_path).st_mtime) if cloud_datetime and local_datetime < cloud_datetime: self.skip_count += 1 if self.verbosity > 1: print "Skipped %s: not modified." % object_name continue if not self.test_run: file_contents = open(file_path, 'r').read() content_type, encoding = mimetypes.guess_type(file_path) self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents, x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type, content_encoding=encoding) # sync_headers(cloud_obj) self.upload_count += 1 if self.verbosity > 1: print "Uploaded", object_name def delete_files(self): # remove any objects in the container that don't exist locally for blob in self.service.list_blobs(self.STATIC_CONTAINER): if blob.name not in self.local_object_names: self.delete_count += 1 if self.verbosity > 1: print "Deleted %s" % blob.name if not self.test_run: self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
class AzureFS(LoggingMixIn, Operations): """Azure Blob Storage filesystem""" blobs = None containers = dict() # <cname, dict(stat:dict, #files:None|dict<fname, stat>) fds = dict() # <fd, (path, bytes, dirty)> fd = 0 def __init__(self, account, key): self.blobs = BlobService(account, key) self.rebuild_container_list() def convert_to_epoch(self, date): """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch""" return int(time.mktime(time.strptime(date, TIME_FORMAT))) def rebuild_container_list(self): cmap = dict() cnames = set() for c in self.blobs.list_containers(): date = c.properties.last_modified cstat = dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0, st_mtime=self.convert_to_epoch(date)) cname = c.name cmap['/' + cname] = dict(stat=cstat, files=None) cnames.add(cname) cmap['/'] = dict(files={}, stat=dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0, st_mtime=int(time.time()))) self.containers = cmap # destroys fs tree cache resistant to misses def _parse_path(self, path): # returns </dir, file(=None)> if path.count('/') > 1: # file return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:]) else: # dir pos = path.rfind('/', 1) if pos == -1: return path, None else: return str(path[:pos]), None def parse_container(self, path): base_container = path[1:] # /abc/def/g --> abc if base_container.find('/') > -1: base_container = base_container[:base_container.find('/')] return str(base_container) def _get_dir(self, path, contents_required=False): if not self.containers: self.rebuild_container_list() if path in self.containers and not (contents_required and \ self.containers[path]['files'] is None): return self.containers[path] cname = self.parse_container(path) if '/' + cname not in self.containers: raise FuseOSError(ENOENT) else: if self.containers['/' + cname]['files'] is None: # fetch contents of container log.info("------> CONTENTS NOT FOUND: %s" % cname) blobs = self.blobs.list_blobs(cname) dirstat = dict(st_mode=(S_IFDIR | 0755), st_size=0, st_uid=getuid(), st_mtime=time.time()) if self.containers['/' + cname]['files'] is None: self.containers['/' + cname]['files'] = dict() for f in blobs: blob_name = f.name blob_date = f.properties.last_modified blob_size = long(f.properties.content_length) node = dict(st_mode=(S_IFREG | 0644), st_size=blob_size, st_mtime=self.convert_to_epoch(blob_date), st_uid=getuid()) if blob_name.find('/') == -1: # file just under container self.containers['/' + cname]['files'][blob_name] = node return self.containers['/' + cname] return None def _get_file(self, path): d, f = self._parse_path(path) dir = self._get_dir(d, True) if dir is not None and f in dir['files']: return dir['files'][f] def getattr(self, path, fh=None): d, f = self._parse_path(path) if f is None: dir = self._get_dir(d) return dir['stat'] else: file = self._get_file(path) if file: return file raise FuseOSError(ENOENT) # FUSE def mkdir(self, path, mode): if path.count('/') <= 1: # create on root name = path[1:] if not 3 <= len(name) <= 63: log.error("Container names can be 3 through 63 chars long.") raise FuseOSError(ENAMETOOLONG) if name is not name.lower(): log.error("Container names cannot contain uppercase \ characters.") raise FuseOSError(EACCES) if name.count('--') > 0: log.error('Container names cannot contain consecutive \ dashes (-).') raise FuseOSError(EAGAIN) #TODO handle all "-"s must be preceded by letter or numbers #TODO starts with only letter or number, can contain letter, nr,'-' resp = self.blobs.create_container(name) if resp: self.rebuild_container_list() log.info("CONTAINER %s CREATED" % name) else: raise FuseOSError(EACCES) log.error("Invalid container name or container already \ exists.") else: raise FuseOSError(ENOSYS) # TODO support 2nd+ level mkdirs def rmdir(self, path): if path.count('/') == 1: c_name = path[1:] resp = self.blobs.delete_container(c_name) if resp: if path in self.containers: del self.containers[path] else: raise FuseOSError(EACCES) else: raise FuseOSError(ENOSYS) # TODO support 2nd+ level mkdirs def create(self, path, mode): node = dict(st_mode=(S_IFREG | mode), st_size=0, st_nlink=1, st_uid=getuid(), st_mtime=time.time()) d, f = self._parse_path(path) if not f: log.error("Cannot create files on root level: /") raise FuseOSError(ENOSYS) dir = self._get_dir(d, True) if not dir: raise FuseOSError(EIO) dir['files'][f] = node return self.open(path, data='') # reusing handler provider def open(self, path, flags=0, data=None): if data == None: # download contents c_name = self.parse_container(path) f_name = path[path.find('/', 1) + 1:] try: data = self.blobs.get_blob(c_name, f_name) except AzureMissingResourceHttpError: dir = self._get_dir('/' + c_name, True) if f_name in dir['files']: del dir['files'][f_name] raise FuseOSError(ENOENT) except AzureException as e: log.error("Read blob failed HTTP %d" % e.code) raise FuseOSError(EAGAIN) self.fd += 1 self.fds[self.fd] = (path, data, False) return self.fd def flush(self, path, fh=None): if not fh: raise FuseOSError(EIO) else: if fh not in self.fds: raise FuseOSError(EIO) path = self.fds[fh][0] data = self.fds[fh][1] dirty = self.fds[fh][2] if not dirty: return 0 # avoid redundant write d, f = self._parse_path(path) c_name = self.parse_container(path) if data is None: data = '' try: if len(data) < 64 * 1024 * 1024: # 64 mb self.blobs.put_blob(c_name, f, data, 'BlockBlob') else: # divide file by blocks and upload block_size = 8 * 1024 * 1024 num_blocks = int(math.ceil(len(data) * 1.0 / block_size)) rd = str(random.randint(1, 1e8)) block_ids = list() for i in range(num_blocks): part = data[i * block_size:min((i + 1) * block_size, len(data))] block_id = base64.encodestring('%s_%s' % (rd, (8 - len(str(i))) * '0' + str(i))) self.blobs.put_block(c_name, f, part, block_id) block_ids.append(block_id) self.blobs.put_block_list(c_name, f, block_ids) except AzureException: raise FuseOSError(EAGAIN) dir = self._get_dir(d, True) if not dir or f not in dir['files']: raise FuseOSError(EIO) # update local data dir['files'][f]['st_size'] = len(data) dir['files'][f]['st_mtime'] = time.time() self.fds[fh] = (path, data, False) # mark as not dirty return 0 def release(self, path, fh=None): if fh is not None and fh in self.fds: del self.fds[fh] def truncate(self, path, length, fh=None): return 0 # assume done, no need def write(self, path, data, offset, fh=None): if not fh or fh not in self.fds: raise FuseOSError(ENOENT) else: d = self.fds[fh][1] if d is None: d = "" self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True) return len(data) def unlink(self, path): c_name = self.parse_container(path) d, f = self._parse_path(path) try: self.blobs.delete_blob(c_name, f) _dir = self._get_dir(path, True) if _dir and f in _dir['files']: del _dir['files'][f] return 0 except AzureMissingResourceHttpError: raise FuseOSError(ENOENT) except Exception as e: raise FuseOSError(EAGAIN) def readdir(self, path, fh): if path == '/': return ['.', '..'] + [x[1:] for x in self.containers.keys() \ if x is not '/'] dir = self._get_dir(path, True) if not dir: raise FuseOSError(ENOENT) return ['.', '..'] + dir['files'].keys() def read(self, path, size, offset, fh): if not fh or fh not in self.fds: raise FuseOSError(ENOENT) f_name = path[path.find('/', 1) + 1:] c_name = path[1:path.find('/', 1)] try: data = self.blobs.get_blob(c_name, f_name) self.fds[fh] = (self.fds[fh][0], data, False) return data[offset:offset + size] except URLError, e: if e.code == 404: raise FuseOSError(ENOENT) elif e.code == 403: raise FUSEOSError(EPERM) else: log.error("Read blob failed HTTP %d" % e.code) raise FuseOSError(EAGAIN) data = self.fds[fh][1] if data is None: data = "" return data[offset:offset + size]
source_account = BlobService( account_name='cngssd', account_key= 'DVvOtpOVW71er9ztR3mooJk4Zc3ZNovW9YV3qu4Y6bkN0eCfHutpcNVXW6gtpfolRk4CcAlmftz/+SDwm2BQag==' ) dest_account = BlobService( account_name='testcng', account_key= 'piWr6zleZ1sL8aopv5Y4NRYyrVWaW2/QrXcPpsxRec4IxtEoR1IyRmZCkbdyq50Bfu0qidF8SicQahdM+OExvg==' ) # list blobs source_blobs = [] source_marker = None while True: batch = source_account.list_blobs('vhds', marker=source_marker) source_blobs.extend(batch) if not batch.next_marker: break source_marker = batch.next_marker for blob in source_blobs: print(blob.name) dest_blobs = [] dest_marker = None while True: batch = dest_account.list_blobs('vhds', marker=dest_marker) dest_blobs.extend(batch) if not batch.next_marker: break dest_marker = batch.next_marker
class BlobSource(DataSource): def __init__(self): self.storage_account = getenv('STORAGE_ACCOUNT') self.blob_service = BlobService(self.storage_account, getenv('STORAGE_KEY')) def load(self, sparkContext, container, path): path = ('/' if path[0] != '/' else '') + path uri = 'wasb://%s@%s.blob.core.windows.net%s' % ( container, self.storage_account, path) print 'Loading from %s' % uri return sparkContext.textFile(uri) def download(self, container, path): print 'Downloading blob from %s/%s' % (container, path) self.blob_service.get_blob_to_path(container, path, path) print 'Downloaded blob to ' + path def saveAsJson(self, payload, container, path): path = path.lstrip('/') print path print 'Saving to %s/%s' % (container, path) json_string = json.dumps(payload, ensure_ascii=False).encode('utf-8') try: self.blob_service.put_blob( container, path, json_string, 'BlockBlob', x_ms_blob_cache_control='max-age=3600', x_ms_blob_content_type='application/json') except Exception as e: print 'Failed to save %s/%s: %s' % (container, path, str(e)) raise def saveAsText(self, rdd, container, path): path = path.lstrip('/') path = '/' + path print 'Saving rdd to %s%s' % (container, path) uri = 'wasb://%s@%s.blob.core.windows.net%s' % ( container, self.storage_account, path) try: rdd.saveAsTextFile(uri) except Exception as e: print 'Failed to save %s%s: %s' % (container, path, str(e)) raise def deleteAllBut(self, container, exceptFolderName): print 'deleteAllBut called' try: bloblistingresult = self.blob_service.list_blobs(container) for i in bloblistingresult: print i.name if not exceptFolderName in i.name: try: print 'deleting' self.blob_service.delete_blob(container, i.name) print 'deleted' except Exception as e: print 'Failed to delete %s/%s: %s' % (container, i.name, str(e)) raise except Exception as e: print 'Failed to list things in %s: %s' % (container, str(e)) raise
) blob_service.create_container(CONTAINER, x_ms_blob_public_access='container') def putblob(fileid, filename): global ACCOUNT blob_service.put_block_blob_from_path( CONTAINER, fileid, filename, x_ms_blob_content_type=guess_type(filename)) return 'https://%s.blob.core.windows.net/%s/%s' % (ACCOUNT, CONTAINER, fileid) putblob('quotes.pkl', 'quotes.pkl') blobs = [] marker = None while True: batch = blob_service.list_blobs(CONTAINER, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: print(blob.name) #blob_service.delete_blob(CONTAINER, 'quotes.pkl')
class AzureFS(LoggingMixIn, Operations): """Azure Blob Storage filesystem""" blobs = None containers = dict() # <cname, dict(stat:dict, #files:None|dict<fname, stat>) fds = dict() # <fd, (path, bytes, dirty)> fd = 0 def __init__(self, account, key): self.blobs = BlobService(account, key) self.rebuild_container_list() def convert_to_epoch(self, date): """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch""" return int(time.mktime(time.strptime(date, TIME_FORMAT))) def rebuild_container_list(self): cmap = dict() cnames = set() for c in self.blobs.list_containers(): date = c.properties.last_modified cstat = dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0, st_mtime=self.convert_to_epoch(date)) cname = c.name cmap['/' + cname] = dict(stat=cstat, files=None) cnames.add(cname) cmap['/'] = dict(files={}, stat=dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0, st_mtime=int(time.time()))) self.containers = cmap # destroys fs tree cache resistant to misses def _parse_path(self, path): # returns </dir, file(=None)> if path.count('/') > 1: # file return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:]) else: # dir pos = path.rfind('/', 1) if pos == -1: return path, None else: return str(path[:pos]), None def parse_container(self, path): base_container = path[1:] # /abc/def/g --> abc if base_container.find('/') > -1: base_container = base_container[:base_container.find('/')] return str(base_container) def _get_dir(self, path, contents_required=False): if not self.containers: self.rebuild_container_list() if path in self.containers and not (contents_required and \ self.containers[path]['files'] is None): return self.containers[path] cname = self.parse_container(path) if '/' + cname not in self.containers: raise FuseOSError(ENOENT) else: if self.containers['/' + cname]['files'] is None: # fetch contents of container log.info("------> CONTENTS NOT FOUND: %s" % cname) blobs = self.blobs.list_blobs(cname) dirstat = dict(st_mode=(S_IFDIR | 0755), st_size=0, st_uid=getuid(), st_mtime=time.time()) if self.containers['/' + cname]['files'] is None: self.containers['/' + cname]['files'] = dict() for f in blobs: blob_name = f.name blob_date = f.properties.last_modified blob_size = long(f.properties.content_length) node = dict(st_mode=(S_IFREG | 0644), st_size=blob_size, st_mtime=self.convert_to_epoch(blob_date), st_uid=getuid()) if blob_name.find('/') == -1: # file just under container self.containers['/' + cname]['files'][blob_name] = node return self.containers['/' + cname] return None def _get_file(self, path): d, f = self._parse_path(path) dir = self._get_dir(d, True) if dir is not None and f in dir['files']: return dir['files'][f] def getattr(self, path, fh=None): d, f = self._parse_path(path) if f is None: dir = self._get_dir(d) return dir['stat'] else: file = self._get_file(path) if file: return file raise FuseOSError(ENOENT) # FUSE def mkdir(self, path, mode): if path.count('/') <= 1: # create on root name = path[1:] if not 3 <= len(name) <= 63: log.error("Container names can be 3 through 63 chars long.") raise FuseOSError(ENAMETOOLONG) if name is not name.lower(): log.error("Container names cannot contain uppercase \ characters.") raise FuseOSError(EACCES) if name.count('--') > 0: log.error('Container names cannot contain consecutive \ dashes (-).') raise FuseOSError(EAGAIN) #TODO handle all "-"s must be preceded by letter or numbers #TODO starts with only letter or number, can contain letter, nr,'-' resp = self.blobs.create_container(name) if resp: self.rebuild_container_list() log.info("CONTAINER %s CREATED" % name) else: raise FuseOSError(EACCES) log.error("Invalid container name or container already \ exists.") else: raise FuseOSError(ENOSYS) # TODO support 2nd+ level mkdirs def rmdir(self, path): if path.count('/') == 1: c_name = path[1:] resp = self.blobs.delete_container(c_name) if resp: if path in self.containers: del self.containers[path] else: raise FuseOSError(EACCES) else: raise FuseOSError(ENOSYS) # TODO support 2nd+ level mkdirs def create(self, path, mode): node = dict(st_mode=(S_IFREG | mode), st_size=0, st_nlink=1, st_uid=getuid(), st_mtime=time.time()) d, f = self._parse_path(path) if not f: log.error("Cannot create files on root level: /") raise FuseOSError(ENOSYS) dir = self._get_dir(d, True) if not dir: raise FuseOSError(EIO) dir['files'][f] = node return self.open(path, data='') # reusing handler provider def open(self, path, flags=0, data=None): if data == None: # download contents c_name = self.parse_container(path) f_name = path[path.find('/', 1) + 1:] try: data = self.blobs.get_blob(c_name, f_name) except AzureMissingResourceHttpError: dir = self._get_dir('/' + c_name, True) if f_name in dir['files']: del dir['files'][f_name] raise FuseOSError(ENOENT) except AzureException as e: log.error("Read blob failed HTTP %d" % e.code) raise FuseOSError(EAGAIN) self.fd += 1 self.fds[self.fd] = (path, data, False) return self.fd def flush(self, path, fh=None): if not fh: raise FuseOSError(EIO) else: if fh not in self.fds: raise FuseOSError(EIO) path = self.fds[fh][0] data = self.fds[fh][1] dirty = self.fds[fh][2] if not dirty: return 0 # avoid redundant write d, f = self._parse_path(path) c_name = self.parse_container(path) if data is None: data = '' try: if len(data) < 64 * 1024 * 1024: # 64 mb self.blobs.put_blob(c_name, f, data, 'BlockBlob') else: # divide file by blocks and upload block_size = 8 * 1024 * 1024 num_blocks = int(math.ceil(len(data) * 1.0 / block_size)) rd = str(random.randint(1, 1e8)) block_ids = list() for i in range(num_blocks): part = data[i * block_size:min((i + 1) * block_size, len(data))] block_id = base64.encodestring( '%s_%s' % (rd, (8 - len(str(i))) * '0' + str(i))) self.blobs.put_block(c_name, f, part, block_id) block_ids.append(block_id) self.blobs.put_block_list(c_name, f, block_ids) except AzureException: raise FuseOSError(EAGAIN) dir = self._get_dir(d, True) if not dir or f not in dir['files']: raise FuseOSError(EIO) # update local data dir['files'][f]['st_size'] = len(data) dir['files'][f]['st_mtime'] = time.time() self.fds[fh] = (path, data, False) # mark as not dirty return 0 def release(self, path, fh=None): if fh is not None and fh in self.fds: del self.fds[fh] def truncate(self, path, length, fh=None): return 0 # assume done, no need def write(self, path, data, offset, fh=None): if not fh or fh not in self.fds: raise FuseOSError(ENOENT) else: d = self.fds[fh][1] if d is None: d = "" self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True) return len(data) def unlink(self, path): c_name = self.parse_container(path) d, f = self._parse_path(path) try: self.blobs.delete_blob(c_name, f) _dir = self._get_dir(path, True) if _dir and f in _dir['files']: del _dir['files'][f] return 0 except AzureMissingResourceHttpError: raise FuseOSError(ENOENT) except Exception as e: raise FuseOSError(EAGAIN) def readdir(self, path, fh): if path == '/': return ['.', '..'] + [x[1:] for x in self.containers.keys() \ if x is not '/'] dir = self._get_dir(path, True) if not dir: raise FuseOSError(ENOENT) return ['.', '..'] + dir['files'].keys() def read(self, path, size, offset, fh): if not fh or fh not in self.fds: raise FuseOSError(ENOENT) f_name = path[path.find('/', 1) + 1:] c_name = path[1:path.find('/', 1)] try: data = self.blobs.get_blob(c_name, f_name) self.fds[fh] = (self.fds[fh][0], data, False) return data[offset:offset + size] except URLError, e: if e.code == 404: raise FuseOSError(ENOENT) elif e.code == 403: raise FUSEOSError(EPERM) else: log.error("Read blob failed HTTP %d" % e.code) raise FuseOSError(EAGAIN) data = self.fds[fh][1] if data is None: data = "" return data[offset:offset + size]
@author: justin.malinchak """ # List blobs in container from azure.storage.blob import BlobService blob_service = BlobService( account_name='portalvhdss5m831rhl98hj', account_key= 'Z1MliCYE7p9Ks9kYQoGeM4V99hODtiJL82BVi/zIm06jLYh7n0tV8YaZHzITKixMwUUmjJ1Vp05XrgHG+gXFlg==' ) blobs = [] marker = None while True: batch = blob_service.list_blobs('mycontainer', marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: bname = blob.name print('') print(bname) print('') bpathname = 'C:\\Batches\\$Work\\' + bname blob_service.get_blob_to_path('mycontainer', bname, bpathname) print('') print('blob downloaded ' + bpathname) print('')
class AzureConnector(): def __init__(self, config): tree = ET.parse('SharedConfig.xml') self.myMachineName = tree.find('.//Instance').get("id") self.sms = ServiceManagementService( subscription_id=config.get("azure", "subscription_id"), cert_file=config.get("azure", "cert_file") ); self.bus_service = ServiceBusService( service_namespace=config.get("azure", "bus_namespace"), shared_access_key_name=config.get("azure", "bus_shared_access_key_name"), shared_access_key_value=config.get("azure", "bus_shared_access_key_value")) self.command_queue = config.get("azure", "commandQueuePath") for tries in range(1,10): try: self.bus_service.create_queue(self.command_queue) break except: print "Esperando" self.status_topic = config.get("azure", "statusTopicPath") self.bus_service.create_queue(self.status_topic) self.storage = BlobService(account_name=config.get("azure", "account_name"), account_key=config.get("azure", "account_key")) self.algo_storage_name = config.get("azure", "algorithm_storage_name") self.storage.create_container(self.algo_storage_name, fail_on_exist=False) self.proj_storage_name = config.get("azure", "project_storage_name") self.storage.create_container(self.proj_storage_name, fail_on_exist=False) def check_new_tasks(self): for tries in range(1,2): try: message = self.bus_service.receive_queue_message(self.command_queue, peek_lock=False, timeout=60) break except: message = None if message is None or message.body is None: return None job_description = json.loads(message.body.replace('/AzureBlobStorage/', '')) command = CommandMetadata( command_id = job_description["command_id"], algorithm_directory = job_description["algorithm_prfx"], project_prfx = job_description["project_prfx"], project_input_files = job_description["project_input_files"], algorithm_executable_name = job_description["algorithm_executable_name"], algorithm_parameters = job_description["algorithm_parameters"], sent_timestamp = datetime.datetime.strptime(job_description["sent_timestamp"], "%d/%m/%Y %H:%M:%S"), machine_size=job_description["machine_size"]) # Retornar dados sobre o comando consumido da fila return command # Não há nada na fila return None def list_algo_files(self, prfx): list = self.storage.list_blobs(container_name=self.algo_storage_name, prefix=prfx) result = [] for blob in list: result.append(blob.name) return result def download_algo_zip(self, algorithm_bin_file, tmp_file): print "download_algo_zip(algorithm_bin_file="+algorithm_bin_file+", tmp_file="+tmp_file+")" for tries in range(1,5): try: self.storage.get_blob_to_path(self.algo_storage_name, algorithm_bin_file, tmp_file, open_mode='wb', snapshot=None, x_ms_lease_id=None, progress_callback=None) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def download_file_to_project(self, project_name, blob_name, dir): print "download_file_to_project(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")" for tries in range(1,5): try: self.storage.get_blob_to_path(self.proj_storage_name, os.path.join(project_name,blob_name), os.path.join(dir,os.path.join(project_name,blob_name)), open_mode='wb', snapshot=None, x_ms_lease_id=None, progress_callback=None) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def download_file_to_project(self, project_name, blob_name, dir): print "download_file_to_project(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")" for tries in range(1,5): try: self.storage.get_blob_to_path(self.proj_storage_name, os.path.join(project_name,blob_name), os.path.join(dir,os.path.join(project_name,blob_name)), open_mode='wb', snapshot=None, x_ms_lease_id=None, progress_callback=None) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def upload_proj_file(self, project_name, blob_name, dir): print "upload_proj_file(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")" if blob_name[0] == '/': blob_name = blob_name[1:] for tries in range(1,5): try: self.storage.put_block_blob_from_path(self.proj_storage_name, os.path.join(project_name,blob_name), os.path.join(dir,os.path.join(project_name,blob_name))) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def download_file_to_algo(self, blob_name, dir): print "download_file_to_algo(blob_name="+blob_name+", dir="+dir+")" for tries in range(1,5): try: self.storage.get_blob_to_path(container_name=self.algo_storage_name, blob_name=os.path.join(blob_name), file_path=os.path.join(dir,blob_name), open_mode='wb', snapshot=None, x_ms_lease_id=None, progress_callback=None) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def send_status(self, main_status): for tries in range(1,5): try: self.bus_service.send_topic_message(topic_name=self.status_topic, message=Message(main_status.encode('utf-8'))) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def shutdown_myself(self): # A máquina virtual irá cometer suicídio. print("Removendo máquina virtual da nuvem...") for tries in range(1,5): try: self.sms.delete_deployment( service_name=self.myMachineName, deployment_name=self.myMachineName, delete_vhd=True) exit(0) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__())