def submit(): blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) # Get a SAS signature (read for 24 hours) for the input container save to a string inputsig = sasUrl(account=ACCOUNT_NAME, key=ACCOUNT_KEY, container=INPUT_CONTAINER, permission='r') # Get a SAS signature (write for 24 hours) for the output container save to a string outputsig = sasUrl(account = ACCOUNT_NAME, key = ACCOUNT_KEY, container = OUTPUT_CONTAINER, permission = 'rwl') # List all the blobs and dump the content to a string blobs = blob_service.list_blobs(INPUT_CONTAINER) bloblist = [] for blob in blobs: bloblist.append(blob.name) os.environ[SLURMDEMO_INPUTSIG] = inputsig os.environ[SLURMDEMO_OUTPUTSIG] = outputsig os.environ[SLURMDEMO_BLOBLIST] = json.dumps(bloblist) os.environ[SLURMDEMO_INPUTCONTAINER] = INPUT_CONTAINER os.environ[SLURMDEMO_OUTPUTCONTAINER] = OUTPUT_CONTAINER os.environ[SLURMDEMO_INPUTACCOUNT] = ACCOUNT_NAME os.environ[SLURMDEMO_OUTPUTACCOUNT] = ACCOUNT_NAME # Call sbatch cli = "sbatch --array=0-{nb} slurmdemo.sh".format(nb=len(bloblist)) run(cli, showoutput=True)
class AzureTransfer(BaseTransfer): def __init__(self, account_name, account_key, container_name): BaseTransfer.__init__(self) self.account_name = account_name self.account_key = account_key self.container_name = container_name self.conn = BlobService(account_name=self.account_name, account_key=self.account_key) self.container = self.get_or_create_container(self.container_name) self.log.debug("AzureTransfer initialized") def get_metadata_for_key(self, key): key = fix_path(key) return self.list_path(key)[0]['metadata'] def list_path(self, path): return_list = [] path = fix_path(path) self.log.info("Asking for listing of: %r", path) for r in self.conn.list_blobs(self.container_name, prefix=path, delimiter="/", include="metadata"): entry = {"name": r.name, "size": r.properties.content_length, "last_modified": dateutil.parser.parse(r.properties.last_modified), "metadata": r.metadata} return_list.append(entry) return return_list def delete_key(self, key_name): key_name = fix_path(key_name) self.log.debug("Deleting key: %r", key_name) return self.conn.delete_blob(self.container_name, key_name) def get_contents_to_file(self, obj_key, filepath_to_store_to): obj_key = fix_path(obj_key) self.log.debug("Starting to fetch the contents of: %r to: %r", obj_key, filepath_to_store_to) return self.conn.get_blob_to_path(self.container_name, obj_key, filepath_to_store_to) def get_contents_to_string(self, obj_key): obj_key = fix_path(obj_key) self.log.debug("Starting to fetch the contents of: %r", obj_key) return self.conn.get_blob_to_bytes(self.container_name, obj_key), self.get_metadata_for_key(obj_key) def store_file_from_memory(self, key, memstring, metadata=None): # For whatever reason Azure requires all values to be strings at the point of sending metadata_to_send = dict((str(k), str(v)) for k, v in metadata.items()) self.conn.put_block_blob_from_bytes(self.container_name, key, memstring, x_ms_meta_name_values=metadata_to_send) def store_file_from_disk(self, key, filepath, metadata=None): # For whatever reason Azure requires all values to be strings at the point of sending metadata_to_send = dict((str(k), str(v)) for k, v in metadata.items()) self.conn.put_block_blob_from_path(self.container_name, key, filepath, x_ms_meta_name_values=metadata_to_send) def get_or_create_container(self, container_name): start_time = time.time() self.conn.create_container(container_name) self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.time() - start_time) return container_name
def list_files_from_path(container, path): blob_service = BlobService(account_name=storage_name, account_key=storage_key) next_marker = None results = [] while True: blobs = blob_service.list_blobs(container, prefix=path, maxresults=2000, marker=next_marker) for blob in blobs: results.append(blob.name) next_marker = blobs.next_marker if not next_marker: break return results
def run(itk_source_dir, externaldata_object_store, account_name, account_key): blob_service = BlobService(account_name=account_name, account_key=account_key) blobs = blob_service.list_blobs('md5') current_blobs = [blob.name for blob in blobs] md5files = [] for root, dirnames, filenames in os.walk(itk_source_dir): for filename in fnmatch.filter(filenames, '*.md5'): md5files.append(os.path.join(root, filename)) for content_link in md5files: upload_to_azure(content_link, externaldata_object_store, blob_service, current_blobs)
def run(itk_source_dir, externaldata_object_store, account_name, account_key): blob_service = BlobService(account_name=account_name, account_key=account_key) blobs = blob_service.list_blobs("md5") current_blobs = [blob.name for blob in blobs] md5files = [] for root, dirnames, filenames in os.walk(itk_source_dir): for filename in fnmatch.filter(filenames, "*.md5"): md5files.append(os.path.join(root, filename)) for content_link in md5files: upload_to_azure(content_link, externaldata_object_store, blob_service, current_blobs)
def process(self): account_name = self.parameters.azure_account_name account_key = self.parameters.azure_account_key blob_service = BlobService(account_name, account_key, protocol="https") proxy_setting = self.parameters.https_proxy or "" date_setting = self.parameters.date or "" date = None if date_setting: if date_setting != "yesterday": date = datetime.datetime.strptime(date_setting, "%Y-%m-%d").date() # for debbuging (probably) elif date_setting == "yesterday": date = datetime.date.today() - datetime.timedelta(days=1) # for normal usage proxy_url = "https://" + proxy_setting if proxy_setting.find("https://") == -1 else proxy_setting proxy_options = urlparse(proxy_url) if date: self.logger.info("Fetching for date: %s (%s)" % (date, date_setting)) else: self.logger.info("No 'date' was specified, fetching ALL") if proxy_options.hostname: self.logger.info("Using https proxy(host=%s, port=%s)" % (proxy_options.hostname, proxy_options.port)) blob_service.set_proxy(host=proxy_options.hostname, port=proxy_options.port) else: if proxy_setting: self.logger.info("Using NO proxy, couldn't use 'https_proxy' it was: %s" % proxy_setting) else: self.logger.info("Using NO proxy, 'https_proxy' was empty") for container in blob_service.list_containers(): container_name = container.name if container_name == "heartbeat": continue if date and (not container_name == "processed-" + str(date)): self.logger.info("IGNORING container '%s' didn't match date selection" % container_name) continue for blob in blob_service.list_blobs(container_name): self.logger.info("Fetching blob %s in container %s" % (container_name, blob.name)) data = blob_service.get_blob(container_name, blob.name) cs = StringIO.StringIO(data) report = gzip.GzipFile(fileobj=cs).read() self.send_message(report)
def download(): blob_service = BlobService(account_name='squadshots', account_key='UgxaWKAKv2ZvhHrPt0IHi4EQedPpZw35r+RXkAYB2eICPrG3TjSwk2G8gUzG/PNDDTV+4CVCYWCvZSiad5xMQQ==') try: blob_service.get_blob_to_path('album','image','static/output.png') except Exception as e: print e blobs = blob_service.list_blobs('album',None,None,None,'metadata',None) for blob in blobs: if blob.metadata != None: for key in blob.metadata: if (blob.metadata)[key] == session['username']: blob_service.get_blob_to_path('album',blob.name,'static/output.png') for i in blob_service.list_containers(): print "This container is " + i.name return render_template('album.html',filename="static/output.png")
def upload_all_new_azure(local_folder, azure_container, account_name, account_key): blob_service = BlobService(account_name=os.getenv('ACC_NAME'), account_key=os.getenv('ACCESS_KEY')) blob_list = blob_service.list_blobs(azure_container) blob_name_list = [b.name for b in blob_list.blobs] blob_name_set = set(blob_name_list) #Now for each file in local forlder see whether it's in the s3folder localfiles = os.listdir(local_folder) localfiles = [f for f in localfiles if "~" not in f] localfiles = [f for f in localfiles if f[0] != "."] localfiles = [f for f in localfiles if (".zip" in f or ".csv" in f)] localfiles = set(localfiles) files_to_upload = localfiles - blob_name_set orig_len =len(files_to_upload) error_counter = 0 while len(files_to_upload)>0: if error_counter>orig_len: logger.error("too many upload failures, exiting") sys.exit() filename = files_to_upload.pop() try: blob_service.put_block_blob_from_path( 'csvs', filename, os.path.join(local_folder,filename) ) except Exception: error_counter +=1 logging.error(filename + " failed to upload") files_to_upload.add(filename)
def upload_all_new_azure(local_folder, azure_container, account_name, account_key): blob_service = BlobService(account_name=os.getenv('ACC_NAME'), account_key=os.getenv('ACCESS_KEY')) blob_list = blob_service.list_blobs(azure_container) blob_name_list = [b.name for b in blob_list.blobs] blob_name_set = set(blob_name_list) #Now for each file in local forlder see whether it's in the s3folder localfiles = os.listdir(local_folder) localfiles = [f for f in localfiles if "~" not in f] localfiles = [f for f in localfiles if f[0] != "."] localfiles = [f for f in localfiles if (".zip" in f or ".csv" in f)] localfiles = set(localfiles) files_to_upload = localfiles - blob_name_set orig_len = len(files_to_upload) error_counter = 0 while len(files_to_upload) > 0: if error_counter > orig_len: logger.error("too many upload failures, exiting") sys.exit() filename = files_to_upload.pop() try: blob_service.put_block_blob_from_path( 'csvs', filename, os.path.join(local_folder, filename)) except Exception: error_counter += 1 logging.error(filename + " failed to upload") files_to_upload.add(filename)
class AzureTransfer(BaseTransfer): def __init__(self, account_name, account_key, container_name, prefix=None): # NOTE: Azure wants all paths to start with a slash prefix = "/{}".format(prefix.lstrip("/") if prefix else "") super().__init__(prefix=prefix) self.account_name = account_name self.account_key = account_key self.container_name = container_name self.conn = BlobService(account_name=self.account_name, account_key=self.account_key) self.container = self.get_or_create_container(self.container_name) self.log.debug("AzureTransfer initialized") # XXX: AzureTransfer isn't actively tested and hasn't its error handling is probably lacking self.log.warning( "AzureTransfer is experimental and has not been thoroughly tested") def get_metadata_for_key(self, key): key = self.format_key_for_backend(key) return self._list_blobs(key)[0]["metadata"] def _metadata_for_key(self, key): return self._list_blobs(key)[0]["metadata"] def list_path(self, key): path = self.format_key_for_backend(key, trailing_slash=True) return self._list_blobs(path) def _list_blobs(self, path): self.log.debug("Listing path %r", path) items = self.conn.list_blobs(self.container_name, prefix=path, delimiter="/", include="metadata") result = [] for item in items: result.append({ "last_modified": dateutil.parser.parse(item.properties.last_modified), "metadata": item.metadata, "name": self.format_key_from_backend(item.name), "size": item.properties.content_length, }) return result def delete_key(self, key): key = self.format_key_for_backend(key) self.log.debug("Deleting key: %r", key) return self.conn.delete_blob(self.container_name, key) def get_contents_to_file(self, key, filepath_to_store_to): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r to: %r", key, filepath_to_store_to) return self.conn.get_blob_to_path(self.container_name, key, filepath_to_store_to) def get_contents_to_fileobj(self, key, fileobj_to_store_to): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r", key) return self.conn.get_blob_to_file(self.container_name, key, fileobj_to_store_to) def get_contents_to_string(self, key): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r", key) return self.conn.get_blob_to_bytes(self.container_name, key), self._metadata_for_key(key) def store_file_from_memory(self, key, memstring, metadata=None): key = self.format_key_for_backend(key) # Azure requires all metadata keys and values to be strings metadata_to_send = {str(k): str(v) for k, v in metadata.items()} self.conn.put_block_blob_from_bytes( self.container_name, key, memstring, x_ms_meta_name_values=metadata_to_send) def store_file_from_disk(self, key, filepath, metadata=None, multipart=None): key = self.format_key_for_backend(key) # Azure requires all metadata keys and values to be strings metadata_to_send = {str(k): str(v) for k, v in metadata.items()} self.conn.put_block_blob_from_path( self.container_name, key, filepath, x_ms_meta_name_values=metadata_to_send) def get_or_create_container(self, container_name): start_time = time.time() self.conn.create_container(container_name) self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.time() - start_time) return container_name
class AzureStorageBlockDeviceAPI(object): """ An ``IBlockDeviceAsyncAPI`` which uses Azure Storage Backed Block Devices Current Support: Azure SMS API """ def __init__(self, **azure_config): """ :param ServiceManagement azure_client: an instance of the azure serivce managment api client. :param String service_name: The name of the cloud service :param names of Azure volumes to identify cluster :returns: A ``BlockDeviceVolume``. """ self._instance_id = self.compute_instance_id() self._azure_service_client = ServiceManagementService( azure_config['subscription_id'], azure_config['management_certificate_path']) self._service_name = azure_config['service_name'] self._azure_storage_client = BlobService( azure_config['storage_account_name'], azure_config['storage_account_key']) self._storage_account_name = azure_config['storage_account_name'] self._disk_container_name = azure_config['disk_container_name'] if azure_config['debug']: to_file(sys.stdout) def allocation_unit(self): """ 1GiB is the minimum allocation unit for azure disks return int: 1 GiB """ return int(GiB(1).to_Byte().value) def compute_instance_id(self): """ Azure Stored a UUID in the SDC kernel module. """ # Node host names should be unique within a vnet return unicode(socket.gethostname()) def create_volume(self, dataset_id, size): """ Create a new volume. :param UUID dataset_id: The Flocker dataset ID of the dataset on this volume. :param int size: The size of the new volume in bytes. :returns: A ``Deferred`` that fires with a ``BlockDeviceVolume`` when the volume has been created. """ size_in_gb = Byte(size).to_GiB().value if size_in_gb % 1 != 0: raise UnsupportedVolumeSize(dataset_id) self._create_volume_blob(size, dataset_id) label = self._disk_label_for_dataset_id(str(dataset_id)) return BlockDeviceVolume( blockdevice_id=unicode(label), size=size, attached_to=None, dataset_id=self._dataset_id_for_disk_label(label)) def destroy_volume(self, blockdevice_id): """ Destroy an existing volume. :param unicode blockdevice_id: The unique identifier for the volume to destroy. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :return: ``None`` """ log_info('Destorying block device: ' + str(blockdevice_id)) (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) request = None if lun is not None: request = \ self._azure_service_client.delete_data_disk( service_name=self._service_name, deployment_name=self._service_name, role_name=target_disk.attached_to.role_name, lun=lun, delete_vhd=True) else: if target_disk.__class__.__name__ == 'Blob': # unregistered disk self._azure_storage_client.delete_blob( self._disk_container_name, target_disk.name) else: request = self._azure_service_client.delete_disk( target_disk.name, True) if request is not None: self._wait_for_async(request.request_id, 5000) self._wait_for_detach(blockdevice_id) def attach_volume(self, blockdevice_id, attach_to): """ Attach ``blockdevice_id`` to ``host``. :param unicode blockdevice_id: The unique identifier for the block device being attached. :param unicode attach_to: An identifier like the one returned by the ``compute_instance_id`` method indicating the node to which to attach the volume. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises AlreadyAttachedVolume: If the supplied ``blockdevice_id`` is already attached. :returns: A ``BlockDeviceVolume`` with a ``host`` attribute set to ``host``. """ (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) if lun is not None: raise AlreadyAttachedVolume(blockdevice_id) log_info('Attempting to attach ' + str(blockdevice_id) + ' to ' + str(attach_to)) disk_size = self._attach_disk(blockdevice_id, target_disk, attach_to) self._wait_for_attach(blockdevice_id) log_info('disk attached') return self._blockdevicevolume_from_azure_volume( blockdevice_id, disk_size, attach_to) def detach_volume(self, blockdevice_id): """ Detach ``blockdevice_id`` from whatever host it is attached to. :param unicode blockdevice_id: The unique identifier for the block device being detached. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises UnattachedVolume: If the supplied ``blockdevice_id`` is not attached to anything. :returns: ``None`` """ (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) if lun is None: raise UnattachedVolume(blockdevice_id) # contrary to function name it doesn't delete by default, just detachs request = \ self._azure_service_client.delete_data_disk( service_name=self._service_name, deployment_name=self._service_name, role_name=role_name, lun=lun) self._wait_for_async(request.request_id, 5000) self._wait_for_detach(blockdevice_id) def get_device_path(self, blockdevice_id): """ Return the device path that has been allocated to the block device on the host to which it is currently attached. :param unicode blockdevice_id: The unique identifier for the block device. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises UnattachedVolume: If the supplied ``blockdevice_id`` is not attached to a host. :returns: A ``FilePath`` for the device. """ (target_disk_or_blob, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk_or_blob is None: raise UnknownVolume(blockdevice_id) if lun is None: raise UnattachedVolume(blockdevice_id) return Lun.get_device_path_for_lun(lun) def list_volumes(self): """ List all the block devices available via the back end API. :returns: A ``list`` of ``BlockDeviceVolume``s. """ media_url_prefix = 'https://' + self._storage_account_name \ + '.blob.core.windows.net/' + self._disk_container_name disks = self._azure_service_client.list_disks() disk_list = [] all_blobs = self._get_flocker_blobs() for d in disks: if media_url_prefix not in d.media_link or \ 'flocker-' not in d.label: continue role_name = None if d.attached_to is not None \ and d.attached_to.role_name is not None: role_name = d.attached_to.role_name disk_list.append( self._blockdevicevolume_from_azure_volume( d.label, self._gibytes_to_bytes(d.logical_disk_size_in_gb), role_name)) if d.label in all_blobs: del all_blobs[d.label] for key in all_blobs: # include unregistered 'disk' blobs disk_list.append( self._blockdevicevolume_from_azure_volume( all_blobs[key].name, all_blobs[key].properties.content_length, None)) return disk_list def _attach_disk(self, blockdevice_id, target_disk, attach_to): """ Attaches disk to specified VM :param string blockdevice_id: The identifier of the disk :param DataVirtualHardDisk/Blob target_disk: The Blob or Disk to be attached :returns int: The size of the attached disk """ lun = Lun.compute_next_lun(self._azure_service_client, self._service_name, str(attach_to)) common_params = { 'service_name': self._service_name, 'deployment_name': self._service_name, 'role_name': attach_to, 'lun': lun } disk_size = None if target_disk.__class__.__name__ == 'Blob': # exclude 512 byte footer disk_size = target_disk.properties.content_length common_params['source_media_link'] = \ 'https://' + self._storage_account_name \ + '.blob.core.windows.net/' + self._disk_container_name \ + '/' + blockdevice_id common_params['disk_label'] = blockdevice_id else: disk_size = self._gibytes_to_bytes( target_disk.logical_disk_size_in_gb) common_params['disk_name'] = target_disk.name request = self._azure_service_client.add_data_disk(**common_params) self._wait_for_async(request.request_id, 5000) return disk_size def _create_volume_blob(self, size, dataset_id): # Create a new page blob as a blank disk self._azure_storage_client.put_blob( container_name=self._disk_container_name, blob_name=self._disk_label_for_dataset_id(dataset_id), blob=None, x_ms_blob_type='PageBlob', x_ms_blob_content_type='application/octet-stream', x_ms_blob_content_length=size) # for disk to be a valid vhd it requires a vhd footer # on the last 512 bytes vhd_footer = Vhd.generate_vhd_footer(size) self._azure_storage_client.put_page( container_name=self._disk_container_name, blob_name=self._disk_label_for_dataset_id(dataset_id), page=vhd_footer, x_ms_page_write='update', x_ms_range='bytes=' + str((size - 512)) + '-' + str(size - 1)) def _disk_label_for_dataset_id(self, dataset_id): """ Returns a disk label for a given Dataset ID :param unicode dataset_id: The identifier of the dataset :returns string: A string representing the disk label """ label = 'flocker-' + str(dataset_id) return label def _dataset_id_for_disk_label(self, disk_label): """ Returns a UUID representing the Dataset ID for the given disk label :param string disk_label: The disk label :returns UUID: The UUID of the dataset """ return UUID(disk_label.replace('flocker-', '')) def _get_disk_vmname_lun(self, blockdevice_id): target_disk = None target_lun = None role_name = None disk_list = self._azure_service_client.list_disks() for d in disk_list: if 'flocker-' not in d.label: continue if d.label == str(blockdevice_id): target_disk = d break if target_disk is None: # check for unregisterd disk blobs = self._get_flocker_blobs() blob = None if str(blockdevice_id) in blobs: blob = blobs[str(blockdevice_id)] return blob, None, None vm_info = None if hasattr(target_disk.attached_to, 'role_name'): vm_info = self._azure_service_client.get_role( self._service_name, self._service_name, target_disk.attached_to.role_name) for d in vm_info.data_virtual_hard_disks: if d.disk_name == target_disk.name: target_lun = d.lun break role_name = target_disk.attached_to.role_name return (target_disk, role_name, target_lun) def _get_flocker_blobs(self): all_blobs = {} blobs = self._azure_storage_client.list_blobs( self._disk_container_name, prefix='flocker-') for b in blobs: # todo - this could be big! all_blobs[b.name] = b return all_blobs def _wait_for_detach(self, blockdevice_id): role_name = '' lun = -1 timeout_count = 0 log_info('waiting for azure to ' + 'report disk as detached...') while role_name is not None or lun is not None: (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) time.sleep(1) timeout_count += 1 if timeout_count > 5000: raise AsynchronousTimeout() log_info('Disk Detached') def _wait_for_attach(self, blockdevice_id): timeout_count = 0 lun = None log_info('waiting for azure to report disk as attached...') while lun is None: (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) time.sleep(.001) timeout_count += 1 if timeout_count > 5000: raise AsynchronousTimeout() def _wait_for_async(self, request_id, timeout): count = 0 result = self._azure_service_client.get_operation_status(request_id) while result.status == 'InProgress': count = count + 1 if count > timeout: log_error('Timed out waiting for async operation to complete.') raise AsynchronousTimeout() time.sleep(.001) log_info('.') result = self._azure_service_client.get_operation_status( request_id) if result.error: log_error(result.error.code) log_error(str(result.error.message)) log_error(result.status + ' in ' + str(count * 5) + 's') def _gibytes_to_bytes(self, size): return int(GiB(size).to_Byte().value) def _blockdevicevolume_from_azure_volume(self, label, size, attached_to_name): # azure will report the disk size excluding the 512 byte footer # however flocker expects the exact value it requested for disk size # so offset the reported size to flocker by 512 bytes return BlockDeviceVolume( blockdevice_id=unicode(label), size=int(size), attached_to=attached_to_name, dataset_id=self._dataset_id_for_disk_label( label)) # disk labels are formatted as flocker-<data_set_id>
ACCOUNT_NAME = 'sounds' ACCOUNT_KEY = AC.getAccountKey() # primary access key HOST_BASE = '.blob.core.windows.net' blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY, host_base=HOST_BASE) CONTAINER = 'bat-detective' # or whatever else you like created = blob_service.create_container(CONTAINER, x_ms_blob_public_access='container') print "Created" if created else "Not created (probably already existing)" audio_dir = '../../data/wav/' SOUND_FILES = glob.glob(audio_dir + '*.wav') for f in SOUND_FILES: print "uploading", os.path.basename(f) blob_service.put_block_blob_from_path( CONTAINER, # container os.path.basename(f), # blob f, # path x_ms_blob_content_type='audio/wav' ) blobs = blob_service.list_blobs(CONTAINER) for blob in blobs: print(blob.name)
class SAzure(SyncStorage): def __init__(self): super().__init__() self.msg_key_na = _('Key not available') try: import alxlib.key key = alxlib.key.Key() if os.path.isfile(key.get_path()): sys.path.insert(0, key.get_dir()) import alxkey self.key = alxkey.alxkey_azure """self.blob = BlobService(account_name=self.key['AZURE_STORAGE_ACCOUNT_NAME'], account_key=self.key['AZURE_ACCESS_KEY'])""" else: # raise (self.msg_key_na) self.key = None except: pass # raise (self.msg_key_na) def connect(self): try: self.blob = BlobService(account_name=self.key['AZURE_STORAGE_ACCOUNT_NAME'], account_key=self.key['AZURE_ACCESS_KEY']) return self.blob.list_containers(maxresults=1) except: return None def connect_blob(self, az_account_name=None, az_account_key=None): try: if az_account_name != None: self.key['AZURE_STORAGE_ACCOUNT_NAME'] = az_account_name self.key['AZURE_ACCESS_KEY'] = az_account_key return self.connect() except: return None def path_clean(self, path: str): try: i = path.index("//") + 2 self.container = path[0:i] if path[len(path) - 1] != "/": path += "/" return path[i:] except: print(_("Bad Path")) exit(1) def spath(self, container, root, b): spath = SyncPath() spath.BasePath = container if b.name[len(b.name)-1]=="/": spath.IsDir= True else: spath.IsFile= True spath.AbsPath = b.name if len(root)>0: spath.SPath = b.name[len(root) - 1:] else: spath.SPath=b.name spath.Size = b.properties.content_length import alxlib.time_help spath.ModifiedTS = alxlib.time_help.to_timestamp(b.properties.last_modified) spath.MD5 = b.properties.content_md5 spath.sys="azure" return spath def path_split(self, path: str): try: list = path.split("/") container = list[0] uri = "" if len(list) > 1: uri = "/".join(map(str, list[1:])) return container, uri except: print(_("Bad path")) exit(1) def path_list_blobs(self, container, uri): try: if len(uri)>0: blobs = self.blob.list_blobs(container, prefix=uri) else: blobs = self.blob.list_blobs(container) """for blob in blobs: print(blob.properties.__dict__) print(blob.name) print(blob.url)""" return blobs except Exception as e: print(_("Bad connection")) logging.warning("container {0}, path {1}".format(container, uri)) exit(1) def path_list(self, path): try: logging.debug("path_list {0}".format(path)) container, uri = self.path_split(path) logging.debug("Container: {0}, Uri: {1}".format(container, uri)) self.connect() self.blob.create_container(container) blobs = self.path_list_blobs(container, uri) d = {} for b in blobs: spath = self.spath(container, uri, b) # print(b.__dict__) #print(str(b.properties.last_modified.__dict__)) #print(str(spath.ModifiedTS)) d[spath.SPath] = spath # print(d) return d except Exception as e: print(e) def remove(self, src: SyncPath): try: logging.debug("Removing {0}".format(src.AbsPath)) self.connect() self.blob.create_container(src.BasePath) self.blob.delete_blob(src.BasePath, src.AbsPath) except: pass def copy_local2azure(self, src, base_dir): try: container, uri = self.path_split(base_dir) if len(src.SPath)>0 and src.SPath[0]=="/": path= uri+ src.SPath[1:] else: path= uri+src.SPath logging.debug("copy_local2azure Spath {0}. path:{1}".format(src.SPath, path)) self.connect() if not src.IsDir: self.blob.put_block_blob_from_path (container, path, src.AbsPath) else: self.blob.put_block_blob_from_text(container, path+"/", "") except Exception as e: print("Error Copying") print(e) def copy_azure2local(self, src, base_dir): try: if len(src.SPath)>0 and (src.SPath[0] == "/" or src.SPath[0] == "\\") : path = src.SPath[1:] else: path = src.SPath path= os.path.normpath(os.path.join(base_dir, path)) logging.debug("copy_azure2local basedir:{0} Spath {1}, path {2}, abs: {3}".format( base_dir, src.SPath, path, src.AbsPath)) if not os.path.isdir(path): os.makedirs(os.path.dirname(path), exist_ok=True) #print( os.path.dirname(path)+"***************") if not (len(src.AbsPath)>0 and src.AbsPath[len(src.AbsPath)-1]=="/"): self.blob.get_blob_to_path(src.BasePath, src.AbsPath, path) """container, uri = self.path_split(base_dir) if len(src.SPath)>0 and src.SPath[0]=="/": path= uri+ src.SPath[1:] else: path= uri+src.SPath self.connect() if not src.IsDir: self.blob.get_blob_to_path(src.BasePath, path, src.AbsPath) else: self.blob.put_block_blob_from_text(container, path, "")""" except Exception as e: print("Error copying") print(e)
class AzureStorageBlockDeviceAPI(object): """ An ``IBlockDeviceAsyncAPI`` which uses Azure Storage Backed Block Devices Current Support: Azure SMS API """ def __init__(self, **azure_config): """ :param ServiceManagement azure_client: an instance of the azure serivce managment api client. :param String service_name: The name of the cloud service :param names of Azure volumes to identify cluster :returns: A ``BlockDeviceVolume``. """ self._instance_id = self.compute_instance_id() self._azure_service_client = ServiceManagementService( azure_config['subscription_id'], azure_config['management_certificate_path']) self._service_name = azure_config['service_name'] self._azure_storage_client = BlobService( azure_config['storage_account_name'], azure_config['storage_account_key']) self._storage_account_name = azure_config['storage_account_name'] self._disk_container_name = azure_config['disk_container_name'] if azure_config['debug']: to_file(sys.stdout) def allocation_unit(self): """ 1GiB is the minimum allocation unit for azure disks return int: 1 GiB """ return int(GiB(1).to_Byte().value) def compute_instance_id(self): """ Azure Stored a UUID in the SDC kernel module. """ # Node host names should be unique within a vnet return unicode(socket.gethostname()) def create_volume(self, dataset_id, size): """ Create a new volume. :param UUID dataset_id: The Flocker dataset ID of the dataset on this volume. :param int size: The size of the new volume in bytes. :returns: A ``Deferred`` that fires with a ``BlockDeviceVolume`` when the volume has been created. """ size_in_gb = Byte(size).to_GiB().value if size_in_gb % 1 != 0: raise UnsupportedVolumeSize(dataset_id) self._create_volume_blob(size, dataset_id) label = self._disk_label_for_dataset_id(str(dataset_id)) return BlockDeviceVolume( blockdevice_id=unicode(label), size=size, attached_to=None, dataset_id=self._dataset_id_for_disk_label(label)) def destroy_volume(self, blockdevice_id): """ Destroy an existing volume. :param unicode blockdevice_id: The unique identifier for the volume to destroy. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :return: ``None`` """ log_info('Destorying block device: ' + str(blockdevice_id)) (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) request = None if lun is not None: request = \ self._azure_service_client.delete_data_disk( service_name=self._service_name, deployment_name=self._service_name, role_name=target_disk.attached_to.role_name, lun=lun, delete_vhd=True) else: if target_disk.__class__.__name__ == 'Blob': # unregistered disk self._azure_storage_client.delete_blob( self._disk_container_name, target_disk.name) else: request = self._azure_service_client.delete_disk( target_disk.name, True) if request is not None: self._wait_for_async(request.request_id, 5000) self._wait_for_detach(blockdevice_id) def attach_volume(self, blockdevice_id, attach_to): """ Attach ``blockdevice_id`` to ``host``. :param unicode blockdevice_id: The unique identifier for the block device being attached. :param unicode attach_to: An identifier like the one returned by the ``compute_instance_id`` method indicating the node to which to attach the volume. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises AlreadyAttachedVolume: If the supplied ``blockdevice_id`` is already attached. :returns: A ``BlockDeviceVolume`` with a ``host`` attribute set to ``host``. """ (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) if lun is not None: raise AlreadyAttachedVolume(blockdevice_id) log_info('Attempting to attach ' + str(blockdevice_id) + ' to ' + str(attach_to)) disk_size = self._attach_disk(blockdevice_id, target_disk, attach_to) self._wait_for_attach(blockdevice_id) log_info('disk attached') return self._blockdevicevolume_from_azure_volume(blockdevice_id, disk_size, attach_to) def detach_volume(self, blockdevice_id): """ Detach ``blockdevice_id`` from whatever host it is attached to. :param unicode blockdevice_id: The unique identifier for the block device being detached. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises UnattachedVolume: If the supplied ``blockdevice_id`` is not attached to anything. :returns: ``None`` """ (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) if lun is None: raise UnattachedVolume(blockdevice_id) # contrary to function name it doesn't delete by default, just detachs request = \ self._azure_service_client.delete_data_disk( service_name=self._service_name, deployment_name=self._service_name, role_name=role_name, lun=lun) self._wait_for_async(request.request_id, 5000) self._wait_for_detach(blockdevice_id) def get_device_path(self, blockdevice_id): """ Return the device path that has been allocated to the block device on the host to which it is currently attached. :param unicode blockdevice_id: The unique identifier for the block device. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises UnattachedVolume: If the supplied ``blockdevice_id`` is not attached to a host. :returns: A ``FilePath`` for the device. """ (target_disk_or_blob, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk_or_blob is None: raise UnknownVolume(blockdevice_id) if lun is None: raise UnattachedVolume(blockdevice_id) return Lun.get_device_path_for_lun(lun) def list_volumes(self): """ List all the block devices available via the back end API. :returns: A ``list`` of ``BlockDeviceVolume``s. """ media_url_prefix = 'https://' + self._storage_account_name \ + '.blob.core.windows.net/' + self._disk_container_name disks = self._azure_service_client.list_disks() disk_list = [] all_blobs = self._get_flocker_blobs() for d in disks: if media_url_prefix not in d.media_link or \ 'flocker-' not in d.label: continue role_name = None if d.attached_to is not None \ and d.attached_to.role_name is not None: role_name = d.attached_to.role_name disk_list.append(self._blockdevicevolume_from_azure_volume( d.label, self._gibytes_to_bytes(d.logical_disk_size_in_gb), role_name)) if d.label in all_blobs: del all_blobs[d.label] for key in all_blobs: # include unregistered 'disk' blobs disk_list.append(self._blockdevicevolume_from_azure_volume( all_blobs[key].name, all_blobs[key].properties.content_length, None)) return disk_list def _attach_disk( self, blockdevice_id, target_disk, attach_to): """ Attaches disk to specified VM :param string blockdevice_id: The identifier of the disk :param DataVirtualHardDisk/Blob target_disk: The Blob or Disk to be attached :returns int: The size of the attached disk """ lun = Lun.compute_next_lun( self._azure_service_client, self._service_name, str(attach_to)) common_params = { 'service_name': self._service_name, 'deployment_name': self._service_name, 'role_name': attach_to, 'lun': lun } disk_size = None if target_disk.__class__.__name__ == 'Blob': # exclude 512 byte footer disk_size = target_disk.properties.content_length common_params['source_media_link'] = \ 'https://' + self._storage_account_name \ + '.blob.core.windows.net/' + self._disk_container_name \ + '/' + blockdevice_id common_params['disk_label'] = blockdevice_id else: disk_size = self._gibytes_to_bytes( target_disk.logical_disk_size_in_gb) common_params['disk_name'] = target_disk.name request = self._azure_service_client.add_data_disk(**common_params) self._wait_for_async(request.request_id, 5000) return disk_size def _create_volume_blob(self, size, dataset_id): # Create a new page blob as a blank disk self._azure_storage_client.put_blob( container_name=self._disk_container_name, blob_name=self._disk_label_for_dataset_id(dataset_id), blob=None, x_ms_blob_type='PageBlob', x_ms_blob_content_type='application/octet-stream', x_ms_blob_content_length=size) # for disk to be a valid vhd it requires a vhd footer # on the last 512 bytes vhd_footer = Vhd.generate_vhd_footer(size) self._azure_storage_client.put_page( container_name=self._disk_container_name, blob_name=self._disk_label_for_dataset_id(dataset_id), page=vhd_footer, x_ms_page_write='update', x_ms_range='bytes=' + str((size - 512)) + '-' + str(size - 1)) def _disk_label_for_dataset_id(self, dataset_id): """ Returns a disk label for a given Dataset ID :param unicode dataset_id: The identifier of the dataset :returns string: A string representing the disk label """ label = 'flocker-' + str(dataset_id) return label def _dataset_id_for_disk_label(self, disk_label): """ Returns a UUID representing the Dataset ID for the given disk label :param string disk_label: The disk label :returns UUID: The UUID of the dataset """ return UUID(disk_label.replace('flocker-', '')) def _get_disk_vmname_lun(self, blockdevice_id): target_disk = None target_lun = None role_name = None disk_list = self._azure_service_client.list_disks() for d in disk_list: if 'flocker-' not in d.label: continue if d.label == str(blockdevice_id): target_disk = d break if target_disk is None: # check for unregisterd disk blobs = self._get_flocker_blobs() blob = None if str(blockdevice_id) in blobs: blob = blobs[str(blockdevice_id)] return blob, None, None vm_info = None if hasattr(target_disk.attached_to, 'role_name'): vm_info = self._azure_service_client.get_role( self._service_name, self._service_name, target_disk.attached_to.role_name) for d in vm_info.data_virtual_hard_disks: if d.disk_name == target_disk.name: target_lun = d.lun break role_name = target_disk.attached_to.role_name return (target_disk, role_name, target_lun) def _get_flocker_blobs(self): all_blobs = {} blobs = self._azure_storage_client.list_blobs( self._disk_container_name, prefix='flocker-') for b in blobs: # todo - this could be big! all_blobs[b.name] = b return all_blobs def _wait_for_detach(self, blockdevice_id): role_name = '' lun = -1 timeout_count = 0 log_info('waiting for azure to ' + 'report disk as detached...') while role_name is not None or lun is not None: (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) time.sleep(1) timeout_count += 1 if timeout_count > 5000: raise AsynchronousTimeout() log_info('Disk Detached') def _wait_for_attach(self, blockdevice_id): timeout_count = 0 lun = None log_info('waiting for azure to report disk as attached...') while lun is None: (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) time.sleep(.001) timeout_count += 1 if timeout_count > 5000: raise AsynchronousTimeout() def _wait_for_async(self, request_id, timeout): count = 0 result = self._azure_service_client.get_operation_status(request_id) while result.status == 'InProgress': count = count + 1 if count > timeout: log_error('Timed out waiting for async operation to complete.') raise AsynchronousTimeout() time.sleep(.001) log_info('.') result = self._azure_service_client.get_operation_status( request_id) if result.error: log_error(result.error.code) log_error(str(result.error.message)) log_error(result.status + ' in ' + str(count * 5) + 's') def _gibytes_to_bytes(self, size): return int(GiB(size).to_Byte().value) def _blockdevicevolume_from_azure_volume(self, label, size, attached_to_name): # azure will report the disk size excluding the 512 byte footer # however flocker expects the exact value it requested for disk size # so offset the reported size to flocker by 512 bytes return BlockDeviceVolume( blockdevice_id=unicode(label), size=int(size), attached_to=attached_to_name, dataset_id=self._dataset_id_for_disk_label(label) ) # disk labels are formatted as flocker-<data_set_id>
class AzureBackend(duplicity.backend.Backend): u""" Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure Storage SDK for Python library. try: import azure import azure.storage if hasattr(azure.storage, u'BlobService'): # v0.11.1 and below from azure.storage import BlobService self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError self.AzureConflictError = azure.WindowsAzureConflictError else: # v1.0.0 and above import azure.storage.blob if hasattr(azure.storage.blob, u'BlobService'): from azure.storage.blob import BlobService else: from azure.storage.blob.blockblobservice import BlockBlobService as BlobService self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError self.AzureConflictError = azure.common.AzureConflictHttpError except ImportError as e: raise BackendException(u"""\ Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/). Exception: %s""" % str(e)) # TODO: validate container name self.container = parsed_url.path.lstrip(u'/') if u'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException(u'AZURE_ACCOUNT_NAME environment variable not set.') if u'AZURE_ACCOUNT_KEY' in os.environ: if u'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'], account_key=os.environ[u'AZURE_ACCOUNT_KEY'], endpoint_suffix=os.environ[u'AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'], account_key=os.environ[u'AZURE_ACCOUNT_KEY']) self._create_container() elif u'AZURE_SHARED_ACCESS_SIGNATURE' in os.environ: if u'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'], sas_token=os.environ[u'AZURE_SHARED_ACCESS_SIGNATURE'], endpoint_suffix=os.environ[u'AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'], sas_token=os.environ[u'AZURE_SHARED_ACCESS_SIGNATURE']) else: raise BackendException( u'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.') if globals.azure_max_single_put_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_SINGLE_PUT_SIZE self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size if globals.azure_max_block_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_BLOCK_SIZE self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size def _create_container(self): try: self.blob_service.create_container(self.container, fail_on_exist=True) except self.AzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError(u"Could not create Azure container: %s" % str(e.message).split(u'\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): remote_filename = fsdecode(remote_filename) kwargs = {} if globals.azure_max_connections: kwargs[u'max_connections'] = globals.azure_max_connections # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container try: self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) except AttributeError: # Old versions use a different method name self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) self._set_tier(remote_filename) def _set_tier(self, remote_filename): if globals.azure_blob_tier is not None: try: self.blob_service.set_standard_blob_tier(self.container, remote_filename, globals.azure_blob_tier) except AttributeError: # might not be available in old API pass def _get(self, remote_filename, local_path): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, fsdecode(remote_filename), local_path.name) def _list(self): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container blobs = [] marker = None while True: batch = self.blob_service.list_blobs(self.container, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, fsdecode(filename)) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, fsdecode(filename)) try: info = {u'size': int(prop.properties.content_length)} except AttributeError: # old versions directly returned the properties info = {u'size': int(prop[u'content-length'])} return info def _error_code(self, operation, e): if isinstance(e, self.AzureMissingResourceError): return log.ErrorCode.backend_not_found
print db_name conn = sqlite3.connect(db_name) c = conn.cursor() AZURE_STORAGE_CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING'] blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING) total_length = 0 number = 0 next_marker = None while True: blobs = blob_service.list_blobs('nexradl2', maxresults=5000, marker=next_marker) next_marker = blobs.next_marker #print(next_marker) print "length of blobs:" print(len(blobs)) newcount = 0 for blob in blobs: path = blob.name #print(blob.name) #print(blob.properties.content_length) c.execute("update files set azure='yes' where path = '%s'" % (path)) total_length = total_length + blob.properties.content_length number = number + 1 newcount = newcount + 1 print "length of newcount:"
#!/usr/bin/python import os import csv import argparse from azure.storage import BlobService container = 'images' account_name='xxxx' account_key='xxxx' if __name__ == "__main__": parser = argparse.ArgumentParser(description='Lists all blob in the container and save information about extensions') parser.add_argument("filename", help='File to write results') args = parser.parse_args() blob_service = BlobService(account_name=account_name, account_key=account_key) blobs = blob_service.list_blobs(container) with open(args.filename, 'w') as f: writer = csv.writer(f) for b in blobs: word_data = os.path.splitext(b.name) writer.writerow(word_data)
class AzureBlobStorage(Storage): def __init__(self, account='nyxstorage', container='pxo'): self.base_storage_uri = 'http://%s.blob.core.windows.net/%s/' % ( account, container) self.blob_service = BlobService( account, get_env_variable('AZURE_BLOB_STORAGE_KEY')) self.container = container def _open(self, name, mode='rb'): data = self.blob_service.get_blob(self.container, name) return ContentFile(data) def _save(self, name, content): _file = content.read() file_name = content.name[-35:] self.blob_service.put_blob( self.container, file_name, _file, x_ms_blob_type='BlockBlob') return self.base_storage_uri + file_name def create_container(self, container_name): result = self.blob_service.create_container( container_name, x_ms_blob_public_access='container') return result def delete(self, name): self.blob_service.delete_blob(self.container, name) def exists(self, name): try: self.blob_service.get_blob_properties(self.container, name) except: return False else: return True def get_available_name(self, name): return name def get_blobs(self): blobs = self.blob_service.list_blobs(self.container) return blobs def get_valid_name(self, name): return name def modified_time(self, name): metadata = self.blob_service.get_blob_metadata(self.container, name) modified_time = float(metadata.get('x-ms-meta-modified_time')) return datetime.fromtimestamp(modified_time) def set_public_container(self, container_name): result = self.blob_service.set_container_acl( container_name, x_ms_blob_public_access='container') return result def size(self, name): properties = self.blob_service.get_blob_properties( self.container, name) return properties.get('content-length') def url(self, name): blob = self.blob_service.list_blobs(self.container, prefix=name) return blob.blobs[0].url
class AzureBackend(duplicity.backend.Backend): """ Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure SDK for Python library. try: import azure from azure.storage import BlobService except ImportError: raise BackendException('Azure backend requires Microsoft Azure SDK for Python ' '(https://github.com/Azure/azure-sdk-for-python).') if 'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.') if 'AZURE_ACCOUNT_KEY' not in os.environ: raise BackendException('AZURE_ACCOUNT_KEY environment variable not set.') account_name = os.environ['AZURE_ACCOUNT_NAME'] account_key = os.environ['AZURE_ACCOUNT_KEY'] self.WindowsAzureMissingResourceError = azure.WindowsAzureMissingResourceError self.blob_service = BlobService(account_name=account_name, account_key=account_key) # TODO: validate container name self.container = parsed_url.path.lstrip('/') try: self.blob_service.create_container(self.container, fail_on_exist=True) except azure.WindowsAzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError("Could not create Azure container: %s" % unicode(e.message).split('\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-blob self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name) def _get(self, remote_filename, local_path): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name) def _list(self): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-blob blobs = self.blob_service.list_blobs(self.container) return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, filename) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, filename) return {'size': int(prop['content-length'])} def _error_code(self, operation, e): if isinstance(e, self.WindowsAzureMissingResourceError): return log.ErrorCode.backend_not_found
class Storage(driver.Base): supports_bytes_range = True def __init__(self, path=None, config=None): self._config = config self._container = self._config.azure_storage_container protocol = 'https' if self._config.azure_use_https else 'http' acct_name = self._config.azure_storage_account_name acct_key = self._config.azure_storage_account_key self._blob = BlobService(account_name=acct_name, account_key=acct_key, protocol=protocol) self._init_container() logger.debug("Initialized azureblob storage driver") def _init_container(self): '''Initializes image container on Azure blob storage if the container does not exist. ''' created = self._blob.create_container(self._container, x_ms_blob_public_access='blob', fail_on_exist=False) if created: logger.info('Created blob container for image registry.') else: logger.debug('Registry container already exists.') return created @lru.get def get_content(self, path): try: return self._blob.get_blob(self._container, path) except azure.WindowsAzureMissingResourceError: raise exceptions.FileNotFoundError('%s is not there' % path) @lru.set def put_content(self, path, content): self._blob.put_blob(self._container, path, content, 'BlockBlob') return path def stream_read(self, path, bytes_range=None): try: f = io.BytesIO() self._blob.get_blob_to_file(self._container, path, f) if bytes_range: f.seek(bytes_range[0]) total_size = bytes_range[1] - bytes_range[0] + 1 else: f.seek(0) while True: buf = None if bytes_range: # Bytes Range is enabled buf_size = self.buffer_size if nb_bytes + buf_size > total_size: # We make sure we don't read out of the range buf_size = total_size - nb_bytes if buf_size > 0: buf = f.read(buf_size) nb_bytes += len(buf) else: # We're at the end of the range buf = '' else: buf = f.read(self.buffer_size) if not buf: break yield buf except IOError: raise exceptions.FileNotFoundError('%s is not there' % path) def stream_write(self, path, fp): self._blob.put_block_blob_from_file(self._container, path, fp) def list_directory(self, path=None): if not path.endswith('/'): path += '/' # path=a would list a/b.txt as well as 'abc.txt' blobs = list(self._blob.list_blobs(self._container, path)) if not blobs: raise exceptions.FileNotFoundError('%s is not there' % path) return [b.name for b in blobs] def exists(self, path): try: self._blob.get_blob_properties(self._container, path) return True except azure.WindowsAzureMissingResourceError: return False @lru.remove def remove(self, path): is_blob = self.exists(path) if is_blob: self._blob.delete_blob(self._container, path) return exists = False blobs = list(self._blob.list_blobs(self._container, path)) if not blobs: raise exceptions.FileNotFoundError('%s is not there' % path) for b in blobs: self._blob.delete_blob(self._container, b.name) def get_size(self, path): try: properties = self._blob.get_blob_properties(self._container, path) return int(properties['content-length']) # auto-converted to long except azure.WindowsAzureMissingResourceError: raise exceptions.FileNotFoundError('%s is not there' % path)
thread.join(timeout) if thread.is_alive(): print 'Terminating process' self.process.terminate() thread.join() print self.process.returncode #command = Command("echo 'Process started'; sleep 2; echo 'Process finished'") #print command.run(timeout=3) #print command.run(timeout=1) # #command = Command('ping www.google.com') #print command.run(timeout=1) AZURE_STORAGE_CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING'] blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING) print blob_service.put_block_blob_from_path( 'nexradl2', '201208/20120810/KSRX/NWS_NEXRAD_NXL2SR_KSRX_20120810050000_20120810055959.tar', '/snfs9/q2/levelii_tarfiles/201208/20120810/KSRX/NWS_NEXRAD_NXL2SR_KSRX_20120810050000_20120810055959.tar', max_connections=5, ) blobs = blob_service.list_blobs('nexradl2', maxresults=10) for blob in blobs: print(blob.name) print(blob.url)
def generate_website_and_upload_azure(azure_csv_container, azure_web_container): blob_service = BlobService(account_name=os.getenv('ACC_NAME'), account_key=os.getenv('ACCESS_KEY')) blob_list = blob_service.list_blobs(azure_csv_container) blob_name_list = blob_list.blobs keys = [] #Only keep files whose dates can be parsed for k in blob_name_list: try: parser.parse(k.name[:8]) keys.append(k) except: pass keys = [k for k in keys if (".zip" in k.name or ".csv" in k.name)] my_array = [] for k in keys: my_dict = {} url = r"http://fhrscsvs.blob.core.windows.net/{}/{}".format(azure_csv_container,k.name) name = k.name date = parser.parse(name[:8]) dateformat = date.strftime("%a %d %b %Y") my_dict["Date of data download"] = dateformat my_dict["Size"] = sizeof_fmt(k.properties.content_length) name = get_link_text(name,dateformat,my_dict) my_dict["File"] = "<a href='{0}'>{1}</a>".format(url,name) my_array.append(my_dict) my_array = sorted(my_array, key=lambda k: k['File'], reverse=True) table_array_fullsnapshot = [a for a in my_array if "__all_current" in a["File"]] table_array_differences = [a for a in my_array if "__diff" in a["File"]] template_dir = os.getenv('TEMPLATE_DIR') loader = jinja2.FileSystemLoader(template_dir) environment = jinja2.Environment(loader=loader) j_template = environment.get_template("template.html") order = ["File", "Size"] timestamp = datetime.datetime.now().strftime("%a %d %b %Y at %H:%M") import math sinarray = [(math.cos(math.radians(i*5-180))+1)*14 for i in range(0,73)] html = j_template.render(table_array_fullsnapshot=table_array_fullsnapshot, order=order, timestamp = timestamp, sinarray=sinarray, table_array_differences=table_array_differences) blob_service.put_block_blob_from_text( azure_web_container, "index.html", html, x_ms_blob_content_type='text/html', text_encoding="utf-8", )
import os import csv import argparse from azure.storage import BlobService container = 'images' account_name = 'xxxx' account_key = 'xxxx' if __name__ == "__main__": parser = argparse.ArgumentParser( description= 'Lists all blob in the container and save information about extensions' ) parser.add_argument("filename", help='File to write results') args = parser.parse_args() blob_service = BlobService(account_name=account_name, account_key=account_key) blobs = blob_service.list_blobs(container) with open(args.filename, 'w') as f: writer = csv.writer(f) for b in blobs: word_data = os.path.splitext(b.name) writer.writerow(word_data)
blobService = BlobService(storageAccount, accessKey) #-------------- # コンテナとBlobオブジェクトを取得 # mediaLinkからBlobオブジェクトを得る logger.debug("deleteOSandDataDisk.py: Container and Blob object get mediaLink...(%s)" % mediaLink) # # 消すべきBlobの存在チェック # # コンテナ一覧を取得 containerList = blobService.list_containers() targetBlob = None for container in containerList: # # コンテナに含まれるBlob一覧を取得 blobList = blobService.list_blobs(container.name) for blob in blobList: # # URIから、先頭のhttp*://を取り除いた文字列を比較 blobname = blob.url.split('://')[1] if blobname == mediaLink.split('://')[1]: logger.debug('deleteOSandDataDisk.py: find target blobname: ' + blobname) targetBlob = blob targetContainer = container # # 見つからなければエラー終了 if (targetBlob is None): logger.error('deleteOSandDataDisk.py: target blob(%s) is not found.' % mediaLink.split('://')[1]) sys.exit() #----------------- # lease開始
class AzureIOStore(IOStore): """ A class that lets you get input from and send output to Azure Storage. """ def __init__(self, account_name, container_name, name_prefix=""): """ Make a new AzureIOStore that reads from and writes to the given container in the given account, adding the given prefix to keys. All paths will be interpreted as keys or key prefixes. If the name prefix does not end with a trailing slash, and is not empty, one will be added automatically. Account keys are retrieved from the AZURE_ACCOUNT_KEY environment variable or from the ~/.toilAzureCredentials file, as in Toil itself. """ # Make sure azure libraries actually loaded assert(have_azure) self.account_name = account_name self.container_name = container_name self.name_prefix = name_prefix if self.name_prefix != "" and not self.name_prefix.endswith("/"): # Make sure it has the trailing slash required. self.name_prefix += "/" # Sneak into Toil and use the same keys it uses self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey( self.account_name) # This will hold out Azure blob store connection self.connection = None def __getstate__(self): """ Return the state to use for pickling. We don't want to try and pickle an open Azure connection. """ return (self.account_name, self.account_key, self.container_name, self.name_prefix) def __setstate__(self, state): """ Set up after unpickling. """ self.account_name = state[0] self.account_key = state[1] self.container_name = state[2] self.name_prefix = state[3] self.connection = None def __connect(self): """ Make sure we have an Azure connection, and set one up if we don't. """ if self.connection is None: RealTimeLogger.get().debug("Connecting to account {}, using " "container {} and prefix {}".format(self.account_name, self.container_name, self.name_prefix)) # Connect to the blob service where we keep everything self.connection = BlobService( account_name=self.account_name, account_key=self.account_key) def read_input_file(self, input_path, local_path): """ Get input from Azure. """ self.__connect() RealTimeLogger.get().debug("Loading {} from AzureIOStore".format( input_path)) # Download the blob. This is known to be synchronous, although it can # call a callback during the process. self.connection.get_blob_to_path(self.container_name, self.name_prefix + input_path, local_path) def list_input_directory(self, input_path, recursive=False): """ Loop over fake /-delimited directories on Azure. The prefix may or may not not have a trailing slash; if not, one will be added automatically. Returns the names of files and fake directories in the given input fake directory, non-recursively. """ self.__connect() RealTimeLogger.get().info("Enumerating {} from AzureIOStore".format( input_path)) # Work out what the directory name to list is fake_directory = self.name_prefix + input_path if fake_directory != "" and not fake_directory.endswith("/"): # We have a nonempty prefix, and we need to end it with a slash fake_directory += "/" # This will hold the marker that we need to send back to get the next # page, if there is one. See <http://stackoverflow.com/a/24303682> marker = None # This holds the subdirectories we found; we yield each exactly once if # we aren't recursing. subdirectories = set() while True: # Get the results from Azure. We skip the delimiter since it doesn't # seem to have the placeholder entries it's suppsoed to. result = self.connection.list_blobs(self.container_name, prefix=fake_directory, marker=marker) for blob in result: # Yield each result's blob name, but directory names only once # Drop the common prefix relative_path = blob.name[len(fake_directory):] if (not recursive) and "/" in relative_path: # We found a file in a subdirectory, and we aren't supposed # to be recursing. subdirectory, _ = relative_path.split("/", 1) if subdirectory not in subdirectories: # It's a new subdirectory. Yield and remember it subdirectories.add(subdirectory) yield subdirectory else: # We found an actual file yield relative_path # Save the marker marker = result.next_marker if not marker: break def write_output_file(self, local_path, output_path): """ Write output to Azure. Will create the container if necessary. """ self.__connect() RealTimeLogger.get().debug("Saving {} to AzureIOStore".format( output_path)) try: # Make the container self.connection.create_container(self.container_name) except azure.WindowsAzureConflictError: # The container probably already exists pass # Upload the blob (synchronously) # TODO: catch no container error here, make the container, and retry self.connection.put_block_blob_from_path(self.container_name, self.name_prefix + output_path, local_path) def exists(self, path): """ Returns true if the given input or output file exists in Azure already. """ self.__connect() marker = None while True: # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it return True # Save the marker marker = result.next_marker if not marker: break return False
def deletefromazure (strPrefix): blob_service = BlobService(account_name='wanderight', account_key='gdmZeJOCx3HYlFPZZukUhHAfeGAu4cfHWGQZc3+HIpkBHjlznUDjhXMl5HWh5MgbjpJF09ZxRaET1JVF9S2MWQ==') blobsToDelete = blob_service.list_blobs(config['container'], prefix=strPrefix) for b in blobsToDelete: blob_service.delete_blob(config['container'], b.name)
print db_name conn = sqlite3.connect(db_name) c = conn.cursor() AZURE_STORAGE_CONNECTION_STRING = os.environ["AZURE_STORAGE_CONNECTION_STRING"] blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING) total_length = 0 number = 0 next_marker = None while True: blobs = blob_service.list_blobs("nexradl2", maxresults=5000, marker=next_marker) next_marker = blobs.next_marker # print(next_marker) print "length of blobs:" print (len(blobs)) newcount = 0 for blob in blobs: path = blob.name # print(blob.name) # print(blob.properties.content_length) c.execute("update files set azure='yes' where path = '%s'" % (path)) total_length = total_length + blob.properties.content_length number = number + 1 newcount = newcount + 1 print "length of newcount:" print newcount
from azure.storage import BlobService import sys key = raw_input("Please enter azure vidoepath blob storage key: ") blob_service = BlobService(account_name='videopathmobilefiles', account_key=key) source = sys.argv[1] target = sys.argv[2] print source + " -> " + target blob_service.create_container(target, x_ms_blob_public_access='container') # blob_service.copy_blob('test2', 'copiedkey', '/videopathmobilefiles/test/key') blobs = blob_service.list_blobs(source) for b in blobs: name = b.name source_path = '/videopathmobilefiles/' + source + '/' + name blob_service.copy_blob(target, name, source_path) print name
class AzureBlobStorage(Storage): ''' classdocs ''' def __init__(self, azure_profile): ''' Constructor ''' if not azure_profile: raise Exception() else: container_name = azure_profile['container_name'] account_name = azure_profile['account_name'] account_key = azure_profile['key'] base_url = azure_profile['base_url'] self.blob = BlobService( account_name=account_name, account_key=account_key) self.container = container_name self.base_url = base_url def delete(self, name): """ Delete file. """ try: self.blob.delete_blob(self.container, name) except WindowsAzureMissingResourceError: return False else: return True def delete_files(self, files=None): """ Delete files in container. """ if not files: files = self.listdir(self.container)[1] for _file in files: self.delete(_file) def exists(self, name, with_properties=False): """ Existing check. """ result = False blob_properties = None try: blob_properties = self.blob.get_blob_properties( self.container, name) except WindowsAzureMissingResourceError: result = False else: result = True if with_properties: return result, blob_properties else: return result def get_available_name(self, name): return super(AzureBlobStorage, self).get_available_name(name.replace('\\', '/')) def get_valid_name(self, name): return name def _list(self, path, prefix, maxresults): result = [] blobs = self.blob.list_blobs(path, prefix, maxresults) for _blob in blobs: result.append(_blob.name) return result def listdir(self, path=None, prefix=None, maxresults=None): """ Catalog file list. """ if not path: path = self.container return [], self._list(path, prefix, maxresults) def size(self, name): """ File size. """ result, properties = self.exists(name, with_properties=True) if result: return int(properties['content-length']) else: return 0 def url(self, name, chk_exist=False): """ URL for file downloading. """ if chk_exist: if self.exists(name): return '%s%s/%s' % (self.base_url, self.container, name) else: return None else: return '%s%s/%s' % (self.base_url, self.container, name) def _open(self, name, mode='rb'): """ Open file. """ in_mem_file = StringIO.StringIO( self.blob.get_blob(self.container, name)) in_mem_file.name = name in_mem_file.mode = mode return File(in_mem_file) def _save(self, name, blob_to_upload, x_ms_blob_type='BlockBlob', content_type=None): """ Save file. """ if hasattr(blob_to_upload, 'content_type'): content_type = blob_to_upload.content_type or None if content_type is None: content_type = mimetypes.guess_type(name)[0] or None blob_to_upload.seek(0) self.blob.put_blob(self.container, name, blob_to_upload, x_ms_blob_type, x_ms_blob_content_type=content_type) return name def modified_time(self, name): """ Last modification time. """ result, properties = self.exists(name, with_properties=True) if result: date_string = properties['last-modified'] modified_dt = parser.parse(date_string) if timezone.is_naive(modified_dt): return modified_dt else: return timezone.make_naive(modified_dt, timezone.get_current_timezone()) else: return None created_time = accessed_time = modified_time
thread = threading.Thread(target=target) thread.start() thread.join(timeout) if thread.is_alive(): print 'Terminating process' self.process.terminate() thread.join() print self.process.returncode #command = Command("echo 'Process started'; sleep 2; echo 'Process finished'") #print command.run(timeout=3) #print command.run(timeout=1) # #command = Command('ping www.google.com') #print command.run(timeout=1) AZURE_STORAGE_CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING'] blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING) print blob_service.put_block_blob_from_path( 'nexradl2', '201208/20120810/KSRX/NWS_NEXRAD_NXL2SR_KSRX_20120810050000_20120810055959.tar', '/snfs9/q2/levelii_tarfiles/201208/20120810/KSRX/NWS_NEXRAD_NXL2SR_KSRX_20120810050000_20120810055959.tar', max_connections=5,) blobs = blob_service.list_blobs('nexradl2',maxresults=10) for blob in blobs: print(blob.name) print(blob.url)
class AzureTransfer(BaseTransfer): def __init__(self, account_name, account_key, container_name, prefix=None): # NOTE: Azure wants all paths to start with a slash prefix = "/{}".format(prefix.lstrip("/") if prefix else "") super().__init__(prefix=prefix) self.account_name = account_name self.account_key = account_key self.container_name = container_name self.conn = BlobService(account_name=self.account_name, account_key=self.account_key) self.container = self.get_or_create_container(self.container_name) self.log.debug("AzureTransfer initialized") # XXX: AzureTransfer isn't actively tested and hasn't its error handling is probably lacking self.log.warning("AzureTransfer is experimental and has not been thoroughly tested") def get_metadata_for_key(self, key): key = self.format_key_for_backend(key) return self._list_blobs(key)[0]["metadata"] def _metadata_for_key(self, key): return self._list_blobs(key)[0]["metadata"] def list_path(self, key): path = self.format_key_for_backend(key, trailing_slash=True) return self._list_blobs(path) def _list_blobs(self, path): self.log.debug("Listing path %r", path) items = self.conn.list_blobs(self.container_name, prefix=path, delimiter="/", include="metadata") result = [] for item in items: result.append({ "last_modified": dateutil.parser.parse(item.properties.last_modified), "metadata": item.metadata, "name": self.format_key_from_backend(item.name), "size": item.properties.content_length, }) return result def delete_key(self, key): key = self.format_key_for_backend(key) self.log.debug("Deleting key: %r", key) return self.conn.delete_blob(self.container_name, key) def get_contents_to_file(self, key, filepath_to_store_to): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r to: %r", key, filepath_to_store_to) return self.conn.get_blob_to_path(self.container_name, key, filepath_to_store_to) def get_contents_to_fileobj(self, key, fileobj_to_store_to): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r", key) return self.conn.get_blob_to_file(self.container_name, key, fileobj_to_store_to) def get_contents_to_string(self, key): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r", key) return self.conn.get_blob_to_bytes(self.container_name, key), self._metadata_for_key(key) def store_file_from_memory(self, key, memstring, metadata=None): key = self.format_key_for_backend(key) # Azure requires all metadata keys and values to be strings metadata_to_send = {str(k): str(v) for k, v in metadata.items()} self.conn.put_block_blob_from_bytes(self.container_name, key, memstring, x_ms_meta_name_values=metadata_to_send) def store_file_from_disk(self, key, filepath, metadata=None, multipart=None): key = self.format_key_for_backend(key) # Azure requires all metadata keys and values to be strings metadata_to_send = {str(k): str(v) for k, v in metadata.items()} self.conn.put_block_blob_from_path(self.container_name, key, filepath, x_ms_meta_name_values=metadata_to_send) def get_or_create_container(self, container_name): start_time = time.time() self.conn.create_container(container_name) self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.time() - start_time) return container_name
import sys import time from azure.storage import BlobService from azure import WindowsAzureMissingResourceError from CREDENTIALS import account_name, account_key db = BlobService(account_name=account_name, account_key=account_key) ### bucket = 'crawl-data' in_progress = set() # existing = set([ x.name for x in db.list_blobs(bucket, 'common-crawl/crawl-data/CC-MAIN-2014-23/') ]) todo = [x.rstrip() for x in sys.stdin.readlines()] todo = [x for x in todo if x not in existing] ### while todo or in_progress: new_progress = set() for path in in_progress: props = db.get_blob_properties(bucket, path) if props['x-ms-copy-status'] == 'success': print '\t%s completed' % path continue new_progress.add(path) time.sleep(0.25) print 'Task queue length is %d' % len(new_progress) print 'TODO queue length is %d' % len(todo) # Populate the queue
class Command(BaseCommand): help = "Synchronizes static media to cloud files." option_list = BaseCommand.option_list + ( optparse.make_option( '-w', '--wipe', action='store_true', dest='wipe', default=False, help="Wipes out entire contents of container first."), optparse.make_option('-t', '--test-run', action='store_true', dest='test_run', default=False, help="Performs a test run of the sync."), optparse.make_option('-c', '--container', dest='container', help="Override STATIC_CONTAINER."), ) # settings from azurite.settings ACCOUNT_NAME = AZURITE['ACCOUNT_NAME'] ACCOUNT_KEY = AZURITE['ACCOUNT_KEY'] STATIC_CONTAINER = AZURITE['STATIC_CONTAINER'] # paths DIRECTORY = os.path.abspath(settings.STATIC_ROOT) STATIC_URL = settings.STATIC_URL if not DIRECTORY.endswith('/'): DIRECTORY = DIRECTORY + '/' if STATIC_URL.startswith('/'): STATIC_URL = STATIC_URL[1:] local_object_names = [] create_count = 0 upload_count = 0 update_count = 0 skip_count = 0 delete_count = 0 service = None def handle(self, *args, **options): self.wipe = options.get('wipe') self.test_run = options.get('test_run') self.verbosity = int(options.get('verbosity')) if hasattr(options, 'container'): self.STATIC_CONTAINER = options.get('container') self.sync_files() def sync_files(self): self.service = BlobService(account_name=self.ACCOUNT_NAME, account_key=self.ACCOUNT_KEY) try: self.service.get_container_properties(self.STATIC_CONTAINER) except WindowsAzureMissingResourceError: self.service.create_container(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') # if -w option is provided, wipe out the contents of the container if self.wipe: blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER)) if self.test_run: print "Wipe would delete %d objects." % blob_count else: print "Deleting %d objects..." % blob_count for blob in self.service.list_blobs(self.STATIC_CONTAINER): self.service.delete_blob(self.STATIC_CONTAINER, blob.name) # walk through the directory, creating or updating files on the cloud os.path.walk(self.DIRECTORY, self.upload_files, "foo") # remove any files on remote that don't exist locally self.delete_files() # print out the final tally to the cmd line self.update_count = self.upload_count - self.create_count print if self.test_run: print "Test run complete with the following results:" print "Skipped %d. Created %d. Updated %d. Deleted %d." % ( self.skip_count, self.create_count, self.update_count, self.delete_count) def upload_files(self, arg, dirname, names): # upload or skip items for item in names: file_path = os.path.join(dirname, item) if os.path.isdir(file_path): continue # Don't try to upload directories object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1] self.local_object_names.append(object_name) try: properties = self.service.get_blob_properties( self.STATIC_CONTAINER, object_name) except WindowsAzureMissingResourceError: properties = {} self.create_count += 1 cloud_datetime = None if 'last-modified' in properties: cloud_datetime = ( properties['last-modified'] and datetime.datetime.strptime(properties['last-modified'], "%a, %d %b %Y %H:%M:%S %Z") or None) local_datetime = datetime.datetime.utcfromtimestamp( os.stat(file_path).st_mtime) if cloud_datetime and local_datetime < cloud_datetime: self.skip_count += 1 if self.verbosity > 1: print "Skipped %s: not modified." % object_name continue if not self.test_run: file_contents = open(file_path, 'r').read() content_type, encoding = mimetypes.guess_type(file_path) self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents, x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type, content_encoding=encoding) # sync_headers(cloud_obj) self.upload_count += 1 if self.verbosity > 1: print "Uploaded", object_name def delete_files(self): # remove any objects in the container that don't exist locally for blob in self.service.list_blobs(self.STATIC_CONTAINER): if blob.name not in self.local_object_names: self.delete_count += 1 if self.verbosity > 1: print "Deleted %s" % blob.name if not self.test_run: self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
class AzureBackend(duplicity.backend.Backend): """ Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure Storage SDK for Python library. try: import azure import azure.storage if hasattr(azure.storage, 'BlobService'): # v0.11.1 and below from azure.storage import BlobService self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError self.AzureConflictError = azure.WindowsAzureConflictError else: # v1.0.0 and above import azure.storage.blob if hasattr(azure.storage.blob, 'BlobService'): from azure.storage.blob import BlobService else: from azure.storage.blob.blockblobservice import BlockBlobService as BlobService self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError self.AzureConflictError = azure.common.AzureConflictHttpError except ImportError as e: raise BackendException("""\ Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/). Exception: %s""" % str(e)) # TODO: validate container name self.container = parsed_url.path.lstrip('/') if 'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.') if 'AZURE_ACCOUNT_KEY' in os.environ: if 'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY'], endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY']) self._create_container() elif 'AZURE_SHARED_ACCESS_SIGNATURE' in os.environ: if 'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE'], endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE']) else: raise BackendException( 'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.') if globals.azure_max_single_put_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_SINGLE_PUT_SIZE self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size if globals.azure_max_block_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_BLOCK_SIZE self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size def _create_container(self): try: self.blob_service.create_container(self.container, fail_on_exist=True) except self.AzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError("Could not create Azure container: %s" % unicode(e.message).split('\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): kwargs = {} if globals.azure_max_connections: kwargs['max_connections'] = globals.azure_max_connections # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container try: self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) except AttributeError: # Old versions use a different method name self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) def _get(self, remote_filename, local_path): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name) def _list(self): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container blobs = [] marker = None while True: batch = self.blob_service.list_blobs(self.container, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, filename) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, filename) try: info = {'size': int(prop.properties.content_length)} except AttributeError: # old versions directly returned the properties info = {'size': int(prop['content-length'])} return info def _error_code(self, operation, e): if isinstance(e, self.AzureMissingResourceError): return log.ErrorCode.backend_not_found
class Command(BaseCommand): help = "Synchronizes static media to cloud files." option_list = BaseCommand.option_list + ( optparse.make_option('-w', '--wipe', action='store_true', dest='wipe', default=False, help="Wipes out entire contents of container first."), optparse.make_option('-t', '--test-run', action='store_true', dest='test_run', default=False, help="Performs a test run of the sync."), optparse.make_option('-c', '--container', dest='container', help="Override STATIC_CONTAINER."), ) # settings from azurite.settings ACCOUNT_NAME = AZURITE['ACCOUNT_NAME'] ACCOUNT_KEY = AZURITE['ACCOUNT_KEY'] STATIC_CONTAINER = AZURITE['STATIC_CONTAINER'] # paths DIRECTORY = os.path.abspath(settings.STATIC_ROOT) STATIC_URL = settings.STATIC_URL if not DIRECTORY.endswith('/'): DIRECTORY = DIRECTORY + '/' if STATIC_URL.startswith('/'): STATIC_URL = STATIC_URL[1:] local_object_names = [] create_count = 0 upload_count = 0 update_count = 0 skip_count = 0 delete_count = 0 service = None def handle(self, *args, **options): self.wipe = options.get('wipe') self.test_run = options.get('test_run') self.verbosity = int(options.get('verbosity')) if hasattr(options, 'container'): self.STATIC_CONTAINER = options.get('container') self.sync_files() def sync_files(self): self.service = BlobService(account_name=self.ACCOUNT_NAME, account_key=self.ACCOUNT_KEY) try: self.service.get_container_properties(self.STATIC_CONTAINER) except WindowsAzureMissingResourceError: self.service.create_container(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') # if -w option is provided, wipe out the contents of the container if self.wipe: blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER)) if self.test_run: print "Wipe would delete %d objects." % blob_count else: print "Deleting %d objects..." % blob_count for blob in self.service.list_blobs(self.STATIC_CONTAINER): self.service.delete_blob(self.STATIC_CONTAINER, blob.name) # walk through the directory, creating or updating files on the cloud os.path.walk(self.DIRECTORY, self.upload_files, "foo") # remove any files on remote that don't exist locally self.delete_files() # print out the final tally to the cmd line self.update_count = self.upload_count - self.create_count print if self.test_run: print "Test run complete with the following results:" print "Skipped %d. Created %d. Updated %d. Deleted %d." % ( self.skip_count, self.create_count, self.update_count, self.delete_count) def upload_files(self, arg, dirname, names): # upload or skip items for item in names: file_path = os.path.join(dirname, item) if os.path.isdir(file_path): continue # Don't try to upload directories object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1] self.local_object_names.append(object_name) try: properties = self.service.get_blob_properties(self.STATIC_CONTAINER, object_name) except WindowsAzureMissingResourceError: properties = {} self.create_count += 1 cloud_datetime = None if 'last-modified' in properties: cloud_datetime = (properties['last-modified'] and datetime.datetime.strptime( properties['last-modified'], "%a, %d %b %Y %H:%M:%S %Z" ) or None) local_datetime = datetime.datetime.utcfromtimestamp( os.stat(file_path).st_mtime) if cloud_datetime and local_datetime < cloud_datetime: self.skip_count += 1 if self.verbosity > 1: print "Skipped %s: not modified." % object_name continue if not self.test_run: file_contents = open(file_path, 'r').read() content_type, encoding = mimetypes.guess_type(file_path) print "content-type", content_type print "encoding", encoding self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents, x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type, content_encoding=encoding) # sync_headers(cloud_obj) self.upload_count += 1 if self.verbosity > 1: print "Uploaded", object_name def delete_files(self): # remove any objects in the container that don't exist locally for blob in self.service.list_blobs(self.STATIC_CONTAINER): if blob.name not in self.local_object_names: self.delete_count += 1 if self.verbosity > 1: print "Deleted %s" % blob.name if not self.test_run: self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
class AzureFS(LoggingMixIn, Operations): """Azure Blob Storage filesystem""" blobs = None containers = dict() # <cname, dict(stat:dict, #files:None|dict<fname, stat>) fds = dict() # <fd, (path, bytes, dirty)> fd = 0 def __init__(self, account, key): self.blobs = BlobService(account, key) self.rebuild_container_list() def convert_to_epoch(self, date): """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch""" return int(time.mktime(time.strptime(date, TIME_FORMAT))) def rebuild_container_list(self): cmap = dict() cnames = set() for c in self.blobs.list_containers(): date = c.properties.last_modified cstat = dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0, st_mtime=self.convert_to_epoch(date)) cname = c.name cmap['/' + cname] = dict(stat=cstat, files=None) cnames.add(cname) cmap['/'] = dict(files={}, stat=dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0, st_mtime=int(time.time()))) self.containers = cmap # destroys fs tree cache resistant to misses def _parse_path(self, path): # returns </dir, file(=None)> if path.count('/') > 1: # file return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:]) else: # dir pos = path.rfind('/', 1) if pos == -1: return path, None else: return str(path[:pos]), None def parse_container(self, path): base_container = path[1:] # /abc/def/g --> abc if base_container.find('/') > -1: base_container = base_container[:base_container.find('/')] return str(base_container) def _get_dir(self, path, contents_required=False): if not self.containers: self.rebuild_container_list() if path in self.containers and not (contents_required and \ self.containers[path]['files'] is None): return self.containers[path] cname = self.parse_container(path) if '/' + cname not in self.containers: raise FuseOSError(ENOENT) else: if self.containers['/' + cname]['files'] is None: # fetch contents of container log.info("------> CONTENTS NOT FOUND: %s" % cname) blobs = self.blobs.list_blobs(cname) dirstat = dict(st_mode=(S_IFDIR | 0755), st_size=0, st_uid=getuid(), st_mtime=time.time()) if self.containers['/' + cname]['files'] is None: self.containers['/' + cname]['files'] = dict() for f in blobs: blob_name = f.name blob_date = f.properties.last_modified blob_size = long(f.properties.content_length) node = dict(st_mode=(S_IFREG | 0644), st_size=blob_size, st_mtime=self.convert_to_epoch(blob_date), st_uid=getuid()) if blob_name.find('/') == -1: # file just under container self.containers['/' + cname]['files'][blob_name] = node return self.containers['/' + cname] return None def _get_file(self, path): d, f = self._parse_path(path) dir = self._get_dir(d, True) if dir is not None and f in dir['files']: return dir['files'][f] def getattr(self, path, fh=None): d, f = self._parse_path(path) if f is None: dir = self._get_dir(d) return dir['stat'] else: file = self._get_file(path) if file: return file raise FuseOSError(ENOENT) # FUSE def mkdir(self, path, mode): if path.count('/') <= 1: # create on root name = path[1:] if not 3 <= len(name) <= 63: log.error("Container names can be 3 through 63 chars long.") raise FuseOSError(ENAMETOOLONG) if name is not name.lower(): log.error("Container names cannot contain uppercase \ characters.") raise FuseOSError(EACCES) if name.count('--') > 0: log.error('Container names cannot contain consecutive \ dashes (-).') raise FuseOSError(EAGAIN) #TODO handle all "-"s must be preceded by letter or numbers #TODO starts with only letter or number, can contain letter, nr,'-' resp = self.blobs.create_container(name) if resp: self.rebuild_container_list() log.info("CONTAINER %s CREATED" % name) else: raise FuseOSError(EACCES) log.error("Invalid container name or container already \ exists.") else: raise FuseOSError(ENOSYS) # TODO support 2nd+ level mkdirs def rmdir(self, path): if path.count('/') == 1: c_name = path[1:] resp = self.blobs.delete_container(c_name) if resp: if path in self.containers: del self.containers[path] else: raise FuseOSError(EACCES) else: raise FuseOSError(ENOSYS) # TODO support 2nd+ level mkdirs def create(self, path, mode): node = dict(st_mode=(S_IFREG | mode), st_size=0, st_nlink=1, st_uid=getuid(), st_mtime=time.time()) d, f = self._parse_path(path) if not f: log.error("Cannot create files on root level: /") raise FuseOSError(ENOSYS) dir = self._get_dir(d, True) if not dir: raise FuseOSError(EIO) dir['files'][f] = node return self.open(path, data='') # reusing handler provider def open(self, path, flags=0, data=None): if data == None: # download contents c_name = self.parse_container(path) f_name = path[path.find('/', 1) + 1:] try: data = self.blobs.get_blob(c_name, f_name) except WindowsAzureMissingResourceError: dir = self._get_dir('/' + c_name, True) if f_name in dir['files']: del dir['files'][f_name] raise FuseOSError(ENOENT) except WindowsAzureError as e: log.error("Read blob failed HTTP %d" % e.code) raise FuseOSError(EAGAIN) self.fd += 1 self.fds[self.fd] = (path, data, False) return self.fd def flush(self, path, fh=None): if not fh: raise FuseOSError(EIO) else: if fh not in self.fds: raise FuseOSError(EIO) path = self.fds[fh][0] data = self.fds[fh][1] dirty = self.fds[fh][2] if not dirty: return 0 # avoid redundant write d, f = self._parse_path(path) c_name = self.parse_container(path) if data is None: data = '' try: if len(data) < 64 * 1024 * 1024: # 64 mb self.blobs.put_blob(c_name, f, data, 'BlockBlob') else: # divide file by blocks and upload block_size = 8 * 1024 * 1024 num_blocks = int(math.ceil(len(data) * 1.0 / block_size)) rd = str(random.randint(1, 1e8)) block_ids = list() for i in range(num_blocks): part = data[i * block_size:min((i + 1) * block_size, len(data))] block_id = base64.encodestring('%s_%s' % (rd, (8 - len(str(i))) * '0' + str(i))) self.blobs.put_block(c_name, f, part, block_id) block_ids.append(block_id) self.blobs.put_block_list(c_name, f, block_ids) except WindowsAzureError: raise FuseOSError(EAGAIN) dir = self._get_dir(d, True) if not dir or f not in dir['files']: raise FuseOSError(EIO) # update local data dir['files'][f]['st_size'] = len(data) dir['files'][f]['st_mtime'] = time.time() self.fds[fh] = (path, data, False) # mark as not dirty return 0 def release(self, path, fh=None): if fh is not None and fh in self.fds: del self.fds[fh] def truncate(self, path, length, fh=None): return 0 # assume done, no need def write(self, path, data, offset, fh=None): if not fh or fh not in self.fds: raise FuseOSError(ENOENT) else: d = self.fds[fh][1] if d is None: d = "" self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True) return len(data) def unlink(self, path): c_name = self.parse_container(path) d, f = self._parse_path(path) try: self.blobs.delete_blob(c_name, f) _dir = self._get_dir(path, True) if _dir and f in _dir['files']: del _dir['files'][f] return 0 except WindowsAzureMissingResourceError: raise FuseOSError(ENOENT) except Exception as e: raise FuseOSError(EAGAIN) def readdir(self, path, fh): if path == '/': return ['.', '..'] + [x[1:] for x in self.containers.keys() \ if x is not '/'] dir = self._get_dir(path, True) if not dir: raise FuseOSError(ENOENT) return ['.', '..'] + dir['files'].keys() def read(self, path, size, offset, fh): if not fh or fh not in self.fds: raise FuseOSError(ENOENT) f_name = path[path.find('/', 1) + 1:] c_name = path[1:path.find('/', 1)] try: data = self.blobs.get_blob(c_name, f_name) self.fds[fh] = (self.fds[fh][0], data, False) return data[offset:offset + size] except URLError, e: if e.code == 404: raise FuseOSError(ENOENT) elif e.code == 403: raise FUSEOSError(EPERM) else: log.error("Read blob failed HTTP %d" % e.code) raise FuseOSError(EAGAIN) data = self.fds[fh][1] if data is None: data = "" return data[offset:offset + size]
#connect to your storage account from azure.storage import BlobService blob_service = BlobService(account_name='YourAccountName', account_key='YourKey') #list all CSV files in your storage account blobs = [] marker = None while True: batch = blob_service.list_blobs('YourContainer', marker=marker, prefix='input_') blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: print(blob.name) #read the blob file as a text file #I just read in the first from the pervious list data = blob_service.get_blob_to_text('rockt', blobs[0].name).split("\n") print("Number of lines in CSV " + str(len(data))) #do your stuff #I want to filter out some lines of my CSV and only keep those having ABC or DEF in them matchers = ['abc', 'def'] matching = [s for s in data if any(xs in s for xs in matchers)] print("Number of lines in CSV " + str(len(matching))) #write your text directly back to blob storage
class AzureBackend(duplicity.backend.Backend): """ Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure Storage SDK for Python library. try: import azure import azure.storage if hasattr(azure.storage, 'BlobService'): # v0.11.1 and below from azure.storage import BlobService self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError self.AzureConflictError = azure.WindowsAzureConflictError else: # v1.0.0 and above from azure.storage.blob import BlobService self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError self.AzureConflictError = azure.common.AzureConflictHttpError except ImportError as e: raise BackendException("""\ Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/). Exception: %s""" % str(e)) if 'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.') if 'AZURE_ACCOUNT_KEY' not in os.environ: raise BackendException('AZURE_ACCOUNT_KEY environment variable not set.') self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY']) # TODO: validate container name self.container = parsed_url.path.lstrip('/') try: self.blob_service.create_container(self.container, fail_on_exist=True) except self.AzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError("Could not create Azure container: %s" % unicode(e.message).split('\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name) def _get(self, remote_filename, local_path): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name) def _list(self): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container blobs = [] marker = None while True: batch = self.blob_service.list_blobs(self.container, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, filename) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, filename) return {'size': int(prop['content-length'])} def _error_code(self, operation, e): if isinstance(e, self.AzureMissingResourceError): return log.ErrorCode.backend_not_found
import sys import time from azure.storage import BlobService from azure import WindowsAzureMissingResourceError from CREDENTIALS import account_name, account_key db = BlobService(account_name=account_name, account_key=account_key) ### bucket = 'crawl-data' in_progress = set() # existing = set([x.name for x in db.list_blobs(bucket, 'common-crawl/crawl-data/CC-MAIN-2014-23/')]) todo = [x.rstrip() for x in sys.stdin.readlines()] todo = [x for x in todo if x not in existing] ### while todo or in_progress: new_progress = set() for path in in_progress: props = db.get_blob_properties(bucket, path) if props['x-ms-copy-status'] == 'success': print '\t%s completed' % path continue new_progress.add(path) time.sleep(0.25) print 'Task queue length is %d' % len(new_progress) print 'TODO queue length is %d' % len(todo) # Populate the queue while todo and len(new_progress) < 256: path = todo.pop() # If it exists, skip it -- only add if it's missing
class Storage(driver.Base): supports_bytes_range = True def __init__(self, path=None, config=None): self._config = config self._container = self._config.azure_storage_container protocol = 'https' if self._config.azure_use_https else 'http' acct_name = self._config.azure_storage_account_name acct_key = self._config.azure_storage_account_key self._blob = BlobService( account_name=acct_name, account_key=acct_key, protocol=protocol) self._init_container() logger.debug("Initialized azureblob storage driver") def _init_container(self): '''Initializes image container on Azure blob storage if the container does not exist. ''' created = self._blob.create_container( self._container, x_ms_blob_public_access='blob', fail_on_exist=False) if created: logger.info('Created blob container for image registry.') else: logger.debug('Registry container already exists.') return created @lru.get def get_content(self, path): try: return self._blob.get_blob(self._container, path) except azure.WindowsAzureMissingResourceError: raise exceptions.FileNotFoundError('%s is not there' % path) @lru.set def put_content(self, path, content): self._blob.put_blob(self._container, path, content, 'BlockBlob') return path def stream_read(self, path, bytes_range=None): try: f = io.BytesIO() self._blob.get_blob_to_file(self._container, path, f) if bytes_range: f.seek(bytes_range[0]) total_size = bytes_range[1] - bytes_range[0] + 1 else: f.seek(0) while True: buf = None if bytes_range: # Bytes Range is enabled buf_size = self.buffer_size if nb_bytes + buf_size > total_size: # We make sure we don't read out of the range buf_size = total_size - nb_bytes if buf_size > 0: buf = f.read(buf_size) nb_bytes += len(buf) else: # We're at the end of the range buf = '' else: buf = f.read(self.buffer_size) if not buf: break yield buf except IOError: raise exceptions.FileNotFoundError('%s is not there' % path) def stream_write(self, path, fp): self._blob.put_block_blob_from_file(self._container, path, fp) def list_directory(self, path=None): if not path.endswith('/'): path += '/' # path=a would list a/b.txt as well as 'abc.txt' blobs = list(self._blob.list_blobs(self._container, path)) if not blobs: raise exceptions.FileNotFoundError('%s is not there' % path) return [b.name for b in blobs] def exists(self, path): try: self._blob.get_blob_properties(self._container, path) return True except azure.WindowsAzureMissingResourceError: return False @lru.remove def remove(self, path): is_blob = self.exists(path) if is_blob: self._blob.delete_blob(self._container, path) return exists = False blobs = list(self._blob.list_blobs(self._container, path)) if not blobs: raise exceptions.FileNotFoundError('%s is not there' % path) for b in blobs: self._blob.delete_blob(self._container, b.name) def get_size(self, path): try: properties = self._blob.get_blob_properties(self._container, path) return int(properties['content-length']) # auto-converted to long except azure.WindowsAzureMissingResourceError: raise exceptions.FileNotFoundError('%s is not there' % path)
account_key=options.account_key else: sys.stderr.write("Azure key is missing") sys.exit(1) if header and not options.output_format: print '\t'.join(str(h) for h in headers) blob_service = BlobService(account_name, account_key) for container in blob_service.list_containers(): c = container.name if c == "heartbeat": continue if options.date and not ( c == "processed-"+options.date ): continue if debug: sys.stderr.write("Processing container: "+str(c)+"\n") for b in blob_service.list_blobs(c): if debug: sys.stderr.write("Processing blob: "+str(b.name)+"\n") data = blob_service.get_blob(c, b.name) cs = StringIO.StringIO(data) gzipstream = gzip.GzipFile(fileobj=cs) if output_format == "txt": print gzipstream.read() elif output_format == "json": d = {} i = 0 ds = gzipstream.read() # some DCU entries contains more than 28 values (outside the # definition of the headers) for x in ds.strip().split("\t")[:27]: d[headers[i]] = x i=i+1
def generate_website_and_upload_azure(azure_csv_container, azure_web_container): blob_service = BlobService(account_name=os.getenv('ACC_NAME'), account_key=os.getenv('ACCESS_KEY')) blob_list = blob_service.list_blobs(azure_csv_container) blob_name_list = blob_list.blobs keys = [] #Only keep files whose dates can be parsed for k in blob_name_list: try: parser.parse(k.name[:8]) keys.append(k) except: pass keys = [k for k in keys if (".zip" in k.name or ".csv" in k.name)] my_array = [] for k in keys: my_dict = {} url = r"http://fhrscsvs.blob.core.windows.net/{}/{}".format( azure_csv_container, k.name) name = k.name date = parser.parse(name[:8]) dateformat = date.strftime("%a %d %b %Y") my_dict["Date of data download"] = dateformat my_dict["Size"] = sizeof_fmt(k.properties.content_length) name = get_link_text(name, dateformat, my_dict) my_dict["File"] = "<a href='{0}'>{1}</a>".format(url, name) my_array.append(my_dict) my_array = sorted(my_array, key=lambda k: k['File'], reverse=True) table_array_fullsnapshot = [ a for a in my_array if "__all_current" in a["File"] ] table_array_differences = [a for a in my_array if "__diff" in a["File"]] template_dir = os.getenv('TEMPLATE_DIR') loader = jinja2.FileSystemLoader(template_dir) environment = jinja2.Environment(loader=loader) j_template = environment.get_template("template.html") order = ["File", "Size"] timestamp = datetime.datetime.now().strftime("%a %d %b %Y at %H:%M") import math sinarray = [(math.cos(math.radians(i * 5 - 180)) + 1) * 14 for i in range(0, 73)] html = j_template.render(table_array_fullsnapshot=table_array_fullsnapshot, order=order, timestamp=timestamp, sinarray=sinarray, table_array_differences=table_array_differences) blob_service.put_block_blob_from_text( azure_web_container, "index.html", html, x_ms_blob_content_type='text/html', text_encoding="utf-8", )
blobs. blob_service: Nombre del servicio de gestion de blobs @class BlobService """ for i in blob_service.list_containers().containers: print("Nombre del contenedor: {}".format(i.name)) print("Url del contenedor: {}".format(i.url)) print("##############################") for j in blob_service.list_blobs(i.name).blobs: print("\tNombre del Blob: {}".format(j.name)) print("\tUrl del Blob: {}".format(j.url)) print("\t------------------------------") blob_service = BlobService(credentials.account_name, credentials.account_key) getContainersWithBlobs(blob_service) f_blob = open('Ejercicio10.txt', "w") for i in blob_service.list_containers().containers: f_blob.write("Nombre del contenedor: {}".format(i.name)) f_blob.write("Url del contenedor: {}".format(i.url)) f_blob.write("##############################") for j in blob_service.list_blobs(i.name).blobs: f_blob.write("\tNombre del Blob: {}".format(j.name)) f_blob.write("\tUrl del Blob: {}".format(j.url)) f_blob.write("\t------------------------------") f_blob.close() blob_service.put_blob('code', 'f_blob.txt', f_blob, 'BlockBlob')