def submit():
    blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY)

    # Get a SAS signature (read for 24 hours) for the input container save to a string
    inputsig = sasUrl(account=ACCOUNT_NAME, key=ACCOUNT_KEY, container=INPUT_CONTAINER, permission='r')

    # Get a SAS signature (write for 24 hours) for the output container save to a string
    outputsig = sasUrl(account = ACCOUNT_NAME, key = ACCOUNT_KEY, container = OUTPUT_CONTAINER, permission = 'rwl')

    # List all the blobs and dump the content to a string
    blobs = blob_service.list_blobs(INPUT_CONTAINER)

    bloblist = []
    for blob in blobs:
        bloblist.append(blob.name)

    os.environ[SLURMDEMO_INPUTSIG] = inputsig
    os.environ[SLURMDEMO_OUTPUTSIG] = outputsig
    os.environ[SLURMDEMO_BLOBLIST] = json.dumps(bloblist)
    os.environ[SLURMDEMO_INPUTCONTAINER] = INPUT_CONTAINER
    os.environ[SLURMDEMO_OUTPUTCONTAINER] = OUTPUT_CONTAINER
    os.environ[SLURMDEMO_INPUTACCOUNT] = ACCOUNT_NAME
    os.environ[SLURMDEMO_OUTPUTACCOUNT] = ACCOUNT_NAME

    # Call sbatch
    cli = "sbatch --array=0-{nb} slurmdemo.sh".format(nb=len(bloblist))
    run(cli, showoutput=True)
Esempio n. 2
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, container_name):
        BaseTransfer.__init__(self)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = container_name
        self.conn = BlobService(account_name=self.account_name, account_key=self.account_key)
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized")

    def get_metadata_for_key(self, key):
        key = fix_path(key)
        return self.list_path(key)[0]['metadata']

    def list_path(self, path):
        return_list = []
        path = fix_path(path)
        self.log.info("Asking for listing of: %r", path)
        for r in self.conn.list_blobs(self.container_name, prefix=path, delimiter="/",
                                      include="metadata"):
            entry = {"name": r.name, "size": r.properties.content_length,
                     "last_modified": dateutil.parser.parse(r.properties.last_modified),
                     "metadata": r.metadata}
            return_list.append(entry)
        return return_list

    def delete_key(self, key_name):
        key_name = fix_path(key_name)
        self.log.debug("Deleting key: %r", key_name)
        return self.conn.delete_blob(self.container_name, key_name)

    def get_contents_to_file(self, obj_key, filepath_to_store_to):
        obj_key = fix_path(obj_key)
        self.log.debug("Starting to fetch the contents of: %r to: %r", obj_key, filepath_to_store_to)
        return self.conn.get_blob_to_path(self.container_name, obj_key, filepath_to_store_to)

    def get_contents_to_string(self, obj_key):
        obj_key = fix_path(obj_key)
        self.log.debug("Starting to fetch the contents of: %r", obj_key)
        return self.conn.get_blob_to_bytes(self.container_name, obj_key), self.get_metadata_for_key(obj_key)

    def store_file_from_memory(self, key, memstring, metadata=None):
        # For whatever reason Azure requires all values to be strings at the point of sending
        metadata_to_send = dict((str(k), str(v)) for k, v in metadata.items())
        self.conn.put_block_blob_from_bytes(self.container_name, key, memstring,
                                            x_ms_meta_name_values=metadata_to_send)

    def store_file_from_disk(self, key, filepath, metadata=None):
        # For whatever reason Azure requires all values to be strings at the point of sending
        metadata_to_send = dict((str(k), str(v)) for k, v in metadata.items())
        self.conn.put_block_blob_from_path(self.container_name, key, filepath,
                                           x_ms_meta_name_values=metadata_to_send)

    def get_or_create_container(self, container_name):
        start_time = time.time()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.time() - start_time)
        return container_name
def list_files_from_path(container, path):
    blob_service = BlobService(account_name=storage_name, account_key=storage_key)
    next_marker = None
    results = []
    while True:
        blobs = blob_service.list_blobs(container, prefix=path, maxresults=2000, marker=next_marker)
        for blob in blobs:
            results.append(blob.name)
        next_marker = blobs.next_marker
        if not next_marker:
            break
    return results
def run(itk_source_dir, externaldata_object_store, account_name, account_key):
    blob_service = BlobService(account_name=account_name,
                               account_key=account_key)
    blobs = blob_service.list_blobs('md5')
    current_blobs = [blob.name for blob in blobs]

    md5files = []
    for root, dirnames, filenames in os.walk(itk_source_dir):
        for filename in fnmatch.filter(filenames, '*.md5'):
            md5files.append(os.path.join(root, filename))

    for content_link in md5files:
        upload_to_azure(content_link, externaldata_object_store,
                        blob_service, current_blobs)
def run(itk_source_dir, externaldata_object_store, account_name, account_key):
    blob_service = BlobService(account_name=account_name,
                               account_key=account_key)
    blobs = blob_service.list_blobs("md5")
    current_blobs = [blob.name for blob in blobs]

    md5files = []
    for root, dirnames, filenames in os.walk(itk_source_dir):
        for filename in fnmatch.filter(filenames, "*.md5"):
            md5files.append(os.path.join(root, filename))

    for content_link in md5files:
        upload_to_azure(content_link, externaldata_object_store, blob_service,
                        current_blobs)
Esempio n. 6
0
    def process(self):
        account_name = self.parameters.azure_account_name
        account_key = self.parameters.azure_account_key

        blob_service = BlobService(account_name, account_key, protocol="https")
        proxy_setting = self.parameters.https_proxy or ""
        date_setting = self.parameters.date or ""
        date = None
    
        if date_setting:
            if date_setting != "yesterday":
                date = datetime.datetime.strptime(date_setting, "%Y-%m-%d").date()  # for debbuging (probably)
            elif date_setting == "yesterday":
                date = datetime.date.today() - datetime.timedelta(days=1)  # for normal usage

        proxy_url = "https://" + proxy_setting if proxy_setting.find("https://") == -1 else proxy_setting
        proxy_options = urlparse(proxy_url)

        if date:
            self.logger.info("Fetching for date: %s (%s)" %  (date, date_setting))
        else:
            self.logger.info("No 'date' was specified, fetching ALL")
        
        if proxy_options.hostname:
            self.logger.info("Using https proxy(host=%s, port=%s)" % (proxy_options.hostname, proxy_options.port))
            blob_service.set_proxy(host=proxy_options.hostname, port=proxy_options.port)
        else:
            if proxy_setting:
                self.logger.info("Using NO proxy, couldn't use 'https_proxy' it was: %s" % proxy_setting)
            else:
                self.logger.info("Using NO proxy, 'https_proxy' was empty")

        for container in blob_service.list_containers():
            container_name = container.name
            if container_name == "heartbeat":
                continue

            if date and (not container_name == "processed-" + str(date)):
                self.logger.info("IGNORING container '%s' didn't match date selection" % container_name)
                continue

            for blob in blob_service.list_blobs(container_name):
                self.logger.info("Fetching blob %s in container %s" % (container_name, blob.name))
                data = blob_service.get_blob(container_name, blob.name)
                cs = StringIO.StringIO(data)
                report = gzip.GzipFile(fileobj=cs).read()

                self.send_message(report)
Esempio n. 7
0
def download():
    blob_service = BlobService(account_name='squadshots', account_key='UgxaWKAKv2ZvhHrPt0IHi4EQedPpZw35r+RXkAYB2eICPrG3TjSwk2G8gUzG/PNDDTV+4CVCYWCvZSiad5xMQQ==')
    try:
        blob_service.get_blob_to_path('album','image','static/output.png')
    except Exception as e:
        print e
    blobs = blob_service.list_blobs('album',None,None,None,'metadata',None)
    for blob in blobs:
        if blob.metadata != None:
            for key in blob.metadata:
                if (blob.metadata)[key] == session['username']:
                        blob_service.get_blob_to_path('album',blob.name,'static/output.png')
    for i in blob_service.list_containers():
        print "This container is " + i.name


    return render_template('album.html',filename="static/output.png")
Esempio n. 8
0
def upload_all_new_azure(local_folder, azure_container, account_name, account_key):



    blob_service = BlobService(account_name=os.getenv('ACC_NAME'), account_key=os.getenv('ACCESS_KEY'))

    blob_list = blob_service.list_blobs(azure_container)

    blob_name_list = [b.name for b in blob_list.blobs]

    blob_name_set = set(blob_name_list)

    #Now for each file in local forlder see whether it's in the s3folder
    
    localfiles = os.listdir(local_folder)
    localfiles = [f for f in localfiles if "~" not in f]
    localfiles = [f for f in localfiles if f[0] != "."]
    localfiles = [f for f in localfiles if (".zip" in f or ".csv" in f)]
    
    localfiles = set(localfiles)

    
    files_to_upload = localfiles - blob_name_set



    orig_len =len(files_to_upload) 
    error_counter = 0
    while len(files_to_upload)>0:
        if error_counter>orig_len:
            logger.error("too many upload failures, exiting")
            sys.exit()
        filename = files_to_upload.pop()

        try:
            blob_service.put_block_blob_from_path(
                'csvs',
                filename,
                os.path.join(local_folder,filename)
            )

        except Exception:
            error_counter +=1
            logging.error(filename + " failed to upload")
            files_to_upload.add(filename)
Esempio n. 9
0
def upload_all_new_azure(local_folder, azure_container, account_name,
                         account_key):

    blob_service = BlobService(account_name=os.getenv('ACC_NAME'),
                               account_key=os.getenv('ACCESS_KEY'))

    blob_list = blob_service.list_blobs(azure_container)

    blob_name_list = [b.name for b in blob_list.blobs]

    blob_name_set = set(blob_name_list)

    #Now for each file in local forlder see whether it's in the s3folder

    localfiles = os.listdir(local_folder)
    localfiles = [f for f in localfiles if "~" not in f]
    localfiles = [f for f in localfiles if f[0] != "."]
    localfiles = [f for f in localfiles if (".zip" in f or ".csv" in f)]

    localfiles = set(localfiles)

    files_to_upload = localfiles - blob_name_set

    orig_len = len(files_to_upload)
    error_counter = 0
    while len(files_to_upload) > 0:
        if error_counter > orig_len:
            logger.error("too many upload failures, exiting")
            sys.exit()
        filename = files_to_upload.pop()

        try:
            blob_service.put_block_blob_from_path(
                'csvs', filename, os.path.join(local_folder, filename))

        except Exception:
            error_counter += 1
            logging.error(filename + " failed to upload")
            files_to_upload.add(filename)
Esempio n. 10
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, container_name, prefix=None):
        # NOTE: Azure wants all paths to start with a slash
        prefix = "/{}".format(prefix.lstrip("/") if prefix else "")
        super().__init__(prefix=prefix)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = container_name
        self.conn = BlobService(account_name=self.account_name,
                                account_key=self.account_key)
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized")
        # XXX: AzureTransfer isn't actively tested and hasn't its error handling is probably lacking
        self.log.warning(
            "AzureTransfer is experimental and has not been thoroughly tested")

    def get_metadata_for_key(self, key):
        key = self.format_key_for_backend(key)
        return self._list_blobs(key)[0]["metadata"]

    def _metadata_for_key(self, key):
        return self._list_blobs(key)[0]["metadata"]

    def list_path(self, key):
        path = self.format_key_for_backend(key, trailing_slash=True)
        return self._list_blobs(path)

    def _list_blobs(self, path):
        self.log.debug("Listing path %r", path)
        items = self.conn.list_blobs(self.container_name,
                                     prefix=path,
                                     delimiter="/",
                                     include="metadata")
        result = []
        for item in items:
            result.append({
                "last_modified":
                dateutil.parser.parse(item.properties.last_modified),
                "metadata":
                item.metadata,
                "name":
                self.format_key_from_backend(item.name),
                "size":
                item.properties.content_length,
            })
        return result

    def delete_key(self, key):
        key = self.format_key_for_backend(key)
        self.log.debug("Deleting key: %r", key)
        return self.conn.delete_blob(self.container_name, key)

    def get_contents_to_file(self, key, filepath_to_store_to):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r to: %r", key,
                       filepath_to_store_to)
        return self.conn.get_blob_to_path(self.container_name, key,
                                          filepath_to_store_to)

    def get_contents_to_fileobj(self, key, fileobj_to_store_to):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r", key)
        return self.conn.get_blob_to_file(self.container_name, key,
                                          fileobj_to_store_to)

    def get_contents_to_string(self, key):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r", key)
        return self.conn.get_blob_to_bytes(self.container_name,
                                           key), self._metadata_for_key(key)

    def store_file_from_memory(self, key, memstring, metadata=None):
        key = self.format_key_for_backend(key)
        # Azure requires all metadata keys and values to be strings
        metadata_to_send = {str(k): str(v) for k, v in metadata.items()}
        self.conn.put_block_blob_from_bytes(
            self.container_name,
            key,
            memstring,
            x_ms_meta_name_values=metadata_to_send)

    def store_file_from_disk(self,
                             key,
                             filepath,
                             metadata=None,
                             multipart=None):
        key = self.format_key_for_backend(key)
        # Azure requires all metadata keys and values to be strings
        metadata_to_send = {str(k): str(v) for k, v in metadata.items()}
        self.conn.put_block_blob_from_path(
            self.container_name,
            key,
            filepath,
            x_ms_meta_name_values=metadata_to_send)

    def get_or_create_container(self, container_name):
        start_time = time.time()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs",
                       container_name,
                       time.time() - start_time)
        return container_name
Esempio n. 11
0
class AzureStorageBlockDeviceAPI(object):
    """
    An ``IBlockDeviceAsyncAPI`` which uses Azure Storage Backed Block Devices
    Current Support: Azure SMS API
    """
    def __init__(self, **azure_config):
        """
        :param ServiceManagement azure_client: an instance of the azure
        serivce managment api client.
        :param String service_name: The name of the cloud service
        :param
            names of Azure volumes to identify cluster
        :returns: A ``BlockDeviceVolume``.
        """
        self._instance_id = self.compute_instance_id()
        self._azure_service_client = ServiceManagementService(
            azure_config['subscription_id'],
            azure_config['management_certificate_path'])
        self._service_name = azure_config['service_name']
        self._azure_storage_client = BlobService(
            azure_config['storage_account_name'],
            azure_config['storage_account_key'])
        self._storage_account_name = azure_config['storage_account_name']
        self._disk_container_name = azure_config['disk_container_name']

        if azure_config['debug']:
            to_file(sys.stdout)

    def allocation_unit(self):
        """
        1GiB is the minimum allocation unit for azure disks
        return int: 1 GiB
        """

        return int(GiB(1).to_Byte().value)

    def compute_instance_id(self):
        """
        Azure Stored a UUID in the SDC kernel module.
        """

        # Node host names should be unique within a vnet

        return unicode(socket.gethostname())

    def create_volume(self, dataset_id, size):
        """
        Create a new volume.
        :param UUID dataset_id: The Flocker dataset ID of the dataset on this
            volume.
        :param int size: The size of the new volume in bytes.
        :returns: A ``Deferred`` that fires with a ``BlockDeviceVolume`` when
            the volume has been created.
        """

        size_in_gb = Byte(size).to_GiB().value

        if size_in_gb % 1 != 0:
            raise UnsupportedVolumeSize(dataset_id)

        self._create_volume_blob(size, dataset_id)

        label = self._disk_label_for_dataset_id(str(dataset_id))
        return BlockDeviceVolume(
            blockdevice_id=unicode(label),
            size=size,
            attached_to=None,
            dataset_id=self._dataset_id_for_disk_label(label))

    def destroy_volume(self, blockdevice_id):
        """
        Destroy an existing volume.
        :param unicode blockdevice_id: The unique identifier for the volume to
            destroy.
        :raises UnknownVolume: If the supplied ``blockdevice_id`` does not
            exist.
        :return: ``None``
        """
        log_info('Destorying block device: ' + str(blockdevice_id))
        (target_disk, role_name, lun) = \
            self._get_disk_vmname_lun(blockdevice_id)

        if target_disk is None:

            raise UnknownVolume(blockdevice_id)

        request = None

        if lun is not None:
            request = \
                self._azure_service_client.delete_data_disk(
                    service_name=self._service_name,
                    deployment_name=self._service_name,
                    role_name=target_disk.attached_to.role_name,
                    lun=lun,
                    delete_vhd=True)
        else:
            if target_disk.__class__.__name__ == 'Blob':
                # unregistered disk
                self._azure_storage_client.delete_blob(
                    self._disk_container_name, target_disk.name)
            else:
                request = self._azure_service_client.delete_disk(
                    target_disk.name, True)

        if request is not None:
            self._wait_for_async(request.request_id, 5000)
            self._wait_for_detach(blockdevice_id)

    def attach_volume(self, blockdevice_id, attach_to):
        """
        Attach ``blockdevice_id`` to ``host``.
        :param unicode blockdevice_id: The unique identifier for the block
            device being attached.
        :param unicode attach_to: An identifier like the one returned by the
            ``compute_instance_id`` method indicating the node to which to
            attach the volume.
        :raises UnknownVolume: If the supplied ``blockdevice_id`` does not
            exist.
        :raises AlreadyAttachedVolume: If the supplied ``blockdevice_id`` is
            already attached.
        :returns: A ``BlockDeviceVolume`` with a ``host`` attribute set to
            ``host``.
        """

        (target_disk, role_name, lun) = \
            self._get_disk_vmname_lun(blockdevice_id)

        if target_disk is None:
            raise UnknownVolume(blockdevice_id)

        if lun is not None:
            raise AlreadyAttachedVolume(blockdevice_id)

        log_info('Attempting to attach ' + str(blockdevice_id) + ' to ' +
                 str(attach_to))

        disk_size = self._attach_disk(blockdevice_id, target_disk, attach_to)

        self._wait_for_attach(blockdevice_id)

        log_info('disk attached')

        return self._blockdevicevolume_from_azure_volume(
            blockdevice_id, disk_size, attach_to)

    def detach_volume(self, blockdevice_id):
        """
        Detach ``blockdevice_id`` from whatever host it is attached to.
        :param unicode blockdevice_id: The unique identifier for the block
            device being detached.
        :raises UnknownVolume: If the supplied ``blockdevice_id`` does not
            exist.
        :raises UnattachedVolume: If the supplied ``blockdevice_id`` is
            not attached to anything.
        :returns: ``None``
        """

        (target_disk, role_name, lun) = \
            self._get_disk_vmname_lun(blockdevice_id)

        if target_disk is None:
            raise UnknownVolume(blockdevice_id)

        if lun is None:
            raise UnattachedVolume(blockdevice_id)

        # contrary to function name it doesn't delete by default, just detachs

        request = \
            self._azure_service_client.delete_data_disk(
                service_name=self._service_name,
                deployment_name=self._service_name,
                role_name=role_name, lun=lun)

        self._wait_for_async(request.request_id, 5000)

        self._wait_for_detach(blockdevice_id)

    def get_device_path(self, blockdevice_id):
        """
        Return the device path that has been allocated to the block device on
        the host to which it is currently attached.
        :param unicode blockdevice_id: The unique identifier for the block
            device.
        :raises UnknownVolume: If the supplied ``blockdevice_id`` does not
            exist.
        :raises UnattachedVolume: If the supplied ``blockdevice_id`` is
            not attached to a host.
        :returns: A ``FilePath`` for the device.
        """

        (target_disk_or_blob, role_name, lun) = \
            self._get_disk_vmname_lun(blockdevice_id)

        if target_disk_or_blob is None:
            raise UnknownVolume(blockdevice_id)

        if lun is None:
            raise UnattachedVolume(blockdevice_id)

        return Lun.get_device_path_for_lun(lun)

    def list_volumes(self):
        """
        List all the block devices available via the back end API.
        :returns: A ``list`` of ``BlockDeviceVolume``s.
        """
        media_url_prefix = 'https://' + self._storage_account_name \
            + '.blob.core.windows.net/' + self._disk_container_name
        disks = self._azure_service_client.list_disks()
        disk_list = []
        all_blobs = self._get_flocker_blobs()
        for d in disks:

            if media_url_prefix not in d.media_link or \
                    'flocker-' not in d.label:
                continue

            role_name = None

            if d.attached_to is not None \
                    and d.attached_to.role_name is not None:

                role_name = d.attached_to.role_name

            disk_list.append(
                self._blockdevicevolume_from_azure_volume(
                    d.label, self._gibytes_to_bytes(d.logical_disk_size_in_gb),
                    role_name))

            if d.label in all_blobs:
                del all_blobs[d.label]

        for key in all_blobs:
            # include unregistered 'disk' blobs
            disk_list.append(
                self._blockdevicevolume_from_azure_volume(
                    all_blobs[key].name,
                    all_blobs[key].properties.content_length, None))

        return disk_list

    def _attach_disk(self, blockdevice_id, target_disk, attach_to):
        """
        Attaches disk to specified VM
        :param string blockdevice_id: The identifier of the disk
        :param DataVirtualHardDisk/Blob target_disk: The Blob
               or Disk to be attached
        :returns int: The size of the attached disk
        """

        lun = Lun.compute_next_lun(self._azure_service_client,
                                   self._service_name, str(attach_to))
        common_params = {
            'service_name': self._service_name,
            'deployment_name': self._service_name,
            'role_name': attach_to,
            'lun': lun
        }
        disk_size = None

        if target_disk.__class__.__name__ == 'Blob':
            # exclude 512 byte footer
            disk_size = target_disk.properties.content_length

            common_params['source_media_link'] = \
                'https://' + self._storage_account_name \
                + '.blob.core.windows.net/' + self._disk_container_name \
                + '/' + blockdevice_id

            common_params['disk_label'] = blockdevice_id

        else:

            disk_size = self._gibytes_to_bytes(
                target_disk.logical_disk_size_in_gb)

            common_params['disk_name'] = target_disk.name

        request = self._azure_service_client.add_data_disk(**common_params)
        self._wait_for_async(request.request_id, 5000)

        return disk_size

    def _create_volume_blob(self, size, dataset_id):
        # Create a new page blob as a blank disk
        self._azure_storage_client.put_blob(
            container_name=self._disk_container_name,
            blob_name=self._disk_label_for_dataset_id(dataset_id),
            blob=None,
            x_ms_blob_type='PageBlob',
            x_ms_blob_content_type='application/octet-stream',
            x_ms_blob_content_length=size)

        # for disk to be a valid vhd it requires a vhd footer
        # on the last 512 bytes
        vhd_footer = Vhd.generate_vhd_footer(size)

        self._azure_storage_client.put_page(
            container_name=self._disk_container_name,
            blob_name=self._disk_label_for_dataset_id(dataset_id),
            page=vhd_footer,
            x_ms_page_write='update',
            x_ms_range='bytes=' + str((size - 512)) + '-' + str(size - 1))

    def _disk_label_for_dataset_id(self, dataset_id):
        """
        Returns a disk label for a given Dataset ID
        :param unicode dataset_id: The identifier of the dataset
        :returns string: A string representing the disk label
        """

        label = 'flocker-' + str(dataset_id)
        return label

    def _dataset_id_for_disk_label(self, disk_label):
        """
        Returns a UUID representing the Dataset ID for the given disk
        label
        :param string disk_label: The disk label
        :returns UUID: The UUID of the dataset
        """
        return UUID(disk_label.replace('flocker-', ''))

    def _get_disk_vmname_lun(self, blockdevice_id):
        target_disk = None
        target_lun = None
        role_name = None
        disk_list = self._azure_service_client.list_disks()

        for d in disk_list:

            if 'flocker-' not in d.label:
                continue
            if d.label == str(blockdevice_id):
                target_disk = d
                break

        if target_disk is None:
            # check for unregisterd disk
            blobs = self._get_flocker_blobs()
            blob = None

            if str(blockdevice_id) in blobs:
                blob = blobs[str(blockdevice_id)]

            return blob, None, None

        vm_info = None

        if hasattr(target_disk.attached_to, 'role_name'):
            vm_info = self._azure_service_client.get_role(
                self._service_name, self._service_name,
                target_disk.attached_to.role_name)

            for d in vm_info.data_virtual_hard_disks:
                if d.disk_name == target_disk.name:
                    target_lun = d.lun
                    break

            role_name = target_disk.attached_to.role_name

        return (target_disk, role_name, target_lun)

    def _get_flocker_blobs(self):
        all_blobs = {}

        blobs = self._azure_storage_client.list_blobs(
            self._disk_container_name, prefix='flocker-')

        for b in blobs:
            # todo - this could be big!
            all_blobs[b.name] = b

        return all_blobs

    def _wait_for_detach(self, blockdevice_id):
        role_name = ''
        lun = -1

        timeout_count = 0

        log_info('waiting for azure to ' + 'report disk as detached...')

        while role_name is not None or lun is not None:
            (target_disk, role_name, lun) = \
                self._get_disk_vmname_lun(blockdevice_id)
            time.sleep(1)
            timeout_count += 1

            if timeout_count > 5000:
                raise AsynchronousTimeout()

        log_info('Disk Detached')

    def _wait_for_attach(self, blockdevice_id):
        timeout_count = 0
        lun = None

        log_info('waiting for azure to report disk as attached...')

        while lun is None:
            (target_disk, role_name, lun) = \
                self._get_disk_vmname_lun(blockdevice_id)
            time.sleep(.001)
            timeout_count += 1

            if timeout_count > 5000:
                raise AsynchronousTimeout()

    def _wait_for_async(self, request_id, timeout):
        count = 0
        result = self._azure_service_client.get_operation_status(request_id)
        while result.status == 'InProgress':
            count = count + 1
            if count > timeout:
                log_error('Timed out waiting for async operation to complete.')
                raise AsynchronousTimeout()
            time.sleep(.001)
            log_info('.')
            result = self._azure_service_client.get_operation_status(
                request_id)
            if result.error:
                log_error(result.error.code)
                log_error(str(result.error.message))

        log_error(result.status + ' in ' + str(count * 5) + 's')

    def _gibytes_to_bytes(self, size):

        return int(GiB(size).to_Byte().value)

    def _blockdevicevolume_from_azure_volume(self, label, size,
                                             attached_to_name):

        # azure will report the disk size excluding the 512 byte footer
        # however flocker expects the exact value it requested for disk size
        # so offset the reported size to flocker by 512 bytes
        return BlockDeviceVolume(
            blockdevice_id=unicode(label),
            size=int(size),
            attached_to=attached_to_name,
            dataset_id=self._dataset_id_for_disk_label(
                label))  # disk labels are formatted as flocker-<data_set_id>
Esempio n. 12
0
ACCOUNT_NAME = 'sounds'
ACCOUNT_KEY  = AC.getAccountKey() # primary access key
HOST_BASE    = '.blob.core.windows.net'

blob_service = BlobService(account_name=ACCOUNT_NAME,
                           account_key=ACCOUNT_KEY,
                           host_base=HOST_BASE)

CONTAINER = 'bat-detective' # or whatever else you like

created = blob_service.create_container(CONTAINER, x_ms_blob_public_access='container')
print "Created" if created else "Not created (probably already existing)"

audio_dir = '../../data/wav/'
SOUND_FILES = glob.glob(audio_dir + '*.wav')

for f in SOUND_FILES:
    print "uploading", os.path.basename(f)
    blob_service.put_block_blob_from_path(
        CONTAINER,                          # container
        os.path.basename(f),                # blob
        f,                                  # path
        x_ms_blob_content_type='audio/wav'
    )


blobs = blob_service.list_blobs(CONTAINER)

for blob in blobs:
    print(blob.name)
Esempio n. 13
0
File: sys.py Progetto: gomes-/alx
class SAzure(SyncStorage):
    def __init__(self):
        super().__init__()
        self.msg_key_na = _('Key not available')
        try:
            import alxlib.key

            key = alxlib.key.Key()
            if os.path.isfile(key.get_path()):
                sys.path.insert(0, key.get_dir())

                import alxkey

                self.key = alxkey.alxkey_azure
                """self.blob = BlobService(account_name=self.key['AZURE_STORAGE_ACCOUNT_NAME'],
                                        account_key=self.key['AZURE_ACCESS_KEY'])"""
            else:
                # raise (self.msg_key_na)
                self.key = None
        except:
            pass
            # raise (self.msg_key_na)

    def connect(self):
        try:

            self.blob = BlobService(account_name=self.key['AZURE_STORAGE_ACCOUNT_NAME'],
                                    account_key=self.key['AZURE_ACCESS_KEY'])

            return self.blob.list_containers(maxresults=1)

        except:
            return None

    def connect_blob(self, az_account_name=None, az_account_key=None):

        try:
            if az_account_name != None:
                self.key['AZURE_STORAGE_ACCOUNT_NAME'] = az_account_name
                self.key['AZURE_ACCESS_KEY'] = az_account_key

            return self.connect()

        except:
            return None

    def path_clean(self, path: str):
        try:
            i = path.index("//") + 2
            self.container = path[0:i]
            if path[len(path) - 1] != "/":
                path += "/"

            return path[i:]
        except:
            print(_("Bad Path"))
            exit(1)

    def spath(self, container, root, b):
        spath = SyncPath()
        spath.BasePath = container
        if b.name[len(b.name)-1]=="/":
            spath.IsDir= True
        else:
            spath.IsFile= True
        spath.AbsPath = b.name
        if len(root)>0:
            spath.SPath = b.name[len(root) - 1:]
        else:
            spath.SPath=b.name
        spath.Size = b.properties.content_length
        import alxlib.time_help

        spath.ModifiedTS = alxlib.time_help.to_timestamp(b.properties.last_modified)
        spath.MD5 = b.properties.content_md5
        spath.sys="azure"
        return spath

    def path_split(self, path: str):
        try:
            list = path.split("/")
            container = list[0]
            uri = ""
            if len(list) > 1:
                uri = "/".join(map(str, list[1:]))

            return container, uri
        except:
            print(_("Bad path"))
            exit(1)

    def path_list_blobs(self, container, uri):

        try:
            if len(uri)>0:
                blobs = self.blob.list_blobs(container, prefix=uri)
            else:
                blobs = self.blob.list_blobs(container)


            """for blob in blobs:
                print(blob.properties.__dict__)
                print(blob.name)
                print(blob.url)"""
            return blobs
        except Exception as e:
            print(_("Bad connection"))
            logging.warning("container {0}, path {1}".format(container, uri))
            exit(1)

    def path_list(self, path):
        try:
            logging.debug("path_list {0}".format(path))

            container, uri = self.path_split(path)
            logging.debug("Container: {0}, Uri: {1}".format(container, uri))

            self.connect()
            self.blob.create_container(container)

            blobs = self.path_list_blobs(container, uri)

            d = {}

            for b in blobs:
                spath = self.spath(container, uri, b)
                # print(b.__dict__)
                #print(str(b.properties.last_modified.__dict__))
                #print(str(spath.ModifiedTS))
                d[spath.SPath] = spath
            # print(d)
            return d
        except Exception as e:
            print(e)

    def remove(self, src: SyncPath):
        try:
            logging.debug("Removing {0}".format(src.AbsPath))
            self.connect()
            self.blob.create_container(src.BasePath)
            self.blob.delete_blob(src.BasePath, src.AbsPath)
        except:
            pass


    def copy_local2azure(self, src, base_dir):
        try:

            container, uri = self.path_split(base_dir)

            if len(src.SPath)>0 and src.SPath[0]=="/":
                path= uri+ src.SPath[1:]
            else:
                path= uri+src.SPath
            logging.debug("copy_local2azure Spath {0}. path:{1}".format(src.SPath, path))
            self.connect()
            if not src.IsDir:
                self.blob.put_block_blob_from_path (container, path, src.AbsPath)
            else:
                self.blob.put_block_blob_from_text(container, path+"/", "")
        except Exception as e:
            print("Error Copying")
            print(e)

    def copy_azure2local(self, src, base_dir):
        try:

            if len(src.SPath)>0 and (src.SPath[0] == "/" or src.SPath[0] == "\\") :
                path = src.SPath[1:]
            else:
                path = src.SPath


            path= os.path.normpath(os.path.join(base_dir, path))
            logging.debug("copy_azure2local basedir:{0} Spath {1}, path {2}, abs: {3}".format( base_dir, src.SPath, path, src.AbsPath))


            if not os.path.isdir(path):
               os.makedirs(os.path.dirname(path), exist_ok=True)
            #print( os.path.dirname(path)+"***************")

            if not (len(src.AbsPath)>0 and src.AbsPath[len(src.AbsPath)-1]=="/"):
                self.blob.get_blob_to_path(src.BasePath, src.AbsPath, path)




            """container, uri = self.path_split(base_dir)

            if len(src.SPath)>0 and src.SPath[0]=="/":
                path= uri+ src.SPath[1:]
            else:
                path= uri+src.SPath
            self.connect()
            if not src.IsDir:
                self.blob.get_blob_to_path(src.BasePath, path, src.AbsPath)
            else:
                self.blob.put_block_blob_from_text(container, path, "")"""
        except Exception as e:
            print("Error copying")
            print(e)
class AzureStorageBlockDeviceAPI(object):
    """
    An ``IBlockDeviceAsyncAPI`` which uses Azure Storage Backed Block Devices
    Current Support: Azure SMS API
    """

    def __init__(self, **azure_config):
        """
        :param ServiceManagement azure_client: an instance of the azure
        serivce managment api client.
        :param String service_name: The name of the cloud service
        :param
            names of Azure volumes to identify cluster
        :returns: A ``BlockDeviceVolume``.
        """
        self._instance_id = self.compute_instance_id()
        self._azure_service_client = ServiceManagementService(
            azure_config['subscription_id'],
            azure_config['management_certificate_path'])
        self._service_name = azure_config['service_name']
        self._azure_storage_client = BlobService(
            azure_config['storage_account_name'],
            azure_config['storage_account_key'])
        self._storage_account_name = azure_config['storage_account_name']
        self._disk_container_name = azure_config['disk_container_name']

        if azure_config['debug']:
            to_file(sys.stdout)

    def allocation_unit(self):
        """
        1GiB is the minimum allocation unit for azure disks
        return int: 1 GiB
        """

        return int(GiB(1).to_Byte().value)

    def compute_instance_id(self):
        """
        Azure Stored a UUID in the SDC kernel module.
        """

        # Node host names should be unique within a vnet

        return unicode(socket.gethostname())

    def create_volume(self, dataset_id, size):
        """
        Create a new volume.
        :param UUID dataset_id: The Flocker dataset ID of the dataset on this
            volume.
        :param int size: The size of the new volume in bytes.
        :returns: A ``Deferred`` that fires with a ``BlockDeviceVolume`` when
            the volume has been created.
        """

        size_in_gb = Byte(size).to_GiB().value

        if size_in_gb % 1 != 0:
            raise UnsupportedVolumeSize(dataset_id)

        self._create_volume_blob(size, dataset_id)

        label = self._disk_label_for_dataset_id(str(dataset_id))
        return BlockDeviceVolume(
            blockdevice_id=unicode(label),
            size=size,
            attached_to=None,
            dataset_id=self._dataset_id_for_disk_label(label))

    def destroy_volume(self, blockdevice_id):
        """
        Destroy an existing volume.
        :param unicode blockdevice_id: The unique identifier for the volume to
            destroy.
        :raises UnknownVolume: If the supplied ``blockdevice_id`` does not
            exist.
        :return: ``None``
        """
        log_info('Destorying block device: ' + str(blockdevice_id))
        (target_disk, role_name, lun) = \
            self._get_disk_vmname_lun(blockdevice_id)

        if target_disk is None:

            raise UnknownVolume(blockdevice_id)

        request = None

        if lun is not None:
            request = \
                self._azure_service_client.delete_data_disk(
                    service_name=self._service_name,
                    deployment_name=self._service_name,
                    role_name=target_disk.attached_to.role_name,
                    lun=lun,
                    delete_vhd=True)
        else:
            if target_disk.__class__.__name__ == 'Blob':
                # unregistered disk
                self._azure_storage_client.delete_blob(
                    self._disk_container_name, target_disk.name)
            else:
                request = self._azure_service_client.delete_disk(
                    target_disk.name, True)

        if request is not None:
            self._wait_for_async(request.request_id, 5000)
            self._wait_for_detach(blockdevice_id)

    def attach_volume(self, blockdevice_id, attach_to):
        """
        Attach ``blockdevice_id`` to ``host``.
        :param unicode blockdevice_id: The unique identifier for the block
            device being attached.
        :param unicode attach_to: An identifier like the one returned by the
            ``compute_instance_id`` method indicating the node to which to
            attach the volume.
        :raises UnknownVolume: If the supplied ``blockdevice_id`` does not
            exist.
        :raises AlreadyAttachedVolume: If the supplied ``blockdevice_id`` is
            already attached.
        :returns: A ``BlockDeviceVolume`` with a ``host`` attribute set to
            ``host``.
        """

        (target_disk, role_name, lun) = \
            self._get_disk_vmname_lun(blockdevice_id)

        if target_disk is None:
            raise UnknownVolume(blockdevice_id)

        if lun is not None:
            raise AlreadyAttachedVolume(blockdevice_id)

        log_info('Attempting to attach ' + str(blockdevice_id)
                 + ' to ' + str(attach_to))

        disk_size = self._attach_disk(blockdevice_id, target_disk, attach_to)

        self._wait_for_attach(blockdevice_id)

        log_info('disk attached')

        return self._blockdevicevolume_from_azure_volume(blockdevice_id,
                                                         disk_size,
                                                         attach_to)

    def detach_volume(self, blockdevice_id):
        """
        Detach ``blockdevice_id`` from whatever host it is attached to.
        :param unicode blockdevice_id: The unique identifier for the block
            device being detached.
        :raises UnknownVolume: If the supplied ``blockdevice_id`` does not
            exist.
        :raises UnattachedVolume: If the supplied ``blockdevice_id`` is
            not attached to anything.
        :returns: ``None``
        """

        (target_disk, role_name, lun) = \
            self._get_disk_vmname_lun(blockdevice_id)

        if target_disk is None:
            raise UnknownVolume(blockdevice_id)

        if lun is None:
            raise UnattachedVolume(blockdevice_id)

        # contrary to function name it doesn't delete by default, just detachs

        request = \
            self._azure_service_client.delete_data_disk(
                service_name=self._service_name,
                deployment_name=self._service_name,
                role_name=role_name, lun=lun)

        self._wait_for_async(request.request_id, 5000)

        self._wait_for_detach(blockdevice_id)

    def get_device_path(self, blockdevice_id):
        """
        Return the device path that has been allocated to the block device on
        the host to which it is currently attached.
        :param unicode blockdevice_id: The unique identifier for the block
            device.
        :raises UnknownVolume: If the supplied ``blockdevice_id`` does not
            exist.
        :raises UnattachedVolume: If the supplied ``blockdevice_id`` is
            not attached to a host.
        :returns: A ``FilePath`` for the device.
        """

        (target_disk_or_blob, role_name, lun) = \
            self._get_disk_vmname_lun(blockdevice_id)

        if target_disk_or_blob is None:
            raise UnknownVolume(blockdevice_id)

        if lun is None:
            raise UnattachedVolume(blockdevice_id)

        return Lun.get_device_path_for_lun(lun)

    def list_volumes(self):
        """
        List all the block devices available via the back end API.
        :returns: A ``list`` of ``BlockDeviceVolume``s.
        """
        media_url_prefix = 'https://' + self._storage_account_name \
            + '.blob.core.windows.net/' + self._disk_container_name
        disks = self._azure_service_client.list_disks()
        disk_list = []
        all_blobs = self._get_flocker_blobs()
        for d in disks:

            if media_url_prefix not in d.media_link or \
                    'flocker-' not in d.label:
                    continue

            role_name = None

            if d.attached_to is not None \
                    and d.attached_to.role_name is not None:

                    role_name = d.attached_to.role_name

            disk_list.append(self._blockdevicevolume_from_azure_volume(
                d.label, self._gibytes_to_bytes(d.logical_disk_size_in_gb),
                role_name))

            if d.label in all_blobs:
                del all_blobs[d.label]

        for key in all_blobs:
            # include unregistered 'disk' blobs
            disk_list.append(self._blockdevicevolume_from_azure_volume(
                all_blobs[key].name,
                all_blobs[key].properties.content_length,
                None))

        return disk_list

    def _attach_disk(
            self,
            blockdevice_id,
            target_disk,
            attach_to):

        """
        Attaches disk to specified VM
        :param string blockdevice_id: The identifier of the disk
        :param DataVirtualHardDisk/Blob target_disk: The Blob
               or Disk to be attached
        :returns int: The size of the attached disk
        """

        lun = Lun.compute_next_lun(
            self._azure_service_client,
            self._service_name,
            str(attach_to))
        common_params = {
            'service_name': self._service_name,
            'deployment_name': self._service_name,
            'role_name': attach_to,
            'lun': lun
        }
        disk_size = None

        if target_disk.__class__.__name__ == 'Blob':
            # exclude 512 byte footer
            disk_size = target_disk.properties.content_length

            common_params['source_media_link'] = \
                'https://' + self._storage_account_name \
                + '.blob.core.windows.net/' + self._disk_container_name \
                + '/' + blockdevice_id

            common_params['disk_label'] = blockdevice_id

        else:

            disk_size = self._gibytes_to_bytes(
                target_disk.logical_disk_size_in_gb)

            common_params['disk_name'] = target_disk.name

        request = self._azure_service_client.add_data_disk(**common_params)
        self._wait_for_async(request.request_id, 5000)

        return disk_size

    def _create_volume_blob(self, size, dataset_id):
        # Create a new page blob as a blank disk
        self._azure_storage_client.put_blob(
            container_name=self._disk_container_name,
            blob_name=self._disk_label_for_dataset_id(dataset_id),
            blob=None,
            x_ms_blob_type='PageBlob',
            x_ms_blob_content_type='application/octet-stream',
            x_ms_blob_content_length=size)

        # for disk to be a valid vhd it requires a vhd footer
        # on the last 512 bytes
        vhd_footer = Vhd.generate_vhd_footer(size)

        self._azure_storage_client.put_page(
            container_name=self._disk_container_name,
            blob_name=self._disk_label_for_dataset_id(dataset_id),
            page=vhd_footer,
            x_ms_page_write='update',
            x_ms_range='bytes=' + str((size - 512)) + '-' + str(size - 1))

    def _disk_label_for_dataset_id(self, dataset_id):
        """
        Returns a disk label for a given Dataset ID
        :param unicode dataset_id: The identifier of the dataset
        :returns string: A string representing the disk label
        """

        label = 'flocker-' + str(dataset_id)
        return label

    def _dataset_id_for_disk_label(self, disk_label):
        """
        Returns a UUID representing the Dataset ID for the given disk
        label
        :param string disk_label: The disk label
        :returns UUID: The UUID of the dataset
        """
        return UUID(disk_label.replace('flocker-', ''))

    def _get_disk_vmname_lun(self, blockdevice_id):
        target_disk = None
        target_lun = None
        role_name = None
        disk_list = self._azure_service_client.list_disks()

        for d in disk_list:

            if 'flocker-' not in d.label:
                continue
            if d.label == str(blockdevice_id):
                target_disk = d
                break

        if target_disk is None:
            # check for unregisterd disk
            blobs = self._get_flocker_blobs()
            blob = None

            if str(blockdevice_id) in blobs:
                blob = blobs[str(blockdevice_id)]

            return blob, None, None

        vm_info = None

        if hasattr(target_disk.attached_to, 'role_name'):
            vm_info = self._azure_service_client.get_role(
                self._service_name, self._service_name,
                target_disk.attached_to.role_name)

            for d in vm_info.data_virtual_hard_disks:
                if d.disk_name == target_disk.name:
                    target_lun = d.lun
                    break

            role_name = target_disk.attached_to.role_name

        return (target_disk, role_name, target_lun)

    def _get_flocker_blobs(self):
        all_blobs = {}

        blobs = self._azure_storage_client.list_blobs(
            self._disk_container_name,
            prefix='flocker-')

        for b in blobs:
            # todo - this could be big!
            all_blobs[b.name] = b

        return all_blobs

    def _wait_for_detach(self, blockdevice_id):
        role_name = ''
        lun = -1

        timeout_count = 0

        log_info('waiting for azure to ' + 'report disk as detached...')

        while role_name is not None or lun is not None:
            (target_disk, role_name, lun) = \
                self._get_disk_vmname_lun(blockdevice_id)
            time.sleep(1)
            timeout_count += 1

            if timeout_count > 5000:
                raise AsynchronousTimeout()

        log_info('Disk Detached')

    def _wait_for_attach(self, blockdevice_id):
        timeout_count = 0
        lun = None

        log_info('waiting for azure to report disk as attached...')

        while lun is None:
            (target_disk, role_name, lun) = \
                self._get_disk_vmname_lun(blockdevice_id)
            time.sleep(.001)
            timeout_count += 1

            if timeout_count > 5000:
                raise AsynchronousTimeout()

    def _wait_for_async(self, request_id, timeout):
        count = 0
        result = self._azure_service_client.get_operation_status(request_id)
        while result.status == 'InProgress':
            count = count + 1
            if count > timeout:
                log_error('Timed out waiting for async operation to complete.')
                raise AsynchronousTimeout()
            time.sleep(.001)
            log_info('.')
            result = self._azure_service_client.get_operation_status(
                request_id)
            if result.error:
                log_error(result.error.code)
                log_error(str(result.error.message))

        log_error(result.status + ' in ' + str(count * 5) + 's')

    def _gibytes_to_bytes(self, size):

        return int(GiB(size).to_Byte().value)

    def _blockdevicevolume_from_azure_volume(self, label, size,
                                             attached_to_name):

        # azure will report the disk size excluding the 512 byte footer
        # however flocker expects the exact value it requested for disk size
        # so offset the reported size to flocker by 512 bytes
        return BlockDeviceVolume(
            blockdevice_id=unicode(label),
            size=int(size),
            attached_to=attached_to_name,
            dataset_id=self._dataset_id_for_disk_label(label)
        )  # disk labels are formatted as flocker-<data_set_id>
class AzureBackend(duplicity.backend.Backend):
    u"""
    Backend for Azure Blob Storage Service
    """
    def __init__(self, parsed_url):
        duplicity.backend.Backend.__init__(self, parsed_url)

        # Import Microsoft Azure Storage SDK for Python library.
        try:
            import azure
            import azure.storage
            if hasattr(azure.storage, u'BlobService'):
                # v0.11.1 and below
                from azure.storage import BlobService
                self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError
                self.AzureConflictError = azure.WindowsAzureConflictError
            else:
                # v1.0.0 and above
                import azure.storage.blob
                if hasattr(azure.storage.blob, u'BlobService'):
                    from azure.storage.blob import BlobService
                else:
                    from azure.storage.blob.blockblobservice import BlockBlobService as BlobService
                self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError
                self.AzureConflictError = azure.common.AzureConflictHttpError
        except ImportError as e:
            raise BackendException(u"""\
Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/).
Exception: %s""" % str(e))

        # TODO: validate container name
        self.container = parsed_url.path.lstrip(u'/')

        if u'AZURE_ACCOUNT_NAME' not in os.environ:
            raise BackendException(u'AZURE_ACCOUNT_NAME environment variable not set.')

        if u'AZURE_ACCOUNT_KEY' in os.environ:
            if u'AZURE_ENDPOINT_SUFFIX' in os.environ:
                self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'],
                                                account_key=os.environ[u'AZURE_ACCOUNT_KEY'],
                                                endpoint_suffix=os.environ[u'AZURE_ENDPOINT_SUFFIX'])
            else:
                self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'],
                                                account_key=os.environ[u'AZURE_ACCOUNT_KEY'])
            self._create_container()
        elif u'AZURE_SHARED_ACCESS_SIGNATURE' in os.environ:
            if u'AZURE_ENDPOINT_SUFFIX' in os.environ:
                self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'],
                                                sas_token=os.environ[u'AZURE_SHARED_ACCESS_SIGNATURE'],
                                                endpoint_suffix=os.environ[u'AZURE_ENDPOINT_SUFFIX'])
            else:
                self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'],
                                                sas_token=os.environ[u'AZURE_SHARED_ACCESS_SIGNATURE'])
        else:
            raise BackendException(
                u'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.')

        if globals.azure_max_single_put_size:
            # check if we use azure-storage>=0.30.0
            try:
                _ = self.blob_service.MAX_SINGLE_PUT_SIZE
                self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size
            # fallback for azure-storage<0.30.0
            except AttributeError:
                self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size

        if globals.azure_max_block_size:
            # check if we use azure-storage>=0.30.0
            try:
                _ = self.blob_service.MAX_BLOCK_SIZE
                self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size
            # fallback for azure-storage<0.30.0
            except AttributeError:
                self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size

    def _create_container(self):
        try:
            self.blob_service.create_container(self.container, fail_on_exist=True)
        except self.AzureConflictError:
            # Indicates that the resource could not be created because it already exists.
            pass
        except Exception as e:
            log.FatalError(u"Could not create Azure container: %s"
                           % str(e.message).split(u'\n', 1)[0],
                           log.ErrorCode.connection_failed)

    def _put(self, source_path, remote_filename):
        remote_filename = fsdecode(remote_filename)
        kwargs = {}
        if globals.azure_max_connections:
            kwargs[u'max_connections'] = globals.azure_max_connections

        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container
        try:
            self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs)
        except AttributeError:  # Old versions use a different method name
            self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs)

        self._set_tier(remote_filename)

    def _set_tier(self, remote_filename):
        if globals.azure_blob_tier is not None:
            try:
                self.blob_service.set_standard_blob_tier(self.container, remote_filename, globals.azure_blob_tier)
            except AttributeError:  # might not be available in old API
                pass

    def _get(self, remote_filename, local_path):
        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs
        self.blob_service.get_blob_to_path(self.container, fsdecode(remote_filename), local_path.name)

    def _list(self):
        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container
        blobs = []
        marker = None
        while True:
            batch = self.blob_service.list_blobs(self.container, marker=marker)
            blobs.extend(batch)
            if not batch.next_marker:
                break
            marker = batch.next_marker
        return [blob.name for blob in blobs]

    def _delete(self, filename):
        # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs
        self.blob_service.delete_blob(self.container, fsdecode(filename))

    def _query(self, filename):
        prop = self.blob_service.get_blob_properties(self.container, fsdecode(filename))
        try:
            info = {u'size': int(prop.properties.content_length)}
        except AttributeError:
            # old versions directly returned the properties
            info = {u'size': int(prop[u'content-length'])}
        return info

    def _error_code(self, operation, e):
        if isinstance(e, self.AzureMissingResourceError):
            return log.ErrorCode.backend_not_found
print db_name

conn = sqlite3.connect(db_name)
c = conn.cursor()

AZURE_STORAGE_CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING']

blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING)

total_length = 0
number = 0
next_marker = None
while True:
    blobs = blob_service.list_blobs('nexradl2',
                                    maxresults=5000,
                                    marker=next_marker)
    next_marker = blobs.next_marker
    #print(next_marker)
    print "length of blobs:"
    print(len(blobs))
    newcount = 0
    for blob in blobs:
        path = blob.name
        #print(blob.name)
        #print(blob.properties.content_length)
        c.execute("update files set azure='yes' where path = '%s'" % (path))
        total_length = total_length + blob.properties.content_length
        number = number + 1
        newcount = newcount + 1
    print "length of newcount:"
#!/usr/bin/python

import os
import csv
import argparse
from azure.storage import BlobService

container = 'images'
account_name='xxxx'
account_key='xxxx'


if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Lists all blob in the container and save information about extensions')
	parser.add_argument("filename", help='File to write results')
	
	args = parser.parse_args()	
	
	blob_service = BlobService(account_name=account_name, account_key=account_key)
	
	blobs = blob_service.list_blobs(container)
	
	with open(args.filename, 'w') as f:
		writer = csv.writer(f)
	 	
		for b in blobs:
			word_data = os.path.splitext(b.name)
			writer.writerow(word_data)
		
Esempio n. 18
0
class AzureBlobStorage(Storage):

    def __init__(self, account='nyxstorage', container='pxo'):
        self.base_storage_uri = 'http://%s.blob.core.windows.net/%s/' % (
            account, container)
        self.blob_service = BlobService(
            account, get_env_variable('AZURE_BLOB_STORAGE_KEY'))
        self.container = container

    def _open(self, name, mode='rb'):
        data = self.blob_service.get_blob(self.container, name)
        return ContentFile(data)

    def _save(self, name, content):
        _file = content.read()
        file_name = content.name[-35:]
        self.blob_service.put_blob(
            self.container, file_name, _file, x_ms_blob_type='BlockBlob')
        return self.base_storage_uri + file_name

    def create_container(self, container_name):
        result = self.blob_service.create_container(
            container_name, x_ms_blob_public_access='container')
        return result

    def delete(self, name):
        self.blob_service.delete_blob(self.container, name)

    def exists(self, name):
        try:
            self.blob_service.get_blob_properties(self.container, name)
        except:
            return False
        else:
            return True

    def get_available_name(self, name):
        return name

    def get_blobs(self):
        blobs = self.blob_service.list_blobs(self.container)
        return blobs

    def get_valid_name(self, name):
        return name

    def modified_time(self, name):
        metadata = self.blob_service.get_blob_metadata(self.container, name)
        modified_time = float(metadata.get('x-ms-meta-modified_time'))
        return datetime.fromtimestamp(modified_time)

    def set_public_container(self, container_name):
        result = self.blob_service.set_container_acl(
            container_name, x_ms_blob_public_access='container')
        return result

    def size(self, name):
        properties = self.blob_service.get_blob_properties(
            self.container, name)
        return properties.get('content-length')

    def url(self, name):
        blob = self.blob_service.list_blobs(self.container, prefix=name)
        return blob.blobs[0].url
Esempio n. 19
0
class AzureBackend(duplicity.backend.Backend):
    """
    Backend for Azure Blob Storage Service
    """
    def __init__(self, parsed_url):
        duplicity.backend.Backend.__init__(self, parsed_url)

        # Import Microsoft Azure SDK for Python library.
        try:
            import azure
            from azure.storage import BlobService
        except ImportError:
            raise BackendException('Azure backend requires Microsoft Azure SDK for Python '
                                   '(https://github.com/Azure/azure-sdk-for-python).')

        if 'AZURE_ACCOUNT_NAME' not in os.environ:
            raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.')

        if 'AZURE_ACCOUNT_KEY' not in os.environ:
            raise BackendException('AZURE_ACCOUNT_KEY environment variable not set.')

        account_name = os.environ['AZURE_ACCOUNT_NAME']
        account_key = os.environ['AZURE_ACCOUNT_KEY']
        self.WindowsAzureMissingResourceError = azure.WindowsAzureMissingResourceError
        self.blob_service = BlobService(account_name=account_name, account_key=account_key)
        # TODO: validate container name
        self.container = parsed_url.path.lstrip('/')
        try:
            self.blob_service.create_container(self.container, fail_on_exist=True)
        except azure.WindowsAzureConflictError:
            # Indicates that the resource could not be created because it already exists.
            pass
        except Exception as e:
            log.FatalError("Could not create Azure container: %s"
                           % unicode(e.message).split('\n', 1)[0],
                           log.ErrorCode.connection_failed)

    def _put(self, source_path, remote_filename):
        # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-blob
        self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name)

    def _get(self, remote_filename, local_path):
        # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs
        self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name)

    def _list(self):
        # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-blob
        blobs = self.blob_service.list_blobs(self.container)
        return [blob.name for blob in blobs]

    def _delete(self, filename):
        # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs
        self.blob_service.delete_blob(self.container, filename)

    def _query(self, filename):
        prop = self.blob_service.get_blob_properties(self.container, filename)
        return {'size': int(prop['content-length'])}

    def _error_code(self, operation, e):
        if isinstance(e, self.WindowsAzureMissingResourceError):
            return log.ErrorCode.backend_not_found
class Storage(driver.Base):

    supports_bytes_range = True

    def __init__(self, path=None, config=None):
        self._config = config
        self._container = self._config.azure_storage_container

        protocol = 'https' if self._config.azure_use_https else 'http'
        acct_name = self._config.azure_storage_account_name
        acct_key = self._config.azure_storage_account_key
        self._blob = BlobService(account_name=acct_name,
                                 account_key=acct_key,
                                 protocol=protocol)

        self._init_container()
        logger.debug("Initialized azureblob storage driver")

    def _init_container(self):
        '''Initializes image container on Azure blob storage if the container
        does not exist.
        '''
        created = self._blob.create_container(self._container,
                                              x_ms_blob_public_access='blob',
                                              fail_on_exist=False)
        if created:
            logger.info('Created blob container for image registry.')
        else:
            logger.debug('Registry container already exists.')
        return created

    @lru.get
    def get_content(self, path):
        try:
            return self._blob.get_blob(self._container, path)
        except azure.WindowsAzureMissingResourceError:
            raise exceptions.FileNotFoundError('%s is not there' % path)

    @lru.set
    def put_content(self, path, content):
        self._blob.put_blob(self._container, path, content, 'BlockBlob')
        return path

    def stream_read(self, path, bytes_range=None):
        try:
            f = io.BytesIO()
            self._blob.get_blob_to_file(self._container, path, f)

            if bytes_range:
                f.seek(bytes_range[0])
                total_size = bytes_range[1] - bytes_range[0] + 1
            else:
                f.seek(0)

            while True:
                buf = None
                if bytes_range:
                    # Bytes Range is enabled
                    buf_size = self.buffer_size
                    if nb_bytes + buf_size > total_size:
                        # We make sure we don't read out of the range
                        buf_size = total_size - nb_bytes
                    if buf_size > 0:
                        buf = f.read(buf_size)
                        nb_bytes += len(buf)
                    else:
                        # We're at the end of the range
                        buf = ''
                else:
                    buf = f.read(self.buffer_size)

                if not buf:
                    break

                yield buf
        except IOError:
            raise exceptions.FileNotFoundError('%s is not there' % path)

    def stream_write(self, path, fp):
        self._blob.put_block_blob_from_file(self._container, path, fp)

    def list_directory(self, path=None):
        if not path.endswith('/'):
            path += '/'  # path=a would list a/b.txt as well as 'abc.txt'

        blobs = list(self._blob.list_blobs(self._container, path))
        if not blobs:
            raise exceptions.FileNotFoundError('%s is not there' % path)

        return [b.name for b in blobs]

    def exists(self, path):
        try:
            self._blob.get_blob_properties(self._container, path)
            return True
        except azure.WindowsAzureMissingResourceError:
            return False

    @lru.remove
    def remove(self, path):
        is_blob = self.exists(path)
        if is_blob:
            self._blob.delete_blob(self._container, path)
            return

        exists = False
        blobs = list(self._blob.list_blobs(self._container, path))
        if not blobs:
            raise exceptions.FileNotFoundError('%s is not there' % path)

        for b in blobs:
            self._blob.delete_blob(self._container, b.name)

    def get_size(self, path):
        try:
            properties = self._blob.get_blob_properties(self._container, path)
            return int(properties['content-length'])  # auto-converted to long
        except azure.WindowsAzureMissingResourceError:
            raise exceptions.FileNotFoundError('%s is not there' % path)
Esempio n. 21
0
        thread.join(timeout)
        if thread.is_alive():
            print 'Terminating process'
            self.process.terminate()
            thread.join()
        print self.process.returncode


#command = Command("echo 'Process started'; sleep 2; echo 'Process finished'")
#print command.run(timeout=3)
#print command.run(timeout=1)
#
#command = Command('ping www.google.com')
#print command.run(timeout=1)

AZURE_STORAGE_CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING']

blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING)

print blob_service.put_block_blob_from_path(
    'nexradl2',
    '201208/20120810/KSRX/NWS_NEXRAD_NXL2SR_KSRX_20120810050000_20120810055959.tar',
    '/snfs9/q2/levelii_tarfiles/201208/20120810/KSRX/NWS_NEXRAD_NXL2SR_KSRX_20120810050000_20120810055959.tar',
    max_connections=5,
)

blobs = blob_service.list_blobs('nexradl2', maxresults=10)
for blob in blobs:
    print(blob.name)
    print(blob.url)
Esempio n. 22
0
def generate_website_and_upload_azure(azure_csv_container, azure_web_container):

    blob_service = BlobService(account_name=os.getenv('ACC_NAME'), account_key=os.getenv('ACCESS_KEY'))
    blob_list = blob_service.list_blobs(azure_csv_container)
    blob_name_list =  blob_list.blobs


    
    keys = []
    #Only keep files whose dates can be parsed
    for k in blob_name_list:
        try:
            parser.parse(k.name[:8])
            keys.append(k)
        except:
            pass

    keys = [k for k in keys if (".zip" in k.name or ".csv" in k.name)]



    
    my_array = []
    for k in keys:
        my_dict = {}
        url = r"http://fhrscsvs.blob.core.windows.net/{}/{}".format(azure_csv_container,k.name)
        name = k.name

        date = parser.parse(name[:8])
        dateformat = date.strftime("%a %d %b %Y")
        my_dict["Date of data download"] = dateformat

        my_dict["Size"] = sizeof_fmt(k.properties.content_length)

        name = get_link_text(name,dateformat,my_dict)
        
        my_dict["File"] = "<a href='{0}'>{1}</a>".format(url,name)
        
        my_array.append(my_dict)

    my_array = sorted(my_array, key=lambda k: k['File'], reverse=True) 

    table_array_fullsnapshot = [a for a in my_array if "__all_current" in a["File"]]
    table_array_differences = [a for a in my_array if "__diff" in a["File"]]

    template_dir = os.getenv('TEMPLATE_DIR')
    loader = jinja2.FileSystemLoader(template_dir)
    environment = jinja2.Environment(loader=loader)

    j_template = environment.get_template("template.html")

    order = ["File", "Size"]
        
    timestamp = datetime.datetime.now().strftime("%a %d %b %Y at %H:%M")
    
    import math
    sinarray = [(math.cos(math.radians(i*5-180))+1)*14 for i in range(0,73)]
    html = j_template.render(table_array_fullsnapshot=table_array_fullsnapshot, 
        order=order, 
        timestamp = timestamp, 
        sinarray=sinarray,
        table_array_differences=table_array_differences)


    blob_service.put_block_blob_from_text(
        azure_web_container,
        "index.html",
        html,
        x_ms_blob_content_type='text/html',
        text_encoding="utf-8",
    )
Esempio n. 23
0
import os
import csv
import argparse
from azure.storage import BlobService

container = 'images'
account_name = 'xxxx'
account_key = 'xxxx'

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        'Lists all blob in the container and save information about extensions'
    )
    parser.add_argument("filename", help='File to write results')

    args = parser.parse_args()

    blob_service = BlobService(account_name=account_name,
                               account_key=account_key)

    blobs = blob_service.list_blobs(container)

    with open(args.filename, 'w') as f:
        writer = csv.writer(f)

        for b in blobs:
            word_data = os.path.splitext(b.name)
            writer.writerow(word_data)
    blobService = BlobService(storageAccount, accessKey)

    #--------------
    # コンテナとBlobオブジェクトを取得
    # mediaLinkからBlobオブジェクトを得る
    logger.debug("deleteOSandDataDisk.py: Container and Blob object get mediaLink...(%s)" % mediaLink)

    # # 消すべきBlobの存在チェック

    # # コンテナ一覧を取得
    containerList = blobService.list_containers()
    targetBlob = None

    for container in containerList:
        # # コンテナに含まれるBlob一覧を取得
        blobList = blobService.list_blobs(container.name)
        for blob in blobList:
            # # URIから、先頭のhttp*://を取り除いた文字列を比較
            blobname = blob.url.split('://')[1]
            if blobname == mediaLink.split('://')[1]:
                logger.debug('deleteOSandDataDisk.py: find target blobname: ' + blobname)
                targetBlob = blob
                targetContainer = container

    # # 見つからなければエラー終了
    if (targetBlob is None):
        logger.error('deleteOSandDataDisk.py: target blob(%s) is not found.' % mediaLink.split('://')[1])
        sys.exit()

    #-----------------
    # lease開始
Esempio n. 25
0
class AzureIOStore(IOStore):
    """
    A class that lets you get input from and send output to Azure Storage.
    
    """
    
    def __init__(self, account_name, container_name, name_prefix=""):
        """
        Make a new AzureIOStore that reads from and writes to the given
        container in the given account, adding the given prefix to keys. All
        paths will be interpreted as keys or key prefixes.
        
        If the name prefix does not end with a trailing slash, and is not empty,
        one will be added automatically.
        
        Account keys are retrieved from the AZURE_ACCOUNT_KEY environment
        variable or from the ~/.toilAzureCredentials file, as in Toil itself.
        
        """
        
        # Make sure azure libraries actually loaded
        assert(have_azure)
        
        self.account_name = account_name
        self.container_name = container_name
        self.name_prefix = name_prefix
        
        if self.name_prefix != "" and not self.name_prefix.endswith("/"):
            # Make sure it has the trailing slash required.
            self.name_prefix += "/"
        
        # Sneak into Toil and use the same keys it uses
        self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey(
            self.account_name)
            
        # This will hold out Azure blob store connection
        self.connection = None
        
    def __getstate__(self):
        """
        Return the state to use for pickling. We don't want to try and pickle
        an open Azure connection.
        """
     
        return (self.account_name, self.account_key, self.container_name, 
            self.name_prefix)
        
    def __setstate__(self, state):
        """
        Set up after unpickling.
        """
        
        self.account_name = state[0]
        self.account_key = state[1]
        self.container_name = state[2]
        self.name_prefix = state[3]
        
        self.connection = None
        
    def __connect(self):
        """
        Make sure we have an Azure connection, and set one up if we don't.
        """
        
        if self.connection is None:
            RealTimeLogger.get().debug("Connecting to account {}, using "
                "container {} and prefix {}".format(self.account_name,
                self.container_name, self.name_prefix))
        
            # Connect to the blob service where we keep everything
            self.connection = BlobService(
                account_name=self.account_name, account_key=self.account_key)
            
            
    def read_input_file(self, input_path, local_path):
        """
        Get input from Azure.
        """
        
        self.__connect()
        
        
        RealTimeLogger.get().debug("Loading {} from AzureIOStore".format(
            input_path))
        
        # Download the blob. This is known to be synchronous, although it can
        # call a callback during the process.
        self.connection.get_blob_to_path(self.container_name,
            self.name_prefix + input_path, local_path)
            
    def list_input_directory(self, input_path, recursive=False):
        """
        Loop over fake /-delimited directories on Azure. The prefix may or may
        not not have a trailing slash; if not, one will be added automatically.
        
        Returns the names of files and fake directories in the given input fake
        directory, non-recursively.
        
        """
        
        self.__connect()
        
        RealTimeLogger.get().info("Enumerating {} from AzureIOStore".format(
            input_path))
        
        # Work out what the directory name to list is
        fake_directory = self.name_prefix + input_path
        
        if fake_directory != "" and not fake_directory.endswith("/"):
            # We have a nonempty prefix, and we need to end it with a slash
            fake_directory += "/"
        
        # This will hold the marker that we need to send back to get the next
        # page, if there is one. See <http://stackoverflow.com/a/24303682>
        marker = None
        
        # This holds the subdirectories we found; we yield each exactly once if
        # we aren't recursing.
        subdirectories = set()
        
        while True:
        
            # Get the results from Azure. We skip the delimiter since it doesn't
            # seem to have the placeholder entries it's suppsoed to.
            result = self.connection.list_blobs(self.container_name, 
                prefix=fake_directory, marker=marker)
                
            for blob in result:
                # Yield each result's blob name, but directory names only once
                
                # Drop the common prefix
                relative_path = blob.name[len(fake_directory):]
                
                if (not recursive) and "/" in relative_path:
                    # We found a file in a subdirectory, and we aren't supposed
                    # to be recursing.
                    subdirectory, _ = relative_path.split("/", 1)
                    
                    if subdirectory not in subdirectories:
                        # It's a new subdirectory. Yield and remember it
                        subdirectories.add(subdirectory)
                        
                        yield subdirectory
                else:
                    # We found an actual file  
                    yield relative_path
                
            # Save the marker
            marker = result.next_marker
                
            if not marker:
                break 
    
    def write_output_file(self, local_path, output_path):
        """
        Write output to Azure. Will create the container if necessary.
        """
        
        self.__connect()
        
        RealTimeLogger.get().debug("Saving {} to AzureIOStore".format(
            output_path))
        
        try:
            # Make the container
            self.connection.create_container(self.container_name)
        except azure.WindowsAzureConflictError:
            # The container probably already exists
            pass
        
        # Upload the blob (synchronously)
        # TODO: catch no container error here, make the container, and retry
        self.connection.put_block_blob_from_path(self.container_name,
            self.name_prefix + output_path, local_path)
            
    def exists(self, path):
        """
        Returns true if the given input or output file exists in Azure already.
        
        """
        
        self.__connect()
        
        marker = None
        
        while True:
        
            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name, 
                prefix=self.name_prefix + path, marker=marker)
                
            for blob in result:
                # Look at each blob
                
                if blob.name == self.name_prefix + path:
                    # Found it
                    return True
                
            # Save the marker
            marker = result.next_marker
                
            if not marker:
                break 
        
        return False
Esempio n. 26
0
def deletefromazure (strPrefix):
    blob_service = BlobService(account_name='wanderight', account_key='gdmZeJOCx3HYlFPZZukUhHAfeGAu4cfHWGQZc3+HIpkBHjlznUDjhXMl5HWh5MgbjpJF09ZxRaET1JVF9S2MWQ==')
    blobsToDelete = blob_service.list_blobs(config['container'], prefix=strPrefix)
    for b in blobsToDelete:
        blob_service.delete_blob(config['container'], b.name)
Esempio n. 27
0
print db_name

conn = sqlite3.connect(db_name)
c = conn.cursor()

AZURE_STORAGE_CONNECTION_STRING = os.environ["AZURE_STORAGE_CONNECTION_STRING"]


blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING)


total_length = 0
number = 0
next_marker = None
while True:
    blobs = blob_service.list_blobs("nexradl2", maxresults=5000, marker=next_marker)
    next_marker = blobs.next_marker
    # print(next_marker)
    print "length of blobs:"
    print (len(blobs))
    newcount = 0
    for blob in blobs:
        path = blob.name
        # print(blob.name)
        # print(blob.properties.content_length)
        c.execute("update files set azure='yes' where path = '%s'" % (path))
        total_length = total_length + blob.properties.content_length
        number = number + 1
        newcount = newcount + 1
    print "length of newcount:"
    print newcount
Esempio n. 28
0
from azure.storage import BlobService
import sys

key = raw_input("Please enter azure vidoepath blob storage key: ")
blob_service = BlobService(account_name='videopathmobilefiles',
                           account_key=key)

source = sys.argv[1]
target = sys.argv[2]

print source + " -> " + target

blob_service.create_container(target, x_ms_blob_public_access='container')

# blob_service.copy_blob('test2', 'copiedkey', '/videopathmobilefiles/test/key')

blobs = blob_service.list_blobs(source)

for b in blobs:
    name = b.name
    source_path = '/videopathmobilefiles/' + source + '/' + name
    blob_service.copy_blob(target, name, source_path)
    print name
Esempio n. 29
0
class AzureBlobStorage(Storage):

    '''
    classdocs
    '''

    def __init__(self, azure_profile):
        '''
        Constructor
        '''

        if not azure_profile:
            raise Exception()
        else:
            container_name = azure_profile['container_name']
            account_name = azure_profile['account_name']
            account_key = azure_profile['key']
            base_url = azure_profile['base_url']

        self.blob = BlobService(
            account_name=account_name, account_key=account_key)

        self.container = container_name
        self.base_url = base_url

    def delete(self, name):
        """
        Delete file.
        """
        try:
            self.blob.delete_blob(self.container, name)
        except WindowsAzureMissingResourceError:
            return False
        else:
            return True

    def delete_files(self, files=None):
        """
        Delete files in container.
        """
        if not files:
            files = self.listdir(self.container)[1]

        for _file in files:
            self.delete(_file)

    def exists(self, name, with_properties=False):
        """
        Existing check.
        """
        result = False
        blob_properties = None

        try:
            blob_properties = self.blob.get_blob_properties(
                self.container, name)
        except WindowsAzureMissingResourceError:
            result = False
        else:
            result = True

        if with_properties:
            return result, blob_properties
        else:
            return result

    def get_available_name(self, name):
        return super(AzureBlobStorage, self).get_available_name(name.replace('\\', '/'))

    def get_valid_name(self, name):

        return name

    def _list(self, path, prefix, maxresults):
        result = []
        blobs = self.blob.list_blobs(path, prefix, maxresults)

        for _blob in blobs:
            result.append(_blob.name)

        return result

    def listdir(self, path=None, prefix=None, maxresults=None):
        """
        Catalog file list.
        """
        if not path:
            path = self.container
        return [], self._list(path, prefix, maxresults)

    def size(self, name):
        """
        File size.
        """

        result, properties = self.exists(name, with_properties=True)

        if result:
            return int(properties['content-length'])
        else:
            return 0

    def url(self, name, chk_exist=False):
        """
        URL for file downloading.
        """

        if chk_exist:
            if self.exists(name):
                return '%s%s/%s' % (self.base_url, self.container, name)
            else:
                return None
        else:
            return '%s%s/%s' % (self.base_url, self.container, name)

    def _open(self, name, mode='rb'):
        """
        Open file.
        """

        in_mem_file = StringIO.StringIO(
            self.blob.get_blob(self.container, name))
        in_mem_file.name = name
        in_mem_file.mode = mode
        return File(in_mem_file)

    def _save(self, name, blob_to_upload, x_ms_blob_type='BlockBlob', content_type=None):
        """
        Save file.
        """

        if hasattr(blob_to_upload, 'content_type'):
            content_type = blob_to_upload.content_type or None

        if content_type is None:
            content_type = mimetypes.guess_type(name)[0] or None

        blob_to_upload.seek(0)

        self.blob.put_blob(self.container, name, blob_to_upload,
                           x_ms_blob_type, x_ms_blob_content_type=content_type)

        return name

    def modified_time(self, name):
        """
        Last modification time.
        """

        result, properties = self.exists(name, with_properties=True)

        if result:
            date_string = properties['last-modified']
            modified_dt = parser.parse(date_string)

            if timezone.is_naive(modified_dt):
                return modified_dt
            else:
                return timezone.make_naive(modified_dt, timezone.get_current_timezone())
        else:
            return None

    created_time = accessed_time = modified_time
Esempio n. 30
0
        thread = threading.Thread(target=target)
        thread.start()

        thread.join(timeout)
        if thread.is_alive():
            print 'Terminating process'
            self.process.terminate()
            thread.join()
        print self.process.returncode

#command = Command("echo 'Process started'; sleep 2; echo 'Process finished'")
#print command.run(timeout=3)
#print command.run(timeout=1)
#
#command = Command('ping www.google.com')
#print command.run(timeout=1)

AZURE_STORAGE_CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING']

blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING)

print blob_service.put_block_blob_from_path( 'nexradl2', '201208/20120810/KSRX/NWS_NEXRAD_NXL2SR_KSRX_20120810050000_20120810055959.tar', '/snfs9/q2/levelii_tarfiles/201208/20120810/KSRX/NWS_NEXRAD_NXL2SR_KSRX_20120810050000_20120810055959.tar', max_connections=5,)


blobs = blob_service.list_blobs('nexradl2',maxresults=10)
for blob in blobs:
    print(blob.name)
    print(blob.url)

Esempio n. 31
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, container_name, prefix=None):
        # NOTE: Azure wants all paths to start with a slash
        prefix = "/{}".format(prefix.lstrip("/") if prefix else "")
        super().__init__(prefix=prefix)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = container_name
        self.conn = BlobService(account_name=self.account_name, account_key=self.account_key)
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized")
        # XXX: AzureTransfer isn't actively tested and hasn't its error handling is probably lacking
        self.log.warning("AzureTransfer is experimental and has not been thoroughly tested")

    def get_metadata_for_key(self, key):
        key = self.format_key_for_backend(key)
        return self._list_blobs(key)[0]["metadata"]

    def _metadata_for_key(self, key):
        return self._list_blobs(key)[0]["metadata"]

    def list_path(self, key):
        path = self.format_key_for_backend(key, trailing_slash=True)
        return self._list_blobs(path)

    def _list_blobs(self, path):
        self.log.debug("Listing path %r", path)
        items = self.conn.list_blobs(self.container_name, prefix=path, delimiter="/", include="metadata")
        result = []
        for item in items:
            result.append({
                "last_modified": dateutil.parser.parse(item.properties.last_modified),
                "metadata": item.metadata,
                "name": self.format_key_from_backend(item.name),
                "size": item.properties.content_length,
            })
        return result

    def delete_key(self, key):
        key = self.format_key_for_backend(key)
        self.log.debug("Deleting key: %r", key)
        return self.conn.delete_blob(self.container_name, key)

    def get_contents_to_file(self, key, filepath_to_store_to):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r to: %r", key, filepath_to_store_to)
        return self.conn.get_blob_to_path(self.container_name, key, filepath_to_store_to)

    def get_contents_to_fileobj(self, key, fileobj_to_store_to):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r", key)
        return self.conn.get_blob_to_file(self.container_name, key, fileobj_to_store_to)

    def get_contents_to_string(self, key):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r", key)
        return self.conn.get_blob_to_bytes(self.container_name, key), self._metadata_for_key(key)

    def store_file_from_memory(self, key, memstring, metadata=None):
        key = self.format_key_for_backend(key)
        # Azure requires all metadata keys and values to be strings
        metadata_to_send = {str(k): str(v) for k, v in metadata.items()}
        self.conn.put_block_blob_from_bytes(self.container_name, key, memstring,
                                            x_ms_meta_name_values=metadata_to_send)

    def store_file_from_disk(self, key, filepath, metadata=None, multipart=None):
        key = self.format_key_for_backend(key)
        # Azure requires all metadata keys and values to be strings
        metadata_to_send = {str(k): str(v) for k, v in metadata.items()}
        self.conn.put_block_blob_from_path(self.container_name, key, filepath,
                                           x_ms_meta_name_values=metadata_to_send)

    def get_or_create_container(self, container_name):
        start_time = time.time()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.time() - start_time)
        return container_name
Esempio n. 32
0
import sys
import time
from azure.storage import BlobService
from azure import WindowsAzureMissingResourceError

from CREDENTIALS import account_name, account_key
db = BlobService(account_name=account_name, account_key=account_key)

###
bucket = 'crawl-data'
in_progress = set()
#
existing = set([
    x.name
    for x in db.list_blobs(bucket, 'common-crawl/crawl-data/CC-MAIN-2014-23/')
])
todo = [x.rstrip() for x in sys.stdin.readlines()]
todo = [x for x in todo if x not in existing]
###
while todo or in_progress:
    new_progress = set()
    for path in in_progress:
        props = db.get_blob_properties(bucket, path)
        if props['x-ms-copy-status'] == 'success':
            print '\t%s completed' % path
            continue
        new_progress.add(path)
        time.sleep(0.25)
    print 'Task queue length is %d' % len(new_progress)
    print 'TODO queue length is %d' % len(todo)
    # Populate the queue
Esempio n. 33
0
class Command(BaseCommand):
    help = "Synchronizes static media to cloud files."

    option_list = BaseCommand.option_list + (
        optparse.make_option(
            '-w',
            '--wipe',
            action='store_true',
            dest='wipe',
            default=False,
            help="Wipes out entire contents of container first."),
        optparse.make_option('-t',
                             '--test-run',
                             action='store_true',
                             dest='test_run',
                             default=False,
                             help="Performs a test run of the sync."),
        optparse.make_option('-c',
                             '--container',
                             dest='container',
                             help="Override STATIC_CONTAINER."),
    )

    # settings from azurite.settings
    ACCOUNT_NAME = AZURITE['ACCOUNT_NAME']
    ACCOUNT_KEY = AZURITE['ACCOUNT_KEY']
    STATIC_CONTAINER = AZURITE['STATIC_CONTAINER']

    # paths
    DIRECTORY = os.path.abspath(settings.STATIC_ROOT)
    STATIC_URL = settings.STATIC_URL

    if not DIRECTORY.endswith('/'):
        DIRECTORY = DIRECTORY + '/'

    if STATIC_URL.startswith('/'):
        STATIC_URL = STATIC_URL[1:]

    local_object_names = []
    create_count = 0
    upload_count = 0
    update_count = 0
    skip_count = 0
    delete_count = 0
    service = None

    def handle(self, *args, **options):
        self.wipe = options.get('wipe')
        self.test_run = options.get('test_run')
        self.verbosity = int(options.get('verbosity'))
        if hasattr(options, 'container'):
            self.STATIC_CONTAINER = options.get('container')
        self.sync_files()

    def sync_files(self):
        self.service = BlobService(account_name=self.ACCOUNT_NAME,
                                   account_key=self.ACCOUNT_KEY)

        try:
            self.service.get_container_properties(self.STATIC_CONTAINER)
        except WindowsAzureMissingResourceError:
            self.service.create_container(self.STATIC_CONTAINER,
                                          x_ms_blob_public_access='blob')

        self.service.set_container_acl(self.STATIC_CONTAINER,
                                       x_ms_blob_public_access='blob')

        # if -w option is provided, wipe out the contents of the container
        if self.wipe:
            blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER))

            if self.test_run:
                print "Wipe would delete %d objects." % blob_count
            else:
                print "Deleting %d objects..." % blob_count
                for blob in self.service.list_blobs(self.STATIC_CONTAINER):
                    self.service.delete_blob(self.STATIC_CONTAINER, blob.name)

        # walk through the directory, creating or updating files on the cloud
        os.path.walk(self.DIRECTORY, self.upload_files, "foo")

        # remove any files on remote that don't exist locally
        self.delete_files()

        # print out the final tally to the cmd line
        self.update_count = self.upload_count - self.create_count
        print
        if self.test_run:
            print "Test run complete with the following results:"
        print "Skipped %d. Created %d. Updated %d. Deleted %d." % (
            self.skip_count, self.create_count, self.update_count,
            self.delete_count)

    def upload_files(self, arg, dirname, names):
        # upload or skip items
        for item in names:
            file_path = os.path.join(dirname, item)
            if os.path.isdir(file_path):
                continue  # Don't try to upload directories

            object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1]
            self.local_object_names.append(object_name)

            try:
                properties = self.service.get_blob_properties(
                    self.STATIC_CONTAINER, object_name)
            except WindowsAzureMissingResourceError:
                properties = {}
                self.create_count += 1

            cloud_datetime = None
            if 'last-modified' in properties:
                cloud_datetime = (
                    properties['last-modified']
                    and datetime.datetime.strptime(properties['last-modified'],
                                                   "%a, %d %b %Y %H:%M:%S %Z")
                    or None)

            local_datetime = datetime.datetime.utcfromtimestamp(
                os.stat(file_path).st_mtime)

            if cloud_datetime and local_datetime < cloud_datetime:
                self.skip_count += 1
                if self.verbosity > 1:
                    print "Skipped %s: not modified." % object_name
                continue

            if not self.test_run:
                file_contents = open(file_path, 'r').read()
                content_type, encoding = mimetypes.guess_type(file_path)
                self.service.put_blob(self.STATIC_CONTAINER,
                                      object_name,
                                      file_contents,
                                      x_ms_blob_type='BlockBlob',
                                      x_ms_blob_content_type=content_type,
                                      content_encoding=encoding)
                # sync_headers(cloud_obj)
            self.upload_count += 1
            if self.verbosity > 1:
                print "Uploaded", object_name

    def delete_files(self):
        # remove any objects in the container that don't exist locally
        for blob in self.service.list_blobs(self.STATIC_CONTAINER):
            if blob.name not in self.local_object_names:
                self.delete_count += 1
                if self.verbosity > 1:
                    print "Deleted %s" % blob.name
                if not self.test_run:
                    self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
Esempio n. 34
0
class AzureBackend(duplicity.backend.Backend):
    """
    Backend for Azure Blob Storage Service
    """
    def __init__(self, parsed_url):
        duplicity.backend.Backend.__init__(self, parsed_url)

        # Import Microsoft Azure Storage SDK for Python library.
        try:
            import azure
            import azure.storage
            if hasattr(azure.storage, 'BlobService'):
                # v0.11.1 and below
                from azure.storage import BlobService
                self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError
                self.AzureConflictError = azure.WindowsAzureConflictError
            else:
                # v1.0.0 and above
                import azure.storage.blob
                if hasattr(azure.storage.blob, 'BlobService'):
                    from azure.storage.blob import BlobService
                else:
                    from azure.storage.blob.blockblobservice import BlockBlobService as BlobService
                self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError
                self.AzureConflictError = azure.common.AzureConflictHttpError
        except ImportError as e:
            raise BackendException("""\
Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/).
Exception: %s""" % str(e))

        # TODO: validate container name
        self.container = parsed_url.path.lstrip('/')

        if 'AZURE_ACCOUNT_NAME' not in os.environ:
            raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.')

        if 'AZURE_ACCOUNT_KEY' in os.environ:
            if 'AZURE_ENDPOINT_SUFFIX' in os.environ:
                self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'],
                                                account_key=os.environ['AZURE_ACCOUNT_KEY'],
                                                endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX'])
            else:
                self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'],
                                                account_key=os.environ['AZURE_ACCOUNT_KEY'])
            self._create_container()
        elif 'AZURE_SHARED_ACCESS_SIGNATURE' in os.environ:
            if 'AZURE_ENDPOINT_SUFFIX' in os.environ:
                self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'],
                                                sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE'],
                                                endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX'])
            else:
                self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'],
                                                sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE'])
        else:
            raise BackendException(
                'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.')

        if globals.azure_max_single_put_size:
            # check if we use azure-storage>=0.30.0
            try:
                _ = self.blob_service.MAX_SINGLE_PUT_SIZE
                self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size
            # fallback for azure-storage<0.30.0
            except AttributeError:
                self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size

        if globals.azure_max_block_size:
            # check if we use azure-storage>=0.30.0
            try:
                _ = self.blob_service.MAX_BLOCK_SIZE
                self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size
            # fallback for azure-storage<0.30.0
            except AttributeError:
                self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size

    def _create_container(self):
        try:
            self.blob_service.create_container(self.container, fail_on_exist=True)
        except self.AzureConflictError:
            # Indicates that the resource could not be created because it already exists.
            pass
        except Exception as e:
            log.FatalError("Could not create Azure container: %s"
                           % unicode(e.message).split('\n', 1)[0],
                           log.ErrorCode.connection_failed)

    def _put(self, source_path, remote_filename):
        kwargs = {}
        if globals.azure_max_connections:
            kwargs['max_connections'] = globals.azure_max_connections

        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container
        try:
            self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs)
        except AttributeError:  # Old versions use a different method name
            self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs)

    def _get(self, remote_filename, local_path):
        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs
        self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name)

    def _list(self):
        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container
        blobs = []
        marker = None
        while True:
            batch = self.blob_service.list_blobs(self.container, marker=marker)
            blobs.extend(batch)
            if not batch.next_marker:
                break
            marker = batch.next_marker
        return [blob.name for blob in blobs]

    def _delete(self, filename):
        # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs
        self.blob_service.delete_blob(self.container, filename)

    def _query(self, filename):
        prop = self.blob_service.get_blob_properties(self.container, filename)
        try:
            info = {'size': int(prop.properties.content_length)}
        except AttributeError:
            # old versions directly returned the properties
            info = {'size': int(prop['content-length'])}
        return info

    def _error_code(self, operation, e):
        if isinstance(e, self.AzureMissingResourceError):
            return log.ErrorCode.backend_not_found
Esempio n. 35
0
class Command(BaseCommand):
    help = "Synchronizes static media to cloud files."

    option_list = BaseCommand.option_list + (
        optparse.make_option('-w', '--wipe',
            action='store_true', dest='wipe', default=False,
            help="Wipes out entire contents of container first."),
        optparse.make_option('-t', '--test-run',
            action='store_true', dest='test_run', default=False,
            help="Performs a test run of the sync."),
        optparse.make_option('-c', '--container',
            dest='container', help="Override STATIC_CONTAINER."),
    )

    # settings from azurite.settings
    ACCOUNT_NAME     = AZURITE['ACCOUNT_NAME']
    ACCOUNT_KEY      = AZURITE['ACCOUNT_KEY']
    STATIC_CONTAINER = AZURITE['STATIC_CONTAINER']

    # paths
    DIRECTORY        = os.path.abspath(settings.STATIC_ROOT)
    STATIC_URL       = settings.STATIC_URL

    if not DIRECTORY.endswith('/'):
        DIRECTORY = DIRECTORY + '/'

    if STATIC_URL.startswith('/'):
        STATIC_URL = STATIC_URL[1:]

    local_object_names = []
    create_count = 0
    upload_count = 0
    update_count = 0
    skip_count = 0
    delete_count = 0
    service = None

    def handle(self, *args, **options):
        self.wipe = options.get('wipe')
        self.test_run = options.get('test_run')
        self.verbosity = int(options.get('verbosity'))
        if hasattr(options, 'container'):
            self.STATIC_CONTAINER = options.get('container')
        self.sync_files()

    def sync_files(self):
        self.service = BlobService(account_name=self.ACCOUNT_NAME,
            account_key=self.ACCOUNT_KEY)

        try:
            self.service.get_container_properties(self.STATIC_CONTAINER)
        except WindowsAzureMissingResourceError:
            self.service.create_container(self.STATIC_CONTAINER,
                x_ms_blob_public_access='blob')

        self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob')

        # if -w option is provided, wipe out the contents of the container
        if self.wipe:
            blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER))

            if self.test_run:
                print "Wipe would delete %d objects." % blob_count
            else:
                print "Deleting %d objects..." % blob_count
                for blob in self.service.list_blobs(self.STATIC_CONTAINER):
                    self.service.delete_blob(self.STATIC_CONTAINER, blob.name)

        # walk through the directory, creating or updating files on the cloud
        os.path.walk(self.DIRECTORY, self.upload_files, "foo")

        # remove any files on remote that don't exist locally
        self.delete_files()

        # print out the final tally to the cmd line
        self.update_count = self.upload_count - self.create_count
        print
        if self.test_run:
            print "Test run complete with the following results:"
        print "Skipped %d. Created %d. Updated %d. Deleted %d." % (
            self.skip_count, self.create_count, self.update_count, self.delete_count)

    def upload_files(self, arg, dirname, names):
        # upload or skip items
        for item in names:
            file_path = os.path.join(dirname, item)
            if os.path.isdir(file_path):
                continue # Don't try to upload directories

            object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1]
            self.local_object_names.append(object_name)

            try:
                properties = self.service.get_blob_properties(self.STATIC_CONTAINER,
                    object_name)
            except WindowsAzureMissingResourceError:
                properties = {}
                self.create_count += 1

            cloud_datetime = None
            if 'last-modified' in properties:
                cloud_datetime = (properties['last-modified'] and
                                  datetime.datetime.strptime(
                                    properties['last-modified'],
                                    "%a, %d %b %Y %H:%M:%S %Z"
                                  ) or None)

            local_datetime = datetime.datetime.utcfromtimestamp(
                                               os.stat(file_path).st_mtime)

            if cloud_datetime and local_datetime < cloud_datetime:
                self.skip_count += 1
                if self.verbosity > 1:
                    print "Skipped %s: not modified." % object_name
                continue

            if not self.test_run:
                file_contents = open(file_path, 'r').read()
                content_type, encoding = mimetypes.guess_type(file_path)
                print "content-type", content_type
                print "encoding", encoding
                self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents,
                    x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type,
                    content_encoding=encoding)
                # sync_headers(cloud_obj)
            self.upload_count += 1
            if self.verbosity > 1:
                print "Uploaded", object_name

    def delete_files(self):
        # remove any objects in the container that don't exist locally
        for blob in self.service.list_blobs(self.STATIC_CONTAINER):
            if blob.name not in self.local_object_names:
                self.delete_count += 1
                if self.verbosity > 1:
                    print "Deleted %s" % blob.name
                if not self.test_run:
                    self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
Esempio n. 36
0
class AzureFS(LoggingMixIn, Operations):
    """Azure Blob Storage filesystem"""

    blobs = None
    containers = dict()  # <cname, dict(stat:dict,
                                    #files:None|dict<fname, stat>)
    fds = dict()  # <fd, (path, bytes, dirty)>
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self.rebuild_container_list()

    def convert_to_epoch(self, date):
        """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch"""
        return int(time.mktime(time.strptime(date, TIME_FORMAT)))

    def rebuild_container_list(self):
        cmap = dict()
        cnames = set()
        for c in self.blobs.list_containers():
            date = c.properties.last_modified
            cstat = dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0,
                         st_mtime=self.convert_to_epoch(date))
            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={},
                         stat=dict(st_mode=(S_IFDIR | 0755),
                                     st_uid=getuid(), st_size=0,
                                     st_mtime=int(time.time())))

        self.containers = cmap   # destroys fs tree cache resistant to misses

    def _parse_path(self, path):    # returns </dir, file(=None)>
        if path.count('/') > 1:     # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:                       # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    def parse_container(self, path):
        base_container = path[1:]   # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False):
        if not self.containers:
            self.rebuild_container_list()

        if path in self.containers and not (contents_required and \
                self.containers[path]['files'] is None):
            return self.containers[path]

        cname = self.parse_container(path)

        if '/' + cname not in self.containers:
            raise FuseOSError(ENOENT)
        else:
            if self.containers['/' + cname]['files'] is None:
                # fetch contents of container
                log.info("------> CONTENTS NOT FOUND: %s" % cname)

                blobs = self.blobs.list_blobs(cname)

                dirstat = dict(st_mode=(S_IFDIR | 0755), st_size=0,
                               st_uid=getuid(), st_mtime=time.time())

                if self.containers['/' + cname]['files'] is None:
                    self.containers['/' + cname]['files'] = dict()

                for f in blobs:
                    blob_name = f.name
                    blob_date = f.properties.last_modified
                    blob_size = long(f.properties.content_length)

                    node = dict(st_mode=(S_IFREG | 0644), st_size=blob_size,
                                st_mtime=self.convert_to_epoch(blob_date),
                                st_uid=getuid())

                    if blob_name.find('/') == -1:  # file just under container
                        self.containers['/' + cname]['files'][blob_name] = node

            return self.containers['/' + cname]
        return None

    def _get_file(self, path):
        d, f = self._parse_path(path)
        dir = self._get_dir(d, True)
        if dir is not None and f in dir['files']:
            return dir['files'][f]

    def getattr(self, path, fh=None):
        d, f = self._parse_path(path)

        if f is None:
            dir = self._get_dir(d)
            return dir['stat']
        else:
            file = self._get_file(path)

            if file:
                return file

        raise FuseOSError(ENOENT)

    # FUSE
    def mkdir(self, path, mode):
        if path.count('/') <= 1:    # create on root
            name = path[1:]

            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long.")
                raise FuseOSError(ENAMETOOLONG)
            if name is not name.lower():
                log.error("Container names cannot contain uppercase \
                        characters.")
                raise FuseOSError(EACCES)
            if name.count('--') > 0:
                log.error('Container names cannot contain consecutive \
                        dashes (-).')
                raise FuseOSError(EAGAIN)
            #TODO handle all "-"s must be preceded by letter or numbers
            #TODO starts with only letter or number, can contain letter, nr,'-'

            resp = self.blobs.create_container(name)

            if resp:
                self.rebuild_container_list()
                log.info("CONTAINER %s CREATED" % name)
            else:
                raise FuseOSError(EACCES)
                log.error("Invalid container name or container already \
                        exists.")
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(EACCES)
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def create(self, path, mode):
        node = dict(st_mode=(S_IFREG | mode), st_size=0, st_nlink=1,
                     st_uid=getuid(), st_mtime=time.time())
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(ENOSYS)

        dir = self._get_dir(d, True)
        if not dir:
            raise FuseOSError(EIO)
        dir['files'][f] = node

        return self.open(path, data='')     # reusing handler provider

    def open(self, path, flags=0, data=None):
        if data == None:                    # download contents
            c_name = self.parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                data = self.blobs.get_blob(c_name, f_name)
            except WindowsAzureMissingResourceError:
                dir = self._get_dir('/' + c_name, True)
                if f_name in dir['files']:
                    del dir['files'][f_name]
                raise FuseOSError(ENOENT)
            except WindowsAzureError as e:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)

        self.fd += 1
        self.fds[self.fd] = (path, data, False)

        return self.fd

    def flush(self, path, fh=None):
        if not fh:
            raise FuseOSError(EIO)
        else:
            if fh not in self.fds:
                raise FuseOSError(EIO)
            path = self.fds[fh][0]
            data = self.fds[fh][1]
            dirty = self.fds[fh][2]

            if not dirty:
                return 0     # avoid redundant write

            d, f = self._parse_path(path)
            c_name = self.parse_container(path)

            if data is None:
                data = ''

            try:
                if len(data) < 64 * 1024 * 1024:   # 64 mb
                    self.blobs.put_blob(c_name, f, data, 'BlockBlob')
                else:
                    # divide file by blocks and upload
                    block_size = 8 * 1024 * 1024
                    num_blocks = int(math.ceil(len(data) * 1.0 / block_size))
                    rd = str(random.randint(1, 1e8))
                    block_ids = list()

                    for i in range(num_blocks):
                        part = data[i * block_size:min((i + 1) * block_size,
                            len(data))]
                        block_id = base64.encodestring('%s_%s' % (rd,
                            (8 - len(str(i))) * '0' + str(i)))
                        self.blobs.put_block(c_name, f, part, block_id)
                        block_ids.append(block_id)

                    self.blobs.put_block_list(c_name, f, block_ids)
            except WindowsAzureError:
                raise FuseOSError(EAGAIN)

            dir = self._get_dir(d, True)
            if not dir or f not in dir['files']:
                raise FuseOSError(EIO)

            # update local data
            dir['files'][f]['st_size'] = len(data)
            dir['files'][f]['st_mtime'] = time.time()
            self.fds[fh] = (path, data, False)  # mark as not dirty
            return 0

    def release(self, path, fh=None):
        if fh is not None and fh in self.fds:
            del self.fds[fh]

    def truncate(self, path, length, fh=None):
        return 0     # assume done, no need

    def write(self, path, data, offset, fh=None):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)
        else:
            d = self.fds[fh][1]
            if d is None:
                d = ""
            self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True)
            return len(data)

    def unlink(self, path):
        c_name = self.parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except WindowsAzureMissingResourceError:
            raise FuseOSError(ENOENT)
        except Exception as e:
            raise FuseOSError(EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys() \
                    if x is not '/']

        dir = self._get_dir(path, True)
        if not dir:
            raise FuseOSError(ENOENT)
        return ['.', '..'] + dir['files'].keys()

    def read(self, path, size, offset, fh):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)

        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            data = self.blobs.get_blob(c_name, f_name)
            self.fds[fh] = (self.fds[fh][0], data, False)
            return data[offset:offset + size]
        except URLError, e:
            if e.code == 404:
                raise FuseOSError(ENOENT)
            elif e.code == 403:
                raise FUSEOSError(EPERM)
            else:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)
        data = self.fds[fh][1]
        if data is None:
            data = ""
        return data[offset:offset + size]
Esempio n. 37
0
#connect to your storage account
from azure.storage import BlobService
blob_service = BlobService(account_name='YourAccountName', account_key='YourKey')

#list all CSV files in your storage account

blobs = []
marker = None
while True:
    batch = blob_service.list_blobs('YourContainer', marker=marker, prefix='input_')
    blobs.extend(batch)
    if not batch.next_marker:
        break
    marker = batch.next_marker
for blob in blobs:
    print(blob.name)

#read the blob file as a text file
#I just read in the first from the pervious list

data = blob_service.get_blob_to_text('rockt', blobs[0].name).split("\n")
print("Number of lines in CSV " + str(len(data)))

#do your stuff
#I want to filter out some lines of my CSV and only keep those having ABC or DEF in them

matchers = ['abc', 'def']
matching = [s for s in data if any(xs in s for xs in matchers)]
print("Number of lines in CSV " + str(len(matching)))

#write your text directly back to blob storage
Esempio n. 38
0
class AzureBackend(duplicity.backend.Backend):
    """
    Backend for Azure Blob Storage Service
    """
    def __init__(self, parsed_url):
        duplicity.backend.Backend.__init__(self, parsed_url)

        # Import Microsoft Azure Storage SDK for Python library.
        try:
            import azure
            import azure.storage
            if hasattr(azure.storage, 'BlobService'):
                # v0.11.1 and below
                from azure.storage import BlobService
                self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError
                self.AzureConflictError = azure.WindowsAzureConflictError
            else:
                # v1.0.0 and above
                from azure.storage.blob import BlobService
                self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError
                self.AzureConflictError = azure.common.AzureConflictHttpError
        except ImportError as e:
            raise BackendException("""\
Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/).
Exception: %s""" % str(e))

        if 'AZURE_ACCOUNT_NAME' not in os.environ:
            raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.')
        if 'AZURE_ACCOUNT_KEY' not in os.environ:
            raise BackendException('AZURE_ACCOUNT_KEY environment variable not set.')
        self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'],
                                        account_key=os.environ['AZURE_ACCOUNT_KEY'])

        # TODO: validate container name
        self.container = parsed_url.path.lstrip('/')
        try:
            self.blob_service.create_container(self.container, fail_on_exist=True)
        except self.AzureConflictError:
            # Indicates that the resource could not be created because it already exists.
            pass
        except Exception as e:
            log.FatalError("Could not create Azure container: %s"
                           % unicode(e.message).split('\n', 1)[0],
                           log.ErrorCode.connection_failed)

    def _put(self, source_path, remote_filename):
        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container
        self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name)

    def _get(self, remote_filename, local_path):
        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs
        self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name)

    def _list(self):
        # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container
        blobs = []
        marker = None
        while True:
            batch = self.blob_service.list_blobs(self.container, marker=marker)
            blobs.extend(batch)
            if not batch.next_marker:
                break
            marker = batch.next_marker
        return [blob.name for blob in blobs]

    def _delete(self, filename):
        # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs
        self.blob_service.delete_blob(self.container, filename)

    def _query(self, filename):
        prop = self.blob_service.get_blob_properties(self.container, filename)
        return {'size': int(prop['content-length'])}

    def _error_code(self, operation, e):
        if isinstance(e, self.AzureMissingResourceError):
            return log.ErrorCode.backend_not_found
Esempio n. 39
0
import sys
import time
from azure.storage import BlobService
from azure import WindowsAzureMissingResourceError

from CREDENTIALS import account_name, account_key
db = BlobService(account_name=account_name, account_key=account_key)

###
bucket = 'crawl-data'
in_progress = set()
#
existing = set([x.name for x in db.list_blobs(bucket, 'common-crawl/crawl-data/CC-MAIN-2014-23/')])
todo = [x.rstrip() for x in sys.stdin.readlines()]
todo = [x for x in todo if x not in existing]
###
while todo or in_progress:
  new_progress = set()
  for path in in_progress:
    props = db.get_blob_properties(bucket, path)
    if props['x-ms-copy-status'] == 'success':
      print '\t%s completed' % path
      continue
    new_progress.add(path)
    time.sleep(0.25)
  print 'Task queue length is %d' % len(new_progress)
  print 'TODO queue length is %d' % len(todo)
  # Populate the queue
  while todo and len(new_progress) < 256:
    path = todo.pop()
    # If it exists, skip it -- only add if it's missing
class Storage(driver.Base):

    supports_bytes_range = True

    def __init__(self, path=None, config=None):
        self._config = config
        self._container = self._config.azure_storage_container

        protocol = 'https' if self._config.azure_use_https else 'http'
        acct_name = self._config.azure_storage_account_name
        acct_key = self._config.azure_storage_account_key
        self._blob = BlobService(
            account_name=acct_name, account_key=acct_key, protocol=protocol)

        self._init_container()
        logger.debug("Initialized azureblob storage driver")

    def _init_container(self):
        '''Initializes image container on Azure blob storage if the container
        does not exist.
        '''
        created = self._blob.create_container(
            self._container, x_ms_blob_public_access='blob',
            fail_on_exist=False)
        if created:
            logger.info('Created blob container for image registry.')
        else:
            logger.debug('Registry container already exists.')
        return created

    @lru.get
    def get_content(self, path):
        try:
            return self._blob.get_blob(self._container, path)
        except azure.WindowsAzureMissingResourceError:
            raise exceptions.FileNotFoundError('%s is not there' % path)

    @lru.set
    def put_content(self, path, content):
        self._blob.put_blob(self._container, path, content, 'BlockBlob')
        return path

    def stream_read(self, path, bytes_range=None):
        try:
            f = io.BytesIO()
            self._blob.get_blob_to_file(self._container, path, f)

            if bytes_range:
                f.seek(bytes_range[0])
                total_size = bytes_range[1] - bytes_range[0] + 1
            else:
                f.seek(0)

            while True:
                buf = None
                if bytes_range:
                    # Bytes Range is enabled
                    buf_size = self.buffer_size
                    if nb_bytes + buf_size > total_size:
                        # We make sure we don't read out of the range
                        buf_size = total_size - nb_bytes
                    if buf_size > 0:
                        buf = f.read(buf_size)
                        nb_bytes += len(buf)
                    else:
                        # We're at the end of the range
                        buf = ''
                else:
                    buf = f.read(self.buffer_size)

                if not buf:
                    break

                yield buf
        except IOError:
            raise exceptions.FileNotFoundError('%s is not there' % path)

    def stream_write(self, path, fp):
        self._blob.put_block_blob_from_file(self._container, path, fp)

    def list_directory(self, path=None):
        if not path.endswith('/'):
            path += '/'  # path=a would list a/b.txt as well as 'abc.txt'

        blobs = list(self._blob.list_blobs(self._container, path))
        if not blobs:
            raise exceptions.FileNotFoundError('%s is not there' % path)

        return [b.name for b in blobs]

    def exists(self, path):
        try:
            self._blob.get_blob_properties(self._container, path)
            return True
        except azure.WindowsAzureMissingResourceError:
            return False

    @lru.remove
    def remove(self, path):
        is_blob = self.exists(path)
        if is_blob:
            self._blob.delete_blob(self._container, path)
            return

        exists = False
        blobs = list(self._blob.list_blobs(self._container, path))
        if not blobs:
            raise exceptions.FileNotFoundError('%s is not there' % path)

        for b in blobs:
            self._blob.delete_blob(self._container, b.name)

    def get_size(self, path):
        try:
            properties = self._blob.get_blob_properties(self._container, path)
            return int(properties['content-length'])  # auto-converted to long
        except azure.WindowsAzureMissingResourceError:
            raise exceptions.FileNotFoundError('%s is not there' % path)
Esempio n. 41
0
    account_key=options.account_key
else:
    sys.stderr.write("Azure key is missing")
    sys.exit(1)

if header and not options.output_format:
    print '\t'.join(str(h) for h in headers)

blob_service = BlobService(account_name, account_key)

for container in blob_service.list_containers():
    c = container.name
    if c == "heartbeat": continue
    if options.date and not ( c == "processed-"+options.date ): continue
    if debug: sys.stderr.write("Processing container: "+str(c)+"\n")
    for b in blob_service.list_blobs(c):
        if debug: sys.stderr.write("Processing blob: "+str(b.name)+"\n")
        data = blob_service.get_blob(c, b.name)
        cs = StringIO.StringIO(data)
        gzipstream = gzip.GzipFile(fileobj=cs)
        if output_format == "txt":
            print gzipstream.read()
        elif output_format == "json":
            d = {}
            i = 0
            ds = gzipstream.read()
            # some DCU entries contains more than 28 values (outside the
            # definition of the headers)
            for x in ds.strip().split("\t")[:27]:
                d[headers[i]] = x
                i=i+1
Esempio n. 42
0
def generate_website_and_upload_azure(azure_csv_container,
                                      azure_web_container):

    blob_service = BlobService(account_name=os.getenv('ACC_NAME'),
                               account_key=os.getenv('ACCESS_KEY'))
    blob_list = blob_service.list_blobs(azure_csv_container)
    blob_name_list = blob_list.blobs

    keys = []
    #Only keep files whose dates can be parsed
    for k in blob_name_list:
        try:
            parser.parse(k.name[:8])
            keys.append(k)
        except:
            pass

    keys = [k for k in keys if (".zip" in k.name or ".csv" in k.name)]

    my_array = []
    for k in keys:
        my_dict = {}
        url = r"http://fhrscsvs.blob.core.windows.net/{}/{}".format(
            azure_csv_container, k.name)
        name = k.name

        date = parser.parse(name[:8])
        dateformat = date.strftime("%a %d %b %Y")
        my_dict["Date of data download"] = dateformat

        my_dict["Size"] = sizeof_fmt(k.properties.content_length)

        name = get_link_text(name, dateformat, my_dict)

        my_dict["File"] = "<a href='{0}'>{1}</a>".format(url, name)

        my_array.append(my_dict)

    my_array = sorted(my_array, key=lambda k: k['File'], reverse=True)

    table_array_fullsnapshot = [
        a for a in my_array if "__all_current" in a["File"]
    ]
    table_array_differences = [a for a in my_array if "__diff" in a["File"]]

    template_dir = os.getenv('TEMPLATE_DIR')
    loader = jinja2.FileSystemLoader(template_dir)
    environment = jinja2.Environment(loader=loader)

    j_template = environment.get_template("template.html")

    order = ["File", "Size"]

    timestamp = datetime.datetime.now().strftime("%a %d %b %Y at %H:%M")

    import math
    sinarray = [(math.cos(math.radians(i * 5 - 180)) + 1) * 14
                for i in range(0, 73)]
    html = j_template.render(table_array_fullsnapshot=table_array_fullsnapshot,
                             order=order,
                             timestamp=timestamp,
                             sinarray=sinarray,
                             table_array_differences=table_array_differences)

    blob_service.put_block_blob_from_text(
        azure_web_container,
        "index.html",
        html,
        x_ms_blob_content_type='text/html',
        text_encoding="utf-8",
    )
Esempio n. 43
0
    blobs.

    blob_service: Nombre del servicio de gestion de blobs @class BlobService
    """
    for i in blob_service.list_containers().containers:
        print("Nombre del contenedor: {}".format(i.name))
        print("Url del contenedor: {}".format(i.url))
        print("##############################")
        for j in blob_service.list_blobs(i.name).blobs:
            print("\tNombre del Blob: {}".format(j.name))
            print("\tUrl del Blob: {}".format(j.url))
            print("\t------------------------------")

blob_service = BlobService(credentials.account_name, credentials.account_key)

getContainersWithBlobs(blob_service)


f_blob = open('Ejercicio10.txt', "w")
for i in blob_service.list_containers().containers:
        f_blob.write("Nombre del contenedor: {}".format(i.name))
        f_blob.write("Url del contenedor: {}".format(i.url))
        f_blob.write("##############################")
        for j in blob_service.list_blobs(i.name).blobs:
            f_blob.write("\tNombre del Blob: {}".format(j.name))
            f_blob.write("\tUrl del Blob: {}".format(j.url))
            f_blob.write("\t------------------------------")
f_blob.close()

blob_service.put_blob('code', 'f_blob.txt', f_blob, 'BlockBlob')